#ifndef g3eigen #define g3eigen #include "../../eigen/Eigen/Dense" #include "../timing_decorator.hpp" #include "../output/frame.hpp" #include "camera.hpp" #include #include #include #include #include #include #include #ifdef SSE #include #endif constexpr int Dim = 3; template class Octree { public: using PointType = Eigen::Matrix; using BoundingBox = std::pair; struct NodeData { T data; PointType position; bool active; bool visible; float size; Eigen::Vector3f color; ///TODO: dont add these just yet. bool light; float emittance; //amount of light to emit float refraction; float reflection; NodeData(const T& data, const PointType& pos, bool visible, Eigen::Vector3f color, float size = 0.01f, bool active = true) : data(data), position(pos), active(active), visible(visible), color(color), size(size) {} }; struct OctreeNode { BoundingBox bounds; std::vector> points; std::array, 8> children; PointType center; bool isLeaf; OctreeNode(const PointType& min, const PointType& max) : bounds(min,max), isLeaf(true) { for (std::unique_ptr& child : children) { child = nullptr; } center = (bounds.first + bounds.second) * 0.5; } bool contains(const PointType& point) const { for (int i = 0; i < Dim; ++i) { if (point[i] < bounds.first[i] || point[i] > bounds.second[i]) { return false; } } return true; } bool isEmpty() const { return points.empty(); } }; private: std::unique_ptr root_; size_t maxDepth; size_t size; size_t maxPointsPerNode; uint8_t getOctant(const PointType& point, const PointType& center) const { int octant = 0; for (int i = 0; i < Dim; ++i) { if (point[i] >= center[i]) octant |= (1 << i); } return octant; } BoundingBox createChildBounds(const OctreeNode* node, uint8_t octant) const { PointType childMin, childMax; PointType center = node->center; for (int i = 0; i < Dim; ++i) { bool high = (octant >> i) & 1; childMin[i] = high ? center[i] : node->bounds.first[i]; childMax[i] = high ? node->bounds.second[i] : center[i]; } return {childMin, childMax}; } void splitNode(OctreeNode* node, int depth) { if (depth >= maxDepth) return; for (int i = 0; i < 8; ++i) { BoundingBox childBounds = createChildBounds(node, i); node->children[i] = std::make_unique(childBounds.first, childBounds.second); } for (const auto& pointData : node->points) { int octant = getOctant(pointData->position, node->center); node->children[octant]->points.emplace_back(pointData); } node->points.clear(); node->isLeaf = false; for (int i = 0; i < 8; ++i) { if (node->children[i]->points.size() > maxPointsPerNode) { splitNode(node->children[i].get(), depth + 1); } } } bool insertRecursive(OctreeNode* node, const std::shared_ptr& pointData, int depth) { if (!node->contains(pointData->position)) return false; if (node->isLeaf) { node->points.emplace_back(pointData); if (node->points.size() > maxPointsPerNode && depth < maxDepth) { splitNode(node, depth); } return true; } else { int octant = getOctant(pointData->position, node->center); if (node->children[octant]) { return insertRecursive(node->children[octant].get(), pointData, depth + 1); } } return false; } void bitonic_sort_8(std::array, 8>& arr) noexcept { #ifdef SSE alignas(32) float values[8]; alignas(32) uint32_t indices[8]; for (int i = 0; i < 8; i++) { values[i] = arr[i].second; indices[i] = arr[i].first; } __m256 val = _mm256_load_ps(values); __m256i idx = _mm256_load_si256((__m256i*)indices); __m256 swapped1 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1)); __m256i swapped_idx1 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(2, 3, 0, 1)); __m256 mask1 = _mm256_cmp_ps(val, swapped1, _CMP_GT_OQ); val = _mm256_blendv_ps(val, swapped1, mask1); idx = _mm256_castps_si256(_mm256_blendv_ps( _mm256_castsi256_ps(idx), _mm256_castsi256_ps(swapped_idx1), mask1)); __m256 swapped2 = _mm256_permute2f128_ps(val, val, 0x01); __m256i swapped_idx2 = _mm256_permute2f128_si256(idx, idx, 0x01); __m256 mask2 = _mm256_cmp_ps(val, swapped2, _CMP_GT_OQ); val = _mm256_blendv_ps(val, swapped2, mask2); idx = _mm256_castps_si256(_mm256_blendv_ps( _mm256_castsi256_ps(idx), _mm256_castsi256_ps(swapped_idx2), mask2)); __m256 swapped3 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(1, 0, 3, 2)); __m256i swapped_idx3 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(1, 0, 3, 2)); __m256 mask3 = _mm256_cmp_ps(val, swapped3, _CMP_GT_OQ); val = _mm256_blendv_ps(val, swapped3, mask3); idx = _mm256_castps_si256(_mm256_blendv_ps( _mm256_castsi256_ps(idx), _mm256_castsi256_ps(swapped_idx3), mask3)); __m256 swapped4 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1)); __m256i swapped_idx4 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(2, 3, 0, 1)); __m256 mask4 = _mm256_cmp_ps(val, swapped4, _CMP_GT_OQ); val = _mm256_blendv_ps(val, swapped4, mask4); idx = _mm256_castps_si256(_mm256_blendv_ps( _mm256_castsi256_ps(idx), _mm256_castsi256_ps(swapped_idx4), mask4)); _mm256_store_ps(values, val); _mm256_store_si256((__m256i*)indices, idx); for (int i = 0; i < 8; i++) { arr[i].second = values[i]; arr[i].first = (uint8_t)indices[i]; } #else auto a0 = arr[0], a1 = arr[1], a2 = arr[2], a3 = arr[3]; auto a4 = arr[4], a5 = arr[5], a6 = arr[6], a7 = arr[7]; if (a0.second > a1.second) std::swap(a0, a1); if (a2.second < a3.second) std::swap(a2, a3); if (a4.second > a5.second) std::swap(a4, a5); if (a6.second < a7.second) std::swap(a6, a7); if (a0.second > a2.second) std::swap(a0, a2); if (a1.second > a3.second) std::swap(a1, a3); if (a0.second > a1.second) std::swap(a0, a1); if (a2.second > a3.second) std::swap(a2, a3); if (a4.second < a6.second) std::swap(a4, a6); if (a5.second < a7.second) std::swap(a5, a7); if (a4.second < a5.second) std::swap(a4, a5); if (a6.second < a7.second) std::swap(a6, a7); if (a0.second > a4.second) std::swap(a0, a4); if (a1.second > a5.second) std::swap(a1, a5); if (a2.second > a6.second) std::swap(a2, a6); if (a3.second > a7.second) std::swap(a3, a7); if (a0.second > a2.second) std::swap(a0, a2); if (a1.second > a3.second) std::swap(a1, a3); if (a4.second > a6.second) std::swap(a4, a6); if (a5.second > a7.second) std::swap(a5, a7); if (a0.second > a1.second) std::swap(a0, a1); if (a2.second > a3.second) std::swap(a2, a3); if (a4.second > a5.second) std::swap(a4, a5); if (a6.second > a7.second) std::swap(a6, a7); arr[0] = a0; arr[1] = a1; arr[2] = a2; arr[3] = a3; arr[4] = a4; arr[5] = a5; arr[6] = a6; arr[7] = a7; #endif } bool rayBoxIntersect(const PointType& origin, const PointType& dir, const BoundingBox& box, float& tMin, float& tMax) const { tMin = 0.0f; tMax = std::numeric_limits::max(); for (int i = 0; i < Dim; ++i) { if (std::abs(dir[i]) < std::numeric_limits::epsilon()) { if (origin[i] < box.first[i] || origin[i] > box.second[i]) return false; } else { float invD = 1.f / dir[i]; float t1 = (box.first[i] - origin[i]) * invD; float t2 = (box.second[i] - origin[i]) * invD; if (t1>t2) std::swap(t1,t2); tMin = std::max(tMin, t1); tMax = std::min(tMax, t2); if (tMin > tMax) return false; } } return true; } public: Octree(const PointType& minBound, const PointType& maxBound, size_t maxPointsPerNode=16, size_t maxDepth = 16) : root_(std::make_unique(minBound, maxBound)), maxPointsPerNode(maxPointsPerNode), maxDepth(maxDepth), size(0) {} bool set(const T& data, const PointType& pos, bool visible, Eigen::Vector3f color, float size, bool active) { auto pointData = std::make_shared(data, pos, visible, color, size, active); if (insertRecursive(root_.get(), pointData, 0)) { size++; return true; } return false; } std::vector> voxelTraverse(const PointType& origin, const PointType& direction, float maxDist, bool stopAtFirstHit) { std::vector> hits; if (empty()) return hits; std::function traverseNode = [&](OctreeNode* node, const PointType& origin, const PointType& dir, float tMin, float tMax) { if (!node || tMin > tMax) return; if (node->isLeaf) { for (const auto& pointData : node->points) { PointType toPoint = pointData->position - origin; float projection = toPoint.dot(dir); if (projection >= 0 && projection <= maxDist) { PointType closestPoint = origin + dir * projection; float distSq = (pointData->position - closestPoint).squaredNorm(); if (distSq < pointData->size * pointData->size) { hits.emplace_back(pointData); if (stopAtFirstHit) return; } } } } else { std::array, 8> childOrder; PointType center = node->center; for (int i = 0; i < 8; ++i) { if (node->children[i]) { PointType childCenter = node->children[i]->center; float dist = (childCenter - origin).dot(dir); childOrder[i] = {i, dist}; } else { childOrder[i] = {i, std::numeric_limits::lowest()}; } } bitonic_sort_8(childOrder); for (int i = 0; i < 8; ++i) { int childIdx = childOrder[i].first; if (node->children[childIdx]) { const auto& childBounds = node->children[childIdx]->bounds; float childtMin = tMin; float childtMax = tMax; if (rayBoxIntersect(origin, dir, childBounds, childtMin, childtMax)) { traverseNode(node->children[childIdx].get(), origin, dir, childtMin, childtMax); if (stopAtFirstHit && !hits.empty()) return; } } } } }; float tMin, tMax; if (rayBoxIntersect(origin, direction, root_->bounds, tMin, tMax)) { tMax = std::min(tMax, maxDist); if (tMin <= tMax) { traverseNode(root_.get(), origin, direction, tMin, tMax); } } return hits; } frame renderFrame(const Camera& cam, int height, int width, frame::colormap colorformat = frame::colormap::RGB) { PointType origin = cam.origin; PointType dir = cam.direction.normalized(); PointType up = cam.up.normalized(); PointType right = cam.right(); frame outFrame(width, height, colorformat); std::vector colorBuffer; int channels; if (colorformat == frame::colormap::B) { channels = 1; } else if (colorformat == frame::colormap::RGB || colorformat == frame::colormap::BGR) { channels = 3; } else { //BGRA and RGBA channels = 4; } colorBuffer.resize(width * height * channels); float aspect = static_cast(width) / height; float fovRad = cam.fovRad(); float tanHalfFov = tan(fovRad * 0.5f); float tanfovy = tanHalfFov; float tanfovx = tanHalfFov * aspect; const Eigen::Vector3f defaultColor(0.1f, 0.2f, 0.4f); #pragma omp parallel for schedule(dynamic) collapse(2) for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { float px = (2.0f * (x + 0.5f) / width - 1.0f) * tanfovx; float py = (1.0f - 2.0f * (y + 0.5f) / height) * tanfovy; PointType rayDir = dir + (right * px) + (up * py); rayDir.normalize(); std::vector> hits = voxelTraverse( origin, rayDir, std::numeric_limits::max(), true); Eigen::Vector3f color = hits.empty() ? defaultColor : hits[0]->color; color = color.cwiseMax(0.0f).cwiseMin(1.0f); int idx = (y * width + x) * channels; switch(colorformat) { case frame::colormap::B: colorBuffer[idx ] = static_cast(color.mean() * 255.0f); break; case frame::colormap::RGB: colorBuffer[idx ] = static_cast(color[0] * 255.0f); colorBuffer[idx + 1] = static_cast(color[1] * 255.0f); colorBuffer[idx + 2] = static_cast(color[2] * 255.0f); break; case frame::colormap::BGR: colorBuffer[idx ] = static_cast(color[2] * 255.0f); colorBuffer[idx + 1] = static_cast(color[1] * 255.0f); colorBuffer[idx + 2] = static_cast(color[0] * 255.0f); break; case frame::colormap::RGBA: colorBuffer[idx ] = static_cast(color[0] * 255.0f); colorBuffer[idx + 1] = static_cast(color[1] * 255.0f); colorBuffer[idx + 2] = static_cast(color[2] * 255.0f); colorBuffer[idx + 3] = 255; break; case frame::colormap::BGRA: colorBuffer[idx ] = static_cast(color[2] * 255.0f); colorBuffer[idx + 1] = static_cast(color[1] * 255.0f); colorBuffer[idx + 2] = static_cast(color[0] * 255.0f); colorBuffer[idx + 3] = 255; break; } } } outFrame.setData(colorBuffer); return outFrame; } bool empty() const { return size == 0; } }; #endif