usually works. need to figure out what causes it to not to sometimes.

This commit is contained in:
yggdrasil75
2026-01-26 20:02:01 -05:00
parent ed6b625826
commit c3916d146a
10 changed files with 743 additions and 200 deletions

410
util/grid/grid3eigen.hpp Normal file
View File

@@ -0,0 +1,410 @@
#ifndef g3eigen
#define g3eigen
#include "../../eigen/Eigen/Dense"
#include "../timing_decorator.hpp"
#include "../output/frame.hpp"
#ifdef SSE
#include <immintrin.h>
#endif
constexpr int Dim = 3;
template<typename T>
class Octree {
public:
using PointType = Eigen::Matrix<float, Dim, 1>;
using BoundingBox = std::pair<PointType, PointType>;
struct NodeData {
T data;
PointType position;
bool active;
bool visible;
float size;
Eigen::Vector3f color;
///TODO: dont add these just yet.
bool light;
float emittance; //amount of light to emit
float refraction;
float reflection;
NodeData(const T& data, const PointType& pos, Eigen::Vector3f color, float size = 0.01f, bool active = true) :
data(data), position(pos), active(active), color(color), size(size) {}
};
struct OctreeNode {
BoundingBox bounds;
std::vector<std::shared_ptr<NodeData>> points;
std::array<std::unique_ptr<OctreeNode>, 8> children;
PointType center;
bool isLeaf;
OctreeNode(const PointType& min, const PointType& max) : bounds(min,max), isLeaf(true) {
for (std::unique_ptr<OctreeNode>& child : children) {
child = nullptr;
}
center = (bounds.first + bounds.second) * 0.5;
}
bool contains(const PointType& point) const {
for (int i = 0; i < Dim; ++i) {
if (point[i] < bounds.first[i] || point[i] > bounds.second[i]) {
return false;
}
}
return true;
}
bool isEmpty() const {
return points.empty();
}
};
private:
std::unique_ptr<OctreeNode> root_;
size_t maxDepth;
size_t size;
size_t maxPointsPerNode;
uint8_t getOctant(const PointType& point, const PointType& center) const {
int octant = 0;
for (int i = 0; i < Dim; ++i) {
if (point[i] >= center[i]) octant |= (1 << i);
}
return octant;
}
BoundingBox createChildBounds(const OctreeNode* node, uint8_t octant) const {
PointType childMin, childMax;
PointType center = node->center;
for (int i = 0; i < Dim; ++i) {
bool high = (octant >> i) & 1;
childMin[i] = high ? center[i] : node->bounds.first[i];
childMax[i] = high ? node->bounds.second[i] : center[i];
}
return {childMin, childMax};
}
void splitNode(OctreeNode* node, int depth) {
if (depth >= maxDepth) return;
for (int i = 0; i < 8; ++i) {
BoundingBox childBounds = createChildBounds(node, i);
node->children[i] = std::make_unique<OctreeNode>(childBounds.first, childBounds.second);
}
for (const auto& pointData : node->points) {
int octant = getOctant(pointData->position, node->center);
node->children[octant]->points.emplace_back(pointData);
}
node->points.clear();
node->isLeaf = false;
for (int i = 0; i < 8; ++i) {
if (node->children[i]->points.size() > maxPointsPerNode) {
splitNode(node->children[i].get(), depth + 1);
}
}
}
bool insertRecursive(OctreeNode* node, const std::shared_ptr<NodeData>& pointData, int depth) {
if (!node->contains(pointData->position)) return false;
if (node->isLeaf) {
node->points.emplace_back(pointData);
if (node->points.size() > maxPointsPerNode && depth < maxDepth) {
splitNode(node,depth);
}
return true;
} else {
int octant = getOctant(pointData->position, node->center);
if (node->children[octant]) {
return insertRecursive(node->children[octant].get(), pointData, depth + 1);
}
}
return false;
}
void bitonic_sort_8(std::array<std::pair<int, float>, 8>& arr) noexcept {
#ifdef SSE
alignas(32) float values[8];
alignas(32) uint32_t indices[8];
for (int i = 0; i < 8; i++) {
values[i] = arr[i].second;
indices[i] = arr[i].first;
}
__m256 val = _mm256_load_ps(values);
__m256i idx = _mm256_load_si256((__m256i*)indices);
__m256 swapped1 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1));
__m256i swapped_idx1 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(2, 3, 0, 1));
__m256 mask1 = _mm256_cmp_ps(val, swapped1, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped1, mask1);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx1),
mask1));
__m256 swapped2 = _mm256_permute2f128_ps(val, val, 0x01);
__m256i swapped_idx2 = _mm256_permute2f128_si256(idx, idx, 0x01);
__m256 mask2 = _mm256_cmp_ps(val, swapped2, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped2, mask2);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx2),
mask2));
__m256 swapped3 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(1, 0, 3, 2));
__m256i swapped_idx3 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(1, 0, 3, 2));
__m256 mask3 = _mm256_cmp_ps(val, swapped3, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped3, mask3);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx3),
mask3));
__m256 swapped4 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1));
__m256i swapped_idx4 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(2, 3, 0, 1));
__m256 mask4 = _mm256_cmp_ps(val, swapped4, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped4, mask4);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx4),
mask4));
_mm256_store_ps(values, val);
_mm256_store_si256((__m256i*)indices, idx);
for (int i = 0; i < 8; i++) {
arr[i].second = values[i];
arr[i].first = (uint8_t)indices[i];
}
#else
auto a0 = arr[0], a1 = arr[1], a2 = arr[2], a3 = arr[3];
auto a4 = arr[4], a5 = arr[5], a6 = arr[6], a7 = arr[7];
if (a0.second > a1.second) std::swap(a0, a1);
if (a2.second < a3.second) std::swap(a2, a3);
if (a4.second > a5.second) std::swap(a4, a5);
if (a6.second < a7.second) std::swap(a6, a7);
if (a0.second > a2.second) std::swap(a0, a2);
if (a1.second > a3.second) std::swap(a1, a3);
if (a0.second > a1.second) std::swap(a0, a1);
if (a2.second > a3.second) std::swap(a2, a3);
if (a4.second < a6.second) std::swap(a4, a6);
if (a5.second < a7.second) std::swap(a5, a7);
if (a4.second < a5.second) std::swap(a4, a5);
if (a6.second < a7.second) std::swap(a6, a7);
if (a0.second > a4.second) std::swap(a0, a4);
if (a1.second > a5.second) std::swap(a1, a5);
if (a2.second > a6.second) std::swap(a2, a6);
if (a3.second > a7.second) std::swap(a3, a7);
if (a0.second > a2.second) std::swap(a0, a2);
if (a1.second > a3.second) std::swap(a1, a3);
if (a4.second > a6.second) std::swap(a4, a6);
if (a5.second > a7.second) std::swap(a5, a7);
if (a0.second > a1.second) std::swap(a0, a1);
if (a2.second > a3.second) std::swap(a2, a3);
if (a4.second > a5.second) std::swap(a4, a5);
if (a6.second > a7.second) std::swap(a6, a7);
arr[0] = a0; arr[1] = a1; arr[2] = a2; arr[3] = a3;
arr[4] = a4; arr[5] = a5; arr[6] = a6; arr[7] = a7;
#endif
}
bool rayBoxIntersect(const PointType& origin, const PointType& dir, const BoundingBox& box,
float& tMin, float& tMax) const {
tMin = 0.0f;
tMax = std::numeric_limits<float>::max();
for (int i = 0; i < Dim; ++i) {
if (std::abs(dir[i]) < std::numeric_limits<float>::epsilon()) {
if (origin[i] < box.first[i] || origin[i] > box.second[i]) return false;
} else {
float invD = 1.f / dir[i];
float t1 = (box.first[i] - origin[i]) * invD;
float t2 = (box.second[i] - origin[i]) * invD;
if (t1>t2) std::swap(t1,t2);
tMin = std::max(tMin, t1);
tMax = std::min(tMax, t2);
if (tMin > tMax) return false;
}
}
return true;
}
public:
Octree(const PointType& minBound, const PointType& maxBound, size_t maxPointsPerNode=16, size_t maxDepth = 16) :
root_(std::make_unique<OctreeNode>(minBound, maxBound)), maxPointsPerNode(maxPointsPerNode),
maxDepth(maxDepth), size(size) {}
bool set(const T& data, const PointType& pos, Eigen::Vector3f color, float size, bool active) {
auto pointData = std::make_shared<NodeData>(data, pos, color, size, active);
if (insertRecursive(root_.get(), pointData, 0)) {
size++;
return true;
}
return false;
}
std::vector<std::shared_ptr<NodeData>> voxelTraverse(const PointType& origin, const PointType& direction,
float maxDist, bool stopAtFirstHit) {
std::vector<std::shared_ptr<NodeData>> hits;
if (empty()) return hits;
std::function<void(OctreeNode*, const PointType&, const PointType&, float, float)> traverseNode =
[&](OctreeNode* node, const PointType& origin, const PointType& dir, float tMin, float tMax) {
if (!node || tMin > tMax) return;
if (node->isLeaf) {
for (const auto& pointData : node->points) {
//if (!pointData->active) continue;
// Calculate intersection with axis-aligned cube
float halfSize = pointData->size * 0.5f;
PointType cubeMin = pointData->position - PointType::Constant(halfSize);
PointType cubeMax = pointData->position + PointType::Constant(halfSize);
// Ray-cube intersection test
float cubeTMin = tMin;
float cubeTMax = tMax;
if (rayBoxIntersect(origin, dir, {cubeMin, cubeMax}, cubeTMin, cubeTMax)) {
// Check if intersection is within max distance
if (cubeTMin <= maxDist) {
hits.emplace_back(pointData);
if (stopAtFirstHit) return;
}
}
}
} else {
std::array<std::pair<int, float>, 8> childOrder;
PointType center = node->center;
for (int i = 0; i < 8; ++i) {
if (node->children[i]) {
PointType childCenter = node->children[i]->center;
float dist = (childCenter - origin).dot(dir);
childOrder[i] = {i, dist};
} else {
childOrder[i] = {i, std::numeric_limits<float>::lowest()};
}
}
bitonic_sort_8(childOrder);
for (int i = 0; i < 8; ++i) {
int childIdx = childOrder[i].first;
if (node->children[childIdx]) {
const auto& childBounds = node->children[childIdx]->bounds;
float childtMin = tMin;
float childtMax = tMax;
if (rayBoxIntersect(origin, dir, childBounds, childtMin, childtMax)) {
traverseNode(node->children[childIdx].get(), origin, dir, childtMin, childtMax);
if (stopAtFirstHit && !hits.empty()) return;
}
}
}
}
};
float tMin, tMax;
if (rayBoxIntersect(origin, direction, root_->bounds, tMin, tMax)) {
tMax = std::min(tMax, maxDist);
if (tMin <= tMax) {
traverseNode(root_.get(), origin, direction, tMin, tMax);
}
}
return hits;
}
frame renderFrame(const PointType& origin, PointType& dir, PointType& up, PointType& right, int height,
int width, frame::colormap colorformat = frame::colormap::RGB) {
frame outFrame(width, height, colorformat);
std::vector<uint8_t> colorBuffer;
int channels;
if (colorformat == frame::colormap::B) {
channels = 1;
} else if (colorformat == frame::colormap::RGB || colorformat == frame::colormap::BGR) {
channels = 3;
} else { //BGRA and RGBA
channels = 4;
}
colorBuffer.resize(width * height * channels);
PointType viewDir = dir.normalized();
PointType upn = up.normalized();
PointType rightn = right.normalized();
float aspect = static_cast<float>(width) / height;
float tanfovy = 1.f;
float tanfovx = tanfovy * aspect;
const Eigen::Vector3f defaultColor(0.1f, 0.2f, 0.4f);
#pragma omp parallel for schedule(dynamic) collapse(2)
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
float px = (2.0f * (x + 0.5f) / width - 1.0f) * tanfovx;
float py = (1.0f - 2.0f * (y + 0.5f) / height) * tanfovy;
PointType rayDir = viewDir + (rightn * px) + (upn * py);
rayDir.normalize();
std::vector<std::shared_ptr<NodeData>> hits = voxelTraverse(
origin, rayDir, std::numeric_limits<float>::max(), true);
Eigen::Vector3f color = hits.empty() ? defaultColor : hits[0]->color;
color = color.cwiseMax(0.0f).cwiseMin(1.0f);
int idx = (y * width + x) * channels;
switch(colorformat) {
case frame::colormap::B:
colorBuffer[idx ] = static_cast<uint8_t>(color.mean() * 255.0f);
break;
case frame::colormap::RGB:
colorBuffer[idx ] = static_cast<uint8_t>(color[0] * 255.0f);
colorBuffer[idx + 1] = static_cast<uint8_t>(color[1] * 255.0f);
colorBuffer[idx + 2] = static_cast<uint8_t>(color[2] * 255.0f);
break;
case frame::colormap::BGR:
colorBuffer[idx ] = static_cast<uint8_t>(color[2] * 255.0f);
colorBuffer[idx + 1] = static_cast<uint8_t>(color[1] * 255.0f);
colorBuffer[idx + 2] = static_cast<uint8_t>(color[0] * 255.0f);
break;
case frame::colormap::RGBA:
colorBuffer[idx ] = static_cast<uint8_t>(color[0] * 255.0f);
colorBuffer[idx + 1] = static_cast<uint8_t>(color[1] * 255.0f);
colorBuffer[idx + 2] = static_cast<uint8_t>(color[2] * 255.0f);
colorBuffer[idx + 3] = 255;
break;
case frame::colormap::BGRA:
colorBuffer[idx ] = static_cast<uint8_t>(color[2] * 255.0f);
colorBuffer[idx + 1] = static_cast<uint8_t>(color[1] * 255.0f);
colorBuffer[idx + 2] = static_cast<uint8_t>(color[0] * 255.0f);
colorBuffer[idx + 3] = 255;
break;
}
}
}
outFrame.setData(colorBuffer);
return outFrame;
}
bool empty() const { return size == 0; }
};
#endif