This commit is contained in:
Yggdrasil75
2026-01-29 09:15:02 -05:00
parent 670ff42b82
commit c282acd725
3 changed files with 86 additions and 113 deletions

View File

@@ -7,13 +7,31 @@ STB_DIR := ./stb
# Compiler and flags # Compiler and flags
CXX := g++ CXX := g++
CXXFLAGS = -std=c++23 -O3 -march=native -I$(IMGUI_DIR) -I$(IMGUI_DIR)/backends -I$(STB_DIR) BASE_CXXFLAGS = -std=c++23 -O3 -I$(IMGUI_DIR) -I$(IMGUI_DIR)/backends -I$(STB_DIR)
CXXFLAGS += `pkg-config --cflags glfw3` BASE_CXXFLAGS += `pkg-config --cflags glfw3`
CFLAGS = $(CXXFLAGS) CFLAGS = $(BASE_CXXFLAGS)
LDFLAGS := -L./imgui -limgui -lGL LDFLAGS := -L./imgui -limgui -lGL
LINUX_GL_LIBS = -lGL -ltbb LINUX_GL_LIBS = -lGL -ltbb
PKG_FLAGS := $(LINUX_GL_LIBS) `pkg-config --static --cflags --libs glfw3` PKG_FLAGS := $(LINUX_GL_LIBS) `pkg-config --static --cflags --libs glfw3`
CXXFLAGS += $(PKG_FLAGS) BASE_CXXFLAGS += $(PKG_FLAGS)
# Test if AVX is supported (run once, store result)
AVX_SUPPORTED := $(shell echo "int main(){}" | $(CXX) -mavx -x c++ -o /dev/null - 2>/dev/null && echo "yes" || echo "no")
SSE2_SUPPORTED := $(shell echo "int main(){}" | $(CXX) -msse2 -x c++ -o /dev/null - 2>/dev/null && echo "yes" || echo "no")
# Set SIMD flags based on detection
ifeq ($(AVX_SUPPORTED),yes)
SIMD_CXXFLAGS = -mavx2 -mfma -DAVX
$(info Building with AVX support)
else ifeq ($(SSE2_SUPPORTED),yes)
SIMD_CXXFLAGS = -msse2 -DSSE
$(info Building with SSE2 support (no AVX))
else
SIMD_CXXFLAGS = -DNO_SIMD
$(warning No SIMD support detected, building scalar version)
endif
CXXFLAGS = $(BASE_CXXFLAGS) $(SIMD_CXXFLAGS)
# Source files # Source files
SRC := $(SRC_DIR)/g3etest.cpp SRC := $(SRC_DIR)/g3etest.cpp
@@ -43,7 +61,7 @@ $(OBJ_DIR)/%.o: $(STB_DIR)/%.cpp
$(CXX) $(CXXFLAGS) -c -o $@ $< $(CXX) $(CXXFLAGS) -c -o $@ $<
all: $(EXE) all: $(EXE)
@echo Build complete for $(ECHO_MESSAGE) @echo "Build complete for $(UNAME_S)"
$(EXE): $(OBJS) $(EXE): $(OBJS)
$(CXX) -o $@ $^ $(CXXFLAGS) $(LIBS) $(CXX) -o $@ $^ $(CXXFLAGS) $(LIBS)

View File

@@ -53,7 +53,7 @@ struct ceilingdefaults {
float color[3] = {1.0f, 1.0f, 1.0f}; // White light float color[3] = {1.0f, 1.0f, 1.0f}; // White light
float emittance = 5.0f; // Brightness float emittance = 5.0f; // Brightness
float voxelSize = 2.0f; float voxelSize = 2.0f;
bool enabled = true; bool enabled = false;
}; };
std::mutex PreviewMutex; std::mutex PreviewMutex;
@@ -113,7 +113,7 @@ void createSphere(const defaults& config, const spheredefaults& sconfig, Octree<
pos.y() >= 0 && pos.y() < config.gridSizecube && pos.y() >= 0 && pos.y() < config.gridSizecube &&
pos.z() >= 0 && pos.z() < config.gridSizecube) { pos.z() >= 0 && pos.z() < config.gridSizecube) {
grid.set(1, pos, true, colorVec, finalSize, true, grid.set(1, pos, true, colorVec, finalSize, true, 1,
sconfig.light, sconfig.emittance, sconfig.refraction, sconfig.reflection); sconfig.light, sconfig.emittance, sconfig.refraction, sconfig.reflection);
} }
} }
@@ -136,7 +136,7 @@ void addCeilingLight(const defaults& config, const ceilingdefaults& ceilingconf,
PointType pos(x, ceilingconf.yLevel, z); PointType pos(x, ceilingconf.yLevel, z);
grid.set(2, pos, true, colorVec, ceilingconf.voxelSize, true, true, ceilingconf.emittance, 0.0f, 0.0f); grid.set(2, pos, true, colorVec, ceilingconf.voxelSize, true, 2, true, ceilingconf.emittance, 0.0f, 0.0f);
} }
} }
grid.printStats(); grid.printStats();

View File

@@ -30,6 +30,7 @@ public:
struct NodeData { struct NodeData {
T data; T data;
PointType position; PointType position;
int objectId;
bool active; bool active;
bool visible; bool visible;
float size; float size;
@@ -40,12 +41,12 @@ public:
float reflection; float reflection;
NodeData(const T& data, const PointType& pos, bool visible, Eigen::Vector3f color, float size = 0.01f, NodeData(const T& data, const PointType& pos, bool visible, Eigen::Vector3f color, float size = 0.01f,
bool active = true, bool light = false, float emittance = 0.0f, float refraction = 0.0f, bool active = true, int objectId = -1, bool light = false, float emittance = 0.0f, float refraction = 0.0f,
float reflection = 0.0f) : data(data), position(pos), active(active), visible(visible), float reflection = 0.0f) : data(data), position(pos), objectId(objectId), active(active), visible(visible),
color(color), size(size), light(light), emittance(emittance), refraction(refraction), color(color), size(size), light(light), emittance(emittance), refraction(refraction),
reflection(reflection) {} reflection(reflection) {}
NodeData() : active(false), visible(false), size(0.0f), light(false), NodeData() : objectId(-1), active(false), visible(false), size(0.0f), light(false),
emittance(0.0f), refraction(0.0f), reflection(0.0f) {} emittance(0.0f), refraction(0.0f), reflection(0.0f) {}
}; };
@@ -175,6 +176,7 @@ private:
writeVal(out, pt->data); writeVal(out, pt->data);
// Write properties // Write properties
writeVec3(out, pt->position); writeVec3(out, pt->position);
writeVal(out, pt->objectId);
writeVal(out, pt->active); writeVal(out, pt->active);
writeVal(out, pt->visible); writeVal(out, pt->visible);
writeVal(out, pt->size); writeVal(out, pt->size);
@@ -217,6 +219,7 @@ private:
auto pt = std::make_shared<NodeData>(); auto pt = std::make_shared<NodeData>();
readVal(in, pt->data); readVal(in, pt->data);
readVec3(in, pt->position); readVec3(in, pt->position);
readVal(in, pt->objectId);
readVal(in, pt->active); readVal(in, pt->active);
readVal(in, pt->visible); readVal(in, pt->visible);
readVal(in, pt->size); readVal(in, pt->size);
@@ -253,62 +256,6 @@ private:
} }
void bitonic_sort_8(std::array<std::pair<int, float>, 8>& arr) const noexcept { void bitonic_sort_8(std::array<std::pair<int, float>, 8>& arr) const noexcept {
#ifdef SSE
alignas(32) float values[8];
alignas(32) uint32_t indices[8];
for (int i = 0; i < 8; i++) {
values[i] = arr[i].second;
indices[i] = arr[i].first;
}
__m256 val = _mm256_load_ps(values);
__m256i idx = _mm256_load_si256((__m256i*)indices);
__m256 swapped1 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1));
__m256i swapped_idx1 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(2, 3, 0, 1));
__m256 mask1 = _mm256_cmp_ps(val, swapped1, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped1, mask1);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx1),
mask1));
__m256 swapped2 = _mm256_permute2f128_ps(val, val, 0x01);
__m256i swapped_idx2 = _mm256_permute2f128_si256(idx, idx, 0x01);
__m256 mask2 = _mm256_cmp_ps(val, swapped2, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped2, mask2);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx2),
mask2));
__m256 swapped3 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(1, 0, 3, 2));
__m256i swapped_idx3 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(1, 0, 3, 2));
__m256 mask3 = _mm256_cmp_ps(val, swapped3, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped3, mask3);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx3),
mask3));
__m256 swapped4 = _mm256_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1));
__m256i swapped_idx4 = _mm256_shuffle_epi32(idx, _MM_SHUFFLE(2, 3, 0, 1));
__m256 mask4 = _mm256_cmp_ps(val, swapped4, _CMP_GT_OQ);
val = _mm256_blendv_ps(val, swapped4, mask4);
idx = _mm256_castps_si256(_mm256_blendv_ps(
_mm256_castsi256_ps(idx),
_mm256_castsi256_ps(swapped_idx4),
mask4));
_mm256_store_ps(values, val);
_mm256_store_si256((__m256i*)indices, idx);
for (int i = 0; i < 8; i++) {
arr[i].second = values[i];
arr[i].first = (uint8_t)indices[i];
}
#else
auto a0 = arr[0], a1 = arr[1], a2 = arr[2], a3 = arr[3]; auto a0 = arr[0], a1 = arr[1], a2 = arr[2], a3 = arr[3];
auto a4 = arr[4], a5 = arr[5], a6 = arr[6], a7 = arr[7]; auto a4 = arr[4], a5 = arr[5], a6 = arr[6], a7 = arr[7];
@@ -344,7 +291,6 @@ private:
arr[0] = a0; arr[1] = a1; arr[2] = a2; arr[3] = a3; arr[0] = a0; arr[1] = a1; arr[2] = a2; arr[3] = a3;
arr[4] = a4; arr[5] = a5; arr[6] = a6; arr[7] = a7; arr[4] = a4; arr[5] = a5; arr[6] = a6; arr[7] = a7;
#endif
} }
bool rayBoxIntersect(const PointType& origin, const PointType& dir, const BoundingBox& box, bool rayBoxIntersect(const PointType& origin, const PointType& dir, const BoundingBox& box,
@@ -402,8 +348,8 @@ public:
Octree() : root_(nullptr), maxPointsPerNode(16), maxDepth(16), size(0) {} Octree() : root_(nullptr), maxPointsPerNode(16), maxDepth(16), size(0) {}
bool set(const T& data, const PointType& pos, bool visible, Eigen::Vector3f color, float size, bool active, bool set(const T& data, const PointType& pos, bool visible, Eigen::Vector3f color, float size, bool active,
bool light = false, float emittance = 0.0f, float refraction = 0.0f, float reflection = 0.0f) { int objectId = -1, bool light = false, float emittance = 0.0f, float refraction = 0.0f, float reflection = 0.0f) {
auto pointData = std::make_shared<NodeData>(data, pos, visible, color, size, active, auto pointData = std::make_shared<NodeData>(data, pos, visible, color, size, active, objectId,
light, emittance, refraction, reflection); light, emittance, refraction, reflection);
if (insertRecursive(root_.get(), pointData, 0)) { if (insertRecursive(root_.get(), pointData, 0)) {
this->size++; this->size++;
@@ -504,7 +450,7 @@ public:
if (found) { if (found) {
node->points.erase(it, node->points.end()); node->points.erase(it, node->points.end());
size--; // Decrement size counter size--;
return true; return true;
} }
return false; return false;
@@ -520,7 +466,7 @@ public:
return removeNode(root_.get()); return removeNode(root_.get());
} }
std::vector<std::shared_ptr<NodeData>> findInRadius(const PointType& center, float radius) { std::vector<std::shared_ptr<NodeData>> findInRadius(const PointType& center, float radius) const {
std::vector<std::shared_ptr<NodeData>> results; std::vector<std::shared_ptr<NodeData>> results;
if (!root_) return results; if (!root_) return results;
@@ -538,8 +484,8 @@ public:
} }
float distSq = (closestPoint - center).squaredNorm(); float distSq = (closestPoint - center).squaredNorm();
if (distSq > (radius + boxHalfSize.norm()) * (radius + boxHalfSize.norm())) { if (distSq > radiusSq) {
return; // No intersection return;
} }
if (node->isLeaf) { if (node->isLeaf) {
@@ -566,7 +512,7 @@ public:
bool update(const PointType& oldPos, const PointType& newPos, const T& newData = T(), bool newVisible = true, bool update(const PointType& oldPos, const PointType& newPos, const T& newData = T(), bool newVisible = true,
Eigen::Vector3f newColor = Eigen::Vector3f(1.0f, 1.0f, 1.0f), float newSize = 0.01f, bool newActive = true, Eigen::Vector3f newColor = Eigen::Vector3f(1.0f, 1.0f, 1.0f), float newSize = 0.01f, bool newActive = true,
bool newLight = false, float newEmittance = 0.0f, float newRefraction = 0.0f, float newReflection = 0.0f, int newObjectId = -2, bool newLight = false, float newEmittance = 0.0f, float newRefraction = 0.0f, float newReflection = 0.0f,
float tolerance = 0.0001f) { float tolerance = 0.0001f) {
// Find the existing point // Find the existing point
@@ -582,6 +528,7 @@ public:
bool visibleCopy = pointData->visible; bool visibleCopy = pointData->visible;
Eigen::Vector3f colorCopy = pointData->color; Eigen::Vector3f colorCopy = pointData->color;
float sizeCopy = pointData->size; float sizeCopy = pointData->size;
int objectIdCopy = pointData->objectId;
bool lightCopy = pointData->light; bool lightCopy = pointData->light;
float emittanceCopy = pointData->emittance; float emittanceCopy = pointData->emittance;
float refractionCopy = pointData->refraction; float refractionCopy = pointData->refraction;
@@ -598,6 +545,7 @@ public:
newColor != Eigen::Vector3f(1.0f, 1.0f, 1.0f) ? newColor : colorCopy, newColor != Eigen::Vector3f(1.0f, 1.0f, 1.0f) ? newColor : colorCopy,
newSize > 0 ? newSize : sizeCopy, newSize > 0 ? newSize : sizeCopy,
newActive ? newActive : activeCopy, newActive ? newActive : activeCopy,
newObjectId != -2 ? newObjectId : objectIdCopy,
newLight ? newLight : lightCopy, newLight ? newLight : lightCopy,
newEmittance > 0 ? newEmittance : emittanceCopy, newEmittance > 0 ? newEmittance : emittanceCopy,
newRefraction >= 0 ? newRefraction : refractionCopy, newRefraction >= 0 ? newRefraction : refractionCopy,
@@ -609,6 +557,7 @@ public:
pointData->visible = newVisible; pointData->visible = newVisible;
pointData->color = newColor; pointData->color = newColor;
pointData->size = newSize; pointData->size = newSize;
if (newObjectId != -2) pointData->objectId = newObjectId;
pointData->active = newActive; pointData->active = newActive;
pointData->light = newLight; pointData->light = newLight;
pointData->emittance = newEmittance; pointData->emittance = newEmittance;
@@ -618,6 +567,13 @@ public:
} }
} }
bool setObjectId(const PointType& pos, int objectId, float tolerance = 0.0001f) {
auto pointData = find(pos, tolerance);
if (!pointData) return false;
pointData->objectId = objectId;
return true;
}
bool updateData(const PointType& pos, const T& newData, float tolerance = 0.0001f) { bool updateData(const PointType& pos, const T& newData, float tolerance = 0.0001f) {
auto pointData = find(pos, tolerance); auto pointData = find(pos, tolerance);
if (!pointData) return false; if (!pointData) return false;
@@ -675,7 +631,7 @@ public:
} }
std::vector<std::shared_ptr<NodeData>> voxelTraverse(const PointType& origin, const PointType& direction, std::vector<std::shared_ptr<NodeData>> voxelTraverse(const PointType& origin, const PointType& direction,
float maxDist, bool stopAtFirstHit) { float maxDist, bool stopAtFirstHit) const {
std::vector<std::shared_ptr<NodeData>> hits; std::vector<std::shared_ptr<NodeData>> hits;
if (empty()) return hits; if (empty()) return hits;
@@ -763,7 +719,7 @@ public:
float tanfovy = tanHalfFov; float tanfovy = tanHalfFov;
float tanfovx = tanHalfFov * aspect; float tanfovx = tanHalfFov * aspect;
PointType space(0,0,0); PointType space(0,0,0);
if (globalIllumination) space = {0.1,0.1,0.1}; if (globalIllumination) space = {0.1f, 0.1f, 0.1f};
const Eigen::Vector3f defaultColor(0.01f, 0.01f, 0.01f); const Eigen::Vector3f defaultColor(0.01f, 0.01f, 0.01f);
float rayLength = std::numeric_limits<float>::max(); float rayLength = std::numeric_limits<float>::max();
@@ -917,7 +873,6 @@ public:
size_t maxPointsInLeaf = 0; size_t maxPointsInLeaf = 0;
size_t minPointsInLeaf = std::numeric_limits<size_t>::max(); size_t minPointsInLeaf = std::numeric_limits<size_t>::max();
// Recursive lambda to gather stats
std::function<void(const OctreeNode*, size_t)> traverse = std::function<void(const OctreeNode*, size_t)> traverse =
[&](const OctreeNode* node, size_t depth) { [&](const OctreeNode* node, size_t depth) {
if (!node) return; if (!node) return;