From 0aeed604a75016f7e0ba44efa3cd8453bc9a82c5 Mon Sep 17 00:00:00 2001 From: Yggdrasil75 Date: Wed, 21 Jan 2026 15:01:22 -0500 Subject: [PATCH] pushing home. will need to correct some things. idea: precalculate regions of steps --- util/basicdefines.hpp | 5 +- util/grid/grid3.hpp | 128 ++++++++-------------- util/vectorlogic/vec3.hpp | 219 ++++++++++++++++++++++++-------------- 3 files changed, 188 insertions(+), 164 deletions(-) diff --git a/util/basicdefines.hpp b/util/basicdefines.hpp index f9baba7..b404f19 100644 --- a/util/basicdefines.hpp +++ b/util/basicdefines.hpp @@ -9,6 +9,8 @@ #define INF 2 ^ 31 - 1 #endif +#ifndef MARCHTABLES +#define MARCHTABLES int edgeTable[256] = { 0x0, 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, @@ -299,4 +301,5 @@ int triTable[256][16] = {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; \ No newline at end of file + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; +#endif \ No newline at end of file diff --git a/util/grid/grid3.hpp b/util/grid/grid3.hpp index bd84be0..f8e72b0 100644 --- a/util/grid/grid3.hpp +++ b/util/grid/grid3.hpp @@ -13,6 +13,7 @@ #include "../output/frame.hpp" #include "../noise/pnoise2.hpp" #include "../vecmat/mat4.hpp" +//#include "../vecmat/mat3.hpp" #include #include #include "../basicdefines.hpp" @@ -108,16 +109,6 @@ struct Camera { } }; -struct Vertex { - Vec3f position; - Vec3f normal; - Vec3ui8 color; - Vec2f texCoord; - - Vertex() = default; - Vertex(Vec3f pos, Vec3f norm, Vec3ui8 colr, Vec2f tex = Vec2f(0,0)) : position(pos), normal(norm), color(colr), texCoord(tex) {} -}; - struct Chunk { Voxel reprVoxel; //average of all voxels in chunk for LOD rendering //std::vector voxels; //list of all voxels in chunk. @@ -138,9 +129,9 @@ private: float radians(float rads) { return rads * (M_PI / 180); } - + Vec3i getChunkCoord(const Vec3i& voxelPos) const { - return Vec3i(voxelPos.x / CHUNK_THRESHOLD, voxelPos.y / CHUNK_THRESHOLD, voxelPos.z / CHUNK_THRESHOLD); + return voxelPos / CHUNK_THRESHOLD; } void updateChunkStatus(const Vec3i& pos, bool isActive) { @@ -221,19 +212,34 @@ public: void set(int x, int y, int z, bool active, Vec3ui8 color, float alpha = 1) { set(Vec3i(x,y,z), active, color, alpha); } - - void set(Vec3i pos, bool active, Vec3ui8 color, float alpha = 1) { - if (pos.x >= 0 && pos.y >= 0 && pos.z >= 0) { - if (!(pos.x < gridSize.x)) { - resize(pos.x, gridSize.y, gridSize.z); + + void set(Vec3i pos, bool active, Vec3ui8 color, float alpha = 1.f) { + if (pos.AllGTE(0)) { + if (pos.AnyGTE(gridSize)) { + resize(gridSize.max(pos)); } - else if (!(pos.y < gridSize.y)) { - resize(gridSize.x, pos.y, gridSize.z); - } - else if (!(pos.z < gridSize.z)) { - resize(gridSize.x, gridSize.y, pos.z); - } - + + Voxel& v = get(pos); + v.active = active; + v.color = color; + v.alpha = alpha; + updateChunkStatus(pos, active); + } + } + + void setBatch(const std::vector& positions, bool active, Vec3ui8 color, float alpha = 1.0f) { + // First, ensure grid is large enough + Vec3i maxPos(0,0,0); + for (const auto& pos : positions) { + maxPos = maxPos.max(pos); + } + + if (maxPos.AnyGTE(gridSize)) { + resize(maxPos); + } + + // Set all positions + for (const auto& pos : positions) { Voxel& v = get(pos); v.active = active; v.color = color; @@ -242,21 +248,15 @@ public: } } - void set(Vec3i pos, Vec4ui8 rgbaval, float alpha = 1) { - set(pos, static_cast(rgbaval.a / 255), rgbaval.toVec3(), alpha); - } - - template - bool inGrid(Vec3 voxl) const { - return (voxl >= 0 && voxl.x < gridSize.x && voxl.y < gridSize.y && voxl.z < gridSize.z); + bool inGrid(Vec3i voxl) const { + return voxl.AllGTE(0) && voxl.AllLT(gridSize); } void voxelTraverse(const Vec3f& origin, const Vec3f& end, Voxel& outVoxel, int maxDist = 10000000) const { Vec3i cv = origin.floorToI(); Vec3i lv = end.floorToI(); Vec3f ray = end - origin; - Vec3i step = Vec3i(ray.x >= 0 ? 1 : -1, ray.y >= 0 ? 1 : -1, ray.z >= 0 ? 1 : -1); - + Vec3i step = end.mask([](float v, float zero) { return v >= zero; }, 0.0f) * 2 - Vec3i(1); Vec3f tDelta = Vec3f(ray.x != 0 ? std::abs(1.0f / ray.x) : INF, ray.y != 0 ? std::abs(1.0f / ray.y) : INF, ray.z != 0 ? std::abs(1.0f / ray.z) : INF); @@ -282,7 +282,6 @@ public: float dist = 0.0f; outVoxel.alpha = 0.0; - //Vec3f newC = (outVoxel.color / 255).toFloat(); while (lv != cv && dist < 1.f && inGrid(cv) && outVoxel.alpha < 1.f) { @@ -341,18 +340,26 @@ public: TIME_FUNCTION; Vec3f forward = cam.forward(); Vec3f right = cam.right(); - //Vec3f upCor = right.cross(forward).normalized(); + Vec3f up = cam.up; + float aspect = resolution.aspect(); float fovRad = cam.fovRad(); - float viewH = tan(cam.fov / 2.f); + float viewH = tan(cam.fov * 0.5f); float viewW = viewH * aspect; float maxDist = std::sqrt(gridSize.lengthSquared()); + frame outFrame(resolution.x, resolution.y, colorformat); - std::vector colorBuffer(resolution.x * resolution.y * 3); + std::vector colorBuffer; + if (colorformat == frame::colormap::RGB) { + colorBuffer.resize(resolution.x * resolution.y * 3); + } else { + colorBuffer.resize(resolution.x * resolution.y * 4); + } + #pragma omp parallel for for (int y = 0; y < resolution.y; y++) { float v = (1.f - 2.f * (y+0.5f) / resolution.y) * viewH; - Vec3f vup = cam.up * v; + Vec3f vup = up * v; for (int x = 0; x < resolution.x; x++) { Voxel outVoxel(0, false, 0.f, Vec3ui8(10, 10, 255)); float u = (2.f * (x+0.5f)/resolution.x - 1.f) * viewW; @@ -374,7 +381,7 @@ public: case frame::colormap::RGB: default: { int idx = (y * resolution.y + x) * 3; - colorBuffer[idx + 0] = hitColor.x; + colorBuffer[idx] = hitColor.x; colorBuffer[idx + 1] = hitColor.y; colorBuffer[idx + 2] = hitColor.z; break; @@ -411,48 +418,7 @@ public: std::cout << "Memory usage (approx): " << (voxels.size() * sizeof(Voxel)) / 1024 << " KB" << std::endl; std::cout << "============================" << std::endl; } - -private: - // Helper function to check if a voxel is on the surface - bool isSurfaceVoxel(int x, int y, int z) const { - if (!inGrid(Vec3i(x, y, z))) return false; - if (!get(x, y, z).active) return false; - - // Check all 6 neighbors - static const std::array neighbors = {{ - Vec3i(1, 0, 0), Vec3i(-1, 0, 0), - Vec3i(0, 1, 0), Vec3i(0, -1, 0), - Vec3i(0, 0, 1), Vec3i(0, 0, -1) - }}; - - for (const auto& n : neighbors) { - Vec3i neighborPos(x + n.x, y + n.y, z + n.z); - if (!inGrid(neighborPos) || !get(neighborPos).active) { - return true; // At least one empty neighbor means this is a surface voxel - } - } - - return false; - } - // Get normal for a surface voxel - Vec3f calculateVoxelNormal(int x, int y, int z) const { - Vec3f normal(0, 0, 0); - - // Simple gradient-based normal calculation - if (inGrid(Vec3i(x+1, y, z)) && !get(x+1, y, z).active) normal.x += 1; - if (inGrid(Vec3i(x-1, y, z)) && !get(x-1, y, z).active) normal.x -= 1; - if (inGrid(Vec3i(x, y+1, z)) && !get(x, y+1, z).active) normal.y += 1; - if (inGrid(Vec3i(x, y-1, z)) && !get(x, y-1, z).active) normal.y -= 1; - if (inGrid(Vec3i(x, y, z+1)) && !get(x, y, z+1).active) normal.z += 1; - if (inGrid(Vec3i(x, y, z-1)) && !get(x, y, z-1).active) normal.z -= 1; - - if (normal.lengthSquared() > 0) { - return normal.normalized(); - } - return Vec3f(0, 1, 0); // Default up normal - } -public: std::vector genSlices(frame::colormap colorFormat = frame::colormap::RGB) const { TIME_FUNCTION; int colors; @@ -534,8 +500,6 @@ public: } return outframes; } - - }; //#include "g3_serialization.hpp" needed to be usable diff --git a/util/vectorlogic/vec3.hpp b/util/vectorlogic/vec3.hpp index 03fe95e..883023b 100644 --- a/util/vectorlogic/vec3.hpp +++ b/util/vectorlogic/vec3.hpp @@ -7,14 +7,16 @@ #include #include #include +#include #include "vec2.hpp" +#include "../basicdefines.hpp" #ifdef __SSE__ #include #endif template -class Vec3 { +class alignas(16) Vec3 { public: struct{ T x, y, z; }; @@ -22,8 +24,11 @@ public: Vec3(T x, T y, T z) : x(x), y(y), z(z) {} Vec3(T scalar) : x(scalar), y(scalar), z(scalar) {} Vec3(float acd[3]) : x(acd[0]), y(acd[1]), z(acd[2]) {} - - Vec3(const class Vec2& vec2, T z = 0); + template + Vec3(const Vec3& other) : x(static_cast(other.x)), y(static_cast(other.y)), z(static_cast(other.z)) {} + + template + Vec3(const class Vec2& vec2, U z = 0) : x(static_cast(vec2.x)), y(static_cast(vec2.y)), z(static_cast(z)) {} Vec3& move(const Vec3& newpos) { x = newpos.x; @@ -37,13 +42,6 @@ public: Vec3 operator+(const Vec3& other) const { return Vec3(x + other.x, y + other.y, z + other.z); } - - Vec3 addMulti(Vec3* result, const Vec3* a, const Vec3* b, size_t count) noexcept { - for (size_t i = 0; i < count; ++i) { - result[i] = a[i] + b[i]; - } - return *this; - } template Vec3 operator-(const Vec3& other) const { @@ -77,7 +75,8 @@ public: } Vec3 operator/(T scalar) const { - return Vec3(x / scalar, y / scalar, z / scalar); + T invScalar = T(1) / scalar; + return Vec3(x * invScalar, y * invScalar, z * invScalar); } Vec3& operator=(T scalar) { @@ -135,9 +134,10 @@ public: } Vec3& operator/=(T scalar) { - x /= scalar; - y /= scalar; - z /= scalar; + T invScalar = T(1) / scalar; + x *= invScalar; + y *= invScalar; + z *= invScalar; return *this; } @@ -155,7 +155,6 @@ public: T length() const { return std::sqrt(x * x + y * y + z * z); - //return static_cast(std::sqrt(static_cast(x * x + y * y + z * z))); } // Fast inverse length (Quake III algorithm) @@ -165,21 +164,21 @@ public: // Fast inverse square root approximation const T half = T(0.5) * lenSq; - T y = lenSq; + T o = lenSq; // Type punning for float/double if constexpr (std::is_same_v) { - long i = *(long*)&y; + long i = *(long*)&o; i = 0x5f3759df - (i >> 1); - y = *(float*)&i; + o = *(float*)&i; } else if constexpr (std::is_same_v) { - long long i = *(long long*)&y; + long long i = *(long long*)&o; i = 0x5fe6eb50c7b537a9 - (i >> 1); - y = *(double*)&i; + o = *(double*)&i; } - y = y * (T(1.5) - (half * y * y)); - return y; + o = o * (T(1.5) - (half * o * o)); + return o; } T lengthSquared() const { @@ -192,13 +191,28 @@ public: T distanceSquared(const Vec3& other) const { Vec3 diff = *this - other; - return diff.x * diff.x + diff.y * diff.y + diff.z * diff.z; + return diff.lengthSquared(); } + // Normalized with SSE optimization Vec3 normalized() const { const T invLen = invLength(); if (invLen > 0) { - return Vec3(x * invLen, y * invLen, z * invLen); + #ifdef __SSE__ + if constexpr (std::is_same_v) { + __m128 vec = _mm_set_ps(0.0f, z, y, x); + __m128 inv = _mm_set1_ps(invLen); + __m128 result = _mm_mul_ps(vec, inv); + + alignas(16) float components[4]; + _mm_store_ps(components, result); + return Vec3(components[0], components[1], components[2]); + } else + #endif + { + // Fallback to scalar operations + return Vec3(x * invLen, y * invLen, z * invLen); + } } return *this; } @@ -243,35 +257,35 @@ public: return (x >= scalar && y >= scalar && z >= scalar); } - bool AllLT(const Vec3& other) { + bool AllLT(const Vec3& other) const { return x < other.x && y < other.y && z < other.z; } - bool AllGT(const Vec3& other) { + bool AllGT(const Vec3& other) const { return x > other.x && y > other.y && z > other.z; } - bool AllLTE(const Vec3& other) { + bool AllLTE(const Vec3& other) const { return x <= other.x && y <= other.y && z <= other.z; } - bool AllGTE(const Vec3& other) { + bool AllGTE(const Vec3& other) const { return x >= other.x && y >= other.y && z >= other.z; } - bool AnyLT(const Vec3& other) { + bool AnyLT(const Vec3& other) const { return x < other.x || y < other.y || z < other.z; } - bool AnyGT(const Vec3& other) { + bool AnyGT(const Vec3& other) const { return x > other.x || y > other.y || z > other.z; } - bool AnyLTE(const Vec3& other) { + bool AnyLTE(const Vec3& other) const { return x <= other.x || y <= other.y || z <= other.z; } - bool AnyGTE(const Vec3& other) { + bool AnyGTE(const Vec3& other) const { return x >= other.x || y >= other.y || z >= other.z; } @@ -298,11 +312,11 @@ public: } Vec3 floorToI8() const { - return Vec3(static_cast(std::floor(x)), static_cast(std::floor(y)), static_cast(std::floor(z))); + return Vec3(static_cast(std::max(T(0), std::floor(x))), static_cast(std::max(T(0), std::floor(y))), static_cast(std::max(T(0), std::floor(z)))); } Vec3 floorToT() const { - return Vec3(static_cast(std::floor(x)), static_cast(std::floor(y)), static_cast(std::floor(z))); + return Vec3(static_cast(std::max(T(0), std::floor(x))), static_cast(std::max(T(0), std::floor(y))), static_cast(std::max(T(0), std::floor(z)))); } Vec3 toFloat() const { @@ -330,23 +344,16 @@ public: } Vec3 clamp(const Vec3& minVal, const Vec3& maxVal) const { - return Vec3( - std::clamp(x, minVal.x, maxVal.x), - std::clamp(y, minVal.y, maxVal.y), - std::clamp(z, minVal.z, maxVal.z) - ); + return this->max(minVal).min(maxVal); } Vec3 clamp(T minVal, T maxVal) const { - return Vec3( - std::clamp(x, minVal, maxVal), - std::clamp(y, minVal, maxVal), - std::clamp(z, minVal, maxVal) - ); + return this->max(Vec3(minVal)).min(Vec3(maxVal)); } - bool isZero(float epsilon = 1e-10f) const { - return std::abs(x) < epsilon && std::abs(y) < epsilon && std::abs(z) < epsilon; + bool isZero() const { + return length() < EPSILON; + //return std::abs(x) < epsilon && std::abs(y) < epsilon && std::abs(z) < epsilon; } bool equals(const Vec3& other, float epsilon = 1e-10f) const { @@ -388,37 +395,49 @@ public: } Vec3 lerp(const Vec3& other, T t) const { - t = std::clamp(t, 0.0f, 1.0f); + t = std::clamp(t, T(0), T(1)); return *this + (other - *this) * t; } + Vec3 fastLerp(const Vec3& other, T t) const { + return *this + (other - *this) * t; + } + + Vec3 fmaLerp(const Vec3& other, T t) const { + return Vec3( + std::fma(t, other.x - x, x), + std::fma(t, other.y - y, y), + std::fma(t, other.z - z, z) + ); + } + Vec3 slerp(const Vec3& other, T t) const { - t = std::clamp(t, 0.0f, 1.0f); - T dot = this->dot(other); - dot = std::clamp(dot, -1.0f, 1.0f); + t = std::clamp(t, T(0), T(1)); + T dotVal = this->dot(other); + dotVal = std::clamp(dotVal, T(-1), T(1)); - T theta = std::acos(dot) * t; - Vec3 relative = other - *this * dot; + T theta = std::acos(dotVal) * t; + Vec3 relative = other - *this * dotVal; relative = relative.normalized(); return (*this * std::cos(theta)) + (relative * std::sin(theta)); } - Vec3 rotateX(float angle) const { - float cosA = std::cos(angle); - float sinA = std::sin(angle); + Vec3 rotateX(T angle) const { + T cosA = std::cos(angle); + T sinA = std::sin(angle); return Vec3(x, y * cosA - z * sinA, y * sinA + z * cosA); } - Vec3 rotateY(float angle) const { - float cosA = std::cos(angle); - float sinA = std::sin(angle); + Vec3 rotateY(T angle) const { + T cosA = std::cos(angle); + T sinA = std::sin(angle); return Vec3(x * cosA + z * sinA, y, -x * sinA + z * cosA); } - Vec3 rotateZ(float angle) const { - float cosA = std::cos(angle); - float sinA = std::sin(angle); + Vec3 rotateZ(T angle) const { + T cosA = std::cos(angle); + T sinA = std::sin(angle); return Vec3(x * cosA - y * sinA, x * sinA + y * cosA, z); } @@ -461,27 +480,27 @@ public: return (&x)[index]; } - Vec3 safeInverse(float epsilon = 1e-10f) const { + Vec3 safeInverse() const { return Vec3( - 1 / (std::abs(x) < epsilon ? std::copysign(epsilon, x) : x), - 1 / (std::abs(y) < epsilon ? std::copysign(epsilon, y) : y), - 1 / (std::abs(z) < epsilon ? std::copysign(epsilon, z) : z) + 1 / (std::abs(x) < EPSILON ? std::copysign(EPSILON, x) : x), + 1 / (std::abs(y) < EPSILON ? std::copysign(EPSILON, y) : y), + 1 / (std::abs(z) < EPSILON ? std::copysign(EPSILON, z) : z) ); } uint8_t calculateOctantMask() const { uint8_t mask = 0; - if (x > 0.0f) mask |= 1; - if (y > 0.0f) mask |= 2; - if (z > 0.0f) mask |= 4; + if (x > 0.f) mask |= 1; + if (y > 0.f) mask |= 2; + if (z > 0.f) mask |= 4; return mask; } - float maxComp() const { + T maxComp() const { return std::max({x, y, z}); } - float minComp() const { + T minComp() const { return std::min({x, y, z}); } @@ -496,12 +515,12 @@ public: }; Vec2 toLatLon() const { - float r = length(); - if (r == 0) return Vec2(0, 0); - float θ = std::acos(z / r); - float lat = static_cast(M_PI/2.0 - θ); - - float lon = static_cast(std::atan2(y, x)); + T r = length(); + if (r == T(0)) return Vec2(0, 0); + T θ = std::acos(z / r); + T lat = static_cast(M_PI/2.0) - θ; + + T lon = std::atan2(y, x); return Vec2(lat, lon); } @@ -519,12 +538,50 @@ public: } }; -//use a smaller format first instead of larger format. -#ifdef std::float16_t -using Vec3f = Vec3; -#else -using Vec3f = Vec3; +#ifdef __SSE__ +// SSE-optimized version for float types +template<> +inline Vec3 Vec3::normalized() const { + float lenSq = lengthSquared(); + if (lenSq > 0.0f) { + // Load vector into SSE register + __m128 vec = _mm_set_ps(0.0f, z, y, x); // w=0, z, y, x + + // Fast inverse square root using SSE + __m128 lenSq128 = _mm_set1_ps(lenSq); + + // Quake III fast inverse sqrt SSE version + __m128 half = _mm_mul_ps(lenSq128, _mm_set1_ps(0.5f)); + __m128 three = _mm_set1_ps(1.5f); + + __m128 y = lenSq128; + __m128i i = _mm_castps_si128(y); + i = _mm_sub_epi32(_mm_set1_epi32(0x5f3759df), + _mm_srai_epi32(i, 1)); + y = _mm_castsi128_ps(i); + + y = _mm_mul_ps(y, _mm_sub_ps(three, _mm_mul_ps(half, _mm_mul_ps(y, y)))); + + // Multiply vector by inverse length + __m128 invLen128 = y; + __m128 result = _mm_mul_ps(vec, invLen128); + + // Extract results + alignas(16) float resultArr[4]; + _mm_store_ps(resultArr, result); + + return Vec3(resultArr[0], resultArr[1], resultArr[2]); + } + return *this; +}; #endif + +//use a smaller format first instead of larger format. +//#ifdef std::float16_t +//using Vec3f = Vec3; +//#else +using Vec3f = Vec3; +//#endif using Vec3d = Vec3; using Vec3i = Vec3; using Vec3i32 = Vec3;