diff --git a/util/grid/grid3.hpp b/util/grid/grid3.hpp index 7b84a6f..bd84be0 100644 --- a/util/grid/grid3.hpp +++ b/util/grid/grid3.hpp @@ -351,11 +351,12 @@ public: std::vector colorBuffer(resolution.x * resolution.y * 3); #pragma omp parallel for for (int y = 0; y < resolution.y; y++) { - float v = (1.f - 2.f * (y+0.5f) / resolution.y) * viewH; + float v = (1.f - 2.f * (y+0.5f) / resolution.y) * viewH; + Vec3f vup = cam.up * v; for (int x = 0; x < resolution.x; x++) { Voxel outVoxel(0, false, 0.f, Vec3ui8(10, 10, 255)); float u = (2.f * (x+0.5f)/resolution.x - 1.f) * viewW; - Vec3f rayDirWorld = (forward + right * u + cam.up * v).normalized(); + Vec3f rayDirWorld = (forward + right * u + vup).normalized(); Vec3f rayStartGrid = cam.posfor.origin; Vec3f rayEnd = rayStartGrid + rayDirWorld * maxDist; voxelTraverse(rayStartGrid, rayEnd, outVoxel, maxDist); diff --git a/util/vectorlogic/vec3.hpp b/util/vectorlogic/vec3.hpp index c3afc3d..4b600b5 100644 --- a/util/vectorlogic/vec3.hpp +++ b/util/vectorlogic/vec3.hpp @@ -8,6 +8,10 @@ #include #include "vec2.hpp" +#ifdef __SSE__ +#include +#endif + template class Vec3 { public: @@ -32,6 +36,12 @@ public: Vec3 operator+(const Vec3& other) const { return Vec3(x + other.x, y + other.y, z + other.z); } + + Vec3 addMulti(Vec3* result, const Vec3* a, const Vec3* b, size_t count) noexcept { + for (size_t i = 0; i < count; ++i) { + result[i] = a[i] + b[i]; + } + } template Vec3 operator-(const Vec3& other) const { @@ -142,7 +152,32 @@ public: } T length() const { - return static_cast(std::sqrt(static_cast(x * x + y * y + z * z))); + return std::sqrt(x * x + y * y + z * z); + //return static_cast(std::sqrt(static_cast(x * x + y * y + z * z))); + } + + // Fast inverse length (Quake III algorithm) + T invLength() const { + const T lenSq = x * x + y * y + z * z; + if (lenSq == 0) return 0; + + // Fast inverse square root approximation + const T half = T(0.5) * lenSq; + T y = lenSq; + + // Type punning for float/double + if constexpr (std::is_same_v) { + long i = *(long*)&y; + i = 0x5f3759df - (i >> 1); + y = *(float*)&i; + } else if constexpr (std::is_same_v) { + long long i = *(long long*)&y; + i = 0x5fe6eb50c7b537a9 - (i >> 1); + y = *(double*)&i; + } + + y = y * (T(1.5) - (half * y * y)); + return y; } T lengthSquared() const { @@ -159,9 +194,9 @@ public: } Vec3 normalized() const { - T len = length(); - if (len > 0) { - return *this / len; + const T invLen = invLength(); + if (invLen > 0) { + return Vec3(x * invLen, y * invLen, z * invLen); } return *this; } @@ -257,15 +292,15 @@ public: } Vec3 floorToI() const { - return Vec3(static_cast(std::floor(x)), static_cast(std::floor(x)), static_cast(std::floor(z))); + return Vec3(static_cast(std::floor(x)), static_cast(std::floor(y)), static_cast(std::floor(z))); } Vec3 floorToI8() const { - return Vec3(static_cast(std::floor(x)), static_cast(std::floor(x)), static_cast(std::floor(z))); + return Vec3(static_cast(std::floor(x)), static_cast(std::floor(y)), static_cast(std::floor(z))); } Vec3 floorToT() const { - return Vec3(static_cast(std::floor(x)), static_cast(std::floor(x)), static_cast(std::floor(z))); + return Vec3(static_cast(std::floor(x)), static_cast(std::floor(y)), static_cast(std::floor(z))); } Vec3 toFloat() const { @@ -482,6 +517,82 @@ public: } }; +// #ifdef __SSE__ +// template<> +// class Vec3 { +// union { +// __m128 simd; +// struct { float x, y, z, w; }; +// } +// public: + +// Vec3() noexcept : simd(_mm_setzero_ps()) {} + +// Vec3(float x, float y, float z) noexcept { +// simd = _mm_set_ps(0.0f, z, y, x); +// } + +// Vec3(float scalar) noexcept { +// simd = _mm_set_ps(0.0f, scalar, scalar, scalar); +// } + +// Vec3(const Vec3& other) noexcept : simd(other.simd) {} + +// Vec3& operator=(const Vec3& other) noexcept { +// simd = other.simd; +// return *this; +// } + +// Vec3 operator+(const Vec3& other) const noexcept { +// Vec3 result; +// result.simd = _mm_add_ps(simd, other.simd); +// return result; +// } + +// Vec3 operator-(const Vec3& other) const noexcept { +// Vec3 result; +// result.simd = _mm_sub_ps(simd, other.simd); +// return result; +// } + +// Vec3 operator*(const Vec3& other) const noexcept { +// Vec3 result; +// result.simd = _mm_mul_ps(simd, other.simd); +// return result; +// } + +// Vec3 operator*(float scalar) const noexcept { +// Vec3 result; +// __m128 scalar_vec = _mm_set1_ps(scalar); +// result.simd = _mm_mul_ps(simd, scalar_vec); +// return result; +// } + +// float dot(const Vec3& other) const noexcept { +// __m128 mul = _mm_mul_ps(simd, other.simd); +// __m128 shuf = _mm_movehdup_ps(mul); +// __m128 sums = _mm_add_ps(mul, shuf); +// shuf = _mm_movehl_ps(shuf, sums); +// sums = _mm_add_ss(sums, shuf); +// return _mm_cvtss_f32(sums); +// } + +// // Add other necessary methods for the specialization +// float length() const { +// float len_sq = dot(*this); +// return std::sqrt(len_sq); +// } + +// Vec3 normalized() const { +// float len = length(); +// if (len > 0) { +// return *this * (1.0f / len); +// } +// return *this; +// } +// }; +// #endif + using Vec3f = Vec3; using Vec3d = Vec3; using Vec3i = Vec3;