tMax was slower with the precompute.
This commit is contained in:
@@ -112,6 +112,7 @@ struct Camera {
|
|||||||
|
|
||||||
struct Chunk {
|
struct Chunk {
|
||||||
Voxel reprVoxel; //average of all voxels in chunk for LOD rendering
|
Voxel reprVoxel; //average of all voxels in chunk for LOD rendering
|
||||||
|
std::vector<bool> activeVoxels; //use this to specify active voxels in this chunk.
|
||||||
//std::vector<Voxel> voxels; //list of all voxels in chunk.
|
//std::vector<Voxel> voxels; //list of all voxels in chunk.
|
||||||
std::vector<Chunk> children; //list of all chunks in chunk
|
std::vector<Chunk> children; //list of all chunks in chunk
|
||||||
bool active; //active if any child chunk or child voxel is active. used to efficiently find active voxels by only going down when an active chunk is found.
|
bool active; //active if any child chunk or child voxel is active. used to efficiently find active voxels by only going down when an active chunk is found.
|
||||||
@@ -125,7 +126,6 @@ private:
|
|||||||
Vec3i gridSize;
|
Vec3i gridSize;
|
||||||
std::vector<Voxel> voxels;
|
std::vector<Voxel> voxels;
|
||||||
std::unordered_map<Vec3i, Chunk, Vec3i::Hash> chunkList;
|
std::unordered_map<Vec3i, Chunk, Vec3i::Hash> chunkList;
|
||||||
std::unordered_map<Vec3i, bool, Vec3i::Hash> activeChunks;
|
|
||||||
int xyPlane;
|
int xyPlane;
|
||||||
|
|
||||||
float radians(float rads) {
|
float radians(float rads) {
|
||||||
@@ -141,9 +141,138 @@ private:
|
|||||||
|
|
||||||
if (isActive) {
|
if (isActive) {
|
||||||
chunkList[chunkCoord].active = true;
|
chunkList[chunkCoord].active = true;
|
||||||
activeChunks[chunkCoord] = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
size_t mortonEncode1(int x,int y, int z) const {
|
||||||
|
//TIME_FUNCTION;
|
||||||
|
size_t result = 0;
|
||||||
|
for (int i = 0; i < 21; i++) {
|
||||||
|
result |= ((x & (1 << i)) << (2 * i)) |
|
||||||
|
((y & (1 << i)) << (2 * i + 1)) |
|
||||||
|
((z & (1 << i)) << (2 * i + 2));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
size_t mortonEncode2(int x,int y, int z) const {
|
||||||
|
//TIME_FUNCTION;
|
||||||
|
size_t result = 0;
|
||||||
|
uint64_t xx = x & 0x1FFFFF; // Mask to 21 bits
|
||||||
|
uint64_t yy = y & 0x1FFFFF;
|
||||||
|
uint64_t zz = z & 0x1FFFFF;
|
||||||
|
|
||||||
|
// Spread bits using parallel bit deposit operations
|
||||||
|
xx = (xx | (xx << 32)) & 0x1F00000000FFFF;
|
||||||
|
xx = (xx | (xx << 16)) & 0x1F0000FF0000FF;
|
||||||
|
xx = (xx | (xx << 8)) & 0x100F00F00F00F00F;
|
||||||
|
xx = (xx | (xx << 4)) & 0x10C30C30C30C30C3;
|
||||||
|
xx = (xx | (xx << 2)) & 0x1249249249249249;
|
||||||
|
|
||||||
|
yy = (yy | (yy << 32)) & 0x1F00000000FFFF;
|
||||||
|
yy = (yy | (yy << 16)) & 0x1F0000FF0000FF;
|
||||||
|
yy = (yy | (yy << 8)) & 0x100F00F00F00F00F;
|
||||||
|
yy = (yy | (yy << 4)) & 0x10C30C30C30C30C3;
|
||||||
|
yy = (yy | (yy << 2)) & 0x1249249249249249;
|
||||||
|
|
||||||
|
zz = (zz | (zz << 32)) & 0x1F00000000FFFF;
|
||||||
|
zz = (zz | (zz << 16)) & 0x1F0000FF0000FF;
|
||||||
|
zz = (zz | (zz << 8)) & 0x100F00F00F00F00F;
|
||||||
|
zz = (zz | (zz << 4)) & 0x10C30C30C30C30C3;
|
||||||
|
zz = (zz | (zz << 2)) & 0x1249249249249249;
|
||||||
|
|
||||||
|
result = xx | (yy << 1) | (zz << 2);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
size_t mortonEncode3(int x,int y, int z) const {
|
||||||
|
//TIME_FUNCTION;
|
||||||
|
size_t result = 0;
|
||||||
|
uint64_t xx = x & 0x1FFFFF; // 21 bits: 2,097,152 values
|
||||||
|
uint64_t yy = y & 0x1FFFFF;
|
||||||
|
uint64_t zz = z & 0x1FFFFF;
|
||||||
|
|
||||||
|
// Spread bits using optimized shifts and masks
|
||||||
|
xx = (xx * 0x100000) & 0xFFC00000000;
|
||||||
|
xx = (xx * 0x40000) & 0x30000FF0000FF;
|
||||||
|
xx = (xx * 0x100) & 0x300F00F00F00F00F;
|
||||||
|
xx = (xx * 0x10) & 0xC30C30C30C30C30C3;
|
||||||
|
xx = (xx * 0x4) & 0x49249249249249249;
|
||||||
|
|
||||||
|
yy = (yy * 0x100000) & 0xFFC00000000;
|
||||||
|
yy = (yy * 0x40000) & 0x30000FF0000FF;
|
||||||
|
yy = (yy * 0x100) & 0x300F00F00F00F00F;
|
||||||
|
yy = (yy * 0x10) & 0xC30C30C30C30C30C3;
|
||||||
|
yy = (yy * 0x4) & 0x49249249249249249;
|
||||||
|
|
||||||
|
zz = (zz * 0x100000) & 0xFFC00000000;
|
||||||
|
zz = (zz * 0x40000) & 0x30000FF0000FF;
|
||||||
|
zz = (zz * 0x100) & 0x300F00F00F00F00F;
|
||||||
|
zz = (zz * 0x10) & 0xC30C30C30C30C30C3;
|
||||||
|
zz = (zz * 0x4) & 0x49249249249249249;
|
||||||
|
result = xx | (yy << 1) | (zz << 2);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
size_t mortonEncode4(int x,int y, int z) const {
|
||||||
|
//TIME_FUNCTION;
|
||||||
|
size_t result = 0;
|
||||||
|
auto spread21 = [](uint64_t n) -> uint64_t {
|
||||||
|
n &= 0x1FFFFF; // Keep only 21 bits
|
||||||
|
n = (n | (n << 32)) & 0x1F00000000FFFF;
|
||||||
|
n = (n | (n << 16)) & 0x1F0000FF0000FF;
|
||||||
|
n = (n | (n << 8)) & 0x100F00F00F00F00F;
|
||||||
|
n = (n | (n << 4)) & 0x10C30C30C30C30C3;
|
||||||
|
n = (n | (n << 2)) & 0x1249249249249249;
|
||||||
|
return n;
|
||||||
|
};
|
||||||
|
result = spread21(x) | (spread21(y) << 1) | (spread21(z) << 2);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
size_t mortonEncode5(int x,int y, int z) const {
|
||||||
|
//TIME_FUNCTION;
|
||||||
|
size_t result = 0;
|
||||||
|
uint64_t xx = x & 0x1FFFFF;
|
||||||
|
uint64_t yy = y & 0x1FFFFF;
|
||||||
|
uint64_t zz = z & 0x1FFFFF;
|
||||||
|
|
||||||
|
#ifdef __BMI2__
|
||||||
|
// Use PDEP instruction if available (Intel/AMD CPUs with BMI2)
|
||||||
|
uint64_t spread_x = _pdep_u64(xx, 0x9249249249249249);
|
||||||
|
uint64_t spread_y = _pdep_u64(yy, 0x9249249249249249);
|
||||||
|
uint64_t spread_z = _pdep_u64(zz, 0x9249249249249249);
|
||||||
|
return spread_x | (spread_y << 1) | (spread_z << 2);
|
||||||
|
#else
|
||||||
|
// Fallback to manual bit spreading
|
||||||
|
auto spread = [](uint64_t n) -> uint64_t {
|
||||||
|
n = (n | (n << 32)) & 0x1F00000000FFFF;
|
||||||
|
n = (n | (n << 16)) & 0x1F0000FF0000FF;
|
||||||
|
n = (n | (n << 8)) & 0x100F00F00F00F00F;
|
||||||
|
n = (n | (n << 4)) & 0x10C30C30C30C30C3;
|
||||||
|
n = (n | (n << 2)) & 0x1249249249249249;
|
||||||
|
return n;
|
||||||
|
};
|
||||||
|
return spread(xx) | (spread(yy) << 1) | (spread(zz) << 2);
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
size_t mortonEncodefallback(int x,int y, int z) const {
|
||||||
|
TIME_FUNCTION;
|
||||||
|
size_t result = 0;
|
||||||
|
result = z * xyPlane + y * gridSize.x + x;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t mortonEncode(int x, int y, int z) const {
|
||||||
|
size_t result = 0;
|
||||||
|
// Total (s) Avg (s) Min (s) Median (s) P99 (s) P99.9 (s) Max (s)
|
||||||
|
//result = mortonEncode1(x,y,z); // (5) 119.849897 23.969979 23.405616 23.535808 25.063036 25.063036 25.063036
|
||||||
|
result = mortonEncode2(x,y,z); // (5) 51.146427 10.229285 9.930608 10.030483 11.166704 11.166704 11.166704
|
||||||
|
//result = mortonEncode3(x,y,z); broken
|
||||||
|
//result = mortonEncode4(x,y,z); // (5) 55.926195 11.185239 10.567710 10.856774 12.258461 12.258461 12.258461
|
||||||
|
//result = mortonEncode5(x,y,z); // (5) 53.964580 10.792916 10.475732 10.680918 11.422500 11.422500 11.422500
|
||||||
|
//result = mortonEncodefallback(x,y,z);
|
||||||
|
|
||||||
|
//alternative:
|
||||||
|
//result = z * xyPlane + y * gridSize.x + x;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// Slab method for AABB intersection
|
// Slab method for AABB intersection
|
||||||
bool intersectRayAABB(const Vec3f& origin, const Vec3f& dir, const Vec3f& boxMin, const Vec3f& boxMax, float& tNear, float& tFar) const {
|
bool intersectRayAABB(const Vec3f& origin, const Vec3f& dir, const Vec3f& boxMin, const Vec3f& boxMax, float& tNear, float& tFar) const {
|
||||||
@@ -187,11 +316,11 @@ public:
|
|||||||
static std::unique_ptr<VoxelGrid> deserializeFromFile(const std::string& filename);
|
static std::unique_ptr<VoxelGrid> deserializeFromFile(const std::string& filename);
|
||||||
|
|
||||||
Voxel& get(int x, int y, int z) {
|
Voxel& get(int x, int y, int z) {
|
||||||
return voxels[z * xyPlane + y * gridSize.x + x];
|
return voxels[mortonEncode(x,y,z)];
|
||||||
}
|
}
|
||||||
|
|
||||||
const Voxel& get(int x, int y, int z) const {
|
const Voxel& get(int x, int y, int z) const {
|
||||||
return voxels[z * xyPlane + y * gridSize.x + x];
|
return voxels[mortonEncode(x,y,z)];
|
||||||
}
|
}
|
||||||
|
|
||||||
Voxel& get(const Vec3i& xyz) {
|
Voxel& get(const Vec3i& xyz) {
|
||||||
@@ -206,7 +335,6 @@ public:
|
|||||||
std::vector<Voxel> newVoxels(newW * newH * newD);
|
std::vector<Voxel> newVoxels(newW * newH * newD);
|
||||||
|
|
||||||
std::unordered_map<Vec3i, Chunk, Vec3i::Hash> chunklist;
|
std::unordered_map<Vec3i, Chunk, Vec3i::Hash> chunklist;
|
||||||
std::unordered_map<Vec3i, bool, Vec3i::Hash> newActiveChunks;
|
|
||||||
|
|
||||||
int copyW = std::min(static_cast<int>(gridSize.x), newW);
|
int copyW = std::min(static_cast<int>(gridSize.x), newW);
|
||||||
int copyH = std::min(static_cast<int>(gridSize.y), newH);
|
int copyH = std::min(static_cast<int>(gridSize.y), newH);
|
||||||
@@ -225,13 +353,11 @@ public:
|
|||||||
for (int x = 0; x < copyW; ++x) {
|
for (int x = 0; x < copyW; ++x) {
|
||||||
if (voxels[oldRowStart + x].active) {
|
if (voxels[oldRowStart + x].active) {
|
||||||
Vec3i cc(x / CHUNK_THRESHOLD, y / CHUNK_THRESHOLD, z / CHUNK_THRESHOLD);
|
Vec3i cc(x / CHUNK_THRESHOLD, y / CHUNK_THRESHOLD, z / CHUNK_THRESHOLD);
|
||||||
newActiveChunks[cc] = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
voxels = std::move(newVoxels);
|
voxels = std::move(newVoxels);
|
||||||
activeChunks = std::move(newActiveChunks);
|
|
||||||
gridSize = Vec3i(newW, newH, newD);
|
gridSize = Vec3i(newW, newH, newD);
|
||||||
xyPlane = gridSize.x * gridSize.y;
|
xyPlane = gridSize.x * gridSize.y;
|
||||||
}
|
}
|
||||||
@@ -283,14 +409,34 @@ public:
|
|||||||
return voxl.AllGTE(0) && voxl.AllLT(gridSize);
|
return voxl.AllGTE(0) && voxl.AllLT(gridSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void voxelTraverse(const Vec3f& origin, const Vec3f& end, Voxel& outVoxel, Vec3i& step, const Vec3f& ray, Vec3f& tMax, int maxDist = 10000000) const {
|
void voxelTraverse(const Vec3f& origin, const Vec3f& end, Voxel& outVoxel, Vec3i& step, int maxDist = 10000000) const {
|
||||||
Vec3i cv = origin.floorToI();
|
Vec3i cv = origin.floorToI();
|
||||||
Vec3i lv = end.floorToI();
|
Vec3i lv = end.floorToI();
|
||||||
|
Vec3f ray = end - origin;
|
||||||
step = Vec3i(ray.x >= 0 ? 1 : -1, ray.y >= 0 ? 1 : -1, ray.z >= 0 ? 1 : -1);
|
step = Vec3i(ray.x >= 0 ? 1 : -1, ray.y >= 0 ? 1 : -1, ray.z >= 0 ? 1 : -1);
|
||||||
Vec3f tDelta = Vec3f(ray.x != 0 ? std::abs(1.0f / ray.x) : INF,
|
Vec3f tDelta = Vec3f(ray.x != 0 ? std::abs(1.0f / ray.x) : INF,
|
||||||
ray.y != 0 ? std::abs(1.0f / ray.y) : INF,
|
ray.y != 0 ? std::abs(1.0f / ray.y) : INF,
|
||||||
ray.z != 0 ? std::abs(1.0f / ray.z) : INF);
|
ray.z != 0 ? std::abs(1.0f / ray.z) : INF);
|
||||||
|
|
||||||
|
Vec3f tMax;
|
||||||
|
if (ray.x > 0) {
|
||||||
|
tMax.x = (std::floor(origin.x) + 1.0f - origin.x) / ray.x;
|
||||||
|
} else if (ray.x < 0) {
|
||||||
|
tMax.x = (origin.x - std::floor(origin.x)) / -ray.x;
|
||||||
|
} else tMax.x = INF;
|
||||||
|
|
||||||
|
if (ray.y > 0) {
|
||||||
|
tMax.y = (std::floor(origin.y) + 1.0f - origin.y) / ray.y;
|
||||||
|
} else if (ray.y < 0) {
|
||||||
|
tMax.y = (origin.y - std::floor(origin.y)) / -ray.y;
|
||||||
|
} else tMax.y = INF;
|
||||||
|
|
||||||
|
if (ray.z > 0) {
|
||||||
|
tMax.z = (std::floor(origin.z) + 1.0f - origin.z) / ray.z;
|
||||||
|
} else if (ray.z < 0) {
|
||||||
|
tMax.z = (origin.z - std::floor(origin.z)) / -ray.z;
|
||||||
|
} else tMax.z = INF;
|
||||||
|
|
||||||
float dist = 0.0f;
|
float dist = 0.0f;
|
||||||
outVoxel.alpha = 0.0;
|
outVoxel.alpha = 0.0;
|
||||||
|
|
||||||
@@ -373,18 +519,18 @@ public:
|
|||||||
precomputedSteps[7] = Vec3i(-1, -1, -1);// ---
|
precomputedSteps[7] = Vec3i(-1, -1, -1);// ---
|
||||||
std::array<Vec3f, 8> precomputedTMax;
|
std::array<Vec3f, 8> precomputedTMax;
|
||||||
|
|
||||||
Vec3f floored = cam.posfor.origin.floor();
|
// Vec3f floored = cam.posfor.origin.floor();
|
||||||
Vec3f dNext = floored + 1.f - cam.posfor.origin;
|
// Vec3f dNext = floored + 1.f - cam.posfor.origin;
|
||||||
Vec3f dPrev = cam.posfor.origin - floored;
|
// Vec3f dPrev = cam.posfor.origin - floored;
|
||||||
|
|
||||||
precomputedTMax[0] = Vec3f(dNext.x, dNext.y, dNext.z);
|
// precomputedTMax[0] = Vec3f(dNext.x, dNext.y, dNext.z);
|
||||||
precomputedTMax[1] = Vec3f(dPrev.x, dNext.y, dNext.z);
|
// precomputedTMax[1] = Vec3f(dPrev.x, dNext.y, dNext.z);
|
||||||
precomputedTMax[2] = Vec3f(dNext.x, dPrev.y, dNext.z);
|
// precomputedTMax[2] = Vec3f(dNext.x, dPrev.y, dNext.z);
|
||||||
precomputedTMax[3] = Vec3f(dPrev.x, dPrev.y, dNext.z);
|
// precomputedTMax[3] = Vec3f(dPrev.x, dPrev.y, dNext.z);
|
||||||
precomputedTMax[4] = Vec3f(dNext.x, dNext.y, dPrev.z);
|
// precomputedTMax[4] = Vec3f(dNext.x, dNext.y, dPrev.z);
|
||||||
precomputedTMax[5] = Vec3f(dPrev.x, dNext.y, dPrev.z);
|
// precomputedTMax[5] = Vec3f(dPrev.x, dNext.y, dPrev.z);
|
||||||
precomputedTMax[6] = Vec3f(dNext.x, dPrev.y, dPrev.z);
|
// precomputedTMax[6] = Vec3f(dNext.x, dPrev.y, dPrev.z);
|
||||||
precomputedTMax[7] = Vec3f(dPrev.x, dPrev.y, dPrev.z);
|
// precomputedTMax[7] = Vec3f(dPrev.x, dPrev.y, dPrev.z);
|
||||||
|
|
||||||
frame outFrame(resolution.x, resolution.y, colorformat);
|
frame outFrame(resolution.x, resolution.y, colorformat);
|
||||||
std::vector<uint8_t> colorBuffer;
|
std::vector<uint8_t> colorBuffer;
|
||||||
@@ -437,15 +583,15 @@ public:
|
|||||||
int xQuad = yQuad;
|
int xQuad = yQuad;
|
||||||
if (u < 0) xQuad ^= 1;
|
if (u < 0) xQuad ^= 1;
|
||||||
step = precomputedSteps[xQuad];
|
step = precomputedSteps[xQuad];
|
||||||
Vec3f tMaxBase = precomputedTMax[xQuad];
|
//Vec3f tMaxBase = precomputedTMax[xQuad];
|
||||||
Vec3f ray = rayEnd - rayStartGrid;
|
Vec3f ray = rayEnd - rayStartGrid;
|
||||||
Vec3f tMax(
|
// Vec3f tMax(
|
||||||
ray.x != 0 ? tMaxBase.x / std::abs(ray.x) : INF,
|
// ray.x != 0 ? tMaxBase.x / std::abs(ray.x) : INF,
|
||||||
ray.y != 0 ? tMaxBase.y / std::abs(ray.y) : INF,
|
// ray.y != 0 ? tMaxBase.y / std::abs(ray.y) : INF,
|
||||||
ray.z != 0 ? tMaxBase.z / std::abs(ray.z) : INF
|
// ray.z != 0 ? tMaxBase.z / std::abs(ray.z) : INF
|
||||||
);
|
// );
|
||||||
|
|
||||||
voxelTraverse(rayStartGrid, rayEnd, outVoxel, step, ray, tMax, maxDist);
|
voxelTraverse(rayStartGrid, rayEnd, outVoxel, step, maxDist);
|
||||||
Vec3ui8 hitColor = outVoxel.color;
|
Vec3ui8 hitColor = outVoxel.color;
|
||||||
// Set pixel color in buffer
|
// Set pixel color in buffer
|
||||||
switch (colorformat) {
|
switch (colorformat) {
|
||||||
@@ -492,7 +638,6 @@ public:
|
|||||||
std::cout << "Total voxels: " << totalVoxels << std::endl;
|
std::cout << "Total voxels: " << totalVoxels << std::endl;
|
||||||
std::cout << "Active voxels: " << activeVoxels << std::endl;
|
std::cout << "Active voxels: " << activeVoxels << std::endl;
|
||||||
std::cout << "Inactive voxels: " << (totalVoxels - activeVoxels) << std::endl;
|
std::cout << "Inactive voxels: " << (totalVoxels - activeVoxels) << std::endl;
|
||||||
std::cout << "Active chunks (map size): " << activeChunks.size() << std::endl;
|
|
||||||
std::cout << "Active percentage: " << activePercentage << "%" << std::endl;
|
std::cout << "Active percentage: " << activePercentage << "%" << std::endl;
|
||||||
std::cout << "Memory usage (approx): " << (voxels.size() * sizeof(Voxel)) / 1024 << " KB" << std::endl;
|
std::cout << "Memory usage (approx): " << (voxels.size() * sizeof(Voxel)) / 1024 << " KB" << std::endl;
|
||||||
std::cout << "============================" << std::endl;
|
std::cout << "============================" << std::endl;
|
||||||
|
|||||||
Reference in New Issue
Block a user