well, I think its decent. I actually need to work on something else with this now.
This commit is contained in:
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -90,7 +90,8 @@
|
|||||||
"__hash_table": "cpp",
|
"__hash_table": "cpp",
|
||||||
"__split_buffer": "cpp",
|
"__split_buffer": "cpp",
|
||||||
"__tree": "cpp",
|
"__tree": "cpp",
|
||||||
"stack": "cpp"
|
"stack": "cpp",
|
||||||
|
"future": "cpp"
|
||||||
},
|
},
|
||||||
"files.exclude": {
|
"files.exclude": {
|
||||||
"**/*.rpyc": true,
|
"**/*.rpyc": true,
|
||||||
|
|||||||
@@ -11,9 +11,9 @@
|
|||||||
#include "../util/timing_decorator.cpp"
|
#include "../util/timing_decorator.cpp"
|
||||||
|
|
||||||
struct AnimationConfig {
|
struct AnimationConfig {
|
||||||
int width = 4096;
|
int width = 2048;
|
||||||
int height = 4096;
|
int height = 2048;
|
||||||
int totalFrames = 4800;
|
int totalFrames = 480;
|
||||||
float fps = 30.0f;
|
float fps = 30.0f;
|
||||||
int numSeeds = 8;
|
int numSeeds = 8;
|
||||||
};
|
};
|
||||||
@@ -75,6 +75,7 @@ void expandPixel(Grid2& grid, AnimationConfig config, std::vector<std::tuple<siz
|
|||||||
visitedThisFrame.insert(std::get<0>(seed));
|
visitedThisFrame.insert(std::get<0>(seed));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const std::tuple<size_t, Vec2, Vec4>& seed : seeds) {
|
for (const std::tuple<size_t, Vec2, Vec4>& seed : seeds) {
|
||||||
size_t id = std::get<0>(seed);
|
size_t id = std::get<0>(seed);
|
||||||
Vec2 seedPOS = std::get<1>(seed);
|
Vec2 seedPOS = std::get<1>(seed);
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ public:
|
|||||||
Vec2 maxGrid = worldToGrid(center + Vec2(radius, radius));
|
Vec2 maxGrid = worldToGrid(center + Vec2(radius, radius));
|
||||||
|
|
||||||
// Check all relevant grid cells
|
// Check all relevant grid cells
|
||||||
|
//#pragma omp parallel for
|
||||||
for (int x = minGrid.x; x <= maxGrid.x; ++x) {
|
for (int x = minGrid.x; x <= maxGrid.x; ++x) {
|
||||||
for (int y = minGrid.y; y <= maxGrid.y; ++y) {
|
for (int y = minGrid.y; y <= maxGrid.y; ++y) {
|
||||||
Vec2 gridPos(x, y);
|
Vec2 gridPos(x, y);
|
||||||
@@ -366,6 +367,7 @@ public:
|
|||||||
//bulk update positions
|
//bulk update positions
|
||||||
void bulkUpdatePositions(const std::unordered_map<size_t, Vec2>& newPositions) {
|
void bulkUpdatePositions(const std::unordered_map<size_t, Vec2>& newPositions) {
|
||||||
TIME_FUNCTION;
|
TIME_FUNCTION;
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id, newPos] : newPositions) {
|
for (const auto& [id, newPos] : newPositions) {
|
||||||
Vec2 oldPosition = Positions.at(id);
|
Vec2 oldPosition = Positions.at(id);
|
||||||
Positions.at(id).move(newPos);
|
Positions.at(id).move(newPos);
|
||||||
@@ -377,6 +379,7 @@ public:
|
|||||||
// Bulk update colors
|
// Bulk update colors
|
||||||
void bulkUpdateColors(const std::unordered_map<size_t, Vec4>& newColors) {
|
void bulkUpdateColors(const std::unordered_map<size_t, Vec4>& newColors) {
|
||||||
TIME_FUNCTION;
|
TIME_FUNCTION;
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id, newColor] : newColors) {
|
for (const auto& [id, newColor] : newColors) {
|
||||||
auto it = Colors.find(id);
|
auto it = Colors.find(id);
|
||||||
if (it != Colors.end()) {
|
if (it != Colors.end()) {
|
||||||
@@ -388,6 +391,7 @@ public:
|
|||||||
// Bulk update sizes
|
// Bulk update sizes
|
||||||
void bulkUpdateSizes(const std::unordered_map<size_t, float>& newSizes) {
|
void bulkUpdateSizes(const std::unordered_map<size_t, float>& newSizes) {
|
||||||
TIME_FUNCTION;
|
TIME_FUNCTION;
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id, newSize] : newSizes) {
|
for (const auto& [id, newSize] : newSizes) {
|
||||||
auto it = Sizes.find(id);
|
auto it = Sizes.find(id);
|
||||||
if (it != Sizes.end()) {
|
if (it != Sizes.end()) {
|
||||||
@@ -412,7 +416,7 @@ public:
|
|||||||
Sizes.reserve(Sizes.size() + objects.size());
|
Sizes.reserve(Sizes.size() + objects.size());
|
||||||
|
|
||||||
// Batch insertion
|
// Batch insertion
|
||||||
#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
for (size_t i = 0; i < objects.size(); ++i) {
|
for (size_t i = 0; i < objects.size(); ++i) {
|
||||||
const auto& [pos, color, size] = objects[i];
|
const auto& [pos, color, size] = objects[i];
|
||||||
size_t id = Positions.set(pos);
|
size_t id = Positions.set(pos);
|
||||||
@@ -440,7 +444,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Batch insertion
|
// Batch insertion
|
||||||
#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
for (size_t i = 0; i < poses.size(); ++i) {
|
for (size_t i = 0; i < poses.size(); ++i) {
|
||||||
size_t id = Positions.set(poses[i]);
|
size_t id = Positions.set(poses[i]);
|
||||||
Colors[id] = colors[i];
|
Colors[id] = colors[i];
|
||||||
@@ -487,6 +491,7 @@ public:
|
|||||||
rgbData.resize(width * height * 3, 0);
|
rgbData.resize(width * height * 3, 0);
|
||||||
|
|
||||||
// For each position in the grid, find the corresponding pixel
|
// For each position in the grid, find the corresponding pixel
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id, pos] : Positions) {
|
for (const auto& [id, pos] : Positions) {
|
||||||
if (pos.x >= minCorner.x && pos.x < maxCorner.x &&
|
if (pos.x >= minCorner.x && pos.x < maxCorner.x &&
|
||||||
pos.y >= minCorner.y && pos.y < maxCorner.y) {
|
pos.y >= minCorner.y && pos.y < maxCorner.y) {
|
||||||
@@ -529,6 +534,7 @@ public:
|
|||||||
bgrData.resize(width * height * 3, 0);
|
bgrData.resize(width * height * 3, 0);
|
||||||
|
|
||||||
// For each position in the grid, find the corresponding pixel
|
// For each position in the grid, find the corresponding pixel
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id, pos] : Positions) {
|
for (const auto& [id, pos] : Positions) {
|
||||||
if (pos.x >= minCorner.x && pos.x < maxCorner.x &&
|
if (pos.x >= minCorner.x && pos.x < maxCorner.x &&
|
||||||
pos.y >= minCorner.y && pos.y < maxCorner.y) {
|
pos.y >= minCorner.y && pos.y < maxCorner.y) {
|
||||||
@@ -601,6 +607,7 @@ public:
|
|||||||
std::vector<uint8_t> rgbaData;
|
std::vector<uint8_t> rgbaData;
|
||||||
rgbaData.reserve(width * height * 4);
|
rgbaData.reserve(width * height * 4);
|
||||||
|
|
||||||
|
//#pragma omp parallel for
|
||||||
for (size_t i = 0; i < rgbData.size(); i += 3) {
|
for (size_t i = 0; i < rgbData.size(); i += 3) {
|
||||||
rgbaData.push_back(rgbData[i]); // R
|
rgbaData.push_back(rgbData[i]); // R
|
||||||
rgbaData.push_back(rgbData[i + 1]); // G
|
rgbaData.push_back(rgbData[i + 1]); // G
|
||||||
@@ -624,6 +631,7 @@ public:
|
|||||||
std::vector<uint8_t> bgraData;
|
std::vector<uint8_t> bgraData;
|
||||||
bgraData.reserve(width * height * 4);
|
bgraData.reserve(width * height * 4);
|
||||||
|
|
||||||
|
//#pragma omp parallel for
|
||||||
for (size_t i = 0; i < bgrData.size(); i += 3) {
|
for (size_t i = 0; i < bgrData.size(); i += 3) {
|
||||||
bgraData.push_back(bgrData[i]); // B
|
bgraData.push_back(bgrData[i]); // B
|
||||||
bgraData.push_back(bgrData[i + 1]); // G
|
bgraData.push_back(bgrData[i + 1]); // G
|
||||||
@@ -646,6 +654,7 @@ public:
|
|||||||
std::vector<uint8_t> grayData;
|
std::vector<uint8_t> grayData;
|
||||||
grayData.reserve(width * height);
|
grayData.reserve(width * height);
|
||||||
|
|
||||||
|
//#pragma omp parallel for
|
||||||
for (size_t i = 0; i < rgbData.size(); i += 3) {
|
for (size_t i = 0; i < rgbData.size(); i += 3) {
|
||||||
uint8_t r = rgbData[i];
|
uint8_t r = rgbData[i];
|
||||||
uint8_t g = rgbData[i + 1];
|
uint8_t g = rgbData[i + 1];
|
||||||
@@ -734,6 +743,7 @@ public:
|
|||||||
maxCorner = it->second;
|
maxCorner = it->second;
|
||||||
|
|
||||||
// Find min and max coordinates
|
// Find min and max coordinates
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id, pos] : Positions) {
|
for (const auto& [id, pos] : Positions) {
|
||||||
minCorner.x = std::min(minCorner.x, pos.x);
|
minCorner.x = std::min(minCorner.x, pos.x);
|
||||||
minCorner.y = std::min(minCorner.y, pos.y);
|
minCorner.y = std::min(minCorner.y, pos.y);
|
||||||
@@ -764,6 +774,7 @@ public:
|
|||||||
neighborMap.clear();
|
neighborMap.clear();
|
||||||
|
|
||||||
// For each object, find nearby neighbors
|
// For each object, find nearby neighbors
|
||||||
|
//#pragma omp parallel for
|
||||||
for (const auto& [id1, pos1] : Positions) {
|
for (const auto& [id1, pos1] : Positions) {
|
||||||
std::vector<size_t> neighbors;
|
std::vector<size_t> neighbors;
|
||||||
float radiusSq = neighborRadius * neighborRadius;
|
float radiusSq = neighborRadius * neighborRadius;
|
||||||
|
|||||||
@@ -12,6 +12,9 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <future>
|
||||||
|
#include <mutex>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
class frame {
|
class frame {
|
||||||
private:
|
private:
|
||||||
@@ -142,17 +145,35 @@ public:
|
|||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
const size_t chunksize = 65535;
|
const size_t chunksize = 65535;
|
||||||
size_t dsize = _data.size();
|
size_t dsize = _data.size();
|
||||||
std::vector<uint8_t>::iterator dbegin = _data.begin();
|
|
||||||
|
|
||||||
//try to optimize space usage without losing speed
|
// Thread-safe storage with mutex protection
|
||||||
std::vector<std::vector<uint8_t>> matches128plus;
|
struct ThreadSafeMatches {
|
||||||
std::vector<std::vector<uint8_t>> matches64plus;
|
std::mutex mutex;
|
||||||
std::vector<std::vector<uint8_t>> matches32plus;
|
std::vector<std::vector<uint8_t>> matches128plus;
|
||||||
std::vector<std::vector<uint8_t>> matchesAll;
|
std::vector<std::vector<uint8_t>> matches64plus;
|
||||||
|
std::vector<std::vector<uint8_t>> matches32plus;
|
||||||
|
std::vector<std::vector<uint8_t>> matchesAll;
|
||||||
|
|
||||||
while (pos < dsize && matches128plus.size() < 65534) {
|
void addMatch(std::vector<uint8_t>&& match, size_t length) {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
if (length >= 128) {
|
||||||
|
if (matches128plus.size() < 65534) matches128plus.push_back(std::move(match));
|
||||||
|
} else if (length >= 64) {
|
||||||
|
if (matches64plus.size() < 65534) matches64plus.push_back(std::move(match));
|
||||||
|
} else if (length >= 32) {
|
||||||
|
if (matches32plus.size() < 65534) matches32plus.push_back(std::move(match));
|
||||||
|
} else {
|
||||||
|
if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ThreadSafeMatches threadMatches;
|
||||||
|
|
||||||
|
while (pos < dsize && result.size() < 65534) {
|
||||||
size_t chunk_end = std::min(pos + chunksize, dsize);
|
size_t chunk_end = std::min(pos + chunksize, dsize);
|
||||||
std::vector<uint8_t> chunk(dbegin + pos, dbegin + chunk_end);
|
std::vector<uint8_t> chunk(_data.begin() + pos, _data.begin() + chunk_end);
|
||||||
|
|
||||||
if (chunk.size() <= 4) {
|
if (chunk.size() <= 4) {
|
||||||
pos = chunk_end;
|
pos = chunk_end;
|
||||||
continue;
|
continue;
|
||||||
@@ -162,87 +183,85 @@ public:
|
|||||||
result.push_back(chunk);
|
result.push_back(chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> ffour;
|
std::vector<uint8_t> ffour(chunk.begin(), chunk.begin() + 4);
|
||||||
ffour.assign(chunk.begin(), chunk.begin() + 4);
|
|
||||||
size_t searchpos = chunk_end;
|
|
||||||
while (searchpos + 4 <= dsize) {
|
|
||||||
bool match_found = true;
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
|
||||||
if (_data[searchpos + i] != ffour[i]) {
|
|
||||||
match_found = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (match_found) {
|
// Split the search space across multiple threads
|
||||||
size_t matchlength = 4;
|
const size_t num_threads = std::thread::hardware_concurrency();
|
||||||
size_t chunk_compare_pos = 4;
|
const size_t search_range = dsize - chunk_end - 3;
|
||||||
size_t input_compare_pos = searchpos + 4;
|
const size_t block_size = (search_range + num_threads - 1) / num_threads;
|
||||||
|
|
||||||
while (chunk_compare_pos < chunk.size() && input_compare_pos < dsize && _data[input_compare_pos] == chunk[chunk_compare_pos]) {
|
std::vector<std::future<void>> futures;
|
||||||
matchlength++;
|
|
||||||
chunk_compare_pos++;
|
|
||||||
input_compare_pos++;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint8_t> matchsequence(dbegin + searchpos, dbegin + searchpos + matchlength);
|
for (size_t t = 0; t < num_threads; ++t) {
|
||||||
|
size_t start = chunk_end + t * block_size;
|
||||||
|
size_t end = std::min(start + block_size, dsize - 3);
|
||||||
|
|
||||||
// Categorize matches by length
|
if (start >= end) continue;
|
||||||
if (matchlength >= 128) {
|
|
||||||
if (matches128plus.size() < 65534) {
|
futures.push_back(std::async(std::launch::async,
|
||||||
matches128plus.push_back(matchsequence);
|
[&, start, end, chunk, ffour]() {
|
||||||
}
|
size_t searchpos = start;
|
||||||
} else if (matchlength >= 64) {
|
while (searchpos <= end) {
|
||||||
if (matches64plus.size() < 65534) {
|
// Check first 4 bytes
|
||||||
matches64plus.push_back(matchsequence);
|
if (_data[searchpos] == ffour[0] &&
|
||||||
}
|
_data[searchpos + 1] == ffour[1] &&
|
||||||
} else if (matchlength >= 32) {
|
_data[searchpos + 2] == ffour[2] &&
|
||||||
if (matches32plus.size() < 65534) {
|
_data[searchpos + 3] == ffour[3]) {
|
||||||
matches32plus.push_back(matchsequence);
|
|
||||||
}
|
// Found match, calculate length
|
||||||
} else {
|
size_t matchlength = 4;
|
||||||
if (matchesAll.size() < 65534) {
|
size_t chunk_compare_pos = 4;
|
||||||
matchesAll.push_back(matchsequence);
|
size_t input_compare_pos = searchpos + 4;
|
||||||
|
|
||||||
|
while (chunk_compare_pos < chunk.size() &&
|
||||||
|
input_compare_pos < dsize &&
|
||||||
|
_data[input_compare_pos] == chunk[chunk_compare_pos]) {
|
||||||
|
matchlength++;
|
||||||
|
chunk_compare_pos++;
|
||||||
|
input_compare_pos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint8_t> matchsequence(
|
||||||
|
_data.begin() + searchpos,
|
||||||
|
_data.begin() + searchpos + matchlength
|
||||||
|
);
|
||||||
|
|
||||||
|
threadMatches.addMatch(std::move(matchsequence), matchlength);
|
||||||
|
searchpos += matchlength;
|
||||||
|
} else {
|
||||||
|
searchpos++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
searchpos += matchlength;
|
// Wait for all threads to complete
|
||||||
} else {
|
for (auto& future : futures) {
|
||||||
searchpos++;
|
future.get();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = chunk_end;
|
pos = chunk_end;
|
||||||
}
|
}
|
||||||
for (const auto& match : matches128plus) {
|
|
||||||
|
// Merge results (same priority order as original)
|
||||||
|
for (const auto& match : threadMatches.matches128plus) {
|
||||||
result.push_back(match);
|
result.push_back(match);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then add 64+ matches if we still have space
|
for (const auto& match : threadMatches.matches64plus) {
|
||||||
for (const auto& match : matches64plus) {
|
if (result.size() < 65534) result.push_back(match);
|
||||||
if (result.size() < 65534) {
|
else break;
|
||||||
result.push_back(match);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then add 32+ matches if we still have space
|
for (const auto& match : threadMatches.matches32plus) {
|
||||||
for (const auto& match : matches32plus) {
|
if (result.size() < 65534) result.push_back(match);
|
||||||
if (result.size() < 65534) {
|
else break;
|
||||||
result.push_back(match);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally add all other matches if we still have space
|
for (const auto& match : threadMatches.matchesAll) {
|
||||||
for (const auto& match : matchesAll) {
|
if (result.size() < 65534) result.push_back(match);
|
||||||
if (result.size() < 65534) {
|
else break;
|
||||||
result.push_back(match);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|||||||
Reference in New Issue
Block a user