From db47f2d731ee6145422e3c76a84beac356e214e4 Mon Sep 17 00:00:00 2001 From: yggdrasil75 Date: Sun, 16 Nov 2025 17:25:24 -0500 Subject: [PATCH] well, I think its decent. I actually need to work on something else with this now. --- .vscode/settings.json | 3 +- tests/g2chromatic2.cpp | 7 +- util/grid/grid22.hpp | 15 +++- util/output/frame.hpp | 167 +++++++++++++++++++++++------------------ 4 files changed, 112 insertions(+), 80 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index e563d9d..a0db1a6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -90,7 +90,8 @@ "__hash_table": "cpp", "__split_buffer": "cpp", "__tree": "cpp", - "stack": "cpp" + "stack": "cpp", + "future": "cpp" }, "files.exclude": { "**/*.rpyc": true, diff --git a/tests/g2chromatic2.cpp b/tests/g2chromatic2.cpp index 2432382..27191b0 100644 --- a/tests/g2chromatic2.cpp +++ b/tests/g2chromatic2.cpp @@ -11,9 +11,9 @@ #include "../util/timing_decorator.cpp" struct AnimationConfig { - int width = 4096; - int height = 4096; - int totalFrames = 4800; + int width = 2048; + int height = 2048; + int totalFrames = 480; float fps = 30.0f; int numSeeds = 8; }; @@ -75,6 +75,7 @@ void expandPixel(Grid2& grid, AnimationConfig config, std::vector(seed)); } + //#pragma omp parallel for for (const std::tuple& seed : seeds) { size_t id = std::get<0>(seed); Vec2 seedPOS = std::get<1>(seed); diff --git a/util/grid/grid22.hpp b/util/grid/grid22.hpp index aedf511..96f75b2 100644 --- a/util/grid/grid22.hpp +++ b/util/grid/grid22.hpp @@ -146,6 +146,7 @@ public: Vec2 maxGrid = worldToGrid(center + Vec2(radius, radius)); // Check all relevant grid cells + //#pragma omp parallel for for (int x = minGrid.x; x <= maxGrid.x; ++x) { for (int y = minGrid.y; y <= maxGrid.y; ++y) { Vec2 gridPos(x, y); @@ -366,6 +367,7 @@ public: //bulk update positions void bulkUpdatePositions(const std::unordered_map& newPositions) { TIME_FUNCTION; + //#pragma omp parallel for for (const auto& [id, newPos] : newPositions) { Vec2 oldPosition = Positions.at(id); Positions.at(id).move(newPos); @@ -377,6 +379,7 @@ public: // Bulk update colors void bulkUpdateColors(const std::unordered_map& newColors) { TIME_FUNCTION; + //#pragma omp parallel for for (const auto& [id, newColor] : newColors) { auto it = Colors.find(id); if (it != Colors.end()) { @@ -388,6 +391,7 @@ public: // Bulk update sizes void bulkUpdateSizes(const std::unordered_map& newSizes) { TIME_FUNCTION; + //#pragma omp parallel for for (const auto& [id, newSize] : newSizes) { auto it = Sizes.find(id); if (it != Sizes.end()) { @@ -412,7 +416,7 @@ public: Sizes.reserve(Sizes.size() + objects.size()); // Batch insertion - #pragma omp parallel for + //#pragma omp parallel for for (size_t i = 0; i < objects.size(); ++i) { const auto& [pos, color, size] = objects[i]; size_t id = Positions.set(pos); @@ -440,7 +444,7 @@ public: } // Batch insertion - #pragma omp parallel for + //#pragma omp parallel for for (size_t i = 0; i < poses.size(); ++i) { size_t id = Positions.set(poses[i]); Colors[id] = colors[i]; @@ -487,6 +491,7 @@ public: rgbData.resize(width * height * 3, 0); // For each position in the grid, find the corresponding pixel + //#pragma omp parallel for for (const auto& [id, pos] : Positions) { if (pos.x >= minCorner.x && pos.x < maxCorner.x && pos.y >= minCorner.y && pos.y < maxCorner.y) { @@ -529,6 +534,7 @@ public: bgrData.resize(width * height * 3, 0); // For each position in the grid, find the corresponding pixel + //#pragma omp parallel for for (const auto& [id, pos] : Positions) { if (pos.x >= minCorner.x && pos.x < maxCorner.x && pos.y >= minCorner.y && pos.y < maxCorner.y) { @@ -601,6 +607,7 @@ public: std::vector rgbaData; rgbaData.reserve(width * height * 4); + //#pragma omp parallel for for (size_t i = 0; i < rgbData.size(); i += 3) { rgbaData.push_back(rgbData[i]); // R rgbaData.push_back(rgbData[i + 1]); // G @@ -624,6 +631,7 @@ public: std::vector bgraData; bgraData.reserve(width * height * 4); + //#pragma omp parallel for for (size_t i = 0; i < bgrData.size(); i += 3) { bgraData.push_back(bgrData[i]); // B bgraData.push_back(bgrData[i + 1]); // G @@ -646,6 +654,7 @@ public: std::vector grayData; grayData.reserve(width * height); + //#pragma omp parallel for for (size_t i = 0; i < rgbData.size(); i += 3) { uint8_t r = rgbData[i]; uint8_t g = rgbData[i + 1]; @@ -734,6 +743,7 @@ public: maxCorner = it->second; // Find min and max coordinates + //#pragma omp parallel for for (const auto& [id, pos] : Positions) { minCorner.x = std::min(minCorner.x, pos.x); minCorner.y = std::min(minCorner.y, pos.y); @@ -764,6 +774,7 @@ public: neighborMap.clear(); // For each object, find nearby neighbors + //#pragma omp parallel for for (const auto& [id1, pos1] : Positions) { std::vector neighbors; float radiusSq = neighborRadius * neighborRadius; diff --git a/util/output/frame.hpp b/util/output/frame.hpp index 1431d85..7a9a536 100644 --- a/util/output/frame.hpp +++ b/util/output/frame.hpp @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include class frame { private: @@ -142,18 +145,36 @@ public: size_t pos = 0; const size_t chunksize = 65535; size_t dsize = _data.size(); - std::vector::iterator dbegin = _data.begin(); - //try to optimize space usage without losing speed - std::vector> matches128plus; - std::vector> matches64plus; - std::vector> matches32plus; - std::vector> matchesAll; + // Thread-safe storage with mutex protection + struct ThreadSafeMatches { + std::mutex mutex; + std::vector> matches128plus; + std::vector> matches64plus; + std::vector> matches32plus; + std::vector> matchesAll; + + void addMatch(std::vector&& match, size_t length) { + std::lock_guard lock(mutex); + if (length >= 128) { + if (matches128plus.size() < 65534) matches128plus.push_back(std::move(match)); + } else if (length >= 64) { + if (matches64plus.size() < 65534) matches64plus.push_back(std::move(match)); + } else if (length >= 32) { + if (matches32plus.size() < 65534) matches32plus.push_back(std::move(match)); + } else { + if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match)); + } + } + }; - while (pos < dsize && matches128plus.size() < 65534) { + ThreadSafeMatches threadMatches; + + while (pos < dsize && result.size() < 65534) { size_t chunk_end = std::min(pos + chunksize, dsize); - std::vector chunk(dbegin + pos, dbegin + chunk_end); - if (chunk.size() <= 4) { + std::vector chunk(_data.begin() + pos, _data.begin() + chunk_end); + + if (chunk.size() <= 4) { pos = chunk_end; continue; } @@ -162,87 +183,85 @@ public: result.push_back(chunk); } - std::vector ffour; - ffour.assign(chunk.begin(), chunk.begin() + 4); - size_t searchpos = chunk_end; - while (searchpos + 4 <= dsize) { - bool match_found = true; - for (int i = 0; i < 4; ++i) { - if (_data[searchpos + i] != ffour[i]) { - match_found = false; - break; - } - } + std::vector ffour(chunk.begin(), chunk.begin() + 4); + + // Split the search space across multiple threads + const size_t num_threads = std::thread::hardware_concurrency(); + const size_t search_range = dsize - chunk_end - 3; + const size_t block_size = (search_range + num_threads - 1) / num_threads; + + std::vector> futures; + + for (size_t t = 0; t < num_threads; ++t) { + size_t start = chunk_end + t * block_size; + size_t end = std::min(start + block_size, dsize - 3); - if (match_found) { - size_t matchlength = 4; - size_t chunk_compare_pos = 4; - size_t input_compare_pos = searchpos + 4; + if (start >= end) continue; + + futures.push_back(std::async(std::launch::async, + [&, start, end, chunk, ffour]() { + size_t searchpos = start; + while (searchpos <= end) { + // Check first 4 bytes + if (_data[searchpos] == ffour[0] && + _data[searchpos + 1] == ffour[1] && + _data[searchpos + 2] == ffour[2] && + _data[searchpos + 3] == ffour[3]) { + + // Found match, calculate length + size_t matchlength = 4; + size_t chunk_compare_pos = 4; + size_t input_compare_pos = searchpos + 4; - while (chunk_compare_pos < chunk.size() && input_compare_pos < dsize && _data[input_compare_pos] == chunk[chunk_compare_pos]) { - matchlength++; - chunk_compare_pos++; - input_compare_pos++; - } + while (chunk_compare_pos < chunk.size() && + input_compare_pos < dsize && + _data[input_compare_pos] == chunk[chunk_compare_pos]) { + matchlength++; + chunk_compare_pos++; + input_compare_pos++; + } - std::vector matchsequence(dbegin + searchpos, dbegin + searchpos + matchlength); - - // Categorize matches by length - if (matchlength >= 128) { - if (matches128plus.size() < 65534) { - matches128plus.push_back(matchsequence); - } - } else if (matchlength >= 64) { - if (matches64plus.size() < 65534) { - matches64plus.push_back(matchsequence); - } - } else if (matchlength >= 32) { - if (matches32plus.size() < 65534) { - matches32plus.push_back(matchsequence); - } - } else { - if (matchesAll.size() < 65534) { - matchesAll.push_back(matchsequence); + std::vector matchsequence( + _data.begin() + searchpos, + _data.begin() + searchpos + matchlength + ); + + threadMatches.addMatch(std::move(matchsequence), matchlength); + searchpos += matchlength; + } else { + searchpos++; + } } } - - searchpos += matchlength; - } else { - searchpos++; - } + )); + } + + // Wait for all threads to complete + for (auto& future : futures) { + future.get(); } pos = chunk_end; } - for (const auto& match : matches128plus) { + + // Merge results (same priority order as original) + for (const auto& match : threadMatches.matches128plus) { result.push_back(match); } - // Then add 64+ matches if we still have space - for (const auto& match : matches64plus) { - if (result.size() < 65534) { - result.push_back(match); - } else { - break; - } + for (const auto& match : threadMatches.matches64plus) { + if (result.size() < 65534) result.push_back(match); + else break; } - // Then add 32+ matches if we still have space - for (const auto& match : matches32plus) { - if (result.size() < 65534) { - result.push_back(match); - } else { - break; - } + for (const auto& match : threadMatches.matches32plus) { + if (result.size() < 65534) result.push_back(match); + else break; } - // Finally add all other matches if we still have space - for (const auto& match : matchesAll) { - if (result.size() < 65534) { - result.push_back(match); - } else { - break; - } + for (const auto& match : threadMatches.matchesAll) { + if (result.size() < 65534) result.push_back(match); + else break; } return result;