From db47f2d731ee6145422e3c76a84beac356e214e4 Mon Sep 17 00:00:00 2001
From: yggdrasil75 <cblackburn7557@gmail.com>
Date: Sun, 16 Nov 2025 17:25:24 -0500
Subject: [PATCH] well, I think its decent. I actually need to work on
 something else with this now.

---
 .vscode/settings.json  |   3 +-
 tests/g2chromatic2.cpp |   7 +-
 util/grid/grid22.hpp   |  15 +++-
 util/output/frame.hpp  | 167 +++++++++++++++++++++++------------------
 4 files changed, 112 insertions(+), 80 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index e563d9d..a0db1a6 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -90,7 +90,8 @@
         "__hash_table": "cpp",
         "__split_buffer": "cpp",
         "__tree": "cpp",
-        "stack": "cpp"
+        "stack": "cpp",
+        "future": "cpp"
     },
     "files.exclude": {
         "**/*.rpyc": true,
diff --git a/tests/g2chromatic2.cpp b/tests/g2chromatic2.cpp
index 2432382..27191b0 100644
--- a/tests/g2chromatic2.cpp
+++ b/tests/g2chromatic2.cpp
@@ -11,9 +11,9 @@
 #include "../util/timing_decorator.cpp"
 
 struct AnimationConfig {
-    int width = 4096;
-    int height = 4096;
-    int totalFrames = 4800;
+    int width = 2048;
+    int height = 2048;
+    int totalFrames = 480;
     float fps = 30.0f;
     int numSeeds = 8;
 };
@@ -75,6 +75,7 @@ void expandPixel(Grid2& grid, AnimationConfig config, std::vector<std::tuple<siz
         visitedThisFrame.insert(std::get<0>(seed));
     }
 
+    //#pragma omp parallel for
     for (const std::tuple<size_t, Vec2, Vec4>& seed : seeds) {
         size_t id = std::get<0>(seed);
         Vec2 seedPOS = std::get<1>(seed);
diff --git a/util/grid/grid22.hpp b/util/grid/grid22.hpp
index aedf511..96f75b2 100644
--- a/util/grid/grid22.hpp
+++ b/util/grid/grid22.hpp
@@ -146,6 +146,7 @@ public:
         Vec2 maxGrid = worldToGrid(center + Vec2(radius, radius));
         
         // Check all relevant grid cells
+        //#pragma omp parallel for
         for (int x = minGrid.x; x <= maxGrid.x; ++x) {
             for (int y = minGrid.y; y <= maxGrid.y; ++y) {
                 Vec2 gridPos(x, y);
@@ -366,6 +367,7 @@ public:
     //bulk update positions
     void bulkUpdatePositions(const std::unordered_map<size_t, Vec2>& newPositions) {
         TIME_FUNCTION;
+        //#pragma omp parallel for
         for (const auto& [id, newPos] : newPositions) {
             Vec2 oldPosition = Positions.at(id);
             Positions.at(id).move(newPos);
@@ -377,6 +379,7 @@ public:
     // Bulk update colors
     void bulkUpdateColors(const std::unordered_map<size_t, Vec4>& newColors) {
         TIME_FUNCTION;
+        //#pragma omp parallel for
         for (const auto& [id, newColor] : newColors) {
             auto it = Colors.find(id);
             if (it != Colors.end()) {
@@ -388,6 +391,7 @@ public:
     // Bulk update sizes
     void bulkUpdateSizes(const std::unordered_map<size_t, float>& newSizes) {
         TIME_FUNCTION;
+        //#pragma omp parallel for
         for (const auto& [id, newSize] : newSizes) {
             auto it = Sizes.find(id);
             if (it != Sizes.end()) {
@@ -412,7 +416,7 @@ public:
         Sizes.reserve(Sizes.size() + objects.size());
         
         // Batch insertion
-        #pragma omp parallel for
+        //#pragma omp parallel for
         for (size_t i = 0; i < objects.size(); ++i) {
             const auto& [pos, color, size] = objects[i];
             size_t id = Positions.set(pos);
@@ -440,7 +444,7 @@ public:
         }
         
         // Batch insertion
-        #pragma omp parallel for
+        //#pragma omp parallel for
         for (size_t i = 0; i < poses.size(); ++i) {
             size_t id = Positions.set(poses[i]);
             Colors[id] = colors[i];
@@ -487,6 +491,7 @@ public:
         rgbData.resize(width * height * 3, 0);
         
         // For each position in the grid, find the corresponding pixel
+        //#pragma omp parallel for
         for (const auto& [id, pos] : Positions) {
             if (pos.x >= minCorner.x && pos.x < maxCorner.x &&
                 pos.y >= minCorner.y && pos.y < maxCorner.y) {
@@ -529,6 +534,7 @@ public:
         bgrData.resize(width * height * 3, 0);
         
         // For each position in the grid, find the corresponding pixel
+        //#pragma omp parallel for
         for (const auto& [id, pos] : Positions) {
             if (pos.x >= minCorner.x && pos.x < maxCorner.x &&
                 pos.y >= minCorner.y && pos.y < maxCorner.y) {
@@ -601,6 +607,7 @@ public:
         std::vector<uint8_t> rgbaData;
         rgbaData.reserve(width * height * 4);
         
+        //#pragma omp parallel for
         for (size_t i = 0; i < rgbData.size(); i += 3) {
             rgbaData.push_back(rgbData[i]);     // R
             rgbaData.push_back(rgbData[i + 1]); // G
@@ -624,6 +631,7 @@ public:
         std::vector<uint8_t> bgraData;
         bgraData.reserve(width * height * 4);
         
+        //#pragma omp parallel for
         for (size_t i = 0; i < bgrData.size(); i += 3) {
             bgraData.push_back(bgrData[i]);     // B
             bgraData.push_back(bgrData[i + 1]); // G
@@ -646,6 +654,7 @@ public:
         std::vector<uint8_t> grayData;
         grayData.reserve(width * height);
         
+        //#pragma omp parallel for
         for (size_t i = 0; i < rgbData.size(); i += 3) {
             uint8_t r = rgbData[i];
             uint8_t g = rgbData[i + 1];
@@ -734,6 +743,7 @@ public:
         maxCorner = it->second;
         
         // Find min and max coordinates
+        //#pragma omp parallel for
         for (const auto& [id, pos] : Positions) {
             minCorner.x = std::min(minCorner.x, pos.x);
             minCorner.y = std::min(minCorner.y, pos.y);
@@ -764,6 +774,7 @@ public:
         neighborMap.clear();
         
         // For each object, find nearby neighbors
+        //#pragma omp parallel for
         for (const auto& [id1, pos1] : Positions) {
             std::vector<size_t> neighbors;
             float radiusSq = neighborRadius * neighborRadius;
diff --git a/util/output/frame.hpp b/util/output/frame.hpp
index 1431d85..7a9a536 100644
--- a/util/output/frame.hpp
+++ b/util/output/frame.hpp
@@ -12,6 +12,9 @@
 #include <stdexcept>
 #include <string>
 #include <iostream>
+#include <future>
+#include <mutex>
+#include <atomic>
 
 class frame {
 private:
@@ -142,18 +145,36 @@ public:
         size_t pos = 0;
         const size_t chunksize = 65535;
         size_t dsize = _data.size();
-        std::vector<uint8_t>::iterator dbegin = _data.begin();
         
-        //try to optimize space usage without losing speed
-        std::vector<std::vector<uint8_t>> matches128plus;
-        std::vector<std::vector<uint8_t>> matches64plus;
-        std::vector<std::vector<uint8_t>> matches32plus;
-        std::vector<std::vector<uint8_t>> matchesAll;
+        // Thread-safe storage with mutex protection
+        struct ThreadSafeMatches {
+            std::mutex mutex;
+            std::vector<std::vector<uint8_t>> matches128plus;
+            std::vector<std::vector<uint8_t>> matches64plus;
+            std::vector<std::vector<uint8_t>> matches32plus;
+            std::vector<std::vector<uint8_t>> matchesAll;
+            
+            void addMatch(std::vector<uint8_t>&& match, size_t length) {
+                std::lock_guard<std::mutex> lock(mutex);
+                if (length >= 128) {
+                    if (matches128plus.size() < 65534) matches128plus.push_back(std::move(match));
+                } else if (length >= 64) {
+                    if (matches64plus.size() < 65534) matches64plus.push_back(std::move(match));
+                } else if (length >= 32) {
+                    if (matches32plus.size() < 65534) matches32plus.push_back(std::move(match));
+                } else {
+                    if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match));
+                }
+            }
+        };
         
-        while (pos < dsize && matches128plus.size() < 65534) {
+        ThreadSafeMatches threadMatches;
+        
+        while (pos < dsize && result.size() < 65534) {
             size_t chunk_end = std::min(pos + chunksize, dsize);
-            std::vector<uint8_t> chunk(dbegin + pos, dbegin + chunk_end);
-            if (chunk.size() <= 4) { 
+            std::vector<uint8_t> chunk(_data.begin() + pos, _data.begin() + chunk_end);
+            
+            if (chunk.size() <= 4) {
                 pos = chunk_end;
                 continue;
             }
@@ -162,87 +183,85 @@ public:
                 result.push_back(chunk);
             }
 
-            std::vector<uint8_t> ffour;
-            ffour.assign(chunk.begin(), chunk.begin() + 4);
-            size_t searchpos = chunk_end;            
-            while (searchpos + 4 <= dsize) {
-                bool match_found = true;
-                for (int i = 0; i < 4; ++i) {
-                    if (_data[searchpos + i] != ffour[i]) {
-                        match_found = false;
-                        break;
-                    }
-                }
+            std::vector<uint8_t> ffour(chunk.begin(), chunk.begin() + 4);
+            
+            // Split the search space across multiple threads
+            const size_t num_threads = std::thread::hardware_concurrency();
+            const size_t search_range = dsize - chunk_end - 3;
+            const size_t block_size = (search_range + num_threads - 1) / num_threads;
+            
+            std::vector<std::future<void>> futures;
+            
+            for (size_t t = 0; t < num_threads; ++t) {
+                size_t start = chunk_end + t * block_size;
+                size_t end = std::min(start + block_size, dsize - 3);
                 
-                if (match_found) {
-                    size_t matchlength = 4;
-                    size_t chunk_compare_pos = 4;
-                    size_t input_compare_pos = searchpos + 4;
+                if (start >= end) continue;
+                
+                futures.push_back(std::async(std::launch::async, 
+                    [&, start, end, chunk, ffour]() {
+                        size_t searchpos = start;
+                        while (searchpos <= end) {
+                            // Check first 4 bytes
+                            if (_data[searchpos] == ffour[0] &&
+                                _data[searchpos + 1] == ffour[1] &&
+                                _data[searchpos + 2] == ffour[2] &&
+                                _data[searchpos + 3] == ffour[3]) {
+                                
+                                // Found match, calculate length
+                                size_t matchlength = 4;
+                                size_t chunk_compare_pos = 4;
+                                size_t input_compare_pos = searchpos + 4;
 
-                    while (chunk_compare_pos < chunk.size() && input_compare_pos < dsize && _data[input_compare_pos] == chunk[chunk_compare_pos]) {
-                        matchlength++;
-                        chunk_compare_pos++;
-                        input_compare_pos++;
-                    }
+                                while (chunk_compare_pos < chunk.size() && 
+                                    input_compare_pos < dsize && 
+                                    _data[input_compare_pos] == chunk[chunk_compare_pos]) {
+                                    matchlength++;
+                                    chunk_compare_pos++;
+                                    input_compare_pos++;
+                                }
 
-                    std::vector<uint8_t> matchsequence(dbegin + searchpos, dbegin + searchpos + matchlength);
-                    
-                    // Categorize matches by length
-                    if (matchlength >= 128) {
-                        if (matches128plus.size() < 65534) {
-                            matches128plus.push_back(matchsequence);
-                        }
-                    } else if (matchlength >= 64) {
-                        if (matches64plus.size() < 65534) {
-                            matches64plus.push_back(matchsequence);
-                        }
-                    } else if (matchlength >= 32) {
-                        if (matches32plus.size() < 65534) {
-                            matches32plus.push_back(matchsequence);
-                        }
-                    } else {
-                        if (matchesAll.size() < 65534) {
-                            matchesAll.push_back(matchsequence);
+                                std::vector<uint8_t> matchsequence(
+                                    _data.begin() + searchpos, 
+                                    _data.begin() + searchpos + matchlength
+                                );
+                                
+                                threadMatches.addMatch(std::move(matchsequence), matchlength);
+                                searchpos += matchlength;
+                            } else {
+                                searchpos++;
+                            }
                         }
                     }
-                    
-                    searchpos += matchlength;
-                } else {
-                    searchpos++;
-                }
+                ));
+            }
+            
+            // Wait for all threads to complete
+            for (auto& future : futures) {
+                future.get();
             }
             
             pos = chunk_end;
         }
-        for (const auto& match : matches128plus) {
+        
+        // Merge results (same priority order as original)
+        for (const auto& match : threadMatches.matches128plus) {
             result.push_back(match);
         }
         
-        // Then add 64+ matches if we still have space
-        for (const auto& match : matches64plus) {
-            if (result.size() < 65534) {
-                result.push_back(match);
-            } else {
-                break;
-            }
+        for (const auto& match : threadMatches.matches64plus) {
+            if (result.size() < 65534) result.push_back(match);
+            else break;
         }
         
-        // Then add 32+ matches if we still have space
-        for (const auto& match : matches32plus) {
-            if (result.size() < 65534) {
-                result.push_back(match);
-            } else {
-                break;
-            }
+        for (const auto& match : threadMatches.matches32plus) {
+            if (result.size() < 65534) result.push_back(match);
+            else break;
         }
         
-        // Finally add all other matches if we still have space
-        for (const auto& match : matchesAll) {
-            if (result.size() < 65534) {
-                result.push_back(match);
-            } else {
-                break;
-            }
+        for (const auto& match : threadMatches.matchesAll) {
+            if (result.size() < 65534) result.push_back(match);
+            else break;
         }
         
         return result;