diff --git a/tests/g2chromatic2.cpp b/tests/g2chromatic2.cpp index 701fbbb..8d0c8bf 100644 --- a/tests/g2chromatic2.cpp +++ b/tests/g2chromatic2.cpp @@ -13,7 +13,7 @@ struct AnimationConfig { int width = 1024; int height = 1024; - int totalFrames = 4800; + int totalFrames = 5; float fps = 30.0f; int numSeeds = 8; }; @@ -109,6 +109,7 @@ void expandPixel(Grid2& grid, AnimationConfig config, std::vector _data; - std::unordered_map overheadmap; + std::unordered_map> overheadmap; size_t ratio = 1; size_t sourceSize = 0; size_t width = 0; @@ -82,7 +82,7 @@ public: cformat = compresstype::DIFFRLE; } else if (cformat == compresstype::RLE) { return *this; - } else { + } else if (cformat == compresstype::RAW) { cformat = compresstype::RLE; } @@ -100,9 +100,10 @@ public: } } - ratio = compressedData.size() - _data.size(); + ratio = compressedData.size() / _data.size(); sourceSize = _data.size(); _data.clear(); + _data.shrink_to_fit(); _data = compressedData; return *this; } @@ -127,31 +128,205 @@ public: return *this; } + std::vector> getRepeats() { + TIME_FUNCTION; + std::vector> result; + size_t pos = 0; + const size_t chunksize = 255; + size_t dsize = _data.size(); + std::vector::iterator dbegin = _data.begin(); + uint8_t minlen = 128; + while (pos < dsize && result.size() < 254){ + size_t chunk_end = std::min(pos + chunksize, dsize); + std::vector chunk(_data.begin() + pos, dbegin + chunk_end); + if (chunk.size() <= 4) { pos = chunk_end; } + result.push_back(chunk); + + std::vector ffour; + ffour.assign(chunk.begin(), chunk.begin() + 4); + size_t searchpos = chunk_end; + while (searchpos + 4 <= dsize) { + bool match_found = true; + for (int i = 0; i < 4; ++i) { + if (_data[searchpos + i] != ffour[i]) { + match_found = false; + break; + } + } + if (match_found) { + size_t matchlength = 4; + size_t chunk_compare_pos = 4; + size_t input_compare_pos = searchpos + 4; + + while (chunk_compare_pos < chunk.size() && input_compare_pos < dsize && _data[input_compare_pos] == chunk[chunk_compare_pos]) { + matchlength++; + chunk_compare_pos++; + input_compare_pos++; + } + + std::vector matchsequence(dbegin + searchpos, dbegin+searchpos+matchlength); + result.push_back(matchsequence); + searchpos += matchlength; + } else { + searchpos++; + } + } + pos = chunk_end; + } + return result; + } + + std::vector> sortvecs(std::vector> source) { + std::sort(source.begin(), source.end(), [](const std::vector & a, const std::vector & b) {return a.size() > b.size();}); + return source; + } + + // LZ78 compression + frame& compressFrameLZ78() { + TIME_FUNCTION; + if (_data.empty()) { + return *this; + } + if (cformat != compresstype::RAW) { + throw std::runtime_error("LZ78 compression can only be applied to raw data"); + } + + std::vector> repeats = getRepeats(); + repeats = sortvecs(repeats); + uint8_t nextDict = 1; + + std::vector compressed; + size_t cpos = 0; + for (const auto& rseq : repeats) { + if (!rseq.empty() && rseq.size() > 1 && overheadmap.size() < 255) { + overheadmap[nextDict] = rseq; + nextDict++; + } + } + + while (cpos < _data.size()) { + bool found_match = false; + uint8_t best_dict_index = 0; + size_t best_match_length = 0; + + // Iterate through dictionary in priority order (longest patterns first) + for (uint8_t dict_idx = 1; dict_idx <= overheadmap.size(); dict_idx++) { + const auto& dict_seq = overheadmap[dict_idx]; + + // Quick length check - if remaining data is shorter than pattern, skip + if (dict_seq.size() > (_data.size() - cpos)) { + continue; + } + + // Check if this pattern matches at current position + bool match = true; + for (size_t i = 0; i < dict_seq.size(); ++i) { + if (_data[cpos + i] != dict_seq[i]) { + match = false; + break; + } + } + + if (match) { + // Found a match - use it immediately (first match is best due to sorting) + best_dict_index = dict_idx; + best_match_length = dict_seq.size(); + found_match = true; + break; // Stop searching - we found our match + } + } + + if (found_match && best_match_length > 1) { + // Write dictionary reference + compressed.push_back(best_dict_index); + cpos += best_match_length; + } else { + // Write literal: 0 followed by the literal byte + compressed.push_back(0); + compressed.push_back(_data[cpos]); + cpos++; + } + } + + ratio = compressed.size() / _data.size(); + sourceSize = _data.size(); + uint32_t original_size = static_cast(_data.size()); + compressed.insert(compressed.begin(), reinterpret_cast(&original_size), + reinterpret_cast(&original_size) + sizeof(original_size)); + + _data = std::move(compressed); + _data.shrink_to_fit(); + cformat = compresstype::LZ78; + + return *this; + } + + frame& decompressFrameLZ78() { + TIME_FUNCTION; + if (cformat != compresstype::LZ78) { + throw std::runtime_error("Data is not LZ78 compressed"); + } + + // Extract original size from beginning of compressed data + uint32_t original_size; + //std::memcpy(&original_size, _data.data(), sizeof(original_size)); + + std::vector decompressedData; + decompressedData.reserve(original_size); + + size_t cpos = sizeof(uint32_t); // Skip the size header + + while (cpos < _data.size()) { + uint8_t token = _data[cpos++]; + + if (token == 0) { + // Literal byte + if (cpos < _data.size()) { + decompressedData.push_back(_data[cpos++]); + } + } else { + // Dictionary reference + auto it = overheadmap.find(token); + if (it != overheadmap.end()) { + const std::vector& dict_entry = it->second; + decompressedData.insert(decompressedData.end(), dict_entry.begin(), dict_entry.end()); + } else { + throw std::runtime_error("Invalid dictionary reference in compressed data"); + } + } + } + + _data = std::move(decompressedData); + cformat = compresstype::RAW; + + return *this; + } + // Differential compression frame& compressFrameDiff() { // TODO - std::logic_error("Function not yet implemented"); + throw std::logic_error("Function not yet implemented"); } frame& decompressFrameDiff() { // TODO - std::logic_error("Function not yet implemented"); + throw std::logic_error("Function not yet implemented"); } // Huffman compression frame& compressFrameHuffman() { // TODO - std::logic_error("Function not yet implemented"); + throw std::logic_error("Function not yet implemented"); } // Combined compression methods frame& compressFrameZigZagRLE() { // TODO - std::logic_error("Function not yet implemented"); + throw std::logic_error("Function not yet implemented"); } frame& compressFrameDiffRLE() { // TODO - std::logic_error("Function not yet implemented"); + throw std::logic_error("Function not yet implemented"); } // Generic decompression that detects compression type @@ -169,6 +344,9 @@ public: cformat = compresstype::DIFF; return decompressFrameDiff(); break; + case compresstype::LZ78: + return decompressFrameLZ78(); + break; case compresstype::HUFFMAN: // Huffman decompression would be implemented here throw std::runtime_error("Huffman decompression not fully implemented"); @@ -201,6 +379,7 @@ public: case compresstype::RLE: std::cout << "RLE"; break; case compresstype::DIFF: std::cout << "DIFF"; break; case compresstype::DIFFRLE: std::cout << "DIFF + RLE"; break; + case compresstype::LZ78: std::cout << "LZ78 (kinda)"; break; case compresstype::HUFFMAN: std::cout << "HUFFMAN"; break; case compresstype::RAW: std::cout << "RAW (uncompressed)"; break; default: std::cout << "UNKNOWN"; break; @@ -215,6 +394,11 @@ public: double savings = (1.0 - (1.0 / getCompressionRatio())) * 100.0; std::cout << "Space Savings: " << savings << "%" << std::endl; } + + // Show dictionary size for LZ78 + if (cformat == compresstype::LZ78) { + std::cout << "Dictionary Size: " << overheadmap.size() << " entries" << std::endl; + } } // Print compression information in a compact format @@ -230,6 +414,7 @@ public: case compresstype::RLE: return "RLE"; case compresstype::DIFF: return "DIFF"; case compresstype::DIFFRLE: return "DIFF+RLE"; + case compresstype::LZ78: return "LZ78"; case compresstype::HUFFMAN: return "HUFFMAN"; case compresstype::RAW: return "RAW"; default: return "UNKNOWN"; @@ -240,7 +425,7 @@ public: return cformat; } - const std::unordered_map& getOverheadMap() const { + const std::unordered_map>& getOverheadMap() const { return overheadmap; }