#ifndef FRAME_HPP #define FRAME_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include class frame { private: std::vector _data; std::vector _compressedData; std::unordered_map> overheadmap; size_t ratio = 1; size_t sourceSize = 0; size_t width = 0; size_t height = 0; public: enum class colormap { RGB, RGBA, BGR, BGRA, B }; enum class compresstype { RLE, DIFF, DIFFRLE, LZ78, HUFFMAN, RAW }; colormap colorFormat = colormap::RGB; compresstype cformat = compresstype::RAW; size_t getWidth() { return width; } size_t getHeight() { return height; } frame() {}; frame(size_t w, size_t h, colormap format = colormap::RGB) : width(w), height(h), colorFormat(format), cformat(compresstype::RAW) { size_t channels = 3; switch (format) { case colormap::RGBA: channels = 4; break; case colormap::BGR: channels = 3; break; case colormap::BGRA: channels = 4; break; case colormap::B: channels = 1; break; default: channels = 3; break; } _data.resize(width * height * channels); } void setData(const std::vector& data) { _data = data; cformat = compresstype::RAW; _compressedData.clear(); _compressedData.shrink_to_fit(); overheadmap.clear(); } const std::vector& getData() const { return _data; } // Run-Length Encoding (RLE) compression frame& compressFrameRLE() { TIME_FUNCTION; if (_data.empty()) { return *this; } if (cformat == compresstype::DIFF) { cformat = compresstype::DIFFRLE; } else if (cformat == compresstype::RLE) { return *this; } else if (cformat == compresstype::RAW) { cformat = compresstype::RLE; } std::vector compressedData; compressedData.reserve(_data.size() * 2); size_t width = 1; for (size_t i = 0; i < _data.size(); i++) { if (i + 1 < _data.size() && _data[i] == _data[i+1] && width < 65535) { width++; } else { compressedData.push_back(width); compressedData.push_back(_data[i]); width = 1; } } ratio = compressedData.size() / _data.size(); sourceSize = _data.size(); _compressedData = std::move(compressedData); _data.clear(); _data.shrink_to_fit(); return *this; } // LZ78 compression frame& compressFrameLZ78() { TIME_FUNCTION; if (_data.empty()) { return *this; } if (cformat != compresstype::RAW) { throw std::runtime_error("LZ78 compression can only be applied to raw data"); } std::unordered_map dict; for (uint16_t i = 0; i < 256; i++) { dict[i] = i; } //std::vector> repeats = getRepeats(); //repeats = sortvecs(repeats); std::vector compressed; uint16_t nextDict = 256; uint16_t cpos = 0; for (uint8_t byte : _data) { uint16_t newval = cpos << 8 | byte; if (dict.find(newval) != dict.end()) { cpos = dict[newval]; } else { _compressedData.push_back(cpos); _compressedData.push_back(byte); if (nextDict < 65535) { dict[newval] = nextDict++; } } cpos = 0; } if (cpos != 0) { _compressedData.push_back(cpos); _compressedData.push_back(0); } ratio = compressed.size() / _data.size(); sourceSize = _data.size(); // _compressedData = std::move(compressed); // _compressedData.shrink_to_fit(); // Clear uncompressed data _data.clear(); _data.shrink_to_fit(); cformat = compresstype::LZ78; return *this; } // Differential compression frame& compressFrameDiff() { // TODO throw std::logic_error("Function not yet implemented"); } // Huffman compression frame& compressFrameHuffman() { // TODO throw std::logic_error("Function not yet implemented"); } // Combined compression methods frame& compressFrameZigZagRLE() { // TODO throw std::logic_error("Function not yet implemented"); } frame& compressFrameDiffRLE() { // TODO throw std::logic_error("Function not yet implemented"); } // Generic decompression that detects compression type frame& decompress() { switch (cformat) { case compresstype::RLE: return decompressFrameRLE(); break; case compresstype::DIFF: return decompressFrameDiff(); break; case compresstype::DIFFRLE: // For combined methods, first decompress RLE then the base method decompressFrameRLE(); cformat = compresstype::DIFF; return decompressFrameDiff(); break; case compresstype::LZ78: return decompressFrameLZ78(); break; case compresstype::HUFFMAN: // Huffman decompression would be implemented here throw std::runtime_error("Huffman decompression not fully implemented"); break; case compresstype::RAW: default: return *this; // Already decompressed } } // Calculate the size of the dictionary in bytes size_t getDictionarySize() const { size_t dictSize = 0; dictSize = sizeof(overheadmap); return dictSize; } // Get compressed size including dictionary overhead size_t getTotalCompressedSize() const { size_t baseSize = getCompressedDataSize(); if (cformat == compresstype::LZ78) { baseSize += getDictionarySize(); } return baseSize; } double getCompressionRatio() const { if (_compressedData.empty() || sourceSize == 0) return 0.0; return static_cast(sourceSize) / getTotalCompressedSize(); } size_t getSourceSize() const { return sourceSize; } size_t getCompressedDataSize() const { return _compressedData.size(); } void printCompressionInfo() const { std::cout << "Compression Type: "; switch (cformat) { case compresstype::RLE: std::cout << "RLE"; break; case compresstype::DIFF: std::cout << "DIFF"; break; case compresstype::DIFFRLE: std::cout << "DIFF + RLE"; break; case compresstype::LZ78: std::cout << "LZ78 (kinda)"; break; case compresstype::HUFFMAN: std::cout << "HUFFMAN"; break; case compresstype::RAW: std::cout << "RAW (uncompressed)"; break; default: std::cout << "UNKNOWN"; break; } std::cout << std::endl; std::cout << "Source Size: " << getSourceSize() << " bytes" << std::endl; std::cout << "Compressed data Size: " << getCompressedDataSize() << " 16-bit words" << std::endl; std::cout << "Compressed Size: " << getCompressedDataSize() * 2 << " bytes" << std::endl; if (cformat == compresstype::LZ78) { std::cout << "Dictionary Size: " << getDictionarySize() << " bytes" << std::endl; std::cout << "Dictionary Entries: " << overheadmap.size() << std::endl; std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl; } else { std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl; } std::cout << "Compression Ratio: " << getCompressionRatio() << ":1" << std::endl; if (getCompressionRatio() > 1.0) { double savings = (1.0 - (1.0 / getCompressionRatio())) * 100.0; std::cout << "Space Savings: " << savings << "%" << std::endl; } } void printCompressionStats() const { if (cformat == compresstype::LZ78) { std::cout << "[" << getCompressionTypeString() << "] " << "Source Size: " << getSourceSize() << " bytes" << getTotalCompressedSize() << "B " << "(ratio: " << getCompressionRatio() << ":1)" << std::endl; } else { std::cout << "[" << getCompressionTypeString() << "] " << getSourceSize() << "B -> " << getTotalCompressedSize() << "B " << "(ratio: " << getCompressionRatio() << ":1)" << std::endl; } } // Get compression type as string std::string getCompressionTypeString() const { switch (cformat) { case compresstype::RLE: return "RLE"; case compresstype::DIFF: return "DIFF"; case compresstype::DIFFRLE: return "DIFF+RLE"; case compresstype::LZ78: return "LZ78"; case compresstype::HUFFMAN: return "HUFFMAN"; case compresstype::RAW: return "RAW"; default: return "UNKNOWN"; } } compresstype getCompressionType() const { return cformat; } bool isCompressed() const { return cformat != compresstype::RAW; } //does this actually work? am I overthinking memory management? void free() { overheadmap.clear(); overheadmap.rehash(0); _compressedData.clear(); _data.clear(); _compressedData.shrink_to_fit(); _data.shrink_to_fit(); } private: //moving decompression to private to prevent breaking stuff from external calls std::vector> sortvecs(std::vector> source) { std::sort(source.begin(), source.end(), [](const std::vector & a, const std::vector & b) {return a.size() > b.size();}); return source; } frame& decompressFrameLZ78() { TIME_FUNCTION; if (cformat != compresstype::LZ78) { throw std::runtime_error("Data is not LZ78 compressed"); } std::unordered_map> dict; for (uint16_t i = 0; i < 256; i++) { dict[i] = {static_cast(i)}; } uint16_t nextdict = 256; for (size_t i = 0; i < _compressedData.size(); i+=2) { uint16_t cpos = _compressedData[i]; uint8_t byte = _compressedData[i+1]; std::vector seq = dict[cpos]; seq.push_back(byte); _data.insert(_data.end(), seq.begin(), seq.end()); if (nextdict < 65535 && cpos != 0) { dict[nextdict++] = seq; } } cformat == compresstype::RAW; return *this; } frame& decompressFrameRLE() { TIME_FUNCTION; std::vector decompressed; decompressed.reserve(sourceSize); if (_compressedData.size() % 2 != 0) { throw std::runtime_error("something broke (decompressFrameRLE)"); } for (size_t i = 0; i < _compressedData.size(); i += 2) { uint16_t width = _compressedData[i]; uint8_t value = static_cast(_compressedData[i+1]); decompressed.insert(decompressed.end(), width, value); } _data = std::move(decompressed); _compressedData.clear(); cformat = compresstype::RAW; return *this; } std::vector> getRepeats() { TIME_FUNCTION; std::vector> result; size_t pos = 0; const size_t chunksize = 65535; size_t dsize = _data.size(); // Thread-safe storage with mutex protection struct ThreadSafeMatches { std::mutex mutex; std::vector> matches128plus; std::vector> matches64plus; //std::vector> matches32plus; //std::vector> matchesAll; void addMatch(std::vector&& match, size_t length) { std::lock_guard lock(mutex); if (length >= 128) { if (matches128plus.size() < 65534) matches128plus.push_back(std::move(match)); } else if (length >= 64) { if (matches64plus.size() < 65534) matches64plus.push_back(std::move(match)); } // else if (length >= 32) { // if (matches32plus.size() < 65534) matches32plus.push_back(std::move(match)); // } // else { // if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match)); // } } }; ThreadSafeMatches threadMatches; while (pos < dsize && result.size() < 65534) { size_t chunk_end = std::min(pos + chunksize, dsize); std::vector chunk(_data.begin() + pos, _data.begin() + chunk_end); if (chunk.size() <= 4) { pos = chunk_end; continue; } if (result.size() < 65534) { result.push_back(chunk); } std::vector ffour(chunk.begin(), chunk.begin() + 4); // Split the search space across multiple threads const size_t num_threads = std::thread::hardware_concurrency(); const size_t search_range = dsize - chunk_end - 3; const size_t block_size = (search_range + num_threads - 1) / num_threads; std::vector> futures; for (size_t t = 0; t < num_threads; ++t) { size_t start = chunk_end + t * block_size; size_t end = std::min(start + block_size, dsize - 3); if (start >= end) continue; futures.push_back(std::async(std::launch::async, [&, start, end, chunk, ffour]() { size_t searchpos = start; while (searchpos <= end) { // Check first 4 bytes if (_data[searchpos] == ffour[0] && _data[searchpos + 1] == ffour[1] && _data[searchpos + 2] == ffour[2] && _data[searchpos + 3] == ffour[3]) { // Found match, calculate length size_t matchlength = 4; size_t chunk_compare_pos = 4; size_t input_compare_pos = searchpos + 4; while (chunk_compare_pos < chunk.size() && input_compare_pos < dsize && _data[input_compare_pos] == chunk[chunk_compare_pos]) { matchlength++; chunk_compare_pos++; input_compare_pos++; } std::vector matchsequence( _data.begin() + searchpos, _data.begin() + searchpos + matchlength ); threadMatches.addMatch(std::move(matchsequence), matchlength); searchpos += matchlength; } else { searchpos++; } } } )); } // Wait for all threads to complete for (auto& future : futures) { future.get(); } pos = chunk_end; } // Merge results to main for (const auto& match : threadMatches.matches128plus) { result.push_back(match); } for (const auto& match : threadMatches.matches64plus) { if (result.size() < 65534) result.push_back(match); else break; } // for (const auto& match : threadMatches.matches32plus) { // if (result.size() < 65534) result.push_back(match); // else break; // } // for (const auto& match : threadMatches.matchesAll) { // if (result.size() < 65534) result.push_back(match); // else break; // } return result; } frame& decompressFrameDiff() { // TODO throw std::logic_error("Function not yet implemented"); } }; #endif