moved a bunch around

This commit is contained in:
Yggdrasil75
2025-11-17 14:44:24 -05:00
parent a7183256cc
commit e7bf6adfa6
5 changed files with 400 additions and 510 deletions

View File

@@ -11,8 +11,8 @@
#include "../util/timing_decorator.cpp"
struct AnimationConfig {
int width = 2048;
int height = 2048;
int width = 1024;
int height = 1024;
int totalFrames = 480;
float fps = 30.0f;
int numSeeds = 8;

View File

@@ -73,7 +73,9 @@ public:
void clear() {
Positions.clear();
Positions.rehash(0);
ƨnoiƚiƨoꟼ.clear();
ƨnoiƚiƨoꟼ.rehash(0);
next_id = 0;
}
@@ -165,6 +167,7 @@ public:
void clear() {
grid.clear();
grid.rehash(0);
}
};
@@ -767,6 +770,9 @@ public:
Sizes.clear();
spatialGrid.clear();
neighborMap.clear();
Colors.rehash(0);
Sizes.rehash(0);
neighborMap.rehash(0);
}
// neighbor map

34
util/grid/sprite2.hpp Normal file
View File

@@ -0,0 +1,34 @@
#ifndef SPRITE2_HPP
#define SPRITE2_HPP
#include "grid2.hpp"
#include "../output/frame.hpp"
class SpriteMap2 : public Grid2 {
private:
// id, sprite
//std::unordered_map<size_t, std::shared_ptr<Grid2>> Sprites;
std::unordered_map<size_t, frame> spritesComped;
std::unordered_map<size_t, int> Layers;
std::unordered_map<size_t, float> Orientations;
public:
using Grid2::Grid2;
size_t addSprite(const Vec2& pos, frame sprite, int layer = 0, float orientation = 0.0f) {
size_t id = addObject(pos, Vec4(0,0,0,0));
spritesComped[id] = sprite;
Layers[id] = layer;
Orientations[id] = orientation;
}
frame getSprite(size_t id) {
return spritesComped.at(id);
}
};
#endif

View File

@@ -8,6 +8,7 @@
#include <algorithm>
#include <filesystem>
#include <chrono>
#include <tuple>
#include <iostream>
#include "frame.hpp"
@@ -188,37 +189,8 @@ private:
return paddedFrame;
}
public:
// Original method for vector of raw frame data
static bool saveAVI(const std::string& filename,
const std::vector<std::vector<uint8_t>>& frames,
int width, int height, float fps = 30.0f) {
TIME_FUNCTION;
if (frames.empty() || width <= 0 || height <= 0 || fps <= 0) {
return false;
}
// Validate frame sizes
size_t expectedFrameSize = width * height * 3;
for (const auto& frame : frames) {
if (frame.size() != expectedFrameSize) {
return false;
}
}
// Create directory if needed
if (!createDirectoryIfNeeded(filename)) {
return false;
}
std::ofstream file(filename, std::ios::binary);
if (!file) {
return false;
}
uint32_t frameCount = static_cast<uint32_t>(frames.size());
uint32_t microSecPerFrame = static_cast<uint32_t>(1000000.0f / fps);
static std::tuple<uint32_t, uint32_t, uint32_t, uint32_t> writeheader(int width, int height, float fps, std::ofstream& file, uint32_t frameCount, uint32_t microSecPerFrame) {
// Calculate padding for each frame (BMP-style row padding)
uint32_t rowSize = (width * 3 + 3) & ~3;
uint32_t frameSize = rowSize * height;
@@ -316,9 +288,62 @@ public:
uint32_t moviListStart = static_cast<uint32_t>(file.tellp());
writeList(file, 0x69766F6D, nullptr, 0); // 'movi' - we'll fill size later
return {moviListStart, frameSize, rowSize, riffStartPos};
}
static void writeFooter(std::ofstream& file, uint32_t moviListStart, uint32_t riffStartPos, std::vector<AVIWriter::AVIIndexEntry>& indexEntries) {
// Update movi list size
uint32_t moviListEnd = static_cast<uint32_t>(file.tellp());
file.seekp(moviListStart + 4);
uint32_t moviListSize = moviListEnd - moviListStart - 8;
file.write(reinterpret_cast<const char*>(&moviListSize), 4);
file.seekp(moviListEnd);
// idx1 chunk - index
uint32_t idx1Size = static_cast<uint32_t>(indexEntries.size() * sizeof(AVIIndexEntry));
writeChunk(file, 0x31786469, indexEntries.data(), idx1Size); // 'idx1'
// Update RIFF chunk size
uint32_t fileEnd = static_cast<uint32_t>(file.tellp());
file.seekp(riffStartPos + 4);
uint32_t riffSize = fileEnd - riffStartPos - 8;
file.write(reinterpret_cast<const char*>(&riffSize), 4);
}
public:
// Original method for vector of raw frame data
static bool saveAVI(const std::string& filename,
const std::vector<std::vector<uint8_t>>& frames,
int width, int height, float fps = 30.0f) {
TIME_FUNCTION;
if (frames.empty() || width <= 0 || height <= 0 || fps <= 0) {
return false;
}
// Validate frame sizes
size_t expectedFrameSize = width * height * 3;
for (const auto& frame : frames) {
if (frame.size() != expectedFrameSize) {
return false;
}
}
// Create directory if needed
createDirectoryIfNeeded(filename);
std::ofstream file(filename, std::ios::binary);
if (!file) {
return false;
}
uint32_t frameCount = static_cast<uint32_t>(frames.size());
uint32_t microSecPerFrame = static_cast<uint32_t>(1000000.0f / fps);
auto [moviListStart, frameSize, rowSize, riffStartPos] = writeheader(width, height, fps, file, frameCount, microSecPerFrame);
std::vector<AVIIndexEntry> indexEntries;
indexEntries.reserve(frameCount);
// Write frames
for (uint32_t i = 0; i < frameCount; ++i) {
uint32_t frameStart = static_cast<uint32_t>(file.tellp()) - moviListStart - 4;
@@ -348,71 +373,14 @@ public:
indexEntries.push_back(entry);
}
// Update movi list size
uint32_t moviListEnd = static_cast<uint32_t>(file.tellp());
file.seekp(moviListStart + 4);
uint32_t moviListSize = moviListEnd - moviListStart - 8;
file.write(reinterpret_cast<const char*>(&moviListSize), 4);
file.seekp(moviListEnd);
// idx1 chunk - index
uint32_t idx1Size = static_cast<uint32_t>(indexEntries.size() * sizeof(AVIIndexEntry));
writeChunk(file, 0x31786469, indexEntries.data(), idx1Size); // 'idx1'
// Update RIFF chunk size
uint32_t fileEnd = static_cast<uint32_t>(file.tellp());
file.seekp(riffStartPos + 4);
uint32_t riffSize = fileEnd - riffStartPos - 8;
file.write(reinterpret_cast<const char*>(&riffSize), 4);
writeFooter(file, moviListStart, riffStartPos, indexEntries);
return true;
}
// Convenience function to save from individual frame files
static bool saveAVIFromFrames(const std::string& filename,
const std::vector<std::string>& frameFiles,
int width, int height,
float fps = 30.0f) {
TIME_FUNCTION;
std::vector<std::vector<uint8_t>> frames;
frames.reserve(frameFiles.size());
for (const auto& frameFile : frameFiles) {
std::ifstream file(frameFile, std::ios::binary);
if (!file) {
return false;
}
// Read BMP file and extract pixel data
file.seekg(0, std::ios::end);
size_t fileSize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<uint8_t> buffer(fileSize);
file.read(reinterpret_cast<char*>(buffer.data()), fileSize);
// Simple BMP parsing - assumes 24-bit uncompressed BMP
if (fileSize < 54 || buffer[0] != 'B' || buffer[1] != 'M') {
return false;
}
// Extract pixel data offset from BMP header
uint32_t dataOffset = *reinterpret_cast<uint32_t*>(&buffer[10]);
if (dataOffset >= fileSize) {
return false;
}
// Read pixel data (BGR format)
std::vector<uint8_t> pixelData(buffer.begin() + dataOffset, buffer.end());
frames.push_back(pixelData);
}
return saveAVI(filename, frames, width, height, fps);
}
// New method for streaming decompression of frame objects
static bool saveAVIFromCompressedFrames(const std::string& filename,
const std::vector<frame>& frames,
std::vector<frame> frames,
int width, int height,
float fps = 30.0f) {
TIME_FUNCTION;
@@ -433,112 +401,18 @@ public:
uint32_t frameCount = static_cast<uint32_t>(frames.size());
uint32_t microSecPerFrame = static_cast<uint32_t>(1000000.0f / fps);
// Calculate padding for each frame (BMP-style row padding)
uint32_t rowSize = (width * 3 + 3) & ~3;
uint32_t frameSize = rowSize * height;
// RIFF AVI header
RIFFChunk riffHeader;
riffHeader.chunkId = 0x46464952; // 'RIFF'
riffHeader.format = 0x20495641; // 'AVI '
// We'll come back and write the size at the end
uint32_t riffStartPos = static_cast<uint32_t>(file.tellp());
file.write(reinterpret_cast<const char*>(&riffHeader), sizeof(riffHeader));
// hdrl list
uint32_t hdrlListStart = static_cast<uint32_t>(file.tellp());
writeList(file, 0x6C726468, nullptr, 0); // 'hdrl' - we'll fill size later
// avih chunk
AVIMainHeader mainHeader;
mainHeader.microSecPerFrame = microSecPerFrame;
mainHeader.maxBytesPerSec = frameSize * static_cast<uint32_t>(fps);
mainHeader.paddingGranularity = 0;
mainHeader.flags = 0x000010; // HASINDEX flag
mainHeader.totalFrames = frameCount;
mainHeader.initialFrames = 0;
mainHeader.streams = 1;
mainHeader.suggestedBufferSize = frameSize;
mainHeader.width = width;
mainHeader.height = height;
mainHeader.reserved[0] = 0;
mainHeader.reserved[1] = 0;
mainHeader.reserved[2] = 0;
mainHeader.reserved[3] = 0;
writeChunk(file, 0x68697661, &mainHeader, sizeof(mainHeader)); // 'avih'
// strl list
uint32_t strlListStart = static_cast<uint32_t>(file.tellp());
writeList(file, 0x6C727473, nullptr, 0); // 'strl' - we'll fill size later
// strh chunk
AVIStreamHeader streamHeader;
streamHeader.type = 0x73646976; // 'vids'
streamHeader.handler = 0x00000000; // Uncompressed
streamHeader.flags = 0;
streamHeader.priority = 0;
streamHeader.language = 0;
streamHeader.initialFrames = 0;
streamHeader.scale = 1;
streamHeader.rate = static_cast<uint32_t>(fps);
streamHeader.start = 0;
streamHeader.length = frameCount;
streamHeader.suggestedBufferSize = frameSize;
streamHeader.quality = 0xFFFFFFFF; // Default quality
streamHeader.sampleSize = 0;
streamHeader.rcFrame.left = 0;
streamHeader.rcFrame.top = 0;
streamHeader.rcFrame.right = width;
streamHeader.rcFrame.bottom = height;
writeChunk(file, 0x68727473, &streamHeader, sizeof(streamHeader)); // 'strh'
// strf chunk
BITMAPINFOHEADER bitmapInfo;
bitmapInfo.size = sizeof(BITMAPINFOHEADER);
bitmapInfo.width = width;
bitmapInfo.height = height;
bitmapInfo.planes = 1;
bitmapInfo.bitCount = 24;
bitmapInfo.compression = 0; // BI_RGB - uncompressed
bitmapInfo.sizeImage = frameSize;
bitmapInfo.xPelsPerMeter = 0;
bitmapInfo.yPelsPerMeter = 0;
bitmapInfo.clrUsed = 0;
bitmapInfo.clrImportant = 0;
writeChunk(file, 0x66727473, &bitmapInfo, sizeof(bitmapInfo)); // 'strf'
// Update strl list size
uint32_t strlListEnd = static_cast<uint32_t>(file.tellp());
file.seekp(strlListStart + 4);
uint32_t strlListSize = strlListEnd - strlListStart - 8;
file.write(reinterpret_cast<const char*>(&strlListSize), 4);
file.seekp(strlListEnd);
// Update hdrl list size
uint32_t hdrlListEnd = static_cast<uint32_t>(file.tellp());
file.seekp(hdrlListStart + 4);
uint32_t hdrlListSize = hdrlListEnd - hdrlListStart - 8;
file.write(reinterpret_cast<const char*>(&hdrlListSize), 4);
file.seekp(hdrlListEnd);
// movi list
uint32_t moviListStart = static_cast<uint32_t>(file.tellp());
writeList(file, 0x69766F6D, nullptr, 0); // 'movi' - we'll fill size later
auto [moviListStart, frameSize, rowSize, riffStartPos] = writeheader(width, height, fps, file, frameCount, microSecPerFrame);
std::vector<AVIIndexEntry> indexEntries;
indexEntries.reserve(frameCount);
// Write frames with streaming decompression
for (uint32_t i = 0; i < frameCount; ++i) {
while (frameCount > 0) {
uint32_t frameStart = static_cast<uint32_t>(file.tellp()) - moviListStart - 4;
// Prepare frame data (decompresses if necessary and converts to RGB)
std::vector<uint8_t> paddedFrame = prepareFrameData(frames[i], width, height, rowSize);
std::vector<uint8_t> paddedFrame = prepareFrameData(frames[0], width, height, rowSize);
//frames[i].free();
// Write frame as '00db' chunk
writeChunk(file, 0x62643030, paddedFrame.data(), frameSize); // '00db'
@@ -550,25 +424,12 @@ public:
entry.size = frameSize;
indexEntries.push_back(entry);
paddedFrame.clear();
frames.erase(frames.begin());
paddedFrame.shrink_to_fit();
frameCount = frames.size();
}
// Update movi list size
uint32_t moviListEnd = static_cast<uint32_t>(file.tellp());
file.seekp(moviListStart + 4);
uint32_t moviListSize = moviListEnd - moviListStart - 8;
file.write(reinterpret_cast<const char*>(&moviListSize), 4);
file.seekp(moviListEnd);
// idx1 chunk - index
uint32_t idx1Size = static_cast<uint32_t>(indexEntries.size() * sizeof(AVIIndexEntry));
writeChunk(file, 0x31786469, indexEntries.data(), idx1Size); // 'idx1'
// Update RIFF chunk size
uint32_t fileEnd = static_cast<uint32_t>(file.tellp());
file.seekp(riffStartPos + 4);
uint32_t riffSize = fileEnd - riffStartPos - 8;
file.write(reinterpret_cast<const char*>(&riffSize), 4);
writeFooter(file, moviListStart, riffStartPos, indexEntries);
return true;
}

View File

@@ -79,10 +79,6 @@ public:
return _data;
}
const std::vector<uint16_t>& getCompressedData() const {
return _compressedData;
}
// Run-Length Encoding (RLE) compression
frame& compressFrameRLE() {
TIME_FUNCTION;
@@ -118,6 +114,295 @@ public:
return *this;
}
// LZ78 compression
frame& compressFrameLZ78() {
TIME_FUNCTION;
if (_data.empty()) {
return *this;
}
if (cformat != compresstype::RAW) {
throw std::runtime_error("LZ78 compression can only be applied to raw data");
}
std::vector<std::vector<uint8_t>> repeats = getRepeats();
repeats = sortvecs(repeats);
uint16_t nextDict = 1;
std::vector<uint16_t> compressed;
size_t cpos = 0;
for (const auto& rseq : repeats) {
if (!rseq.empty() && rseq.size() > 1 && overheadmap.size() < 65535) {
overheadmap[nextDict] = rseq;
nextDict++;
}
}
while (cpos < _data.size()) {
bool found_match = false;
uint16_t best_dict_index = 0;
size_t best_match_length = 0;
// Iterate through dictionary in priority order (longest patterns first)
for (uint16_t dict_idx = 1; dict_idx <= overheadmap.size(); dict_idx++) {
const auto& dict_seq = overheadmap[dict_idx];
// Quick length check - if remaining data is shorter than pattern, skip
if (dict_seq.size() > (_data.size() - cpos)) {
continue;
}
// Check if this pattern matches at current position
bool match = true;
for (size_t i = 0; i < dict_seq.size(); ++i) {
if (_data[cpos + i] != dict_seq[i]) {
match = false;
break;
}
}
if (match) {
// Found a match - use it immediately (first match is best due to sorting)
best_dict_index = dict_idx;
best_match_length = dict_seq.size();
found_match = true;
break; // Stop searching - we found our match
}
}
if (found_match && best_match_length > 1) {
// Write dictionary reference
compressed.push_back(best_dict_index);
cpos += best_match_length;
} else {
// Write literal: 0 followed by the literal byte
compressed.push_back(0);
compressed.push_back(_data[cpos]);
cpos++;
}
}
ratio = compressed.size() / _data.size();
sourceSize = _data.size();
_compressedData = std::move(compressed);
_compressedData.shrink_to_fit();
// Clear uncompressed data
_data.clear();
_data.shrink_to_fit();
cformat = compresstype::LZ78;
return *this;
}
// Differential compression
frame& compressFrameDiff() {
// TODO
throw std::logic_error("Function not yet implemented");
}
// Huffman compression
frame& compressFrameHuffman() {
// TODO
throw std::logic_error("Function not yet implemented");
}
// Combined compression methods
frame& compressFrameZigZagRLE() {
// TODO
throw std::logic_error("Function not yet implemented");
}
frame& compressFrameDiffRLE() {
// TODO
throw std::logic_error("Function not yet implemented");
}
// Generic decompression that detects compression type
frame& decompress() {
switch (cformat) {
case compresstype::RLE:
return decompressFrameRLE();
break;
case compresstype::DIFF:
return decompressFrameDiff();
break;
case compresstype::DIFFRLE:
// For combined methods, first decompress RLE then the base method
decompressFrameRLE();
cformat = compresstype::DIFF;
return decompressFrameDiff();
break;
case compresstype::LZ78:
return decompressFrameLZ78();
break;
case compresstype::HUFFMAN:
// Huffman decompression would be implemented here
throw std::runtime_error("Huffman decompression not fully implemented");
break;
case compresstype::RAW:
default:
return *this; // Already decompressed
}
}
// Calculate the size of the dictionary in bytes
size_t getDictionarySize() const {
size_t dictSize = 0;
dictSize = sizeof(overheadmap);
return dictSize;
}
// Get compressed size including dictionary overhead
size_t getTotalCompressedSize() const {
size_t baseSize = getCompressedDataSize();
if (cformat == compresstype::LZ78) {
baseSize += getDictionarySize();
}
return baseSize;
}
double getCompressionRatio() const {
if (_compressedData.empty() || sourceSize == 0) return 0.0;
return static_cast<double>(sourceSize) / getTotalCompressedSize();
}
size_t getSourceSize() const {
return sourceSize;
}
size_t getCompressedDataSize() const {
return _compressedData.size();
}
void printCompressionInfo() const {
std::cout << "Compression Type: ";
switch (cformat) {
case compresstype::RLE: std::cout << "RLE"; break;
case compresstype::DIFF: std::cout << "DIFF"; break;
case compresstype::DIFFRLE: std::cout << "DIFF + RLE"; break;
case compresstype::LZ78: std::cout << "LZ78 (kinda)"; break;
case compresstype::HUFFMAN: std::cout << "HUFFMAN"; break;
case compresstype::RAW: std::cout << "RAW (uncompressed)"; break;
default: std::cout << "UNKNOWN"; break;
}
std::cout << std::endl;
std::cout << "Source Size: " << getSourceSize() << " bytes" << std::endl;
std::cout << "Compressed data Size: " << getCompressedDataSize() << " 16-bit words" << std::endl;
std::cout << "Compressed Size: " << getCompressedDataSize() * 2 << " bytes" << std::endl;
if (cformat == compresstype::LZ78) {
std::cout << "Dictionary Size: " << getDictionarySize() << " bytes" << std::endl;
std::cout << "Dictionary Entries: " << overheadmap.size() << std::endl;
std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl;
} else {
std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl;
}
std::cout << "Compression Ratio: " << getCompressionRatio() << ":1" << std::endl;
if (getCompressionRatio() > 1.0) {
double savings = (1.0 - (1.0 / getCompressionRatio())) * 100.0;
std::cout << "Space Savings: " << savings << "%" << std::endl;
}
}
void printCompressionStats() const {
if (cformat == compresstype::LZ78) {
std::cout << "[" << getCompressionTypeString() << "] "
<< "Source Size: " << getSourceSize() << " bytes"
<< getTotalCompressedSize() << "B "
<< "(ratio: " << getCompressionRatio() << ":1)" << std::endl;
} else {
std::cout << "[" << getCompressionTypeString() << "] "
<< getSourceSize() << "B -> " << getTotalCompressedSize() << "B "
<< "(ratio: " << getCompressionRatio() << ":1)" << std::endl;
}
}
// Get compression type as string
std::string getCompressionTypeString() const {
switch (cformat) {
case compresstype::RLE: return "RLE";
case compresstype::DIFF: return "DIFF";
case compresstype::DIFFRLE: return "DIFF+RLE";
case compresstype::LZ78: return "LZ78";
case compresstype::HUFFMAN: return "HUFFMAN";
case compresstype::RAW: return "RAW";
default: return "UNKNOWN";
}
}
compresstype getCompressionType() const {
return cformat;
}
bool isCompressed() const {
return cformat != compresstype::RAW;
}
//does this actually work? am I overthinking memory management?
void free() {
overheadmap.clear();
overheadmap.rehash(0);
_compressedData.clear();
_data.clear();
_compressedData.shrink_to_fit();
_data.shrink_to_fit();
}
private:
//moving decompression to private to prevent breaking stuff from external calls
std::vector<std::vector<uint8_t>> sortvecs(std::vector<std::vector<uint8_t>> source) {
std::sort(source.begin(), source.end(), [](const std::vector<uint8_t> & a, const std::vector<uint8_t> & b) {return a.size() > b.size();});
return source;
}
frame& decompressFrameLZ78() {
TIME_FUNCTION;
if (cformat != compresstype::LZ78) {
throw std::runtime_error("Data is not LZ78 compressed");
}
//std::cout << "why is this breaking? breakpoint f366" << std::endl;
std::vector<uint8_t> decompressedData;
decompressedData.reserve(sourceSize);
size_t cpos = 0;
while (cpos < _compressedData.size()) {
uint16_t token = _compressedData[cpos++];
//std::cout << "why is this breaking? breakpoint f374." << cpos << std::endl;
if (token != 0) {
// Dictionary reference
auto it = overheadmap.find(token);
if (it != overheadmap.end()) {
const std::vector<uint8_t>& dict_entry = it->second;
decompressedData.insert(decompressedData.end(), dict_entry.begin(), dict_entry.end());
} else {
throw std::runtime_error("Invalid dictionary reference in compressed data");
}
} else {
// Literal byte
if (cpos < _compressedData.size()) {
decompressedData.push_back(static_cast<uint8_t>(_compressedData[cpos++]));
}
}
}
_data = std::move(decompressedData);
_compressedData.clear();
_compressedData.shrink_to_fit();
overheadmap.clear();
cformat = compresstype::RAW;
return *this;
}
frame& decompressFrameRLE() {
TIME_FUNCTION;
std::vector<uint8_t> decompressed;
@@ -153,7 +438,7 @@ public:
std::vector<std::vector<uint8_t>> matches128plus;
std::vector<std::vector<uint8_t>> matches64plus;
//std::vector<std::vector<uint8_t>> matches32plus;
std::vector<std::vector<uint8_t>> matchesAll;
//std::vector<std::vector<uint8_t>> matchesAll;
void addMatch(std::vector<uint8_t>&& match, size_t length) {
std::lock_guard<std::mutex> lock(mutex);
@@ -165,9 +450,9 @@ public:
// else if (length >= 32) {
// if (matches32plus.size() < 65534) matches32plus.push_back(std::move(match));
// }
else {
if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match));
}
// else {
// if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match));
// }
}
};
@@ -270,307 +555,11 @@ public:
return result;
}
std::vector<std::vector<uint8_t>> sortvecs(std::vector<std::vector<uint8_t>> source) {
std::sort(source.begin(), source.end(), [](const std::vector<uint8_t> & a, const std::vector<uint8_t> & b) {return a.size() > b.size();});
return source;
}
// LZ78 compression
frame& compressFrameLZ78() {
TIME_FUNCTION;
if (_data.empty()) {
return *this;
}
if (cformat != compresstype::RAW) {
throw std::runtime_error("LZ78 compression can only be applied to raw data");
}
std::vector<std::vector<uint8_t>> repeats = getRepeats();
repeats = sortvecs(repeats);
uint16_t nextDict = 1;
std::vector<uint16_t> compressed;
size_t cpos = 0;
for (const auto& rseq : repeats) {
if (!rseq.empty() && rseq.size() > 1 && overheadmap.size() < 65535) {
overheadmap[nextDict] = rseq;
nextDict++;
}
}
while (cpos < _data.size()) {
bool found_match = false;
uint16_t best_dict_index = 0;
size_t best_match_length = 0;
// Iterate through dictionary in priority order (longest patterns first)
for (uint16_t dict_idx = 1; dict_idx <= overheadmap.size(); dict_idx++) {
const auto& dict_seq = overheadmap[dict_idx];
// Quick length check - if remaining data is shorter than pattern, skip
if (dict_seq.size() > (_data.size() - cpos)) {
continue;
}
// Check if this pattern matches at current position
bool match = true;
for (size_t i = 0; i < dict_seq.size(); ++i) {
if (_data[cpos + i] != dict_seq[i]) {
match = false;
break;
}
}
if (match) {
// Found a match - use it immediately (first match is best due to sorting)
best_dict_index = dict_idx;
best_match_length = dict_seq.size();
found_match = true;
break; // Stop searching - we found our match
}
}
if (found_match && best_match_length > 1) {
// Write dictionary reference
compressed.push_back(best_dict_index);
cpos += best_match_length;
} else {
// Write literal: 0 followed by the literal byte
compressed.push_back(0);
compressed.push_back(_data[cpos]);
cpos++;
}
}
ratio = compressed.size() / _data.size();
sourceSize = _data.size();
_compressedData = std::move(compressed);
_compressedData.shrink_to_fit();
// Clear uncompressed data
_data.clear();
_data.shrink_to_fit();
cformat = compresstype::LZ78;
return *this;
}
frame& decompressFrameLZ78() {
TIME_FUNCTION;
if (cformat != compresstype::LZ78) {
throw std::runtime_error("Data is not LZ78 compressed");
}
std::vector<uint8_t> decompressedData;
decompressedData.reserve(sourceSize);
size_t cpos = 0;
while (cpos < _compressedData.size()) {
uint16_t token = _compressedData[cpos++];
if (token == 0) {
// Literal byte
if (cpos < _compressedData.size()) {
decompressedData.push_back(static_cast<uint8_t>(_compressedData[cpos++]));
}
} else {
// Dictionary reference
auto it = overheadmap.find(token);
if (it != overheadmap.end()) {
const std::vector<uint8_t>& dict_entry = it->second;
decompressedData.insert(decompressedData.end(), dict_entry.begin(), dict_entry.end());
} else {
throw std::runtime_error("Invalid dictionary reference in compressed data");
}
}
}
_data = std::move(decompressedData);
_compressedData.clear();
_compressedData.shrink_to_fit();
overheadmap.clear();
cformat = compresstype::RAW;
return *this;
}
// Differential compression
frame& compressFrameDiff() {
// TODO
throw std::logic_error("Function not yet implemented");
}
frame& decompressFrameDiff() {
// TODO
throw std::logic_error("Function not yet implemented");
}
// Huffman compression
frame& compressFrameHuffman() {
// TODO
throw std::logic_error("Function not yet implemented");
}
// Combined compression methods
frame& compressFrameZigZagRLE() {
// TODO
throw std::logic_error("Function not yet implemented");
}
frame& compressFrameDiffRLE() {
// TODO
throw std::logic_error("Function not yet implemented");
}
// Generic decompression that detects compression type
frame& decompress() {
switch (cformat) {
case compresstype::RLE:
return decompressFrameRLE();
break;
case compresstype::DIFF:
return decompressFrameDiff();
break;
case compresstype::DIFFRLE:
// For combined methods, first decompress RLE then the base method
decompressFrameRLE();
cformat = compresstype::DIFF;
return decompressFrameDiff();
break;
case compresstype::LZ78:
return decompressFrameLZ78();
break;
case compresstype::HUFFMAN:
// Huffman decompression would be implemented here
throw std::runtime_error("Huffman decompression not fully implemented");
break;
case compresstype::RAW:
default:
return *this; // Already decompressed
}
}
// Calculate the size of the dictionary in bytes
size_t getDictionarySize() const {
size_t dictSize = 0;
dictSize = sizeof(overheadmap);
return dictSize;
}
// Get compressed size including dictionary overhead
size_t getTotalCompressedSize() const {
size_t baseSize = getCompressedSize() * 2; // Convert 16-bit words to bytes
if (cformat == compresstype::LZ78) {
baseSize += getDictionarySize();
}
return baseSize;
}
double getCompressionRatio() const {
if (_compressedData.empty() || sourceSize == 0) return 0.0;
return static_cast<double>(sourceSize) / getTotalCompressedSize();
}
// Get source size (uncompressed size)
size_t getSourceSize() const {
return sourceSize;
}
// Get compressed size (just the compressed data in bytes, excluding dictionary)
size_t getCompressedSize() const {
return _compressedData.size() * 2; // Convert 16-bit words to bytes
}
// Get just the compressed data size in 16-bit words
size_t getCompressedDataSize() const {
return _compressedData.size();
}
// Print compression information
void printCompressionInfo() const {
std::cout << "Compression Type: ";
switch (cformat) {
case compresstype::RLE: std::cout << "RLE"; break;
case compresstype::DIFF: std::cout << "DIFF"; break;
case compresstype::DIFFRLE: std::cout << "DIFF + RLE"; break;
case compresstype::LZ78: std::cout << "LZ78 (kinda)"; break;
case compresstype::HUFFMAN: std::cout << "HUFFMAN"; break;
case compresstype::RAW: std::cout << "RAW (uncompressed)"; break;
default: std::cout << "UNKNOWN"; break;
}
std::cout << std::endl;
std::cout << "Source Size: " << getSourceSize() << " bytes" << std::endl;
std::cout << "Compressed data Size: " << getCompressedDataSize() << " 16-bit words" << std::endl;
std::cout << "Compressed Size: " << getCompressedSize() << " bytes" << std::endl;
if (cformat == compresstype::LZ78) {
std::cout << "Dictionary Size: " << getDictionarySize() << " bytes" << std::endl;
std::cout << "Dictionary Entries: " << overheadmap.size() << std::endl;
std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl;
} else {
std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl;
}
std::cout << "Compression Ratio: " << getCompressionRatio() << ":1" << std::endl;
if (getCompressionRatio() > 1.0) {
double savings = (1.0 - (1.0 / getCompressionRatio())) * 100.0;
std::cout << "Space Savings: " << savings << "%" << std::endl;
}
}
// Print compression information in a compact format
void printCompressionStats() const {
if (cformat == compresstype::LZ78) {
std::cout << "[" << getCompressionTypeString() << "] "
<< getSourceSize() << "B -> " << getCompressedSize() << "B + "
<< getDictionarySize() << "B dict = " << getTotalCompressedSize() << "B "
<< "(ratio: " << getCompressionRatio() << ":1)" << std::endl;
} else {
std::cout << "[" << getCompressionTypeString() << "] "
<< getSourceSize() << "B -> " << getTotalCompressedSize() << "B "
<< "(ratio: " << getCompressionRatio() << ":1)" << std::endl;
}
}
// Get compression type as string
std::string getCompressionTypeString() const {
switch (cformat) {
case compresstype::RLE: return "RLE";
case compresstype::DIFF: return "DIFF";
case compresstype::DIFFRLE: return "DIFF+RLE";
case compresstype::LZ78: return "LZ78";
case compresstype::HUFFMAN: return "HUFFMAN";
case compresstype::RAW: return "RAW";
default: return "UNKNOWN";
}
}
compresstype getCompressionType() const {
return cformat;
}
const std::unordered_map<uint16_t, std::vector<uint8_t>>& getOverheadMap() const {
return overheadmap;
}
bool isCompressed() const {
return cformat != compresstype::RAW;
}
// Check if compressed data is available
bool hasCompressedData() const {
return !_compressedData.empty();
}
// Check if uncompressed data is available
bool hasUncompressedData() const {
return !_data.empty();
}
};
#endif