545 lines
18 KiB
C++
545 lines
18 KiB
C++
#ifndef FRAME_HPP
|
|
#define FRAME_HPP
|
|
|
|
#include <vector>
|
|
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <unordered_map>
|
|
#include <queue>
|
|
#include <functional>
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <future>
|
|
#include <mutex>
|
|
#include <atomic>
|
|
#include "../timing_decorator.hpp"
|
|
|
|
class frame {
|
|
private:
|
|
std::vector<uint8_t> _data;
|
|
std::vector<uint16_t> _compressedData;
|
|
std::unordered_map<uint16_t, std::vector<uint8_t>> overheadmap;
|
|
size_t ratio = 1;
|
|
size_t sourceSize = 0;
|
|
size_t width = 0;
|
|
size_t height = 0;
|
|
|
|
public:
|
|
enum class colormap {
|
|
RGB,
|
|
RGBA,
|
|
BGR,
|
|
BGRA,
|
|
B
|
|
};
|
|
|
|
enum class compresstype {
|
|
RLE,
|
|
DIFF,
|
|
DIFFRLE,
|
|
LZ78,
|
|
HUFFMAN,
|
|
RAW
|
|
};
|
|
|
|
colormap colorFormat = colormap::RGB;
|
|
compresstype cformat = compresstype::RAW;
|
|
|
|
const size_t& getWidth() const {
|
|
return width;
|
|
}
|
|
|
|
const size_t& getHeight() const {
|
|
return height;
|
|
}
|
|
frame() {};
|
|
frame(size_t w, size_t h, colormap format = colormap::RGB)
|
|
: width(w), height(h), colorFormat(format), cformat(compresstype::RAW) {
|
|
size_t channels = 3;
|
|
switch (format) {
|
|
case colormap::RGBA: channels = 4; break;
|
|
case colormap::BGR: channels = 3; break;
|
|
case colormap::BGRA: channels = 4; break;
|
|
case colormap::B: channels = 1; break;
|
|
default: channels = 3; break;
|
|
}
|
|
_data.resize(width * height * channels);
|
|
}
|
|
|
|
void setData(const std::vector<uint8_t>& data) {
|
|
_data = data;
|
|
cformat = compresstype::RAW;
|
|
_compressedData.clear();
|
|
_compressedData.shrink_to_fit();
|
|
overheadmap.clear();
|
|
sourceSize = data.size();
|
|
}
|
|
|
|
const std::vector<uint8_t>& getData() const {
|
|
return _data;
|
|
}
|
|
|
|
// Run-Length Encoding (RLE) compression
|
|
frame& compressFrameRLE() {
|
|
TIME_FUNCTION;
|
|
if (_data.empty()) {
|
|
return *this;
|
|
}
|
|
if (cformat == compresstype::DIFF) {
|
|
cformat = compresstype::DIFFRLE;
|
|
} else if (cformat == compresstype::RLE) {
|
|
return *this;
|
|
} else if (cformat == compresstype::RAW) {
|
|
cformat = compresstype::RLE;
|
|
}
|
|
|
|
std::vector<uint16_t> compressedData;
|
|
compressedData.reserve(_data.size() * 2);
|
|
|
|
size_t width = 1;
|
|
for (size_t i = 0; i < _data.size(); i++) {
|
|
if (i + 1 < _data.size() && _data[i] == _data[i+1] && width < 65535) {
|
|
width++;
|
|
} else {
|
|
compressedData.push_back(width);
|
|
compressedData.push_back(_data[i]);
|
|
width = 1;
|
|
}
|
|
}
|
|
ratio = compressedData.size() / _data.size();
|
|
sourceSize = _data.size();
|
|
_compressedData = std::move(compressedData);
|
|
_data.clear();
|
|
_data.shrink_to_fit();
|
|
return *this;
|
|
}
|
|
|
|
// LZ78 compression
|
|
frame& compressFrameLZ78() {
|
|
TIME_FUNCTION;
|
|
if (_data.empty()) {
|
|
return *this;
|
|
}
|
|
if (cformat != compresstype::RAW) {
|
|
throw std::runtime_error("LZ78 compression can only be applied to raw data");
|
|
}
|
|
|
|
std::unordered_map<uint16_t, uint16_t> dict;
|
|
for (uint16_t i = 0; i < 256; i++) {
|
|
dict[i] = i;
|
|
}
|
|
|
|
uint16_t nextDict = 256;
|
|
uint16_t cpos = 0;
|
|
|
|
for (uint8_t byte : _data) {
|
|
uint16_t newval = cpos << 8 | byte;
|
|
if (dict.find(newval) != dict.end()) {
|
|
cpos = dict[newval];
|
|
} else {
|
|
_compressedData.push_back(cpos);
|
|
_compressedData.push_back(byte);
|
|
if (nextDict < 65535) {
|
|
dict[newval] = nextDict++;
|
|
}
|
|
}
|
|
cpos = 0;
|
|
}
|
|
if (cpos != 0) {
|
|
_compressedData.push_back(cpos);
|
|
_compressedData.push_back(0);
|
|
}
|
|
|
|
ratio = _compressedData.size() / _data.size();
|
|
sourceSize = _data.size();
|
|
_data.clear();
|
|
_data.shrink_to_fit();
|
|
|
|
cformat = compresstype::LZ78;
|
|
|
|
return *this;
|
|
}
|
|
|
|
// Differential compression
|
|
frame& compressFrameDiff() {
|
|
// TODO
|
|
throw std::logic_error("Function not yet implemented");
|
|
}
|
|
|
|
// Huffman compression
|
|
frame& compressFrameHuffman() {
|
|
// TODO
|
|
throw std::logic_error("Function not yet implemented");
|
|
}
|
|
|
|
// Combined compression methods
|
|
frame& compressFrameZigZagRLE() {
|
|
// TODO
|
|
throw std::logic_error("Function not yet implemented");
|
|
}
|
|
|
|
frame& compressFrameDiffRLE() {
|
|
// TODO
|
|
throw std::logic_error("Function not yet implemented");
|
|
}
|
|
|
|
// Generic decompression that detects compression type
|
|
frame& decompress() {
|
|
switch (cformat) {
|
|
case compresstype::RLE:
|
|
return decompressFrameRLE();
|
|
break;
|
|
case compresstype::DIFF:
|
|
return decompressFrameDiff();
|
|
break;
|
|
case compresstype::DIFFRLE:
|
|
// For combined methods, first decompress RLE then the base method
|
|
decompressFrameRLE();
|
|
cformat = compresstype::DIFF;
|
|
return decompressFrameDiff();
|
|
break;
|
|
case compresstype::LZ78:
|
|
return decompressFrameLZ78();
|
|
break;
|
|
case compresstype::HUFFMAN:
|
|
// Huffman decompression would be implemented here
|
|
throw std::runtime_error("Huffman decompression not fully implemented");
|
|
break;
|
|
case compresstype::RAW:
|
|
default:
|
|
return *this; // Already decompressed
|
|
}
|
|
}
|
|
|
|
// Calculate the size of the dictionary in bytes
|
|
size_t getDictionarySize() const {
|
|
size_t dictSize = 0;
|
|
dictSize = sizeof(overheadmap);
|
|
return dictSize;
|
|
}
|
|
|
|
// Get compressed size including dictionary overhead
|
|
size_t getTotalCompressedSize() const {
|
|
size_t baseSize = getCompressedDataSize();
|
|
return baseSize;
|
|
}
|
|
|
|
double getCompressionRatio() const {
|
|
if (_compressedData.empty() || sourceSize == 0) return 0.0;
|
|
return static_cast<double>(sourceSize) / getTotalCompressedSize();
|
|
}
|
|
|
|
size_t getSourceSize() const {
|
|
return sourceSize;
|
|
}
|
|
|
|
size_t getCompressedDataSize() const {
|
|
return _compressedData.size() * 2;
|
|
}
|
|
|
|
void printCompressionInfo() const {
|
|
std::cout << "Compression Type: ";
|
|
switch (cformat) {
|
|
case compresstype::RLE: std::cout << "RLE"; break;
|
|
case compresstype::DIFF: std::cout << "DIFF"; break;
|
|
case compresstype::DIFFRLE: std::cout << "DIFF + RLE"; break;
|
|
case compresstype::LZ78: std::cout << "LZ78 (kinda)"; break;
|
|
case compresstype::HUFFMAN: std::cout << "HUFFMAN"; break;
|
|
case compresstype::RAW: std::cout << "RAW (uncompressed)"; break;
|
|
default: std::cout << "UNKNOWN"; break;
|
|
}
|
|
std::cout << std::endl;
|
|
|
|
std::cout << "Source Size: " << getSourceSize() << " bytes" << std::endl;
|
|
std::cout << "Compressed data Size: " << getCompressedDataSize() << " 16-bit words" << std::endl;
|
|
std::cout << "Compressed Size: " << getCompressedDataSize() * 2 << " bytes" << std::endl;
|
|
|
|
if (cformat == compresstype::LZ78) {
|
|
std::cout << "Dictionary Size: " << getDictionarySize() << " bytes" << std::endl;
|
|
std::cout << "Dictionary Entries: " << overheadmap.size() << std::endl;
|
|
std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl;
|
|
} else {
|
|
std::cout << "Total Compressed Size: " << getTotalCompressedSize() << " bytes" << std::endl;
|
|
}
|
|
|
|
std::cout << "Compression Ratio: " << getCompressionRatio() << ":1" << std::endl;
|
|
|
|
if (getCompressionRatio() > 1.0) {
|
|
double savings = (1.0 - (1.0 / getCompressionRatio())) * 100.0;
|
|
std::cout << "Space Savings: " << savings << "%" << std::endl;
|
|
}
|
|
}
|
|
|
|
void printCompressionStats() const {
|
|
std::cout << "[" << getCompressionTypeString() << "] "
|
|
<< getSourceSize() << "B -> " << getTotalCompressedSize() << "B "
|
|
<< "(ratio: " << getCompressionRatio() << ":1)" << std::endl;
|
|
}
|
|
|
|
// Get compression type as string
|
|
std::string getCompressionTypeString() const {
|
|
switch (cformat) {
|
|
case compresstype::RLE: return "RLE";
|
|
case compresstype::DIFF: return "DIFF";
|
|
case compresstype::DIFFRLE: return "DIFF+RLE";
|
|
case compresstype::LZ78: return "LZ78";
|
|
case compresstype::HUFFMAN: return "HUFFMAN";
|
|
case compresstype::RAW: return "RAW";
|
|
default: return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
compresstype getCompressionType() const {
|
|
return cformat;
|
|
}
|
|
|
|
bool isCompressed() const {
|
|
return cformat != compresstype::RAW;
|
|
}
|
|
|
|
//does this actually work? am I overthinking memory management?
|
|
void free() {
|
|
overheadmap.clear();
|
|
overheadmap.rehash(0);
|
|
_compressedData.clear();
|
|
_data.clear();
|
|
_compressedData.shrink_to_fit();
|
|
_data.shrink_to_fit();
|
|
}
|
|
|
|
private:
|
|
std::vector<std::vector<uint8_t>> sortvecs(std::vector<std::vector<uint8_t>> source) {
|
|
std::sort(source.begin(), source.end(), [](const std::vector<uint8_t> & a, const std::vector<uint8_t> & b) {return a.size() > b.size();});
|
|
return source;
|
|
}
|
|
|
|
frame& decompressFrameLZ78() {
|
|
TIME_FUNCTION;
|
|
if (cformat != compresstype::LZ78) {
|
|
throw std::runtime_error("Data is not LZ78 compressed");
|
|
}
|
|
|
|
std::unordered_map<uint16_t, std::vector<uint8_t>> dict;
|
|
for (uint16_t i = 0; i < 256; i++) {
|
|
dict[i] = {static_cast<uint8_t>(i)};
|
|
}
|
|
|
|
uint16_t nextdict = 256;
|
|
|
|
for (size_t i = 0; i < _compressedData.size(); i+=2) {
|
|
uint16_t cpos = _compressedData[i];
|
|
uint8_t byte = _compressedData[i+1];
|
|
std::vector<uint8_t> seq = dict[cpos];
|
|
seq.push_back(byte);
|
|
_data.insert(_data.end(), seq.begin(), seq.end());
|
|
if (nextdict < 65535 && cpos != 0) {
|
|
dict[nextdict++] = seq;
|
|
}
|
|
}
|
|
cformat == compresstype::RAW;
|
|
|
|
return *this;
|
|
}
|
|
|
|
frame& decompressFrameRLE() {
|
|
TIME_FUNCTION;
|
|
std::vector<uint8_t> decompressed;
|
|
decompressed.reserve(sourceSize);
|
|
|
|
if (_compressedData.size() % 2 != 0) {
|
|
throw std::runtime_error("something broke (decompressFrameRLE)");
|
|
}
|
|
|
|
for (size_t i = 0; i < _compressedData.size(); i += 2) {
|
|
uint16_t width = _compressedData[i];
|
|
uint8_t value = static_cast<uint8_t>(_compressedData[i+1]);
|
|
decompressed.insert(decompressed.end(), width, value);
|
|
}
|
|
|
|
_data = std::move(decompressed);
|
|
_compressedData.clear();
|
|
cformat = compresstype::RAW;
|
|
|
|
return *this;
|
|
}
|
|
|
|
std::vector<std::vector<uint8_t>> getRepeats() {
|
|
TIME_FUNCTION;
|
|
std::vector<std::vector<uint8_t>> result;
|
|
size_t pos = 0;
|
|
const size_t chunksize = 65535;
|
|
size_t dsize = _data.size();
|
|
|
|
// Thread-safe storage with mutex protection
|
|
struct ThreadSafeMatches {
|
|
std::mutex mutex;
|
|
std::vector<std::vector<uint8_t>> matches128plus;
|
|
std::vector<std::vector<uint8_t>> matches64plus;
|
|
//std::vector<std::vector<uint8_t>> matches32plus;
|
|
//std::vector<std::vector<uint8_t>> matchesAll;
|
|
|
|
void addMatch(std::vector<uint8_t>&& match, size_t length) {
|
|
std::lock_guard<std::mutex> lock(mutex);
|
|
if (length >= 128) {
|
|
if (matches128plus.size() < 65534) matches128plus.push_back(std::move(match));
|
|
} else if (length >= 64) {
|
|
if (matches64plus.size() < 65534) matches64plus.push_back(std::move(match));
|
|
}
|
|
// else if (length >= 32) {
|
|
// if (matches32plus.size() < 65534) matches32plus.push_back(std::move(match));
|
|
// }
|
|
// else {
|
|
// if (matchesAll.size() < 65534) matchesAll.push_back(std::move(match));
|
|
// }
|
|
}
|
|
};
|
|
|
|
ThreadSafeMatches threadMatches;
|
|
|
|
while (pos < dsize && result.size() < 65534) {
|
|
size_t chunk_end = std::min(pos + chunksize, dsize);
|
|
std::vector<uint8_t> chunk(_data.begin() + pos, _data.begin() + chunk_end);
|
|
|
|
if (chunk.size() <= 4) {
|
|
pos = chunk_end;
|
|
continue;
|
|
}
|
|
|
|
if (result.size() < 65534) {
|
|
result.push_back(chunk);
|
|
}
|
|
|
|
std::vector<uint8_t> ffour(chunk.begin(), chunk.begin() + 4);
|
|
|
|
// Split the search space across multiple threads
|
|
const size_t num_threads = std::thread::hardware_concurrency();
|
|
const size_t search_range = dsize - chunk_end - 3;
|
|
const size_t block_size = (search_range + num_threads - 1) / num_threads;
|
|
|
|
std::vector<std::future<void>> futures;
|
|
|
|
for (size_t t = 0; t < num_threads; ++t) {
|
|
size_t start = chunk_end + t * block_size;
|
|
size_t end = std::min(start + block_size, dsize - 3);
|
|
|
|
if (start >= end) continue;
|
|
|
|
futures.push_back(std::async(std::launch::async,
|
|
[&, start, end, chunk, ffour]() {
|
|
size_t searchpos = start;
|
|
while (searchpos <= end) {
|
|
// Check first 4 bytes
|
|
if (_data[searchpos] == ffour[0] &&
|
|
_data[searchpos + 1] == ffour[1] &&
|
|
_data[searchpos + 2] == ffour[2] &&
|
|
_data[searchpos + 3] == ffour[3]) {
|
|
|
|
// Found match, calculate length
|
|
size_t matchlength = 4;
|
|
size_t chunk_compare_pos = 4;
|
|
size_t input_compare_pos = searchpos + 4;
|
|
|
|
while (chunk_compare_pos < chunk.size() &&
|
|
input_compare_pos < dsize &&
|
|
_data[input_compare_pos] == chunk[chunk_compare_pos]) {
|
|
matchlength++;
|
|
chunk_compare_pos++;
|
|
input_compare_pos++;
|
|
}
|
|
|
|
std::vector<uint8_t> matchsequence(
|
|
_data.begin() + searchpos,
|
|
_data.begin() + searchpos + matchlength
|
|
);
|
|
|
|
threadMatches.addMatch(std::move(matchsequence), matchlength);
|
|
searchpos += matchlength;
|
|
} else {
|
|
searchpos++;
|
|
}
|
|
}
|
|
}
|
|
));
|
|
}
|
|
|
|
// Wait for all threads to complete
|
|
for (auto& future : futures) {
|
|
future.get();
|
|
}
|
|
|
|
pos = chunk_end;
|
|
}
|
|
|
|
// Merge results to main
|
|
for (const auto& match : threadMatches.matches128plus) {
|
|
result.push_back(match);
|
|
}
|
|
|
|
for (const auto& match : threadMatches.matches64plus) {
|
|
if (result.size() < 65534) result.push_back(match);
|
|
else break;
|
|
}
|
|
|
|
// for (const auto& match : threadMatches.matches32plus) {
|
|
// if (result.size() < 65534) result.push_back(match);
|
|
// else break;
|
|
// }
|
|
|
|
// for (const auto& match : threadMatches.matchesAll) {
|
|
// if (result.size() < 65534) result.push_back(match);
|
|
// else break;
|
|
// }
|
|
|
|
return result;
|
|
}
|
|
|
|
frame& decompressFrameDiff() {
|
|
// TODO
|
|
throw std::logic_error("Function not yet implemented");
|
|
}
|
|
|
|
};
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, frame& f) {
|
|
os << "Frame[" << f.getWidth() << "x" << f.getHeight() << "] ";
|
|
|
|
// Color format
|
|
os << "Format: ";
|
|
switch (f.colorFormat) {
|
|
case frame::colormap::RGB: os << "RGB"; break;
|
|
case frame::colormap::RGBA: os << "RGBA"; break;
|
|
case frame::colormap::BGR: os << "BGR"; break;
|
|
case frame::colormap::BGRA: os << "BGRA"; break;
|
|
case frame::colormap::B: os << "Grayscale"; break;
|
|
default: os << "Unknown"; break;
|
|
}
|
|
|
|
// Compression info
|
|
os << " | Compression: " << f.getCompressionTypeString();
|
|
|
|
// Size info
|
|
if (f.isCompressed()) {
|
|
os << " | " << f.getSourceSize() << "B -> " << f.getTotalCompressedSize()
|
|
<< "B (ratio: " << std::fixed << std::setprecision(2) << f.getCompressionRatio() << ":1)";
|
|
} else {
|
|
os << " | Size: " << f.getData().size() << "B";
|
|
}
|
|
|
|
// Data status
|
|
os << " | Data: ";
|
|
if (!f.getData().empty()) {
|
|
os << "raw(" << f.getData().size() << " bytes)";
|
|
} else if (f.getCompressedDataSize() > 0) {
|
|
os << "compressed(" << f.getCompressedDataSize() << " words)";
|
|
} else {
|
|
os << "empty";
|
|
}
|
|
|
|
return os;
|
|
}
|
|
|
|
#endif |