#include #include #include #include #include #include #include "../util/grid/grid2.hpp" #include "../util/output/aviwriter.hpp" #include "../util/timing_decorator.cpp" struct AnimationConfig { int width = 512; int height = 512; int totalFrames = 240; float fps = 30.0f; int numSeeds = 1; }; const float PI4 = M_PI / 4.0f; const float PI43 = PI4 * 3.0f; struct SeedDataSoA { // Use alignas to ensure 16-byte alignment for SSE instructions std::vector sx, sy; // Seed X and Y positions std::vector sr, sg, sb, sa; // Seed R, G, B, A colors size_t count = 0; void reserve(size_t n) { sx.reserve(n); sy.reserve(n); sr.reserve(n); sg.reserve(n); sb.reserve(n); sa.reserve(n); } }; SeedDataSoA convertSeedsToSoA(const std::vector& points, const std::vector& colors) { TIME_FUNCTION; SeedDataSoA soaData; size_t numSeeds = points.size(); // Pad to the nearest multiple of 4 for clean SIMD loops size_t paddedSize = (numSeeds + 3) & ~3; soaData.count = paddedSize; soaData.reserve(paddedSize); for (size_t i = 0; i < numSeeds; ++i) { soaData.sx.push_back(points[i].x); soaData.sy.push_back(points[i].y); soaData.sr.push_back(colors[i].r); soaData.sg.push_back(colors[i].g); soaData.sb.push_back(colors[i].b); soaData.sa.push_back(colors[i].a); } // Add padding elements for (size_t i = numSeeds; i < paddedSize; ++i) { soaData.sx.push_back(0); soaData.sy.push_back(0); soaData.sr.push_back(0); soaData.sg.push_back(0); soaData.sb.push_back(0); soaData.sa.push_back(0); } std::cout << "Converted " << numSeeds << " seeds to SoA format (padded to " << paddedSize << ")" << std::endl; return soaData; } // Helper for a horizontal add of a __m128 register inline float horizontal_add(__m128 v) { __m128 shuf = _mm_movehdup_ps(v); // {v.z, v.z, v.w, v.w} __m128 sums = _mm_add_ps(v, shuf); // {v.x+v.z, v.y+v.z, v.z+v.w, v.w+v.w} shuf = _mm_movehl_ps(shuf, sums); // {v.z+v.w, v.w+v.w, ...} sums = _mm_add_ss(sums, shuf); // adds lowest float return _mm_cvtss_f32(sums); } // Non-optimized atan2 for a SIMD vector. For a real high-performance scenario, // you would use a library like SLEEF or a polynomial approximation. inline __m128 _mm_atan2_ps(__m128 y, __m128 x) { float y_lanes[4], x_lanes[4]; _mm_storeu_ps(y_lanes, y); _mm_storeu_ps(x_lanes, x); for(int i = 0; i < 4; ++i) { y_lanes[i] = std::atan2(y_lanes[i], x_lanes[i]); } return _mm_loadu_ps(y_lanes); } bool initializeGrid(Grid2& grid, const AnimationConfig& config) { TIME_FUNCTION; std::cout << "Initializing grayscale grid..." << std::endl; for (int y = 1; y < config.height; ++y) { for (int x = 1; x < config.width; ++x) { float gradient = (x + y) / float(config.width + config.height - 2); Vec2 position(static_cast(x), static_cast(y)); Vec4 color(gradient, gradient, gradient, 1.0f); grid.addObject(position, color, 1.0f); } } std::cout << "Grayscale grid created with " << config.width * config.height << " objects" << std::endl; return true; } std::pair, std::vector> generateSeedPoints(const AnimationConfig& config) { TIME_FUNCTION; std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<> xDist(0, config.width - 1); std::uniform_int_distribution<> yDist(0, config.height - 1); std::uniform_real_distribution<> colorDist(0.2f, 0.8f); std::vector seedPoints; std::vector seedColors; for (int i = 0; i < config.numSeeds; ++i) { seedPoints.emplace_back(xDist(gen), yDist(gen)); seedColors.emplace_back(colorDist(gen), colorDist(gen), colorDist(gen), 1.0f); // Alpha fixed to 1.0 } std::cout << "Generated " << config.numSeeds << " seed points for color propagation" << std::endl; return {seedPoints, seedColors}; } Vec4 calculateInfluencedColor(const Vec2& position, const Vec4& originalColor, float progress, const SeedDataSoA& seeds, const AnimationConfig& config) { const __m128 v_max_dist = _mm_set1_ps(std::max(config.width, config.height) * 0.6f); const __m128 v_progress = _mm_set1_ps(progress); const __m128 v_one = _mm_set1_ps(1.0f); const __m128 v_zero = _mm_setzero_ps(); const __m128 v_pi4 = _mm_set1_ps(PI4); const __m128 v_pi43 = _mm_set1_ps(PI43); const __m128 v_neg_zero = _mm_set1_ps(-0.0f); // For fast absolute value // Broadcast pixel position into SIMD registers const __m128 v_pos_x = _mm_set1_ps(position.x); const __m128 v_pos_y = _mm_set1_ps(position.y); // Accumulators for color channel updates __m128 r_updates = _mm_setzero_ps(); __m128 g_updates = _mm_setzero_ps(); __m128 b_updates = _mm_setzero_ps(); __m128 a_updates = _mm_setzero_ps(); // Process 4 seeds at a time for (size_t s = 0; s < seeds.count; s += 4) { // --- Load data for 4 seeds --- const __m128 seed_x = _mm_load_ps(&seeds.sx[s]); const __m128 seed_y = _mm_load_ps(&seeds.sy[s]); // --- Calculate distance and influence --- const __m128 dir_x = _mm_sub_ps(v_pos_x, seed_x); const __m128 dir_y = _mm_sub_ps(v_pos_y, seed_y); const __m128 dist_sq = _mm_add_ps(_mm_mul_ps(dir_x, dir_x), _mm_mul_ps(dir_y, dir_y)); const __m128 dist = _mm_sqrt_ps(dist_sq); __m128 influence = _mm_sub_ps(v_one, _mm_div_ps(dist, v_max_dist)); influence = _mm_max_ps(v_zero, influence); // clamp to 0 // --- Calculate full potential color contribution --- const __m128 influence_progress = _mm_mul_ps(influence, v_progress); const __m128 contrib_r = _mm_mul_ps(_mm_load_ps(&seeds.sr[s]), influence_progress); const __m128 contrib_g = _mm_mul_ps(_mm_load_ps(&seeds.sg[s]), influence_progress); const __m128 contrib_b = _mm_mul_ps(_mm_load_ps(&seeds.sb[s]), influence_progress); const __m128 contrib_a = _mm_mul_ps(_mm_load_ps(&seeds.sa[s]), influence_progress); // --- Branchless Masking based on Angle --- const __m128 angle = _mm_atan2_ps(dir_y, dir_x); const __m128 abs_angle = _mm_andnot_ps(v_neg_zero, angle); // fast abs // Create masks for each condition const __m128 mask_right = _mm_cmplt_ps(abs_angle, v_pi4); // abs(angle) < PI4 const __m128 mask_left = _mm_cmpgt_ps(abs_angle, v_pi43); // abs(angle) > PI43 // The "else if" logic is tricky. We need to ensure mutual exclusivity. // mask_below is true if (angle > 0) AND NOT (right OR left) const __m128 is_not_rl = _mm_andnot_ps(mask_right, _mm_andnot_ps(mask_left, v_one)); const __m128 mask_below = _mm_and_ps(_mm_cmpgt_ps(angle, v_zero), is_not_rl); // The "else" case (above) is whatever is left over. // mask_above is true if NOT (right OR left OR below) const __m128 mask_above = _mm_andnot_ps(mask_below, is_not_rl); // --- Accumulate updates using masks --- // Add contribution only where mask is active (all 1s) r_updates = _mm_add_ps(r_updates, _mm_and_ps(contrib_r, mask_above)); g_updates = _mm_add_ps(g_updates, _mm_and_ps(contrib_g, mask_below)); b_updates = _mm_add_ps(b_updates, _mm_and_ps(contrib_b, mask_left)); a_updates = _mm_add_ps(a_updates, _mm_and_ps(contrib_a, mask_right)); } // --- Horizontal Reduction: Sum the updates from all lanes --- Vec4 newColor = originalColor; newColor.r += horizontal_add(r_updates); newColor.g += horizontal_add(g_updates); newColor.b += horizontal_add(b_updates); newColor.a += horizontal_add(a_updates); // --- Apply fmod(x, 1.0) which is x - floor(x) for positive numbers --- // Can do this with SIMD as well for the final color vector __m128 final_color_v = _mm_loadu_ps(&newColor.r); final_color_v = _mm_sub_ps(final_color_v, _mm_floor_ps(final_color_v)); _mm_storeu_ps(&newColor.r, final_color_v); return newColor.clampColor(); } void updateColorsForFrame(Grid2& grid, float progress, const SeedDataSoA& seeds, const AnimationConfig& config) { TIME_FUNCTION; grid.bulkUpdateColors([&](size_t id, const Vec2& pos, const Vec4& currentColor) { return calculateInfluencedColor(pos, currentColor, progress, seeds, config); }); } std::vector convertFrameToBGR(Grid2& grid, const AnimationConfig& config) { TIME_FUNCTION; int frameWidth, frameHeight; std::vector bgrData; grid.getGridRegionAsBGR(Vec2(0,0), Vec2(config.width, config.height), frameWidth, frameHeight, bgrData); std::vector bgrFrame(frameWidth * frameHeight * 3); #pragma omp parallel for for (int i = 0; i < frameWidth * frameHeight; ++i) { bgrFrame[i * 3] = static_cast(std::clamp(bgrData[i * 3], 0, 255)); bgrFrame[i * 3 + 1] = static_cast(std::clamp(bgrData[i * 3 + 1], 0, 255)); bgrFrame[i * 3 + 2] = static_cast(std::clamp(bgrData[i * 3 + 2], 0, 255)); } return bgrFrame; } std::vector> createAnimationFrames(Grid2& grid, const SeedDataSoA& seeds, const AnimationConfig& config) { TIME_FUNCTION; std::vector> frames; for (int frame = 0; frame < config.totalFrames; ++frame) { std::cout << "Processing frame " << frame + 1 << "/" << config.totalFrames << std::endl; float progress = static_cast(frame) / (config.totalFrames - 1); updateColorsForFrame(grid, progress, seeds, config); std::vector bgrFrame = convertFrameToBGR(grid, config); frames.push_back(bgrFrame); } return frames; } void printSuccessMessage(const std::string& filename, const std::vector& seedPoints, const std::vector& seedColors, const AnimationConfig& config) { std::cout << "\nSuccessfully saved chromatic transformation animation to: " << filename << std::endl; std::cout << "Video details:" << std::endl; std::cout << " - Dimensions: " << config.width << " x " << config.height << std::endl; std::cout << " - Frames: " << config.totalFrames << " (" << config.totalFrames/config.fps << " seconds at " << config.fps << "fps)" << std::endl; std::cout << " - Seed points: " << config.numSeeds << std::endl; std::cout << "\nSeed points used:" << std::endl; for (int i = 0; i < config.numSeeds; ++i) { std::cout << " Seed " << i + 1 << ": Position " << seedPoints[i] << ", Color " << seedColors[i].toColorString() << std::endl; } FunctionTimer::printStats(FunctionTimer::Mode::ENHANCED); } void printErrorMessage(const std::vector>& frames, const AnimationConfig& config) { std::cerr << "Failed to save AVI file!" << std::endl; std::cerr << "Debug info:" << std::endl; std::cerr << " - Frames count: " << frames.size() << std::endl; if (!frames.empty()) { std::cerr << " - First frame size: " << frames[0].size() << std::endl; std::cerr << " - Expected frame size: " << config.width * config.height * 3 << std::endl; } std::cerr << " - Width: " << config.width << ", Height: " << config.height << std::endl; } bool saveAnimation(const std::vector>& frames, const std::vector& seedPoints, const std::vector& seedColors, const AnimationConfig& config) { TIME_FUNCTION; std::string filename = "output/chromatic_transformation.avi"; std::cout << "Attempting to save AVI file: " << filename << std::endl; bool success = AVIWriter::saveAVI(filename, frames, config.width, config.height, config.fps); if (success) { printSuccessMessage(filename, seedPoints, seedColors, config); return true; } else { printErrorMessage(frames, config); return false; } } int main() { std::cout << "Creating chromatic transformation animation..." << std::endl; AnimationConfig config; Grid2 grid; if (!initializeGrid(grid, config)) return 1; auto [seedPoints, seedColors] = generateSeedPoints(config); auto seeds_SoA = convertSeedsToSoA(seedPoints, seedColors); // Create animation frames using the SIMD-friendly data auto frames = createAnimationFrames(grid, seeds_SoA, config); if (!saveAnimation(frames, seedPoints, seedColors, config)) return 1; return 0; }