diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-04-03 20:02:00 +0200 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-04-03 20:02:00 +0200 |
| commit | 6274071e3857c1640cc5aef804cb86509ab312f9 (patch) | |
| tree | 1a4e56b3c3b4bfb4d8f0d2f588487d6e227c3b27 /render.c | |
| parent | 971e51eebbf088f1ac590da1fc57e803eb1ee8cf (diff) | |
Move to glfw
Diffstat (limited to 'render.c')
| -rw-r--r-- | render.c | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/render.c b/render.c new file mode 100644 index 0000000..1dc90aa --- /dev/null +++ b/render.c @@ -0,0 +1,47 @@ + + +/* [=]===^=[ clear_buffer ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void clear_buffer(void) { + // PROFILE_FUNCTION(); + memset(buffer, 0, sizeof(buffer)); +} + +/* [=]===^=[ set_decay ]=================================================================^===[=] */ +static uint16_t _old_weight; +static uint16_t _new_weight; +static void set_decay(uint16_t old_weight) { + _old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0; + _new_weight = 256 - old_weight; +} + +/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void apply_phosphor_decay(void) { + // PROFILE_FUNCTION(); + __m256i old_weight = _mm256_set1_epi16(_old_weight); + __m256i new_weight = _mm256_set1_epi16(_new_weight); + __m128i alpha_mask = _mm_set1_epi32(0x000000ff); + uint32_t * restrict src = buffer; + uint32_t * restrict dst = display_buffer; + + for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) { + for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) { + _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0); + _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0); + + __m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]); + __m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]); + + __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels); + __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels); + + __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight)); + blended = _mm256_srli_epi16(blended, 8); + + __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1)); + final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask)); + _mm_storeu_si128((__m128i*)&dst[x], final_pixels); + } + } +} |
