diff options
Diffstat (limited to 'base/render.c')
| -rw-r--r-- | base/render.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/base/render.c b/base/render.c new file mode 100644 index 0000000..e7118d0 --- /dev/null +++ b/base/render.c @@ -0,0 +1,79 @@ + + + +// Get pointer to where in the buffer to render RENDER_START(0,0) is top left +#define RENDER_START(x, y) (state.display_buffer + ((y) << 11) + (x)) + +// Center X Coordinate for Rendering +#define CENTER_X(w) ((state.render_width - (w)) >> 1) + +/* [=]===^=[ update_render_position ]=================================================================^===[=] */ +static void update_render_position(void) { + state.render_x = (BUFFER_WIDTH - state.render_width) >> 1; + state.render_y = (BUFFER_HEIGHT - state.render_height) >> 1; + state.display_buffer = buffer + (state.render_y * BUFFER_WIDTH) + state.render_x; +} + +/* [=]===^=[ change_resolution ]=================================================================^===[=] */ +static void change_resolution(uint32_t new_width, uint32_t new_height) { + if(new_width != state.render_width || new_height != state.render_height) { + state.render_width = new_width; + state.render_height = new_height; + update_render_position(); + setup_render_target(); + } +} + +/* [=]===^=[ clear_buffer ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void clear_buffer(void) { + PROFILE_FUNCTION(); + uint32_t * restrict dst = RENDER_START(0, 0); + + for(uint32_t i = 0; i < state.render_height; i++) { + memset(dst, 0, state.render_width * 4); + dst += BUFFER_WIDTH; + } +} + +/* [=]===^=[ set_decay ]=================================================================^===[=] */ +static uint16_t _old_weight; +static uint16_t _new_weight; +static void set_decay(uint16_t old_weight) { + _old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0; + _new_weight = 256 - old_weight; +} + +/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void apply_phosphor_decay(void) { + PROFILE_FUNCTION(); + __m256i old_weight = _mm256_set1_epi16(_old_weight); + __m256i new_weight = _mm256_set1_epi16(_new_weight); + __m128i alpha_mask = _mm_set1_epi32(0x000000ff); + uint32_t render_width = state.render_width; + uint32_t render_height = state.render_height; + uint32_t * restrict src = RENDER_START(0, 0); + uint32_t * restrict dst = display_buffer; + + for(uint32_t y = 0; y < render_height; ++y, src += BUFFER_WIDTH, dst += render_width) { + for(uint32_t x = 0; x < render_width; x += 4) { + _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0); + _mm_prefetch((char*)&dst[x + 2 * render_width], _MM_HINT_T0); + + __m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]); + __m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]); + + __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels); + __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels); + + __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight)); + blended = _mm256_srli_epi16(blended, 8); + + __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1)); + final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask)); + _mm_storeu_si128((__m128i*)&dst[x], final_pixels); + } + } +} + |
