summaryrefslogtreecommitdiff
path: root/render.c
diff options
context:
space:
mode:
authorPeter Fors <peter.fors@mindkiller.com>2025-04-08 13:42:13 +0200
committerPeter Fors <peter.fors@mindkiller.com>2025-04-08 13:42:13 +0200
commitc9bd7fecdb5d6c8954cf31efef910ed734386c70 (patch)
treeab59d3ca3e256ee7a431d82a6412e9c323f418ed /render.c
parent205dc47111125aa2f7458be948cd9cd434d0344c (diff)
2690fps
Diffstat (limited to 'render.c')
-rw-r--r--render.c58
1 files changed, 29 insertions, 29 deletions
diff --git a/render.c b/render.c
index 26b3c19..6e399ef 100644
--- a/render.c
+++ b/render.c
@@ -16,32 +16,32 @@ static void set_decay(uint16_t old_weight) {
}
/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
-__attribute__((always_inline, hot))
-static inline void apply_phosphor_decay(void) {
- // PROFILE_FUNCTION();
- __m256i old_weight = _mm256_set1_epi16(_old_weight);
- __m256i new_weight = _mm256_set1_epi16(_new_weight);
- __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
- uint32_t * restrict src = buffer;
- uint32_t * restrict dst = display_buffer;
-
- for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
- for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
- _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
- _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
-
- __m128i new_pixels = _mm_load_si128((__m128i*)&src[x]);
- __m128i old_pixels = _mm_load_si128((__m128i*)&dst[x]);
-
- __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
- __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
-
- __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
- blended = _mm256_srli_epi16(blended, 8);
-
- __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
- final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
- _mm_store_si128((__m128i*)&dst[x], final_pixels);
- }
- }
-}
+// __attribute__((always_inline, hot))
+// static inline void apply_phosphor_decay(void) {
+// // PROFILE_FUNCTION();
+// __m256i old_weight = _mm256_set1_epi16(_old_weight);
+// __m256i new_weight = _mm256_set1_epi16(_new_weight);
+// __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
+// uint32_t * restrict src = buffer;
+// uint32_t * restrict dst = display_buffer;
+
+// for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
+// for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
+// _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+// _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+
+// __m128i new_pixels = _mm_load_si128((__m128i*)&src[x]);
+// __m128i old_pixels = _mm_load_si128((__m128i*)&dst[x]);
+
+// __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
+// __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
+
+// __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
+// blended = _mm256_srli_epi16(blended, 8);
+
+// __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
+// final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
+// _mm_store_si128((__m128i*)&dst[x], final_pixels);
+// }
+// }
+// }