1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
/* [=]===^=[ clear_buffer ]=================================================================^===[=] */
__attribute__((always_inline, hot))
static inline void clear_buffer(void) {
// PROFILE_FUNCTION();
memset(buffer, 0, sizeof(buffer));
}
/* [=]===^=[ set_decay ]=================================================================^===[=] */
static uint16_t _old_weight;
static uint16_t _new_weight;
static void set_decay(uint16_t old_weight) {
_old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0;
_new_weight = 256 - old_weight;
}
/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
// __attribute__((always_inline, hot))
// static inline void apply_phosphor_decay(void) {
// // PROFILE_FUNCTION();
// __m256i old_weight = _mm256_set1_epi16(_old_weight);
// __m256i new_weight = _mm256_set1_epi16(_new_weight);
// __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
// uint32_t * restrict src = buffer;
// uint32_t * restrict dst = display_buffer;
// for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
// for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
// _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
// _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
// __m128i new_pixels = _mm_load_si128((__m128i*)&src[x]);
// __m128i old_pixels = _mm_load_si128((__m128i*)&dst[x]);
// __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
// __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
// __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
// blended = _mm256_srli_epi16(blended, 8);
// __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
// final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
// _mm_store_si128((__m128i*)&dst[x], final_pixels);
// }
// }
// }
|