summaryrefslogtreecommitdiff
path: root/render.c
blob: 6e399efd59ec9690bcce9f04db3152a82ba47ef1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47


/* [=]===^=[ clear_buffer ]=================================================================^===[=] */
__attribute__((always_inline, hot))
static inline void clear_buffer(void) {
	// PROFILE_FUNCTION();
	memset(buffer, 0, sizeof(buffer));
}

/* [=]===^=[ set_decay ]=================================================================^===[=] */
static uint16_t _old_weight;
static uint16_t _new_weight;
static void set_decay(uint16_t old_weight) {
	_old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0;
	_new_weight = 256 - old_weight;
}

/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
// __attribute__((always_inline, hot))
// static inline void apply_phosphor_decay(void) {
// 	// PROFILE_FUNCTION();
// 	__m256i old_weight = _mm256_set1_epi16(_old_weight);
// 	__m256i new_weight = _mm256_set1_epi16(_new_weight);
// 	__m128i alpha_mask = _mm_set1_epi32(0x000000ff);
// 	uint32_t * restrict src = buffer;
// 	uint32_t * restrict dst = display_buffer;

// 	for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
// 		for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
// 			_mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
// 			_mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);

// 			__m128i new_pixels = _mm_load_si128((__m128i*)&src[x]);
// 			__m128i old_pixels = _mm_load_si128((__m128i*)&dst[x]);

// 			__m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
// 			__m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);

// 			__m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
// 			blended = _mm256_srli_epi16(blended, 8);

// 			__m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
// 			final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
// 			_mm_store_si128((__m128i*)&dst[x], final_pixels);
// 		}
// 	}
// }