summaryrefslogtreecommitdiff
path: root/base/render.c
blob: e7118d05bd327eb239f8ebfc0287a7381c4f6a04 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79



// Get pointer to where in the buffer to render RENDER_START(0,0) is top left
#define RENDER_START(x, y) (state.display_buffer + ((y) << 11) + (x))

// Center X Coordinate for Rendering
#define CENTER_X(w) ((state.render_width - (w)) >> 1)

/* [=]===^=[ update_render_position ]=================================================================^===[=] */
static void update_render_position(void) {
	state.render_x = (BUFFER_WIDTH - state.render_width) >> 1;
	state.render_y = (BUFFER_HEIGHT - state.render_height) >> 1;
	state.display_buffer = buffer + (state.render_y * BUFFER_WIDTH) + state.render_x;
}

/* [=]===^=[ change_resolution ]=================================================================^===[=] */
static void change_resolution(uint32_t new_width, uint32_t new_height) {
	if(new_width != state.render_width || new_height != state.render_height) {
		state.render_width = new_width;
		state.render_height = new_height;
		update_render_position();
		setup_render_target();
	}
}

/* [=]===^=[ clear_buffer ]=================================================================^===[=] */
__attribute__((always_inline, hot))
static inline void clear_buffer(void) {
	PROFILE_FUNCTION();
	uint32_t * restrict dst = RENDER_START(0, 0);

	for(uint32_t i = 0; i < state.render_height; i++) {
		memset(dst, 0, state.render_width * 4);
		dst += BUFFER_WIDTH;
	}
}

/* [=]===^=[ set_decay ]=================================================================^===[=] */
static uint16_t _old_weight;
static uint16_t _new_weight;
static void set_decay(uint16_t old_weight) {
	_old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0;
	_new_weight = 256 - old_weight;
}

/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
__attribute__((always_inline, hot))
static inline void apply_phosphor_decay(void) {
	PROFILE_FUNCTION();
	__m256i old_weight = _mm256_set1_epi16(_old_weight);
	__m256i new_weight = _mm256_set1_epi16(_new_weight);
	__m128i alpha_mask = _mm_set1_epi32(0x000000ff);
	uint32_t render_width = state.render_width;
	uint32_t render_height = state.render_height;
	uint32_t * restrict src = RENDER_START(0, 0);
	uint32_t * restrict dst = display_buffer;

	for(uint32_t y = 0; y < render_height; ++y, src += BUFFER_WIDTH, dst += render_width) {
		for(uint32_t x = 0; x < render_width; x += 4) {
			_mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
			_mm_prefetch((char*)&dst[x + 2 * render_width], _MM_HINT_T0);

			__m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]);
			__m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]);

			__m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
			__m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);

			__m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
			blended = _mm256_srli_epi16(blended, 8);

			__m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
			final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
			_mm_storeu_si128((__m128i*)&dst[x], final_pixels);
		}
	}
}