diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-04-28 22:19:43 +0200 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-04-28 22:19:43 +0200 |
| commit | e08b851c79ae9a7fc0a2066e49110dc7fb426bce (patch) | |
| tree | c8458daee7201983903cf04413ff9a6072084028 /base/overlay.c | |
| parent | c40f7421d8c1ccbe008dbd2191c6642625ae4b83 (diff) | |
reverted rewrite of ppu, optimized what functions should be forced inline, gained ~2.5% performance
Diffstat (limited to 'base/overlay.c')
| -rw-r--r-- | base/overlay.c | 375 |
1 files changed, 0 insertions, 375 deletions
diff --git a/base/overlay.c b/base/overlay.c deleted file mode 100644 index 2316b24..0000000 --- a/base/overlay.c +++ /dev/null @@ -1,375 +0,0 @@ -#include "data/font_info.h" -INCBIN(_font_texture, "data/font.ugg"); -struct ugg *font_texture_data = (struct ugg*)_font_texture_data; - -/* Overlay state structure */ -struct overlay { - GLuint vao; - GLuint vbo; - GLuint ebo; - GLuint program; - GLint loc_proj; - GLint loc_tex; - GLint loc_color; - GLint loc_pos_offset; - GLuint font_texture; - GLuint white_texture; -}; - -static struct overlay overlay_state; -static float mat[16]; - -/* ------------------------------------------------------------------------- */ -/* Shader sources for a top-left orthographic approach */ -/* ------------------------------------------------------------------------- */ -static const char* overlay_vertex_shader_src = -"#version 140\n" -"in vec2 in_pos;\n" -"in vec2 in_uv;\n" -"uniform mat4 u_projection;\n" -"uniform vec2 u_pos_offset;\n" -"out vec2 v_uv;\n" -"void main() {\n" -" vec2 pos = in_pos + u_pos_offset;\n" -" gl_Position = u_projection * vec4(pos, 0.0, 1.0);\n" -" v_uv = in_uv;\n" -"}\n"; - -static const char* overlay_fragment_shader_src = -"#version 140\n" -"uniform sampler2D u_font_texture;\n" -"uniform vec4 u_color;\n" -"in vec2 v_uv;\n" -"out vec4 frag_color;\n" -"void main() {\n" -" vec4 tex_sample = texture(u_font_texture, v_uv);\n" -" float alpha = tex_sample.a;\n" -" frag_color = vec4(u_color.rgb * alpha, u_color.a * alpha);\n" -"}\n"; - -/* ------------------------------------------------------------------------- */ -/* Shader helpers */ -/* ------------------------------------------------------------------------- */ -__attribute__((cold, noinline, section(".init_section"))) -static GLuint overlay_compile_shader(const char *source, GLenum type) { - GLuint shader = glCreateShader(type); - if(shader == 0) { - fprintf(stderr, "Error creating shader of type %d.\n", type); - return 0; - } - - glShaderSource(shader, 1, &source, 0); - glCompileShader(shader); - - /* Check for compilation errors */ - GLint success; - glGetShaderiv(shader, GL_COMPILE_STATUS, &success); - if(!success) { - GLint log_size = 0; - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_size); - char log[2048]; - glGetShaderInfoLog(shader, log_size, 0, log); - fprintf(stderr, "Shader compilation failed:\n%s\n", log); - glDeleteShader(shader); - return 0; - } - - return shader; -} - -__attribute__((cold, noinline, section(".init_section"))) -static GLuint overlay_create_program(const char *vs_source, const char *fs_source) { - GLuint vs = overlay_compile_shader(vs_source, GL_VERTEX_SHADER); - GLuint fs = overlay_compile_shader(fs_source, GL_FRAGMENT_SHADER); - GLuint prog = glCreateProgram(); - - glAttachShader(prog, vs); - glAttachShader(prog, fs); - glLinkProgram(prog); - - /* Check for linking errors */ - GLint success; - glGetProgramiv(prog, GL_LINK_STATUS, &success); - if(!success) { - /* Get and print log */ - GLint log_size = 0; - glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_size); - char log[2048]; - glGetProgramInfoLog(prog, log_size, 0, log); - fprintf(stderr, "Program linking failed:\n%s\n", log); - glDeleteProgram(prog); - glDeleteShader(vs); - glDeleteShader(fs); - return 0; - } - - /* Shaders can be deleted after linking */ - glDeleteShader(vs); - glDeleteShader(fs); - - return prog; -} - -static void overlay_make_ortho_top_left(float w, float h, float m[16]) { - m[0] = 2.0f / w; - m[5] = -2.0f / h; - m[10] = -1.0f; - m[12] = -1.0f; - m[13] = 1.0f; - m[15] = 1.0f; -} - -#define MAX_GLYPHS 128 // NOTE(peter): max glyphs per ROW -#define MAX_VERTICES (MAX_PROFILING_ENTRIES * MAX_GLYPHS * 4 * 4) -float vertices[MAX_VERTICES] __attribute__((section(".bss"))); - -static void overlay_init(void) { - overlay_state.program = overlay_create_program(overlay_vertex_shader_src, overlay_fragment_shader_src); - overlay_state.loc_proj = glGetUniformLocation(overlay_state.program, "u_projection"); - overlay_state.loc_tex = glGetUniformLocation(overlay_state.program, "u_font_texture"); - overlay_state.loc_color = glGetUniformLocation(overlay_state.program, "u_color"); - overlay_state.loc_pos_offset = glGetUniformLocation(overlay_state.program, "u_pos_offset"); - - glGenVertexArrays(1, &overlay_state.vao); - glBindVertexArray(overlay_state.vao); - - glGenBuffers(1, &overlay_state.vbo); - glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); - glBufferData(GL_ARRAY_BUFFER, MAX_VERTICES * sizeof(float), 0, GL_DYNAMIC_DRAW); - - glGenBuffers(1, &overlay_state.ebo); - - unsigned short indices[MAX_PROFILING_ENTRIES * MAX_GLYPHS * 6]; - size_t index_offset = 0; - size_t vertex_offset = 0; - - for(size_t i = 0; i < MAX_PROFILING_ENTRIES * MAX_GLYPHS; i++) { - indices[index_offset++] = vertex_offset + 0; - indices[index_offset++] = vertex_offset + 1; - indices[index_offset++] = vertex_offset + 2; - indices[index_offset++] = vertex_offset + 0; - indices[index_offset++] = vertex_offset + 2; - indices[index_offset++] = vertex_offset + 3; - vertex_offset += 4; - } - - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, overlay_state.ebo); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); - - GLint in_pos_attrib = glGetAttribLocation(overlay_state.program, "in_pos"); - GLint in_uv_attrib = glGetAttribLocation(overlay_state.program, "in_uv"); - - glEnableVertexAttribArray(in_pos_attrib); - glVertexAttribPointer(in_pos_attrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); - glEnableVertexAttribArray(in_uv_attrib); - glVertexAttribPointer(in_uv_attrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); - - glBindVertexArray(0); - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - - - uint8_t *rgba_data = mks_alloc(512 * 512 * 4); - uint8_t *dst = rgba_data; - for(uint32_t i = 0; i < 512 * 512; ++i) { - uint8_t alpha = font_texture_data->data[i]; - if(alpha) { - *dst++ = 255; - *dst++ = 255; - *dst++ = 255; - *dst++ = alpha; - - } else { - *dst++ = 0; - *dst++ = 0; - *dst++ = 0; - *dst++ = 0; - } - } - glGenTextures(1, &overlay_state.font_texture); - glBindTexture(GL_TEXTURE_2D, overlay_state.font_texture); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgba_data); - glGenerateMipmap(GL_TEXTURE_2D); - mks_free(rgba_data); - - glGenTextures(1, &overlay_state.white_texture); - glBindTexture(GL_TEXTURE_2D, overlay_state.white_texture); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - uint32_t white[] = { 0xffffffff }; - glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, 1, 1, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, white); - - glBindTexture(GL_TEXTURE_2D, 0); -} - -__attribute__((cold, noinline, section(".init_section"))) -static void overlay_shutdown(void) { - glDeleteProgram(overlay_state.program); - glDeleteBuffers(1, &overlay_state.vbo); - glDeleteBuffers(1, &overlay_state.ebo); - glDeleteVertexArrays(1, &overlay_state.vao); - glDeleteTextures(1, &overlay_state.font_texture); -} - -static void overlay_render_rect(float x1, float y1, float x2, float y2, float r, float g, float b, float a) { - float vertices[4 * 4] = { - /* pos.x, pos.y, u, v */ - x1, y1, 0.f, 0.f, // Top-left - x2, y1, 1.f, 0.f, // Top-right - x2, y2, 1.f, 1.f, // Bottom-right - x1, y2, 0.f, 1.f // Bottom-left - }; - - glBindVertexArray(overlay_state.vao); - glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices); - glUseProgram(overlay_state.program); - glUniform4f(overlay_state.loc_color, r, g, b, a); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, overlay_state.white_texture); - glUniform1i(overlay_state.loc_tex, 0); - glUniform2f(overlay_state.loc_pos_offset, 0.0f, 0.0f); - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); - glBindTexture(GL_TEXTURE_2D, 0); - glBindVertexArray(0); -} - -__attribute__((always_inline)) -static inline void overlay_render_text_line(uint8_t **lines, float x, float y, float r, float g, float b, float a) { - uint32_t vertex_offset = 0; - uint32_t glyph_count = 0; - - glBindVertexArray(overlay_state.vao); - glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); - - float target_width = 24.f * .40f; - float target_height = 48.f * .40f; - - // Convert font units to pixels - float _scale = 48.0f / (1900 - (-480)); - float scaled_ascent = 1900 * _scale; - float scale_x = target_width / 24.0f; - float scale_y = target_height / 48.0f; - - // **Apply baseline correction once (not per character!)** - y += scaled_ascent * scale_y; - - for(int i = 0; i < MAX_PROFILING_ENTRIES; i++) { - uint8_t *text = lines[i]; - if(!text) continue; - - float cx = x; - while(*text) { - uint8_t c = *text++; - struct glyph_info *g = &glyph_data[c]; - - // Normalize texture coordinates - float u0 = g->x / 512.f; - float v0 = g->y / 512.f; - float u1 = (g->x + g->width) / 512.f; - float v1 = (g->y + g->height) / 512.f; - - // Apply width & height scaling - float glyph_width = g->width * scale_x; - float glyph_height = g->height * scale_y; - - // **Fix baseline positioning** - float x0 = roundf(cx + (g->x_offset * scale_x)); - float y0 = roundf(y + (g->y_offset * scale_y)); - float x1 = x0 + glyph_width; - float y1 = y0 + glyph_height; - - // Generate quad for the character - vertices[vertex_offset + 0] = x0; - vertices[vertex_offset + 1] = y0; - vertices[vertex_offset + 2] = u0; - vertices[vertex_offset + 3] = v0; - - vertices[vertex_offset + 4] = x1; - vertices[vertex_offset + 5] = y0; - vertices[vertex_offset + 6] = u1; - vertices[vertex_offset + 7] = v0; - - vertices[vertex_offset + 8] = x1; - vertices[vertex_offset + 9] = y1; - vertices[vertex_offset + 10] = u1; - vertices[vertex_offset + 11] = v1; - - vertices[vertex_offset + 12] = x0; - vertices[vertex_offset + 13] = y1; - vertices[vertex_offset + 14] = u0; - vertices[vertex_offset + 15] = v1; - - // Move cursor forward - cx += g->advance * scale_x; - vertex_offset += 16; - glyph_count++; - } - y += target_height; // Move to next row - } - - - if(glyph_count > 0) { - glBindVertexArray(overlay_state.vao); - glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); - glBufferSubData(GL_ARRAY_BUFFER, 0, vertex_offset * sizeof(float), vertices); - - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, overlay_state.font_texture); - glUniform1i(overlay_state.loc_tex, 0); - glUniform4f(overlay_state.loc_color, r, g, b, a); - glDrawElements(GL_TRIANGLES, glyph_count * 6, GL_UNSIGNED_SHORT, 0); - glBindVertexArray(0); - } -} - -static void overlay_render(float rect_x, float rect_y, float rect_w, float rect_h, uint8_t **lines, int window_w, int window_h) { - overlay_make_ortho_top_left((float)window_w, (float)window_h, mat); - glViewport(0.f, 0.f, window_w, window_h); - glUseProgram(overlay_state.program); - glUniformMatrix4fv(overlay_state.loc_proj, 1, GL_FALSE, mat); - - overlay_render_rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h, .01f, .013f, .04f, .6f); - overlay_render_text_line(lines, rect_x + 2.f, rect_y + 2.f, 0.f, 0.f, 0.f, 1.f); - overlay_render_text_line(lines, rect_x, rect_y, 1.f, 1.f, 1.f, 1.f); -} - -/* [=]===^=[ debug_render ]=================================================================^===[=] */ -__attribute__((section(".bss"))) -uint8_t *debug_lines[MAX_PROFILING_ENTRIES]; -static void debug_render(void) { - if(state.overlay) { - size_t offset = 0; - for(uint32_t i = 0; i < MAX_PROFILING_ENTRIES; ++i) { - if(state.debug.timings[i].count) { - debug_lines[i] = &debug_line_buffer[offset]; - offset += stbsp_sprintf((char *)&debug_line_buffer[offset], "%25s: cycles=%7" PRIu64 ", count=%2u, cycles/count=%7" PRIu64, state.debug.timings[i].name, state.debug.timings[i].cycles, state.debug.timings[i].count, state.debug.timings[i].cycles / state.debug.timings[i].count) + 1; - } else { - debug_lines[i] = 0; - } - } - overlay_render(20.f, 20.f, 705.f, 280.f, debug_lines, state.screen_width, state.screen_height); - - offset = 0; - for(uint32_t i = 0; i < MAX_PROFILING_ENTRIES; ++i) { - debug_lines[i] = 0; - } - debug_lines[0] = &debug_line_buffer[offset]; - offset += stbsp_sprintf((char *)&debug_line_buffer[offset], "Total memory allocated: %" PRIu64, state.total_allocated) + 1; - overlay_render(20.f, 340.f, 500.f, 200.f, debug_lines, state.screen_width, state.screen_height); - } -} - -/* [=]===^=[ reset_profiling_data ]=================================================================^===[=] */ -static inline void reset_profiling_data(void) { - memset(&state.debug, 0, sizeof(state.debug)); -} |
