summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--base/.gitignore6
-rw-r--r--base/audio.c458
-rw-r--r--base/base.c122
-rwxr-xr-xbase/build.sh77
-rw-r--r--base/callbacks.c72
-rw-r--r--base/common.h55
-rw-r--r--base/fragment_shader.glsl149
-rw-r--r--base/incbin.h50
-rw-r--r--base/opengl.c133
-rw-r--r--base/opengl_loader.c210
-rw-r--r--base/overlay.c375
-rw-r--r--base/render.c79
-rw-r--r--base/settings.h11
-rw-r--r--base/shader.c44
-rw-r--r--base/shader.h34
-rw-r--r--base/state.c134
-rw-r--r--base/stb_sprintf.h1906
-rw-r--r--base/ugg.h10
-rw-r--r--base/vertex_shader.glsl9
19 files changed, 3934 insertions, 0 deletions
diff --git a/base/.gitignore b/base/.gitignore
new file mode 100644
index 0000000..4b1168a
--- /dev/null
+++ b/base/.gitignore
@@ -0,0 +1,6 @@
+/data
+.base_linux
+.base_windows
+tags
+base
+base.exe
diff --git a/base/audio.c b/base/audio.c
new file mode 100644
index 0000000..1d385d5
--- /dev/null
+++ b/base/audio.c
@@ -0,0 +1,458 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <math.h>
+#include <stdlib.h>
+
+#define SAMPLE_RATE 48000
+#define NUM_CHANNELS 2
+#define FRAME_SIZE (NUM_CHANNELS * sizeof(short))
+
+// static inline float smoothstep(float edge0, float edge1, float x) {
+// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1]
+// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1]
+// return x * x * (3.0f - 2.0f * x); // Smooth interpolation
+// }
+
+// static inline float smootherstep(float edge0, float edge1, float x) {
+// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1]
+// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1]
+// return x * x * x * (x * (x * 6 - 15) + 10); // Modified curve
+// }
+
+static inline float fast_cos(float x) {
+ float x2 = x * x;
+ return 1.0f - x2 * (0.5f - x2 * 0.04166667f); // Approximation of cos(x)
+}
+
+static inline float cosine_smooth(float edge0, float edge1, float x) {
+ x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1]
+ x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1]
+ return 0.5f * (1.0f - fast_cos(x * M_PI)); // Cosine smoothing
+}
+
+static float filter_phase = 0.0f;
+static float prev_output_sample_L = 0.0f;
+static float prev_output_sample_R = 0.0f;
+
+static void audio_callback_thread(int16_t *audio_buffer, size_t frames) {
+ int filter_override = state.filter_override; // Manual override: -1 = auto, 0 = off, 1 = on
+ float filter_frequency = state.filter_frequency; // Frequency in Hz for squarewave toggle
+
+ audio_callback(audio_buffer, frames);
+
+ if(filter_override) {
+ float a = 1.0f * M_PI * 4000.0f / (SAMPLE_RATE + 1.0f * M_PI * 4000.0f);
+ float phase_increment = filter_frequency / SAMPLE_RATE;
+
+ for(size_t i = 0; i < frames * 2; i += 2) {
+ float led_filter_active;
+
+ if(filter_override == -1) {
+ filter_phase += phase_increment;
+ if(filter_phase >= 1.0f) filter_phase -= 1.0f;
+
+ led_filter_active = cosine_smooth(0.45f, 0.50f, filter_phase) - cosine_smooth(0.95f, 1.00f, filter_phase);
+
+ } else {
+ led_filter_active = 1.0f; // Manual override (1 = on)
+ }
+
+ float input_sample_L = (float)audio_buffer[i] / 32767.0f;
+ float input_sample_R = (float)audio_buffer[i + 1] / 32767.0f;
+
+ float filtered_sample_L = a * input_sample_L + (1.0f - a) * prev_output_sample_L;
+ float filtered_sample_R = a * input_sample_R + (1.0f - a) * prev_output_sample_R;
+
+ prev_output_sample_L = filtered_sample_L;
+ prev_output_sample_R = filtered_sample_R;
+
+ audio_buffer[i] = (int16_t)((1.0f - led_filter_active) * input_sample_L * 32767.0f + led_filter_active * filtered_sample_L * 32767.0f);
+ audio_buffer[i + 1] = (int16_t)((1.0f - led_filter_active) * input_sample_R * 32767.0f + led_filter_active * filtered_sample_R * 32767.0f);
+ }
+ }
+}
+
+#ifdef __linux__
+
+
+#include <pipewire/pipewire.h>
+#include <spa/param/audio/format-utils.h>
+#include <spa/param/props.h>
+
+#define BUFFER_SIZE (512 * FRAME_SIZE)
+
+static struct pw_thread_loop *pa_thread_loop;
+static struct pw_context *pa_context;
+static struct pw_core *pa_core;
+static struct pw_stream *pa_stream;
+static struct spa_hook pa_stream_listener;
+static uint64_t audio_clock_frequency;
+static uint64_t playback_cursor;
+
+/*
+ * Called from PipeWire's real-time thread whenever new audio data is needed.
+ * We dequeue a buffer, call your audio_callback() to fill it, and then re-queue.
+ */
+static void on_process(void *userdata) {
+ struct pw_buffer *buffer;
+ struct spa_buffer *spa_buf;
+ int16_t *data;
+ uint32_t size;
+ uint32_t frames;
+ struct pw_time time_info;
+
+ buffer = pw_stream_dequeue_buffer(pa_stream);
+ if(!buffer) {
+ /* No buffer available, skip. */
+ return;
+ }
+
+ spa_buf = buffer->buffer;
+ if(!spa_buf->datas || !spa_buf->datas[0].data) {
+ pw_stream_queue_buffer(pa_stream, buffer);
+ return;
+ }
+
+ data = spa_buf->datas[0].data;
+ size = spa_buf->datas[0].maxsize;
+ frames = size / FRAME_SIZE;
+
+ // if(pw_stream_get_time_n(pa_stream, &time_info, sizeof(time_info)) == 0) {
+ // playback_cursor = time_info.now;
+ // }
+ // printf("Cursor(ns): %luns\n", playback_cursor);
+
+ audio_callback_thread(data, frames);
+
+ if(spa_buf->datas[0].chunk) {
+ spa_buf->datas[0].chunk->size = frames * FRAME_SIZE;
+ spa_buf->datas[0].chunk->stride = FRAME_SIZE;
+ }
+
+ pw_stream_queue_buffer(pa_stream, buffer);
+}
+
+/*
+ * Initialize PipeWire, create the stream, and connect for audio playback.
+ * Returns immediately so your main thread can continue.
+ */
+int audio_initialize(void) {
+ pw_init(0, 0);
+
+ pa_thread_loop = pw_thread_loop_new("my-audio-loop", 0);
+ if(pa_thread_loop) {
+ if(pw_thread_loop_start(pa_thread_loop) == 0) {
+ pw_thread_loop_lock(pa_thread_loop);
+
+ pa_context = pw_context_new(pw_thread_loop_get_loop(pa_thread_loop), 0, 0);
+ if(pa_context) {
+ pa_core = pw_context_connect(pa_context, 0, 0);
+ if(pa_core){
+ static const struct spa_dict_item items[] = {
+ SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_TYPE, "Audio"),
+ SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_CATEGORY, "Playback"),
+ SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_ROLE, "Game"),
+ SPA_DICT_ITEM_INIT(PW_KEY_NODE_LATENCY, "512/48000")
+ };
+ struct pw_properties *props = pw_properties_new_dict(&SPA_DICT_INIT(items, 4));
+ // pw_properties_free(props);
+
+ pa_stream = pw_stream_new(pa_core, "My Audio Stream", props);
+ if(pa_stream) {
+ static struct pw_stream_events stream_events = { PW_VERSION_STREAM_EVENTS, .process = on_process, };
+ pw_stream_add_listener(pa_stream, &pa_stream_listener, &stream_events, 0);
+
+ /*
+ * Build two SPA params:
+ * 1) The audio format: S16_LE, SAMPLE_RATE, NUM_CHANNELS
+ * 2) The buffer param: request BUFFER_SIZE bytes per buffer
+ */
+ uint8_t fmt_buffer[1024];
+ struct spa_pod_builder fmt_builder = SPA_POD_BUILDER_INIT(fmt_buffer, sizeof(fmt_buffer));
+ const struct spa_pod *fmt_param = spa_pod_builder_add_object(
+ &fmt_builder,
+ SPA_TYPE_OBJECT_Format, SPA_PARAM_EnumFormat,
+ SPA_FORMAT_mediaType, SPA_POD_Id(SPA_MEDIA_TYPE_audio),
+ SPA_FORMAT_mediaSubtype, SPA_POD_Id(SPA_MEDIA_SUBTYPE_raw),
+ SPA_FORMAT_AUDIO_format, SPA_POD_Id(SPA_AUDIO_FORMAT_S16_LE),
+ SPA_FORMAT_AUDIO_rate, SPA_POD_Int(SAMPLE_RATE),
+ SPA_FORMAT_AUDIO_channels, SPA_POD_Int(NUM_CHANNELS)
+ );
+
+ uint8_t buf_buffer[1024];
+ struct spa_pod_builder buf_builder = SPA_POD_BUILDER_INIT(buf_buffer, sizeof(buf_buffer));
+ struct spa_pod *buf_param = spa_pod_builder_add_object(
+ &buf_builder,
+ SPA_TYPE_OBJECT_ParamBuffers, SPA_PARAM_Buffers,
+ SPA_PARAM_BUFFERS_buffers, SPA_POD_CHOICE_RANGE_Int(8, 2, 16), /* We'll request 8 buffers, each of size = BUFFER_SIZE bytes. */
+ SPA_PARAM_BUFFERS_blocks, SPA_POD_Int(1),
+ SPA_PARAM_BUFFERS_size, SPA_POD_CHOICE_RANGE_Int(BUFFER_SIZE, BUFFER_SIZE, BUFFER_SIZE*8),
+ SPA_PARAM_BUFFERS_stride, SPA_POD_Int(FRAME_SIZE),
+ SPA_PARAM_BUFFERS_align, SPA_POD_Int(16)
+ );
+
+ const struct spa_pod *params[2];
+ params[0] = fmt_param;
+ params[1] = buf_param;
+
+ int res = pw_stream_connect(pa_stream, PW_DIRECTION_OUTPUT, PW_ID_ANY, PW_STREAM_FLAG_AUTOCONNECT | PW_STREAM_FLAG_RT_PROCESS | PW_STREAM_FLAG_MAP_BUFFERS, params, 2);
+ pw_thread_loop_unlock(pa_thread_loop);
+ return 0;
+
+ } else {
+ fprintf(stderr, "Failed to create PipeWire stream\n");
+ }
+ pw_core_disconnect(pa_core);
+ } else {
+ fprintf(stderr, "Failed to connect context to core\n");
+ }
+ pw_context_destroy(pa_context);
+ } else {
+ fprintf(stderr, "Failed to create PipeWire context\n");
+ }
+ pw_thread_loop_unlock(pa_thread_loop);
+ pw_thread_loop_stop(pa_thread_loop);
+ } else {
+ fprintf(stderr, "Failed to start PipeWire thread loop\n");
+ }
+ pw_thread_loop_destroy(pa_thread_loop);
+ } else {
+ fprintf(stderr, "Failed to create PipeWire thread loop\n");
+ }
+ pw_deinit();
+ return -1;
+}
+
+/*
+ * Clean up PipeWire objects, stop the thread loop, and deinit.
+ * This should be called before your program exits.
+ */
+void audio_shutdown(void) {
+ if(!pa_thread_loop) {
+ return;
+ }
+
+ pw_thread_loop_lock(pa_thread_loop);
+
+ if(pa_stream){
+ pw_stream_disconnect(pa_stream);
+ pw_stream_destroy(pa_stream);
+ }
+
+ if(pa_core){
+ pw_core_disconnect(pa_core);
+ }
+
+ if(pa_context){
+ pw_context_destroy(pa_context);
+ }
+
+ pw_thread_loop_unlock(pa_thread_loop);
+ pw_thread_loop_stop(pa_thread_loop);
+ pw_thread_loop_destroy(pa_thread_loop);
+ pw_deinit();
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#elif _WIN32
+
+#define COBJMACROS
+#include <windows.h>
+#include <initguid.h>
+#include <audioclient.h>
+#include <mmdeviceapi.h>
+#include <avrt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <timeapi.h>
+
+/*
+ * Minimal WASAPI shared-mode audio playback with explicit 48kHz/16-bit/2ch.
+ */
+
+#define NUM_CHANNELS 2
+
+static IMMDeviceEnumerator *enumerator;
+static IMMDevice *device_out;
+static IAudioClient *audio_client_out;
+static IAudioRenderClient *render_client;
+static HANDLE audio_event;
+static HANDLE audio_thread;
+static int running;
+
+static DWORD WINAPI audio_thread_proc(void *arg) {
+ UINT32 buffer_size;
+ UINT32 padding;
+ UINT32 available;
+ uint8_t *data;
+
+ IAudioClient_GetBufferSize(audio_client_out, &buffer_size);
+
+ while(running) {
+ WaitForSingleObject(audio_event, INFINITE);
+ if(!running) {
+ break;
+ }
+
+ IAudioClient_GetCurrentPadding(audio_client_out, &padding);
+ available = buffer_size - padding;
+ IAudioRenderClient_GetBuffer(render_client, available, &data);
+ audio_callback_thread((int16_t*)data, available);
+ IAudioRenderClient_ReleaseBuffer(render_client, available, 0);
+ }
+ return 0;
+}
+
+void audio_initialize() {
+ WAVEFORMATEX wf;
+ REFERENCE_TIME dur_out;
+
+ CoInitializeEx(0, COINIT_MULTITHREADED);
+ if(SUCCEEDED(CoCreateInstance(&CLSID_MMDeviceEnumerator, 0, CLSCTX_ALL, &IID_IMMDeviceEnumerator, (void**)&enumerator))) {
+ if(SUCCEEDED(IMMDeviceEnumerator_GetDefaultAudioEndpoint(enumerator, eRender, eConsole, &device_out))) {
+ if(SUCCEEDED(IMMDevice_Activate(device_out, &IID_IAudioClient, CLSCTX_ALL, 0, (void**)&audio_client_out))) {
+ wf.wFormatTag = WAVE_FORMAT_PCM;
+ wf.nChannels = NUM_CHANNELS;
+ wf.nSamplesPerSec = 48000;
+ wf.wBitsPerSample = 16;
+ wf.nBlockAlign = (wf.nChannels * wf.wBitsPerSample) / 8;
+ wf.nAvgBytesPerSec = wf.nSamplesPerSec * wf.nBlockAlign;
+ wf.cbSize = 0;
+
+ IAudioClient_GetDevicePeriod(audio_client_out, &dur_out, 0);
+ IAudioClient_Initialize(audio_client_out, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, dur_out, 0, &wf, 0);
+ audio_event = CreateEvent(0, FALSE, FALSE, 0);
+ if(audio_event){
+ IAudioClient_SetEventHandle(audio_client_out, audio_event);
+ IAudioClient_GetService(audio_client_out, &IID_IAudioRenderClient, (void**)&render_client);
+ IAudioClient_Start(audio_client_out);
+
+ running = 1;
+ audio_thread = CreateThread(0, 0, audio_thread_proc, 0, 0, 0);
+ return;
+ } else {
+ printf("Failed to create audio event\n");
+ }
+ audio_client_out->lpVtbl->Release(audio_client_out);
+ } else {
+ printf("Failed to activate audio client\n");
+ }
+ device_out->lpVtbl->Release(device_out);
+ } else {
+ printf("Failed to get default audio endpoint\n");
+ }
+ enumerator->lpVtbl->Release(enumerator);
+ } else {
+ printf("Failed to create MMDeviceEnumerator\n");
+ }
+}
+
+void audio_shutdown() {
+ running = 0;
+ if(audio_thread) {
+ SetEvent(audio_event);
+ WaitForSingleObject(audio_thread, INFINITE);
+ CloseHandle(audio_thread);
+ }
+ if(audio_event) {
+ CloseHandle(audio_event);
+ }
+ if(audio_client_out) {
+ IAudioClient_Stop(audio_client_out);
+ audio_client_out->lpVtbl->Release(audio_client_out);
+ }
+ if(render_client) {
+ render_client->lpVtbl->Release(render_client);
+ }
+ if(device_out) {
+ device_out->lpVtbl->Release(device_out);
+ }
+ if(enumerator) {
+ enumerator->lpVtbl->Release(enumerator);
+ }
+ CoUninitialize();
+}
+
+#endif
+
+
+
+
+
+// BELOW IS FOR FUTURE FRAME SYNCHRONIZATION!!!
+
+#if 0
+// Audio sync throttling logic (using audio playback clock)
+
+#define AUDIO_SAMPLE_RATE 48000
+#define FRAMETIME (1000000000 / 60) // NES: ~16.67ms per frame (replace as needed for PAL/other)
+
+static uint64_t emulator_start_time_ns = 0;
+static uint64_t audio_start_time_ns = 0;
+
+// Stub: return current audio playback time in nanoseconds
+uint64_t get_audio_playback_time_ns(void);
+
+// Call this once at emulation start
+void audio_sync_init(uint64_t current_time_ns) {
+ emulator_start_time_ns = current_time_ns;
+ audio_start_time_ns = get_audio_playback_time_ns();
+}
+
+// Call this at the end of each frame
+void audio_throttle_emulator(uint64_t frame_number, int64_t *frame_duration_ns) {
+ uint64_t expected_emulated_time = frame_number * FRAMETIME;
+ uint64_t actual_audio_time = get_audio_playback_time_ns() - audio_start_time_ns;
+
+ int64_t drift = (int64_t)(actual_audio_time) - (int64_t)(expected_emulated_time);
+
+ // Adjust frame duration to correct drift gradually
+ *frame_duration_ns -= drift / 8;
+ // Clamp adjustment to avoid jitter
+ if(*frame_duration_ns > FRAMETIME + 50000) {
+ *frame_duration_ns = FRAMETIME + 50000;
+ } else if(*frame_duration_ns < FRAMETIME - 50000) {
+ *frame_duration_ns = FRAMETIME - 50000;
+ }
+}
+
+#ifdef _WIN32
+#include <windows.h>
+#include <mmdeviceapi.h>
+#include <audioclient.h>
+
+uint64_t get_audio_playback_time_ns(void) {
+ // WASAPI: query IAudioClock interface
+ // This is just a placeholder. You’ll need to cache IAudioClock *audio_clock externally.
+ extern IAudioClock *audio_clock;
+ UINT64 pos;
+ audio_clock->lpVtbl->GetPosition(audio_clock, &pos, 0);
+ return (pos * 1000000000ULL) / AUDIO_SAMPLE_RATE;
+}
+
+#else
+// PipeWire backend
+#include <spa/clock/clock.h>
+extern struct spa_clock *audio_clock;
+
+uint64_t get_audio_playback_time_ns(void) {
+ struct spa_clock_info info;
+ audio_clock->get_time(audio_clock, &info);
+ return info.nsec;
+}
+#endif
+
+#endif
diff --git a/base/base.c b/base/base.c
new file mode 100644
index 0000000..4c6fbe3
--- /dev/null
+++ b/base/base.c
@@ -0,0 +1,122 @@
+#define _GNU_SOURCE
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include <inttypes.h>
+#include <immintrin.h>
+#include <unistd.h>
+
+#ifdef _WIN32
+ #define WIN32_LEAN_AND_MEAN
+ #define NOMINMAX
+ #undef NOCRYPT
+ #include <windows.h>
+ #include <mmsystem.h>
+#elif defined(__linux__)
+ #include <dlfcn.h>
+ // #include <sched.h>
+ // #include <unistd.h>
+#endif
+
+#include "settings.h"
+#ifdef PROFILER
+#define STB_SPRINTF_IMPLEMENTATION
+#define STB_SPRINTF_NOFLOAT
+#define STB_SPRINTF_STATIC
+#include "stb_sprintf.h"
+#define DEBUG_PRINT(format, ...) do { \
+ char buf[512]; \
+ int len = stbsp_snprintf(buf, sizeof(buf), format, ##__VA_ARGS__); \
+ write(STDOUT_FILENO, buf, len); \
+} while(0)
+#else
+#define DEBUG_PRINT(...)
+#endif
+
+#include "opengl_loader.c"
+
+#include "incbin.h"
+#include "ugg.h"
+#include "state.c"
+#include "common.h"
+
+
+#include "opengl.c"
+#include "render.c"
+#include <mkfw/mkfw.c>
+
+#ifdef PROFILER
+#include "overlay.c"
+#endif
+#include "audio.c"
+#include "callbacks.c"
+
+/* [=]===^=[ main ]=================================================================^===[=] */
+int main(int argc, char **argv) {
+ state.toggle_crt_emulation = true;
+ mkfw_init(SCREEN_WIDTH*3, SCREEN_HEIGHT*3);
+ mkfw_set_swapinterval(0);
+ mkfw_set_window_min_size_and_aspect(SCREEN_WIDTH*3, SCREEN_HEIGHT*3, 4, 3);
+ mkfw_set_key_callback(key_callback);
+ mkfw_set_mouse_move_delta_callback(mouse_move_callback);
+ mkfw_set_mouse_button_callback(mouse_button_callback);
+ mkfw_set_framebuffer_size_callback(framebuffer_callback);
+ opengl_setup(vertex_shader_start, fragment_shader_start);
+ change_resolution(SCREEN_WIDTH, SCREEN_HEIGHT);
+#ifdef PROFILER
+ overlay_init();
+#endif
+ init_callback();
+ audio_initialize();
+
+ set_decay(20);
+
+ bool running = true;
+ uint64_t next_update = mkfw_gettime() + FRAMETIME;
+ while(running && !mkfw_should_close()) {
+ mkfw_pump_messages();
+ if(key_pressed(MKS_KEY_ESCAPE)) { running = false; }
+
+#ifdef PROFILER
+ reset_profiling_data();
+#endif
+ render_callback();
+ apply_phosphor_decay();
+ update_keyboard_state();
+ update_modifier_state();
+ update_mouse_state();
+ state.frame_number++;
+
+#ifndef PERF_TEST
+ render_frame();
+#ifdef PROFILER
+ debug_render();
+#endif
+
+ uint64_t now = mkfw_gettime();
+ int64_t remaining = next_update - now;
+ if(remaining > 0) {
+ if(remaining > SLEEP_MARGIN_NS) {
+ mkfw_sleep(remaining - SLEEP_MARGIN_NS);
+ }
+ while(mkfw_gettime() < next_update) { /**/ }
+ } else {
+ next_update = now;
+ }
+ next_update += FRAMETIME;
+
+ mkfw_swap_buffers();
+#endif
+ }
+
+#ifdef PROFILER
+ overlay_shutdown();
+#endif
+ audio_shutdown();
+ mkfw_cleanup();
+ return 0;
+}
diff --git a/base/build.sh b/base/build.sh
new file mode 100755
index 0000000..b415041
--- /dev/null
+++ b/base/build.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+export PATH="$(git rev-parse --show-toplevel)/bin:$PATH"
+
+# Project name
+PROJECT_NAME="base"
+
+# Base configuration common to all builds
+CFLAGS="-mavx2 -mtune=native -std=gnu11 "
+CFLAGS+="-msse4.1 -mfunction-return=keep -mindirect-branch=keep "
+CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fwhole-program "
+CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fno-strict-aliasing -ffunction-sections -fdata-sections "
+CFLAGS+="-U_FORTIFY_SOURCE "
+CFLAGS+="-Wall -Wextra "
+CFLAGS+="-Wno-unused-parameter -Wno-sign-compare -Wno-trigraphs -Wno-maybe-uninitialized "
+CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function "
+
+LDFLAGS="-Wl,--gc-sections "
+
+# Base include paths
+INCLUDE_PATHS="-I../include -I../base -I../.."
+
+# Linux-specific includes and libraries
+LINUX_INCLUDE="-I/usr/include/pipewire-0.3 -I/usr/include/spa-0.2"
+LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread"
+
+# Windows-specific includes and libraries
+WINDOWS_INCLUDE=""
+WINDOWS_LIBS="-lwinmm -lksuser -lole32 -lmmdevapi -lavrt -lgdi32 -lopengl32 -luuid"
+
+# Determine build type
+BUILD_TYPE=$1
+if [ -z "$BUILD_TYPE" ]; then
+ BUILD_TYPE="normal"
+fi
+
+case "$BUILD_TYPE" in
+ "normal")
+ CFLAGS+=" -g -O2 -DDEBUG_INTERNAL"
+ ;;
+ "debug")
+ CFLAGS+=" -g -O0"
+ LDFLAGS+=" -fno-pie -no-pie"
+ ;;
+ "release")
+ CFLAGS+=" -s -O2"
+ ;;
+ *)
+ echo "Unknown build type: $BUILD_TYPE"
+ exit 1
+ ;;
+esac
+
+# Make sure shaders are up to date
+shader2h 330 vertex_shader vertex_shader.glsl data/vertex_shader.h
+shader2h 330 fragment_shader shader.h fragment_shader.glsl data/fragment_shader.h
+
+# Stop on first error
+set -e
+
+# Common compile commands
+gcc_cmd="gcc $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LDFLAGS"
+mingw_cmd="x86_64-w64-mingw32-gcc $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME}.exe -mwindows $INCLUDE_PATHS $LDFLAGS"
+
+# Run Linux and Windows builds in parallel
+(
+ ctime -begin .${PROJECT_NAME}_linux
+ $gcc_cmd $LINUX_INCLUDE $LINUX_LIBS
+ ctime -end .${PROJECT_NAME}_linux $?
+) &
+
+(
+ ctime -begin .${PROJECT_NAME}_windows
+ $mingw_cmd $WINDOWS_INCLUDE $WINDOWS_LIBS
+ ctime -end .${PROJECT_NAME}_windows $?
+) &
+wait
+
diff --git a/base/callbacks.c b/base/callbacks.c
new file mode 100644
index 0000000..c5397e9
--- /dev/null
+++ b/base/callbacks.c
@@ -0,0 +1,72 @@
+
+
+/* [=]===^=[ framebuffer_callback ]=================================================================^===[=] */
+static void framebuffer_callback(int32_t width, int32_t height) {
+ state.screen_width = width;
+ state.screen_height = height;
+ state.viewport.x = 0;
+ state.viewport.y = 0;
+ state.viewport.w = width;
+ state.viewport.h = height;
+
+ float current_aspect = (float)width / (float)height;
+
+ if(current_aspect > aspect_ratio) { // Window is wider than the desired aspect ratio
+ float new_width = height * aspect_ratio; // Compute new width based on the height and the desired aspect ratio
+ state.viewport.x = (width - new_width) / 2;
+ state.viewport.w = new_width;
+ } else if(current_aspect < aspect_ratio) { // Window is taller than the desired aspect ratio
+ float new_height = width / aspect_ratio; // Compute new height based on the width and the desired aspect ratio
+ state.viewport.y = (height - new_height) / 2;
+ state.viewport.h = new_height;
+ }
+}
+
+/* [=]===^=[ key_callback ]=================================================================^===[=] */
+static void key_callback(uint32_t key, uint32_t action, uint32_t mods) {
+
+ if(key == MKS_KEY_ESCAPE) {
+ if(action == MKS_PRESSED) {
+ mkfw_set_should_close(true);
+ }
+ }
+
+ if(action == MKS_RELEASED) {
+ switch(key) {
+ // Handle shader CRT emulation toggle
+ case MKS_KEY_F12: {
+ state.toggle_crt_emulation = !state.toggle_crt_emulation;
+ } break;
+
+ // Handle fullscreen toggle
+ case MKS_KEY_F11: {
+ if(!keyboard_state[MKS_KEY_SHIFT]) {
+ if(state.fullscreen) {
+ mkfw_fullscreen(false);
+ state.fullscreen = false;
+ } else {
+ mkfw_fullscreen(true);
+ state.fullscreen = true;
+ }
+ } else if(keyboard_state[MKS_KEY_SHIFT]) {
+#ifdef PROFILER
+ state.overlay = !state.overlay;
+#endif
+ }
+ } break;
+
+ default: break;
+ }
+ }
+}
+
+/* [=]===^=[ mouse_move_callback ]=================================================================^===[=] */
+static void mouse_move_callback(int32_t x, int32_t y) {
+ state.mouse_dx += x;
+ state.mouse_dy += y;
+}
+
+/* [=]===^=[ mouse_button_callback ]=================================================================^===[=] */
+static void mouse_button_callback(uint8_t button, int action) {
+ // printf("mouse_button\n");
+}
diff --git a/base/common.h b/base/common.h
new file mode 100644
index 0000000..2138908
--- /dev/null
+++ b/base/common.h
@@ -0,0 +1,55 @@
+
+
+#ifdef _WIN32
+#include <intrin.h>
+#include <windows.h>
+#include <malloc.h> // For _aligned_malloc and _aligned_free on Windows
+#define aligned_alloc(align, size) _aligned_malloc(size, align)
+#define aligned_free _aligned_free
+
+#elif __linux__
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <stdlib.h> // For aligned_alloc on Linux
+#define aligned_free free
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(_Array) (sizeof(_Array) / sizeof(_Array[0]))
+#endif
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__TINYC__)
+#define ALIGNED(x) __attribute__((aligned(x)))
+#elif defined(_MSC_VER)
+#define ALIGNED(x) __declspec(align(x))
+#else
+#define ALIGNED(x) /* No alignment support */
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define ASSUME(condition) if (!(condition)) __builtin_unreachable()
+#elif defined(_MSC_VER)
+#define ASSUME(condition) __assume(condition)
+#else
+#define ASSUME(condition) ((void)0) /* Fallback: No-op */
+#endif
+
+#define UNREACHABLE(cond) do { if(cond) __builtin_unreachable(); } while(0)
+
+#define DEFAULT_ALIGNMENT 64
+
+static void *mks_alloc(size_t size) {
+ size = (size + (DEFAULT_ALIGNMENT - 1)) & ~(DEFAULT_ALIGNMENT - 1);
+ void *ptr = aligned_alloc(DEFAULT_ALIGNMENT, size);
+ state.total_allocated += size;
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+static void mks_free(void *ptr) {
+ aligned_free(ptr);
+}
diff --git a/base/fragment_shader.glsl b/base/fragment_shader.glsl
new file mode 100644
index 0000000..c21b2de
--- /dev/null
+++ b/base/fragment_shader.glsl
@@ -0,0 +1,149 @@
+// Specify default precision for fragment shaders
+
+out vec4 outcolor;
+in vec2 frag_texture_coord;
+
+uniform vec2 resolution;
+uniform vec2 src_image_size;
+uniform float brightness;
+uniform vec4 tone_data;
+uniform bool crt_emulation;
+uniform sampler2D iChannel0;
+
+vec3 CrtsFetch(vec2 uv) {
+ const float bias = 0.003333333;
+ return max(texture(iChannel0, uv, -16.0).rgb, vec3(bias));
+}
+
+#define CrtsRcpF1(x) (1.0 / (x))
+#define CrtsSatF1(x) clamp((x), 0.0, 1.0)
+const float PI2 = 6.28318530717958;
+const float HALF = 0.5;
+
+float CrtsMax3F1(float a, float b, float c) {
+ return max(a, max(b, c));
+}
+
+vec3 CrtsMask(vec2 pos, float dark) {
+ #ifdef CRTS_MASK_GRILLE
+ vec3 m = vec3(dark);
+ float x = fract(pos.x * (1.0 / 3.0));
+ m.r = (x < (1.0 / 3.0)) ? 1.0 : dark;
+ m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark;
+ m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark;
+ return m;
+ #endif
+
+ #ifdef CRTS_MASK_GRILLE_LITE
+ vec3 m = vec3(1.0);
+ float x = fract(pos.x * (1.0 / 3.0));
+ m.r = (x < (1.0 / 3.0)) ? dark : 1.0;
+ m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? dark : 1.0;
+ m.b = (x >= (2.0 / 3.0)) ? dark : 1.0;
+ return m;
+ #endif
+
+ #ifdef CRTS_MASK_NONE
+ return vec3(1.0);
+ #endif
+
+ #ifdef CRTS_MASK_SHADOW
+ pos.x += pos.y * 3.0;
+ vec3 m = vec3(dark);
+ float x = fract(pos.x * (1.0 / 6.0));
+ m.r = (x < (1.0 / 3.0)) ? 1.0 : dark;
+ m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark;
+ m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark;
+ return m;
+ #endif
+}
+
+vec3 CrtsFilter(vec2 ipos, vec2 inputSizeDivOutputSize, vec2 halfInputSize, vec2 rcpInputSize, vec2 rcpOutputSize, vec2 twoDivOutputSize, float inputHeight, vec2 warp, float thin, float blur, float mask, vec4 tone) {
+ vec2 pos = ipos * twoDivOutputSize - vec2(1.0);
+ pos *= vec2(1.0 + (pos.y * pos.y) * warp.x, 1.0 + (pos.x * pos.x) * warp.y);
+ float vin = 1.0 - ((1.0 - CrtsSatF1(pos.x * pos.x)) * (1.0 - CrtsSatF1(pos.y * pos.y)));
+ vin = CrtsSatF1((-vin) * inputHeight + inputHeight);
+ pos = pos * halfInputSize + halfInputSize;
+
+ float y0 = floor(pos.y - 0.5) + 0.5;
+ float x0 = floor(pos.x - 1.5) + 0.5;
+ vec2 p = vec2(x0 * rcpInputSize.x, y0 * rcpInputSize.y);
+
+ vec3 colA[4], colB[4];
+ for (int i = 0; i < 4; i++) {
+ colA[i] = CrtsFetch(p);
+ p.x += rcpInputSize.x;
+ }
+ p.y += rcpInputSize.y;
+ for (int i = 3; i >= 0; i--) {
+ p.x -= rcpInputSize.x;
+ colB[i] = CrtsFetch(p);
+ }
+
+ float off = pos.y - y0;
+ float scanA = cos(min(HALF, off * thin) * PI2) * HALF + HALF;
+ float scanB = cos(min(HALF, (-off) * thin + thin) * PI2) * HALF + HALF;
+
+ float off0 = pos.x - x0;
+ float pix[4];
+ for (int i = 0; i < 4; i++) {
+ float diff = off0 - float(i);
+ pix[i] = exp2(blur * diff * diff);
+ }
+ float pixT = CrtsRcpF1(pix[0] + pix[1] + pix[2] + pix[3]);
+
+ #ifdef CRTS_WARP
+ pixT *= vin;
+ #endif
+
+ scanA *= pixT;
+ scanB *= pixT;
+
+ vec3 color = (colA[0] * pix[0] + colA[1] * pix[1] + colA[2] * pix[2] + colA[3] * pix[3]) * scanA + (colB[0] * pix[0] + colB[1] * pix[1] + colB[2] * pix[2] + colB[3] * pix[3]) * scanB;
+ color *= CrtsMask(ipos, mask);
+
+ #ifdef CRTS_TONE
+ float peak = max(1.0 / (256.0 * 65536.0), CrtsMax3F1(color.r, color.g, color.b));
+ vec3 ratio = color * CrtsRcpF1(peak);
+ #ifdef CRTS_CONTRAST
+ peak = pow(peak, tone.x);
+ #endif
+ peak = peak * CrtsRcpF1(peak * tone.y + tone.z);
+ #ifdef CRTS_SATURATION
+ ratio = pow(ratio, vec3(tone.w));
+ #endif
+ return ratio * peak;
+ #else
+ return color;
+ #endif
+}
+
+vec3 linearToSRGB(vec3 color) {
+ return pow(color, vec3(1.0 / 2.2));
+}
+
+void main() {
+ vec2 fragCoord = vec2(frag_texture_coord.x, 1.0 - frag_texture_coord.y);
+ if (crt_emulation) {
+ outcolor.rgb = CrtsFilter(
+ fragCoord.xy * resolution,
+ src_image_size / resolution,
+ src_image_size * vec2(0.5),
+ 1.0 / src_image_size,
+ 1.0 / resolution,
+ 2.0 / resolution,
+ src_image_size.y,
+ vec2(1.0 / 24.0, 1.0 / 16.0), // warp value
+ INPUT_THIN,
+ INPUT_BLUR,
+ INPUT_MASK,
+ tone_data
+ );
+
+ outcolor.rgb *= brightness;
+ outcolor = vec4(outcolor.rgb, 1.0); // Keep original color with alpha set to 1.0
+
+ } else {
+ outcolor = texture(iChannel0, fragCoord);
+ }
+}
diff --git a/base/incbin.h b/base/incbin.h
new file mode 100644
index 0000000..fc1ecd2
--- /dev/null
+++ b/base/incbin.h
@@ -0,0 +1,50 @@
+
+
+#define STR2(x) #x
+#define STR(x) STR2(x)
+
+#ifdef _WIN32
+#define INCBIN_SECTION ".data, \"aw\""
+#else
+#define INCBIN_SECTION ".data"
+#endif
+
+/*
+#ifdef _WIN32
+#define INCBIN_SECTION ".rdata, \"dr\""
+#else
+#define INCBIN_SECTION ".rodata"
+#endif
+*/
+
+#define INCBIN(name, file) \
+ __asm__(".section " INCBIN_SECTION "\n" \
+ ".global " STR(name) "_data\n" \
+ ".balign 64\n" \
+ STR(name) "_data:\n" \
+ ".incbin " STR(file) "\n" \
+ ".zero 64\n" \
+ ".global " STR(name) "_end\n" \
+ ".balign 1\n" \
+ STR(name) "_end:\n"); \
+ extern __attribute__((aligned(64))) char name##_data[]; \
+ extern char name##_end[];
+
+/*
+// INCBIN_SHADER(vertexshader, "#version 140", "shader_header.glsl", "vertex_shader.glsl");
+#define INCBIN_SHADER(name, version_str, header_file, shader_file) \
+ __asm__(".section " INCBIN_SECTION "\n" \
+ ".global " STR(name) "_data\n" \
+ ".balign 64\n" \
+ STR(name) "_data:\n" \
+ ".ascii \"" version_str "\\n\"\n" \
+ ".incbin \"" header_file "\"\n" \
+ ".incbin \"" shader_file "\"\n" \
+ ".byte 0\n" \
+ ".global " STR(name) "_end\n" \
+ ".balign 1\n" \
+ STR(name) "_end:\n"); \
+ extern __attribute__((aligned(64))) char name##_data[]; \
+ extern char name##_end[];
+
+*/
diff --git a/base/opengl.c b/base/opengl.c
new file mode 100644
index 0000000..641a2a0
--- /dev/null
+++ b/base/opengl.c
@@ -0,0 +1,133 @@
+
+#include "shader.c"
+#include "shader.h"
+#include "data/fragment_shader.h"
+#include "data/vertex_shader.h"
+
+/* [=]===^=[ setup_render_target ]================================================================^===[=] */
+static void setup_render_target(void) {
+ glDeleteTextures(1, &state.texture);
+
+ glGenTextures(1, &state.texture);
+ glBindTexture(GL_TEXTURE_2D, state.texture);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, state.render_width, state.render_height, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, buffer);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
+}
+
+/* [=]===^=[ compile_shader ]==============================================================^===[=] */
+static GLuint compile_shader(GLenum shader_type, const char *shader_source) {
+ GLuint shader = glCreateShader(shader_type);
+ glShaderSource(shader, 1, &shader_source, 0);
+ glCompileShader(shader);
+
+ GLint success;
+ GLchar info_log[512];
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
+ if(!success) {
+ glGetShaderInfoLog(shader, sizeof(info_log), 0, info_log);
+ DEBUG_PRINT("%s shader compilation failed:\n%s\n", (shader_type == GL_VERTEX_SHADER) ? "Vertex" : "Fragment", info_log);
+ }
+ return shader;
+}
+
+/* [=]===^=[ setup_opengl ]================================================================^===[=] */
+static void opengl_setup(const char *vertex_shader_src, const char *fragment_shader_src) {
+ gl_loader();
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ glDisable(GL_DEPTH_TEST);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+ glDisable(GL_CULL_FACE);
+
+ // Shader setup
+ GLuint vertex_shader = compile_shader(GL_VERTEX_SHADER, vertex_shader_src);
+ GLuint fragment_shader = compile_shader(GL_FRAGMENT_SHADER, fragment_shader_src);
+
+ state.shader_program = glCreateProgram();
+ glAttachShader(state.shader_program, vertex_shader);
+ glAttachShader(state.shader_program, fragment_shader);
+
+ glBindAttribLocation(state.shader_program, 0, "position");
+ glBindAttribLocation(state.shader_program, 1, "texture_coord");
+ glLinkProgram(state.shader_program);
+ GLint success;
+ glGetProgramiv(state.shader_program, GL_LINK_STATUS, &success);
+ if(!success) {
+ char log[512];
+ glGetProgramInfoLog(state.shader_program, sizeof(log), NULL, log);
+ DEBUG_PRINT("Shader Linking Failed: %s\n", log);
+ }
+
+ glDeleteShader(vertex_shader);
+ glDeleteShader(fragment_shader);
+ glUseProgram(state.shader_program);
+
+ // Calculations for the shader.
+ state.contrast = 1.0f;
+ state.saturation = 0.3f;
+ state.brightness = 1.0f;
+ CrtsTone(state.tone_data, state.contrast, state.saturation, INPUT_THIN, INPUT_MASK); // NOTE(peter): Move this into the mainloop if change of contrast/saturation is added as an interactive thing.
+
+ // Retrieve shader uniforms
+ state.uniform_resolution = glGetUniformLocation(state.shader_program, "resolution");
+ state.uniform_src_image_size = glGetUniformLocation(state.shader_program, "src_image_size");
+ state.uniform_brightness = glGetUniformLocation(state.shader_program, "brightness");
+ state.uniform_tone = glGetUniformLocation(state.shader_program, "tone_data");
+ state.uniform_crt_emulation = glGetUniformLocation(state.shader_program, "crt_emulation");
+ state.uniform_sampler_location = glGetUniformLocation(state.shader_program, "iChannel0");
+
+ glGenVertexArrays(1, &state.vao);
+ glGenBuffers(1, &state.vbo);
+ glGenBuffers(1, &state.ebo);
+ glBindVertexArray(state.vao);
+
+ // Vertex data: Position (x, y) and Texture Coordinates (u, v)
+ const float vertices[] = {
+ -1.0f, -1.0f, 0.0f, 0.0f, // Bottom-left
+ 1.0f, -1.0f, 1.0f, 0.0f, // Bottom-right
+ 1.0f, 1.0f, 1.0f, 1.0f, // Top-right
+ -1.0f, 1.0f, 0.0f, 1.0f // Top-left
+ };
+
+ static const unsigned int indices[] = { 0, 1, 2, 2, 3, 0 };
+
+ glBindBuffer(GL_ARRAY_BUFFER, state.vbo);
+ glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
+
+ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); // Position
+ glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); // Texture Coord
+ glEnableVertexAttribArray(0);
+ glEnableVertexAttribArray(1);
+}
+
+/* [=]===^=[ render_frame ]=================================================================^===[=] */
+__attribute__((always_inline))
+static inline void render_frame(void) {
+ glClearColor(.0f, 0.f, 0.f, 1.f);
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ glUseProgram(state.shader_program);
+ glBindVertexArray(state.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, state.vbo);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, state.texture);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, state.render_width, state.render_height, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, display_buffer);
+ glUniform2f(state.uniform_src_image_size, (float)state.render_width, (float)state.render_height);
+ glUniform2f(state.uniform_resolution, (float)state.viewport.w, (float)state.viewport.h);
+ glUniform1f(state.uniform_brightness, state.brightness);
+ glUniform4f(state.uniform_tone, state.tone_data[0], state.tone_data[1], state.tone_data[2], state.tone_data[3]);
+ glUniform1i(state.uniform_crt_emulation, state.toggle_crt_emulation);
+ glUniform1i(state.uniform_sampler_location, 0);
+ glViewport(state.viewport.x, state.viewport.y, state.viewport.w, state.viewport.h);
+ glEnableVertexAttribArray(0);
+ glEnableVertexAttribArray(1);
+ glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
+}
diff --git a/base/opengl_loader.c b/base/opengl_loader.c
new file mode 100644
index 0000000..36d2444
--- /dev/null
+++ b/base/opengl_loader.c
@@ -0,0 +1,210 @@
+#ifdef _WIN32
+typedef __int64 GLintptr;
+#else
+typedef intptr_t GLintptr;
+#endif
+typedef void GLvoid;
+typedef unsigned char GLboolean;
+typedef unsigned char GLubyte;
+typedef char GLchar;
+
+typedef int GLint;
+typedef int GLsizei;
+
+typedef unsigned int GLenum;
+typedef unsigned int GLuint;
+typedef unsigned int GLbitfield;
+
+typedef float GLfloat;
+typedef double GLdouble;
+
+typedef unsigned long long GLsizeiptr;
+
+#define GL_NO_ERROR 0
+#define GL_INFO_LOG_LENGTH 0x8b84
+#define GL_ZERO 0x0000
+#define GL_ONE 0x0001
+#define GL_ALPHA 0x1906
+#define GL_BLEND 0x0be2
+#define GL_CLAMP_TO_EDGE 0x812f
+#define GL_COLOR_BUFFER_BIT 0x4000
+#define GL_COMPILE_STATUS 0x8b81
+#define GL_DEPTH_TEST 0x0b71
+#define GL_FRAMEBUFFER_SRGB 0x8db9
+#define GL_FRAGMENT_SHADER 0x8b30
+#define GL_LINK_STATUS 0x8b82
+#define GL_MODELVIEW 0x1700
+#define GL_NEAREST 0x2600
+#define GL_ONE_MINUS_SRC_ALPHA 0x0303
+#define GL_PROJECTION 0x1701
+#define GL_QUADS 0x0007
+#define GL_RGBA 0x1908
+#define GL_RGBA8 0x8058
+#define GL_SCISSOR_TEST 0x0c11
+#define GL_SRGB8_ALPHA8 0x8c43
+#define GL_SRC_ALPHA 0x0302
+#define GL_TEXTURE0 0x84c0
+#define GL_TEXTURE_2D 0x0de1
+#define GL_TEXTURE_COORD_ARRAY 0x8078
+#define GL_TEXTURE_MAG_FILTER 0x2800
+#define GL_TEXTURE_MIN_FILTER 0x2801
+#define GL_TEXTURE_WRAP_S 0x2802
+#define GL_TEXTURE_WRAP_T 0x2803
+#define GL_UNSIGNED_BYTE 0x1401
+#define GL_UNSIGNED_INT_8_8_8_8 0x8035
+#define GL_VERTEX_SHADER 0x8b31
+#define GL_ARRAY_BUFFER 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER 0x8893
+#define GL_STATIC_DRAW 0x88e4
+#define GL_FLOAT 0x1406
+#define GL_FALSE 0
+#define GL_TRUE 1
+#define GL_LINEAR 0x2601
+#define GL_FUNC_ADD 0x8006
+#define GL_CULL_FACE 0x0b44
+#define GL_STREAM_DRAW 0x88e0
+#define GL_WRITE_ONLY 0x88b9
+#define GL_TRIANGLES 0x0004
+#define GL_UNSIGNED_INT 0x1405
+#define GL_UNSIGNED_SHORT 0x1403
+#define GL_MULTISAMPLE 0x809d
+#define GL_DYNAMIC_DRAW 0x88e8
+#define GL_RED 0x1903
+#define GL_FRAMEBUFFER_WIDTH 0x9310
+#define GL_FRAMEBUFFER_HEIGHT 0x9311
+#define GL_VIEWPORT 0x0ba2
+#define GL_PIXEL_UNPACK_BUFFER 0x88ec
+#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88ef
+#define GL_VERTEX_ARRAY_BINDING 0x85b5
+#define GL_ARRAY_BUFFER_BINDING 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895
+#define GL_CURRENT_PROGRAM 0x8b8d
+#define GL_TEXTURE_BINDING_2D 0x8069
+#define GL_LINEAR_MIPMAP_LINEAR 0x2703
+#define GL_SRGB_ALPHA 0x8c43
+#define GL_UNPACK_ROW_LENGTH 0x0cf2
+#define GL_UNPACK_SKIP_PIXELS 0x0cf4
+#define GL_UNPACK_SKIP_ROWS 0x0cf3
+#define GL_TRIANGLE_STRIP 0x0005
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8646
+
+#define DECLARE_GL_FUNCTION(Name, ReturnType, ...) typedef ReturnType (*type_##Name)(__VA_ARGS__);
+#define DECLARE_GLOBAL_FUNCTION(Name, ...) type_##Name Name;
+
+#define GL_FUNCTIONS(X) \
+ X(glActiveTexture, void, GLenum texture) \
+ X(glAttachShader, void, GLuint program, GLuint shader) \
+ X(glBindBuffer, void, GLenum target, GLuint buffer) \
+ X(glBindTexture, void, GLenum target, GLuint texture) \
+ X(glBufferData, void, GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage) \
+ X(glClear, void, GLbitfield mask) \
+ X(glClearColor, void, GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) \
+ X(glCompileShader, void, GLuint shader) \
+ X(glCreateProgram, GLuint) \
+ X(glCreateShader, GLuint, GLenum type) \
+ X(glDeleteShader, void, GLuint shader) \
+ X(glDrawElements, void, GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) \
+ X(glEnableVertexAttribArray, void, GLuint index) \
+ X(glGenBuffers, void, GLsizei n, GLuint *buffers) \
+ X(glGenTextures, void, GLsizei n, GLuint *textures) \
+ X(glGetShaderInfoLog, void, GLuint shader, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \
+ X(glGetShaderiv, void, GLuint shader, GLenum pname, GLint *params) \
+ X(glGetUniformLocation, GLint, GLuint program, const GLchar *name) \
+ X(glLinkProgram, void, GLuint program) \
+ X(glShaderSource, void, GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length) \
+ X(glTexImage2D, void, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels) \
+ X(glTexParameteri, void, GLenum target, GLenum pname, GLint param) \
+ X(glTexSubImage2D, void, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels) \
+ X(glUniform1f, void, GLint location, GLfloat v0) \
+ X(glUniform1i, void, GLint location, GLint v0) \
+ X(glUniform2f, void, GLint location, GLfloat v0, GLfloat v1) \
+ X(glUniform4f, void, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3) \
+ X(glUseProgram, void, GLuint program) \
+ X(glVertexAttribPointer, void, GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer) \
+ X(glViewport, void, GLint x, GLint y, GLsizei width, GLsizei height) \
+ X(glDeleteProgram, void, GLuint program) \
+ X(glDeleteBuffers, void, GLsizei n, const GLuint *buffers) \
+ X(glDeleteTextures, void, GLsizei n, const GLuint *textures) \
+ X(glEnable, void, GLenum cap) \
+ X(glGenerateMipmap, void, GLenum target) \
+ X(glGetProgramiv, void, GLuint program, GLenum pname, GLint *params) \
+ X(glGetAttribLocation, GLint, GLuint program, const GLchar *name) \
+ X(glDetachShader, void, GLuint program, GLuint shader) \
+ X(glUniformMatrix4fv, void, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value) \
+ X(glMapBuffer, void*, GLenum target, GLenum access) \
+ X(glUnmapBuffer, GLboolean, GLenum target) \
+ X(glBlendEquation, void, GLenum mode) \
+ X(glBlendFunc, void, GLenum sfactor, GLenum dfactor) \
+ X(glDisable, void, GLenum cap) \
+ X(glScissor, void, GLint x, GLint y, GLsizei width, GLsizei height) \
+ X(glTexCoord2f, void, GLfloat s, GLfloat t) \
+ X(glVertex2f, void, GLfloat x, GLfloat y) \
+ X(glGetError, GLenum) \
+ X(glGetProgramInfoLog, void, GLuint program, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \
+ X(glGenVertexArrays, void, GLsizei n, GLuint *arrays) \
+ X(glBindVertexArray, void, GLuint array) \
+ X(glDeleteVertexArrays, void, GLsizei n, const GLuint *arrays) \
+ X(glDrawArrays, void, GLenum mode, GLint first, GLsizei count) \
+ X(glBufferSubData, void, GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data) \
+ X(glDisableVertexAttribArray, void, GLuint index) \
+ X(glGetIntegerv, void, GLenum pname, GLint *data) \
+ X(glBindAttribLocation, void, GLuint program, GLuint index, const GLchar *name) \
+ X(glGetUniformfv, void, GLuint program, GLint location, GLfloat *params) \
+ X(glPixelStorei, void, GLenum pname, GLint param) \
+ X(glGetVertexAttribiv, void, GLuint index, GLenum pname, GLint *params) \
+ X(glFinish, void)
+
+GL_FUNCTIONS(DECLARE_GL_FUNCTION)
+GL_FUNCTIONS(DECLARE_GLOBAL_FUNCTION)
+
+
+#if defined(_WIN32)
+static void *get_any_gl_address(const char *name) {
+ void *p = (void *)wglGetProcAddress(name);
+ if(!p) {
+ HMODULE module = LoadLibraryA("opengl32.dll");
+ if(module) {
+ p = (void *)GetProcAddress(module, name);
+ }
+ }
+ return p;
+}
+
+#define GetOpenGLFunction(Name, ...) \
+ *(void **)&Name = (void *)get_any_gl_address(#Name); \
+ if(!Name) { \
+ DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \
+ exit(EXIT_FAILURE); \
+ }
+
+#elif defined(__linux__)
+#include <dlfcn.h>
+static void *glXGetProcAddress(const GLubyte *procName) {
+ static void *(*glxGetProcAddress)(const GLubyte *);
+ if(!glxGetProcAddress) {
+ void *libGL = dlopen("libGL.so.1", RTLD_LAZY | RTLD_GLOBAL);
+ if(!libGL) {
+ DEBUG_PRINT("Error: Unable to load libGL.so.1\n");
+ exit(EXIT_FAILURE);
+ }
+ glxGetProcAddress = dlsym(libGL, "glXGetProcAddress");
+ if(!glxGetProcAddress) {
+ DEBUG_PRINT("Error: Unable to find glXGetProcAddress\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+ return glxGetProcAddress(procName);
+}
+
+#define GetOpenGLFunction(Name, ...) \
+ *(void **)&Name = (void *)glXGetProcAddress((const GLubyte *)#Name); \
+ if(!Name) { \
+ DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \
+ exit(EXIT_FAILURE); \
+ }
+#endif
+
+__attribute__((cold, noinline, section(".init_section")))
+static void gl_loader() {
+ GL_FUNCTIONS(GetOpenGLFunction);
+}; \ No newline at end of file
diff --git a/base/overlay.c b/base/overlay.c
new file mode 100644
index 0000000..2316b24
--- /dev/null
+++ b/base/overlay.c
@@ -0,0 +1,375 @@
+#include "data/font_info.h"
+INCBIN(_font_texture, "data/font.ugg");
+struct ugg *font_texture_data = (struct ugg*)_font_texture_data;
+
+/* Overlay state structure */
+struct overlay {
+ GLuint vao;
+ GLuint vbo;
+ GLuint ebo;
+ GLuint program;
+ GLint loc_proj;
+ GLint loc_tex;
+ GLint loc_color;
+ GLint loc_pos_offset;
+ GLuint font_texture;
+ GLuint white_texture;
+};
+
+static struct overlay overlay_state;
+static float mat[16];
+
+/* ------------------------------------------------------------------------- */
+/* Shader sources for a top-left orthographic approach */
+/* ------------------------------------------------------------------------- */
+static const char* overlay_vertex_shader_src =
+"#version 140\n"
+"in vec2 in_pos;\n"
+"in vec2 in_uv;\n"
+"uniform mat4 u_projection;\n"
+"uniform vec2 u_pos_offset;\n"
+"out vec2 v_uv;\n"
+"void main() {\n"
+" vec2 pos = in_pos + u_pos_offset;\n"
+" gl_Position = u_projection * vec4(pos, 0.0, 1.0);\n"
+" v_uv = in_uv;\n"
+"}\n";
+
+static const char* overlay_fragment_shader_src =
+"#version 140\n"
+"uniform sampler2D u_font_texture;\n"
+"uniform vec4 u_color;\n"
+"in vec2 v_uv;\n"
+"out vec4 frag_color;\n"
+"void main() {\n"
+" vec4 tex_sample = texture(u_font_texture, v_uv);\n"
+" float alpha = tex_sample.a;\n"
+" frag_color = vec4(u_color.rgb * alpha, u_color.a * alpha);\n"
+"}\n";
+
+/* ------------------------------------------------------------------------- */
+/* Shader helpers */
+/* ------------------------------------------------------------------------- */
+__attribute__((cold, noinline, section(".init_section")))
+static GLuint overlay_compile_shader(const char *source, GLenum type) {
+ GLuint shader = glCreateShader(type);
+ if(shader == 0) {
+ fprintf(stderr, "Error creating shader of type %d.\n", type);
+ return 0;
+ }
+
+ glShaderSource(shader, 1, &source, 0);
+ glCompileShader(shader);
+
+ /* Check for compilation errors */
+ GLint success;
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
+ if(!success) {
+ GLint log_size = 0;
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_size);
+ char log[2048];
+ glGetShaderInfoLog(shader, log_size, 0, log);
+ fprintf(stderr, "Shader compilation failed:\n%s\n", log);
+ glDeleteShader(shader);
+ return 0;
+ }
+
+ return shader;
+}
+
+__attribute__((cold, noinline, section(".init_section")))
+static GLuint overlay_create_program(const char *vs_source, const char *fs_source) {
+ GLuint vs = overlay_compile_shader(vs_source, GL_VERTEX_SHADER);
+ GLuint fs = overlay_compile_shader(fs_source, GL_FRAGMENT_SHADER);
+ GLuint prog = glCreateProgram();
+
+ glAttachShader(prog, vs);
+ glAttachShader(prog, fs);
+ glLinkProgram(prog);
+
+ /* Check for linking errors */
+ GLint success;
+ glGetProgramiv(prog, GL_LINK_STATUS, &success);
+ if(!success) {
+ /* Get and print log */
+ GLint log_size = 0;
+ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_size);
+ char log[2048];
+ glGetProgramInfoLog(prog, log_size, 0, log);
+ fprintf(stderr, "Program linking failed:\n%s\n", log);
+ glDeleteProgram(prog);
+ glDeleteShader(vs);
+ glDeleteShader(fs);
+ return 0;
+ }
+
+ /* Shaders can be deleted after linking */
+ glDeleteShader(vs);
+ glDeleteShader(fs);
+
+ return prog;
+}
+
+static void overlay_make_ortho_top_left(float w, float h, float m[16]) {
+ m[0] = 2.0f / w;
+ m[5] = -2.0f / h;
+ m[10] = -1.0f;
+ m[12] = -1.0f;
+ m[13] = 1.0f;
+ m[15] = 1.0f;
+}
+
+#define MAX_GLYPHS 128 // NOTE(peter): max glyphs per ROW
+#define MAX_VERTICES (MAX_PROFILING_ENTRIES * MAX_GLYPHS * 4 * 4)
+float vertices[MAX_VERTICES] __attribute__((section(".bss")));
+
+static void overlay_init(void) {
+ overlay_state.program = overlay_create_program(overlay_vertex_shader_src, overlay_fragment_shader_src);
+ overlay_state.loc_proj = glGetUniformLocation(overlay_state.program, "u_projection");
+ overlay_state.loc_tex = glGetUniformLocation(overlay_state.program, "u_font_texture");
+ overlay_state.loc_color = glGetUniformLocation(overlay_state.program, "u_color");
+ overlay_state.loc_pos_offset = glGetUniformLocation(overlay_state.program, "u_pos_offset");
+
+ glGenVertexArrays(1, &overlay_state.vao);
+ glBindVertexArray(overlay_state.vao);
+
+ glGenBuffers(1, &overlay_state.vbo);
+ glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo);
+ glBufferData(GL_ARRAY_BUFFER, MAX_VERTICES * sizeof(float), 0, GL_DYNAMIC_DRAW);
+
+ glGenBuffers(1, &overlay_state.ebo);
+
+ unsigned short indices[MAX_PROFILING_ENTRIES * MAX_GLYPHS * 6];
+ size_t index_offset = 0;
+ size_t vertex_offset = 0;
+
+ for(size_t i = 0; i < MAX_PROFILING_ENTRIES * MAX_GLYPHS; i++) {
+ indices[index_offset++] = vertex_offset + 0;
+ indices[index_offset++] = vertex_offset + 1;
+ indices[index_offset++] = vertex_offset + 2;
+ indices[index_offset++] = vertex_offset + 0;
+ indices[index_offset++] = vertex_offset + 2;
+ indices[index_offset++] = vertex_offset + 3;
+ vertex_offset += 4;
+ }
+
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, overlay_state.ebo);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
+
+ GLint in_pos_attrib = glGetAttribLocation(overlay_state.program, "in_pos");
+ GLint in_uv_attrib = glGetAttribLocation(overlay_state.program, "in_uv");
+
+ glEnableVertexAttribArray(in_pos_attrib);
+ glVertexAttribPointer(in_pos_attrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0);
+ glEnableVertexAttribArray(in_uv_attrib);
+ glVertexAttribPointer(in_uv_attrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float)));
+
+ glBindVertexArray(0);
+ glBindBuffer(GL_ARRAY_BUFFER, 0);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+
+
+ uint8_t *rgba_data = mks_alloc(512 * 512 * 4);
+ uint8_t *dst = rgba_data;
+ for(uint32_t i = 0; i < 512 * 512; ++i) {
+ uint8_t alpha = font_texture_data->data[i];
+ if(alpha) {
+ *dst++ = 255;
+ *dst++ = 255;
+ *dst++ = 255;
+ *dst++ = alpha;
+
+ } else {
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+ }
+ }
+ glGenTextures(1, &overlay_state.font_texture);
+ glBindTexture(GL_TEXTURE_2D, overlay_state.font_texture);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgba_data);
+ glGenerateMipmap(GL_TEXTURE_2D);
+ mks_free(rgba_data);
+
+ glGenTextures(1, &overlay_state.white_texture);
+ glBindTexture(GL_TEXTURE_2D, overlay_state.white_texture);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ uint32_t white[] = { 0xffffffff };
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, 1, 1, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, white);
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+}
+
+__attribute__((cold, noinline, section(".init_section")))
+static void overlay_shutdown(void) {
+ glDeleteProgram(overlay_state.program);
+ glDeleteBuffers(1, &overlay_state.vbo);
+ glDeleteBuffers(1, &overlay_state.ebo);
+ glDeleteVertexArrays(1, &overlay_state.vao);
+ glDeleteTextures(1, &overlay_state.font_texture);
+}
+
+static void overlay_render_rect(float x1, float y1, float x2, float y2, float r, float g, float b, float a) {
+ float vertices[4 * 4] = {
+ /* pos.x, pos.y, u, v */
+ x1, y1, 0.f, 0.f, // Top-left
+ x2, y1, 1.f, 0.f, // Top-right
+ x2, y2, 1.f, 1.f, // Bottom-right
+ x1, y2, 0.f, 1.f // Bottom-left
+ };
+
+ glBindVertexArray(overlay_state.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo);
+ glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices);
+ glUseProgram(overlay_state.program);
+ glUniform4f(overlay_state.loc_color, r, g, b, a);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, overlay_state.white_texture);
+ glUniform1i(overlay_state.loc_tex, 0);
+ glUniform2f(overlay_state.loc_pos_offset, 0.0f, 0.0f);
+ glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glBindVertexArray(0);
+}
+
+__attribute__((always_inline))
+static inline void overlay_render_text_line(uint8_t **lines, float x, float y, float r, float g, float b, float a) {
+ uint32_t vertex_offset = 0;
+ uint32_t glyph_count = 0;
+
+ glBindVertexArray(overlay_state.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo);
+
+ float target_width = 24.f * .40f;
+ float target_height = 48.f * .40f;
+
+ // Convert font units to pixels
+ float _scale = 48.0f / (1900 - (-480));
+ float scaled_ascent = 1900 * _scale;
+ float scale_x = target_width / 24.0f;
+ float scale_y = target_height / 48.0f;
+
+ // **Apply baseline correction once (not per character!)**
+ y += scaled_ascent * scale_y;
+
+ for(int i = 0; i < MAX_PROFILING_ENTRIES; i++) {
+ uint8_t *text = lines[i];
+ if(!text) continue;
+
+ float cx = x;
+ while(*text) {
+ uint8_t c = *text++;
+ struct glyph_info *g = &glyph_data[c];
+
+ // Normalize texture coordinates
+ float u0 = g->x / 512.f;
+ float v0 = g->y / 512.f;
+ float u1 = (g->x + g->width) / 512.f;
+ float v1 = (g->y + g->height) / 512.f;
+
+ // Apply width & height scaling
+ float glyph_width = g->width * scale_x;
+ float glyph_height = g->height * scale_y;
+
+ // **Fix baseline positioning**
+ float x0 = roundf(cx + (g->x_offset * scale_x));
+ float y0 = roundf(y + (g->y_offset * scale_y));
+ float x1 = x0 + glyph_width;
+ float y1 = y0 + glyph_height;
+
+ // Generate quad for the character
+ vertices[vertex_offset + 0] = x0;
+ vertices[vertex_offset + 1] = y0;
+ vertices[vertex_offset + 2] = u0;
+ vertices[vertex_offset + 3] = v0;
+
+ vertices[vertex_offset + 4] = x1;
+ vertices[vertex_offset + 5] = y0;
+ vertices[vertex_offset + 6] = u1;
+ vertices[vertex_offset + 7] = v0;
+
+ vertices[vertex_offset + 8] = x1;
+ vertices[vertex_offset + 9] = y1;
+ vertices[vertex_offset + 10] = u1;
+ vertices[vertex_offset + 11] = v1;
+
+ vertices[vertex_offset + 12] = x0;
+ vertices[vertex_offset + 13] = y1;
+ vertices[vertex_offset + 14] = u0;
+ vertices[vertex_offset + 15] = v1;
+
+ // Move cursor forward
+ cx += g->advance * scale_x;
+ vertex_offset += 16;
+ glyph_count++;
+ }
+ y += target_height; // Move to next row
+ }
+
+
+ if(glyph_count > 0) {
+ glBindVertexArray(overlay_state.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo);
+ glBufferSubData(GL_ARRAY_BUFFER, 0, vertex_offset * sizeof(float), vertices);
+
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, overlay_state.font_texture);
+ glUniform1i(overlay_state.loc_tex, 0);
+ glUniform4f(overlay_state.loc_color, r, g, b, a);
+ glDrawElements(GL_TRIANGLES, glyph_count * 6, GL_UNSIGNED_SHORT, 0);
+ glBindVertexArray(0);
+ }
+}
+
+static void overlay_render(float rect_x, float rect_y, float rect_w, float rect_h, uint8_t **lines, int window_w, int window_h) {
+ overlay_make_ortho_top_left((float)window_w, (float)window_h, mat);
+ glViewport(0.f, 0.f, window_w, window_h);
+ glUseProgram(overlay_state.program);
+ glUniformMatrix4fv(overlay_state.loc_proj, 1, GL_FALSE, mat);
+
+ overlay_render_rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h, .01f, .013f, .04f, .6f);
+ overlay_render_text_line(lines, rect_x + 2.f, rect_y + 2.f, 0.f, 0.f, 0.f, 1.f);
+ overlay_render_text_line(lines, rect_x, rect_y, 1.f, 1.f, 1.f, 1.f);
+}
+
+/* [=]===^=[ debug_render ]=================================================================^===[=] */
+__attribute__((section(".bss")))
+uint8_t *debug_lines[MAX_PROFILING_ENTRIES];
+static void debug_render(void) {
+ if(state.overlay) {
+ size_t offset = 0;
+ for(uint32_t i = 0; i < MAX_PROFILING_ENTRIES; ++i) {
+ if(state.debug.timings[i].count) {
+ debug_lines[i] = &debug_line_buffer[offset];
+ offset += stbsp_sprintf((char *)&debug_line_buffer[offset], "%25s: cycles=%7" PRIu64 ", count=%2u, cycles/count=%7" PRIu64, state.debug.timings[i].name, state.debug.timings[i].cycles, state.debug.timings[i].count, state.debug.timings[i].cycles / state.debug.timings[i].count) + 1;
+ } else {
+ debug_lines[i] = 0;
+ }
+ }
+ overlay_render(20.f, 20.f, 705.f, 280.f, debug_lines, state.screen_width, state.screen_height);
+
+ offset = 0;
+ for(uint32_t i = 0; i < MAX_PROFILING_ENTRIES; ++i) {
+ debug_lines[i] = 0;
+ }
+ debug_lines[0] = &debug_line_buffer[offset];
+ offset += stbsp_sprintf((char *)&debug_line_buffer[offset], "Total memory allocated: %" PRIu64, state.total_allocated) + 1;
+ overlay_render(20.f, 340.f, 500.f, 200.f, debug_lines, state.screen_width, state.screen_height);
+ }
+}
+
+/* [=]===^=[ reset_profiling_data ]=================================================================^===[=] */
+static inline void reset_profiling_data(void) {
+ memset(&state.debug, 0, sizeof(state.debug));
+}
diff --git a/base/render.c b/base/render.c
new file mode 100644
index 0000000..e7118d0
--- /dev/null
+++ b/base/render.c
@@ -0,0 +1,79 @@
+
+
+
+// Get pointer to where in the buffer to render RENDER_START(0,0) is top left
+#define RENDER_START(x, y) (state.display_buffer + ((y) << 11) + (x))
+
+// Center X Coordinate for Rendering
+#define CENTER_X(w) ((state.render_width - (w)) >> 1)
+
+/* [=]===^=[ update_render_position ]=================================================================^===[=] */
+static void update_render_position(void) {
+ state.render_x = (BUFFER_WIDTH - state.render_width) >> 1;
+ state.render_y = (BUFFER_HEIGHT - state.render_height) >> 1;
+ state.display_buffer = buffer + (state.render_y * BUFFER_WIDTH) + state.render_x;
+}
+
+/* [=]===^=[ change_resolution ]=================================================================^===[=] */
+static void change_resolution(uint32_t new_width, uint32_t new_height) {
+ if(new_width != state.render_width || new_height != state.render_height) {
+ state.render_width = new_width;
+ state.render_height = new_height;
+ update_render_position();
+ setup_render_target();
+ }
+}
+
+/* [=]===^=[ clear_buffer ]=================================================================^===[=] */
+__attribute__((always_inline, hot))
+static inline void clear_buffer(void) {
+ PROFILE_FUNCTION();
+ uint32_t * restrict dst = RENDER_START(0, 0);
+
+ for(uint32_t i = 0; i < state.render_height; i++) {
+ memset(dst, 0, state.render_width * 4);
+ dst += BUFFER_WIDTH;
+ }
+}
+
+/* [=]===^=[ set_decay ]=================================================================^===[=] */
+static uint16_t _old_weight;
+static uint16_t _new_weight;
+static void set_decay(uint16_t old_weight) {
+ _old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0;
+ _new_weight = 256 - old_weight;
+}
+
+/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
+__attribute__((always_inline, hot))
+static inline void apply_phosphor_decay(void) {
+ PROFILE_FUNCTION();
+ __m256i old_weight = _mm256_set1_epi16(_old_weight);
+ __m256i new_weight = _mm256_set1_epi16(_new_weight);
+ __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
+ uint32_t render_width = state.render_width;
+ uint32_t render_height = state.render_height;
+ uint32_t * restrict src = RENDER_START(0, 0);
+ uint32_t * restrict dst = display_buffer;
+
+ for(uint32_t y = 0; y < render_height; ++y, src += BUFFER_WIDTH, dst += render_width) {
+ for(uint32_t x = 0; x < render_width; x += 4) {
+ _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+ _mm_prefetch((char*)&dst[x + 2 * render_width], _MM_HINT_T0);
+
+ __m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]);
+ __m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]);
+
+ __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
+ __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
+
+ __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
+ blended = _mm256_srli_epi16(blended, 8);
+
+ __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
+ final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
+ _mm_storeu_si128((__m128i*)&dst[x], final_pixels);
+ }
+ }
+}
+
diff --git a/base/settings.h b/base/settings.h
new file mode 100644
index 0000000..be448c4
--- /dev/null
+++ b/base/settings.h
@@ -0,0 +1,11 @@
+
+#ifdef DEBUG_INTERNAL
+
+// #define PERF_TEST
+#ifndef PERF_TEST
+#define PROFILER
+#endif
+
+
+
+#endif
diff --git a/base/shader.c b/base/shader.c
new file mode 100644
index 0000000..a6e16f6
--- /dev/null
+++ b/base/shader.c
@@ -0,0 +1,44 @@
+#include "shader.h"
+
+//==============================================================
+//
+// CPU CODE
+//
+//==============================================================
+// TONAL CONTROL CONSTANT GENERATION
+//--------------------------------------------------------------
+// Make sure to use same CRTS_MASK_* defines on CPU and GPU!!!!!
+//==============================================================
+/*
+ * dst - Output 4 float array.
+ *
+ * contrast - Increase contrast, ranges from,
+ * 1.0 = no change
+ * 2.0 = very strong contrast (over 2.0 for even more)
+ *
+ * saturation - Increase saturation, ranges from,
+ * 0.0 = no change
+ * 1.0 = increased saturation (over 1.0 for even more)
+ *
+ * thin, mask - Inputs shared between CrtsTone() and CrtsFilter()
+ *
+ */
+static void CrtsTone(float * restrict dst, float contrast, float saturation, float thin, float mask) {
+//--------------------------------------------------------------
+#ifdef CRTS_MASK_NONE
+ mask = 1.0f;
+#endif
+//--------------------------------------------------------------
+#ifdef CRTS_MASK_GRILLE_LITE
+ // Normal R mask is {1.0,mask,mask}
+ // LITE R mask is {mask,1.0,1.0}
+ mask = 0.5f + mask * 0.5f;
+#endif
+//--------------------------------------------------------------
+ float midOut = 0.18f / ((1.5f - thin) * (0.5f * mask + 0.5f));
+ float pMidIn = powf(0.18f, contrast);
+ dst[0] = contrast;
+ dst[1] = ((-pMidIn) + midOut) / ((1.0f - pMidIn) * midOut);
+ dst[2] = ((-pMidIn) * midOut + pMidIn) / (midOut * (-pMidIn) + midOut);
+ dst[3] = contrast + saturation;
+}
diff --git a/base/shader.h b/base/shader.h
new file mode 100644
index 0000000..a9cb400
--- /dev/null
+++ b/base/shader.h
@@ -0,0 +1,34 @@
+//==============================================================
+// SETUP FOR CRTS
+//==============================================================
+#define CRTS_TONE 1
+#define CRTS_CONTRAST 1
+#define CRTS_SATURATION 1
+//--------------------------------------------------------------
+#define CRTS_WARP 1
+//--------------------------------------------------------------
+// Try different masks
+// #define CRTS_MASK_GRILLE 1
+// #define CRTS_MASK_GRILLE_LITE 1
+// #define CRTS_MASK_NONE 1
+#define CRTS_MASK_SHADOW 1
+// --------------------------------------------------------------
+// Scanline thinness
+// 0.50 = fused scanlines
+// 0.70 = recommended default
+// 1.00 = thinner scanlines (too thin)
+#define INPUT_THIN 0.7
+//--------------------------------------------------------------
+// Horizonal scan blur
+// -3.0 = pixely
+// -2.5 = default
+// -2.0 = smooth
+// -1.0 = too blurry
+#define INPUT_BLUR -2.5
+//--------------------------------------------------------------
+// Shadow mask effect, ranges from,
+// 0.25 = large amount of mask (not recommended, too dark)
+// 0.50 = recommended default
+// 1.00 = no shadow mask
+#define INPUT_MASK 0.5
+
diff --git a/base/state.c b/base/state.c
new file mode 100644
index 0000000..f883d64
--- /dev/null
+++ b/base/state.c
@@ -0,0 +1,134 @@
+
+#define BUFFER_WIDTH 2048
+#define BUFFER_HEIGHT 1024
+#define FPS 50
+#ifdef _WIN32
+ #define SLEEP_MARGIN_NS 330000 // 0.33ms (Windows timing functionality is utter garbage)
+#else
+ #define SLEEP_MARGIN_NS 100000 // 0.1ms
+#endif
+
+#define ONE_SECOND_NS 1000000000
+#define FRAMETIME (ONE_SECOND_NS / FPS)
+
+#define SCREEN_WIDTH 360
+#define SCREEN_HEIGHT 270
+
+uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
+uint32_t display_buffer[SCREEN_WIDTH * SCREEN_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
+
+#ifdef PROFILER
+#define MAX_PROFILING_ENTRIES (64)
+
+struct function_cycles {
+ const char *name;
+ uint64_t cycles;
+ uint32_t count;
+};
+
+struct debug_state {
+ struct function_cycles timings[MAX_PROFILING_ENTRIES];
+};
+#endif
+
+struct state {
+ // Pointers (8-byte aligned)
+ uint32_t *display_buffer;
+ struct { int32_t x, y, w, h; } viewport;
+ int32_t mouse_dx;
+ int32_t mouse_dy;
+ int32_t filter_override; // Manual override: -1 = automatic, 0 = off, 1 = on
+ float filter_frequency; // Frequency in Hz for squarewave toggle
+
+ float contrast;
+ float saturation;
+ float brightness;
+ float tone_data[4];
+
+ // OpenGL Objects
+ GLuint shader_program;
+ GLuint texture;
+ GLuint vao;
+ GLuint vbo;
+ GLuint ebo;
+
+ // Shader Uniforms
+ GLuint uniform_resolution;
+ GLuint uniform_src_image_size;
+ GLuint uniform_brightness;
+ GLuint uniform_tone;
+ GLuint uniform_crt_emulation;
+ GLuint uniform_sampler_location;
+ GLuint uniform_tex_bounds;
+
+ // Rendering & Dynamic Resolution
+ uint32_t screen_width; // for the debugger
+ uint32_t screen_height; // for the debugger
+ uint32_t render_width; // The actual remake resolution (e.g., 360)
+ uint32_t render_height; // The actual remake resolution (e.g., 270)
+ uint32_t render_x; // X position inside BUFFER_WIDTH (to track centering)
+ uint32_t render_y; // Y position inside BUFFER_HEIGHT (to track centering)
+ uint32_t frame_number;
+ size_t total_allocated;
+ bool freewheeling;
+ bool toggle_crt_emulation;
+ bool fullscreen;
+#ifdef PROFILER
+ bool overlay;
+ struct debug_state debug;
+#endif
+};
+
+struct state state;
+
+static struct remake_callbacks *current_part = 0;
+static void render_callback();
+static void audio_callback(int16_t *audio_buffer, size_t frames);
+static void init_callback();
+
+#ifndef PROFILER
+#define PROFILE_NAMED(name)
+#define PROFILE_FUNCTION()
+
+#else
+
+struct profiling_context {
+ uint32_t func_id;
+};
+
+__attribute__((section(".bss")))
+uint8_t debug_line_buffer[MAX_PROFILING_ENTRIES * 256]; // NOTE(peter): for storing all performance strings in an array to print to the debug-output
+
+__attribute__((always_inline, hot))
+static inline uint64_t read_tsc() {
+ unsigned int aux;
+ uint64_t result = __rdtscp(&aux);
+ return result;
+}
+
+__attribute__((always_inline, hot))
+static inline void end_profiling(struct profiling_context *ctx) __attribute__((unused));
+__attribute__((always_inline, hot))
+static inline void end_profiling(struct profiling_context *ctx) {
+ state.debug.timings[ctx->func_id].cycles += read_tsc();
+ state.debug.timings[ctx->func_id].count++;
+}
+
+#define PROFILE_NAMED(name) \
+ uint32_t func_id = __COUNTER__; \
+ if((state).debug.timings[func_id].count == 0) \
+ (state).debug.timings[func_id] = (struct function_cycles){(name), 0, 0}; \
+ (state).debug.timings[func_id].cycles -= read_tsc(); \
+ struct profiling_context ctx __attribute__((cleanup(end_profiling))) = { func_id };
+
+#define PROFILE_FUNCTION() \
+ uint32_t func_id = __COUNTER__; \
+ if((state).debug.timings[func_id].count == 0) \
+ (state).debug.timings[func_id] = (struct function_cycles){__func__, 0, 0}; \
+ (state).debug.timings[func_id].cycles -= read_tsc(); \
+ struct profiling_context ctx __attribute__((cleanup(end_profiling))) = { func_id };
+#endif
+
+
+
+
diff --git a/base/stb_sprintf.h b/base/stb_sprintf.h
new file mode 100644
index 0000000..4b4c171
--- /dev/null
+++ b/base/stb_sprintf.h
@@ -0,0 +1,1906 @@
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types: sc uidBboXx p AaGgEef n
+// lengths : hh h ll j z t I64 I32 I
+//
+// Contributors:
+// Fabian "ryg" Giesen (reformatting)
+// github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+// github:d26435
+// github:trex78
+// github:account-login
+// Jari Komppa (SI suffixes)
+// Rohit Nirmal
+// Marcin Wojdyr
+// Leonard Ritter
+// Stefano Zanotti
+// Adam Allison
+// Arvid Gerstmann
+// Markus Kolb
+//
+// LICENSE:
+//
+// See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists? Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+ Convert an arg list into a buffer. stbsp_snprintf always returns
+ a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+ Convert a va_list arg list into a buffer. stbsp_vsnprintf always returns
+ a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+ typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+ Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+ Your callback can then copy the chars out, print them or whatever.
+ This function is actually the workhorse for everything else.
+ The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+ // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+ Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision). This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either). We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld). It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+ #if __has_feature(address_sanitizer)
+ #if __has_attribute(__no_sanitize__)
+ #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+ #elif __has_attribute(__no_sanitize_address__)
+ #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #elif __has_attribute(__no_address_safety_analysis__)
+ #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+ #endif
+ #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+ #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute)
+ #if __has_attribute(format)
+ #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v) (void)(v)
+#else
+#define STBSP__NOTUSED(v) (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+ short temp; // force next field to be 2-byte aligned
+ char pair[201];
+} stbsp__digitpair =
+{
+ 0,
+ "00010203040506070809101112131415161718192021222324"
+ "25262728293031323334353637383940414243444546474849"
+ "50515253545556575859606162636465666768697071727374"
+ "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+ stbsp__period = pperiod;
+ stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+ sign[0] = 0;
+ if(fl & STBSP__NEGATIVE) {
+ sign[0] = 1;
+ sign[1] = '-';
+ } else if(fl & STBSP__LEADINGSPACE) {
+ sign[0] = 1;
+ sign[1] = ' ';
+ } else if(fl & STBSP__LEADINGPLUS) {
+ sign[0] = 1;
+ sign[1] = '+';
+ }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+ char const * sn = s;
+
+ // get up to 4-byte alignment
+ for(;;) {
+ if(((stbsp__uintptr)sn & 3) == 0)
+ break;
+
+ if(!limit || *sn == 0)
+ return (stbsp__uint32)(sn - s);
+
+ ++sn;
+ --limit;
+ }
+
+ // scan over 4 bytes at a time to find terminating 0
+ // this will intentionally scan up to 3 bytes past the end of buffers,
+ // but becase it works 4B aligned, it will never cross page boundaries
+ // (hence the STBSP__ASAN markup; the over-read here is intentional
+ // and harmless)
+ while(limit >= 4) {
+ stbsp__uint32 v = *(stbsp__uint32 *)sn;
+ // bit hack to find if there's a 0 byte in there
+ if((v - 0x01010101) & (~v) & 0x80808080UL)
+ break;
+
+ sn += 4;
+ limit -= 4;
+ }
+
+ // handle the last few characters to find actual size
+ while(limit && *sn) {
+ ++sn;
+ --limit;
+ }
+
+ return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+ static char hex[] = "0123456789abcdefxp";
+ static char hexu[] = "0123456789ABCDEFXP";
+ char *bf;
+ char const *f;
+ int tlen = 0;
+
+ bf = buf;
+ f = fmt;
+ for(;;) {
+ stbsp__int32 fw, pr, tz;
+ stbsp__uint32 fl;
+
+ // macros for the callback buffer stuff
+ #define stbsp__chk_cb_bufL(bytes) \
+ { \
+ int len = (int)(bf - buf); \
+ if((len + (bytes)) >= STB_SPRINTF_MIN) { \
+ tlen += len; \
+ if(0 == (bf = buf = callback(buf, user, len))) \
+ goto done; \
+ } \
+ }
+ #define stbsp__chk_cb_buf(bytes) \
+ { \
+ if(callback) { \
+ stbsp__chk_cb_bufL(bytes); \
+ } \
+ }
+ #define stbsp__flush_cb() \
+ { \
+ stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+ } // flush if there is even one byte in the buffer
+ #define stbsp__cb_buf_clamp(cl, v) \
+ cl = v; \
+ if(callback) { \
+ int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+ if(cl > lg) \
+ cl = lg; \
+ }
+
+ // fast copy everything up to the next % (or end of string)
+ for(;;) {
+ while(((stbsp__uintptr)f) & 3) {
+ schk1:
+ if(f[0] == '%')
+ goto scandd;
+ schk2:
+ if(f[0] == 0)
+ goto endfmt;
+ stbsp__chk_cb_buf(1);
+ *bf++ = f[0];
+ ++f;
+ }
+ for(;;) {
+ // Check if the next 4 bytes contain %(0x25) or end of string.
+ // Using the 'hasless' trick:
+ // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+ stbsp__uint32 v, c;
+ v = *(stbsp__uint32 *)f;
+ c = (~v) & 0x80808080;
+ if(((v ^ 0x25252525) - 0x01010101) & c)
+ goto schk1;
+ if((v - 0x01010101) & c)
+ goto schk2;
+ if(callback)
+ if((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+ goto schk1;
+ #ifdef STB_SPRINTF_NOUNALIGNED
+ if(((stbsp__uintptr)bf) & 3) {
+ bf[0] = f[0];
+ bf[1] = f[1];
+ bf[2] = f[2];
+ bf[3] = f[3];
+ } else
+ #endif
+ {
+ *(stbsp__uint32 *)bf = v;
+ }
+ bf += 4;
+ f += 4;
+ }
+ }
+ scandd:
+
+ ++f;
+
+ // ok, we have a percent, read the modifiers first
+ fw = 0;
+ pr = -1;
+ fl = 0;
+ tz = 0;
+
+ // flags
+ for(;;) {
+ switch (f[0]) {
+ // if we have left justify
+ case '-':
+ fl |= STBSP__LEFTJUST;
+ ++f;
+ continue;
+ // if we have leading plus
+ case '+':
+ fl |= STBSP__LEADINGPLUS;
+ ++f;
+ continue;
+ // if we have leading space
+ case ' ':
+ fl |= STBSP__LEADINGSPACE;
+ ++f;
+ continue;
+ // if we have leading 0x
+ case '#':
+ fl |= STBSP__LEADING_0X;
+ ++f;
+ continue;
+ // if we have thousand commas
+ case '\'':
+ fl |= STBSP__TRIPLET_COMMA;
+ ++f;
+ continue;
+ // if we have kilo marker (none->kilo->kibi->jedec)
+ case '$':
+ if(fl & STBSP__METRIC_SUFFIX) {
+ if(fl & STBSP__METRIC_1024) {
+ fl |= STBSP__METRIC_JEDEC;
+ } else {
+ fl |= STBSP__METRIC_1024;
+ }
+ } else {
+ fl |= STBSP__METRIC_SUFFIX;
+ }
+ ++f;
+ continue;
+ // if we don't want space between metric suffix and number
+ case '_':
+ fl |= STBSP__METRIC_NOSPACE;
+ ++f;
+ continue;
+ // if we have leading zero
+ case '0':
+ fl |= STBSP__LEADINGZERO;
+ ++f;
+ goto flags_done;
+ default: goto flags_done;
+ }
+ }
+ flags_done:
+
+ // get the field width
+ if(f[0] == '*') {
+ fw = va_arg(va, stbsp__uint32);
+ ++f;
+ } else {
+ while((f[0] >= '0') && (f[0] <= '9')) {
+ fw = fw * 10 + f[0] - '0';
+ f++;
+ }
+ }
+ // get the precision
+ if(f[0] == '.') {
+ ++f;
+ if(f[0] == '*') {
+ pr = va_arg(va, stbsp__uint32);
+ ++f;
+ } else {
+ pr = 0;
+ while((f[0] >= '0') && (f[0] <= '9')) {
+ pr = pr * 10 + f[0] - '0';
+ f++;
+ }
+ }
+ }
+
+ // handle integer size overrides
+ switch (f[0]) {
+ // are we halfwidth?
+ case 'h':
+ fl |= STBSP__HALFWIDTH;
+ ++f;
+ if(f[0] == 'h')
+ ++f; // QUARTERWIDTH
+ break;
+ // are we 64-bit (unix style)
+ case 'l':
+ fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+ ++f;
+ if(f[0] == 'l') {
+ fl |= STBSP__INTMAX;
+ ++f;
+ }
+ break;
+ // are we 64-bit on intmax? (c99)
+ case 'j':
+ fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+ ++f;
+ break;
+ // are we 64-bit on size_t or ptrdiff_t? (c99)
+ case 'z':
+ fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+ ++f;
+ break;
+ case 't':
+ fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+ ++f;
+ break;
+ // are we 64-bit (msft style)
+ case 'I':
+ if((f[1] == '6') && (f[2] == '4')) {
+ fl |= STBSP__INTMAX;
+ f += 3;
+ } else if((f[1] == '3') && (f[2] == '2')) {
+ f += 3;
+ } else {
+ fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+ ++f;
+ }
+ break;
+ default: break;
+ }
+
+ // handle each replacement
+ switch (f[0]) {
+ #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+ char num[STBSP__NUMSZ];
+ char lead[8];
+ char tail[8];
+ char *s;
+ char const *h;
+ stbsp__uint32 l, n, cs;
+ stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+ double fv;
+#endif
+ stbsp__int32 dp;
+ char const *sn;
+
+ case 's':
+ // get the string
+ s = va_arg(va, char *);
+ if(s == 0)
+ s = (char *)"0";
+ // get the length, limited to desired precision
+ // always limit to ~0u chars since our counts are 32b
+ l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+ lead[0] = 0;
+ tail[0] = 0;
+ pr = 0;
+ dp = 0;
+ cs = 0;
+ // copy the string in
+ goto scopy;
+
+ case 'c': // char
+ // get the character
+ s = num + STBSP__NUMSZ - 1;
+ *s = (char)va_arg(va, int);
+ l = 1;
+ lead[0] = 0;
+ tail[0] = 0;
+ pr = 0;
+ dp = 0;
+ cs = 0;
+ goto scopy;
+
+ case 'n': // weird write-bytes specifier
+ {
+ int *d = va_arg(va, int *);
+ *d = tlen + (int)(bf - buf);
+ } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+ case 'A': // float
+ case 'a': // hex float
+ case 'G': // float
+ case 'g': // float
+ case 'E': // float
+ case 'e': // float
+ case 'f': // float
+ va_arg(va, double); // eat it
+ s = (char *)"No float";
+ l = 8;
+ lead[0] = 0;
+ tail[0] = 0;
+ pr = 0;
+ cs = 0;
+ STBSP__NOTUSED(dp);
+ goto scopy;
+#else
+ case 'A': // hex float
+ case 'a': // hex float
+ h = (f[0] == 'A') ? hexu : hex;
+ fv = va_arg(va, double);
+ if(pr == -1)
+ pr = 6; // default is 6
+ // read the double into a string
+ if(stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+ fl |= STBSP__NEGATIVE;
+
+ s = num + 64;
+
+ stbsp__lead_sign(fl, lead);
+
+ if(dp == -1023)
+ dp = (n64) ? -1022 : 0;
+ else
+ n64 |= (((stbsp__uint64)1) << 52);
+ n64 <<= (64 - 56);
+ if(pr < 15)
+ n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+ *s++ = '0';
+ *s++ = 'x';
+#else
+ lead[1 + lead[0]] = '0';
+ lead[2 + lead[0]] = 'x';
+ lead[0] += 2;
+#endif
+ *s++ = h[(n64 >> 60) & 15];
+ n64 <<= 4;
+ if(pr)
+ *s++ = stbsp__period;
+ sn = s;
+
+ // print the bits
+ n = pr;
+ if(n > 13)
+ n = 13;
+ if(pr > (stbsp__int32)n)
+ tz = pr - n;
+ pr = 0;
+ while(n--) {
+ *s++ = h[(n64 >> 60) & 15];
+ n64 <<= 4;
+ }
+
+ // print the expo
+ tail[1] = h[17];
+ if(dp < 0) {
+ tail[2] = '-';
+ dp = -dp;
+ } else
+ tail[2] = '+';
+ n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+ tail[0] = (char)n;
+ for(;;) {
+ tail[n] = '0' + dp % 10;
+ if(n <= 3)
+ break;
+ --n;
+ dp /= 10;
+ }
+
+ dp = (int)(s - sn);
+ l = (int)(s - (num + 64));
+ s = num + 64;
+ cs = 1 + (3 << 24);
+ goto scopy;
+
+ case 'G': // float
+ case 'g': // float
+ h = (f[0] == 'G') ? hexu : hex;
+ fv = va_arg(va, double);
+ if(pr == -1)
+ pr = 6;
+ else if(pr == 0)
+ pr = 1; // default is 6
+ // read the double into a string
+ if(stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+ fl |= STBSP__NEGATIVE;
+
+ // clamp the precision and delete extra zeros after clamp
+ n = pr;
+ if(l > (stbsp__uint32)pr)
+ l = pr;
+ while((l > 1) && (pr) && (sn[l - 1] == '0')) {
+ --pr;
+ --l;
+ }
+
+ // should we use %e
+ if((dp <= -4) || (dp > (stbsp__int32)n)) {
+ if(pr > (stbsp__int32)l)
+ pr = l - 1;
+ else if(pr)
+ --pr; // when using %e, there is one digit before the decimal
+ goto doexpfromg;
+ }
+ // this is the insane action to get the pr to match %g semantics for %f
+ if(dp > 0) {
+ pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+ } else {
+ pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+ }
+ goto dofloatfromg;
+
+ case 'E': // float
+ case 'e': // float
+ h = (f[0] == 'E') ? hexu : hex;
+ fv = va_arg(va, double);
+ if(pr == -1)
+ pr = 6; // default is 6
+ // read the double into a string
+ if(stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+ fl |= STBSP__NEGATIVE;
+ doexpfromg:
+ tail[0] = 0;
+ stbsp__lead_sign(fl, lead);
+ if(dp == STBSP__SPECIAL) {
+ s = (char *)sn;
+ cs = 0;
+ pr = 0;
+ goto scopy;
+ }
+ s = num + 64;
+ // handle leading chars
+ *s++ = sn[0];
+
+ if(pr)
+ *s++ = stbsp__period;
+
+ // handle after decimal
+ if((l - 1) > (stbsp__uint32)pr)
+ l = pr + 1;
+ for(n = 1; n < l; n++)
+ *s++ = sn[n];
+ // trailing zeros
+ tz = pr - (l - 1);
+ pr = 0;
+ // dump expo
+ tail[1] = h[0xe];
+ dp -= 1;
+ if(dp < 0) {
+ tail[2] = '-';
+ dp = -dp;
+ } else
+ tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+ n = 5;
+#else
+ n = (dp >= 100) ? 5 : 4;
+#endif
+ tail[0] = (char)n;
+ for(;;) {
+ tail[n] = '0' + dp % 10;
+ if(n <= 3)
+ break;
+ --n;
+ dp /= 10;
+ }
+ cs = 1 + (3 << 24); // how many tens
+ goto flt_lead;
+
+ case 'f': // float
+ fv = va_arg(va, double);
+ doafloat:
+ // do kilos
+ if(fl & STBSP__METRIC_SUFFIX) {
+ double divisor;
+ divisor = 1000.0f;
+ if(fl & STBSP__METRIC_1024)
+ divisor = 1024.0;
+ while(fl < 0x4000000) {
+ if((fv < divisor) && (fv > -divisor))
+ break;
+ fv /= divisor;
+ fl += 0x1000000;
+ }
+ }
+ if(pr == -1)
+ pr = 6; // default is 6
+ // read the double into a string
+ if(stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+ fl |= STBSP__NEGATIVE;
+ dofloatfromg:
+ tail[0] = 0;
+ stbsp__lead_sign(fl, lead);
+ if(dp == STBSP__SPECIAL) {
+ s = (char *)sn;
+ cs = 0;
+ pr = 0;
+ goto scopy;
+ }
+ s = num + 64;
+
+ // handle the three decimal varieties
+ if(dp <= 0) {
+ stbsp__int32 i;
+ // handle 0.000*000xxxx
+ *s++ = '0';
+ if(pr)
+ *s++ = stbsp__period;
+ n = -dp;
+ if((stbsp__int32)n > pr)
+ n = pr;
+ i = n;
+ while(i) {
+ if((((stbsp__uintptr)s) & 3) == 0)
+ break;
+ *s++ = '0';
+ --i;
+ }
+ while(i >= 4) {
+ *(stbsp__uint32 *)s = 0x30303030;
+ s += 4;
+ i -= 4;
+ }
+ while(i) {
+ *s++ = '0';
+ --i;
+ }
+ if((stbsp__int32)(l + n) > pr)
+ l = pr - n;
+ i = l;
+ while(i) {
+ *s++ = *sn++;
+ --i;
+ }
+ tz = pr - (n + l);
+ cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+ } else {
+ cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+ if((stbsp__uint32)dp >= l) {
+ // handle xxxx000*000.0
+ n = 0;
+ for(;;) {
+ if((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+ cs = 0;
+ *s++ = stbsp__comma;
+ } else {
+ *s++ = sn[n];
+ ++n;
+ if(n >= l)
+ break;
+ }
+ }
+ if(n < (stbsp__uint32)dp) {
+ n = dp - n;
+ if((fl & STBSP__TRIPLET_COMMA) == 0) {
+ while(n) {
+ if((((stbsp__uintptr)s) & 3) == 0)
+ break;
+ *s++ = '0';
+ --n;
+ }
+ while(n >= 4) {
+ *(stbsp__uint32 *)s = 0x30303030;
+ s += 4;
+ n -= 4;
+ }
+ }
+ while(n) {
+ if((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+ cs = 0;
+ *s++ = stbsp__comma;
+ } else {
+ *s++ = '0';
+ --n;
+ }
+ }
+ }
+ cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+ if(pr) {
+ *s++ = stbsp__period;
+ tz = pr;
+ }
+ } else {
+ // handle xxxxx.xxxx000*000
+ n = 0;
+ for(;;) {
+ if((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+ cs = 0;
+ *s++ = stbsp__comma;
+ } else {
+ *s++ = sn[n];
+ ++n;
+ if(n >= (stbsp__uint32)dp)
+ break;
+ }
+ }
+ cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+ if(pr)
+ *s++ = stbsp__period;
+ if((l - dp) > (stbsp__uint32)pr)
+ l = pr + dp;
+ while(n < l) {
+ *s++ = sn[n];
+ ++n;
+ }
+ tz = pr - (l - dp);
+ }
+ }
+ pr = 0;
+
+ // handle k,m,g,t
+ if(fl & STBSP__METRIC_SUFFIX) {
+ char idx;
+ idx = 1;
+ if(fl & STBSP__METRIC_NOSPACE)
+ idx = 0;
+ tail[0] = idx;
+ tail[1] = ' ';
+ {
+ if(fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+ if(fl & STBSP__METRIC_1024)
+ tail[idx + 1] = "_KMGT"[fl >> 24];
+ else
+ tail[idx + 1] = "_kMGT"[fl >> 24];
+ idx++;
+ // If printing kibits and not in jedec, add the 'i'.
+ if(fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+ tail[idx + 1] = 'i';
+ idx++;
+ }
+ tail[0] = idx;
+ }
+ }
+ };
+
+ flt_lead:
+ // get the length that we copied
+ l = (stbsp__uint32)(s - (num + 64));
+ s = num + 64;
+ goto scopy;
+#endif
+
+ case 'B': // upper binary
+ case 'b': // lower binary
+ h = (f[0] == 'B') ? hexu : hex;
+ lead[0] = 0;
+ if(fl & STBSP__LEADING_0X) {
+ lead[0] = 2;
+ lead[1] = '0';
+ lead[2] = h[0xb];
+ }
+ l = (8 << 4) | (1 << 8);
+ goto radixnum;
+
+ case 'o': // octal
+ h = hexu;
+ lead[0] = 0;
+ if(fl & STBSP__LEADING_0X) {
+ lead[0] = 1;
+ lead[1] = '0';
+ }
+ l = (3 << 4) | (3 << 8);
+ goto radixnum;
+
+ case 'p': // pointer
+ fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+ pr = sizeof(void *) * 2;
+ fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+ // fall through - to X
+
+ case 'X': // upper hex
+ case 'x': // lower hex
+ h = (f[0] == 'X') ? hexu : hex;
+ l = (4 << 4) | (4 << 8);
+ lead[0] = 0;
+ if(fl & STBSP__LEADING_0X) {
+ lead[0] = 2;
+ lead[1] = '0';
+ lead[2] = h[16];
+ }
+ radixnum:
+ // get the number
+ if(fl & STBSP__INTMAX)
+ n64 = va_arg(va, stbsp__uint64);
+ else
+ n64 = va_arg(va, stbsp__uint32);
+
+ s = num + STBSP__NUMSZ;
+ dp = 0;
+ // clear tail, and clear leading if value is zero
+ tail[0] = 0;
+ if(n64 == 0) {
+ lead[0] = 0;
+ if(pr == 0) {
+ l = 0;
+ cs = 0;
+ goto scopy;
+ }
+ }
+ // convert to string
+ for(;;) {
+ *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+ n64 >>= (l >> 8);
+ if(!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+ break;
+ if(fl & STBSP__TRIPLET_COMMA) {
+ ++l;
+ if((l & 15) == ((l >> 4) & 15)) {
+ l &= ~15;
+ *--s = stbsp__comma;
+ }
+ }
+ };
+ // get the tens and the comma pos
+ cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+ // get the length that we copied
+ l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+ // copy it
+ goto scopy;
+
+ case 'u': // unsigned
+ case 'i':
+ case 'd': // integer
+ // get the integer and abs it
+ if(fl & STBSP__INTMAX) {
+ stbsp__int64 i64 = va_arg(va, stbsp__int64);
+ n64 = (stbsp__uint64)i64;
+ if((f[0] != 'u') && (i64 < 0)) {
+ n64 = (stbsp__uint64)-i64;
+ fl |= STBSP__NEGATIVE;
+ }
+ } else {
+ stbsp__int32 i = va_arg(va, stbsp__int32);
+ n64 = (stbsp__uint32)i;
+ if((f[0] != 'u') && (i < 0)) {
+ n64 = (stbsp__uint32)-i;
+ fl |= STBSP__NEGATIVE;
+ }
+ }
+
+#ifndef STB_SPRINTF_NOFLOAT
+ if(fl & STBSP__METRIC_SUFFIX) {
+ if(n64 < 1024)
+ pr = 0;
+ else if(pr == -1)
+ pr = 1;
+ fv = (double)(stbsp__int64)n64;
+ goto doafloat;
+ }
+#endif
+
+ // convert to string
+ s = num + STBSP__NUMSZ;
+ l = 0;
+
+ for(;;) {
+ // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+ char *o = s - 8;
+ if(n64 >= 100000000) {
+ n = (stbsp__uint32)(n64 % 100000000);
+ n64 /= 100000000;
+ } else {
+ n = (stbsp__uint32)n64;
+ n64 = 0;
+ }
+ if((fl & STBSP__TRIPLET_COMMA) == 0) {
+ do {
+ s -= 2;
+ *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+ n /= 100;
+ } while(n);
+ }
+ while(n) {
+ if((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+ l = 0;
+ *--s = stbsp__comma;
+ --o;
+ } else {
+ *--s = (char)(n % 10) + '0';
+ n /= 10;
+ }
+ }
+ if(n64 == 0) {
+ if((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+ ++s;
+ break;
+ }
+ while(s != o)
+ if((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+ l = 0;
+ *--s = stbsp__comma;
+ --o;
+ } else {
+ *--s = '0';
+ }
+ }
+
+ tail[0] = 0;
+ stbsp__lead_sign(fl, lead);
+
+ // get the length that we copied
+ l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+ if(l == 0) {
+ *--s = '0';
+ l = 1;
+ }
+ cs = l + (3 << 24);
+ if(pr < 0)
+ pr = 0;
+
+ scopy:
+ // get fw=leading/trailing space, pr=leading zeros
+ if(pr < (stbsp__int32)l)
+ pr = l;
+ n = pr + lead[0] + tail[0] + tz;
+ if(fw < (stbsp__int32)n)
+ fw = n;
+ fw -= n;
+ pr -= l;
+
+ // handle right justify and leading zeros
+ if((fl & STBSP__LEFTJUST) == 0) {
+ if(fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+ {
+ pr = (fw > pr) ? fw : pr;
+ fw = 0;
+ } else {
+ fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+ }
+ }
+
+ // copy the spaces and/or zeros
+ if(fw + pr) {
+ stbsp__int32 i;
+ stbsp__uint32 c;
+
+ // copy leading spaces (or when doing %8.4d stuff)
+ if((fl & STBSP__LEFTJUST) == 0)
+ while(fw > 0) {
+ stbsp__cb_buf_clamp(i, fw);
+ fw -= i;
+ while(i) {
+ if((((stbsp__uintptr)bf) & 3) == 0)
+ break;
+ *bf++ = ' ';
+ --i;
+ }
+ while(i >= 4) {
+ *(stbsp__uint32 *)bf = 0x20202020;
+ bf += 4;
+ i -= 4;
+ }
+ while(i) {
+ *bf++ = ' ';
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+
+ // copy leader
+ sn = lead + 1;
+ while(lead[0]) {
+ stbsp__cb_buf_clamp(i, lead[0]);
+ lead[0] -= (char)i;
+ while(i) {
+ *bf++ = *sn++;
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+
+ // copy leading zeros
+ c = cs >> 24;
+ cs &= 0xffffff;
+ cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+ while(pr > 0) {
+ stbsp__cb_buf_clamp(i, pr);
+ pr -= i;
+ if((fl & STBSP__TRIPLET_COMMA) == 0) {
+ while(i) {
+ if((((stbsp__uintptr)bf) & 3) == 0)
+ break;
+ *bf++ = '0';
+ --i;
+ }
+ while(i >= 4) {
+ *(stbsp__uint32 *)bf = 0x30303030;
+ bf += 4;
+ i -= 4;
+ }
+ }
+ while(i) {
+ if((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+ cs = 0;
+ *bf++ = stbsp__comma;
+ } else
+ *bf++ = '0';
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+ }
+
+ // copy leader if there is still one
+ sn = lead + 1;
+ while(lead[0]) {
+ stbsp__int32 i;
+ stbsp__cb_buf_clamp(i, lead[0]);
+ lead[0] -= (char)i;
+ while(i) {
+ *bf++ = *sn++;
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+
+ // copy the string
+ n = l;
+ while(n) {
+ stbsp__int32 i;
+ stbsp__cb_buf_clamp(i, n);
+ n -= i;
+ STBSP__UNALIGNED(while(i >= 4) {
+ *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+ bf += 4;
+ s += 4;
+ i -= 4;
+ })
+ while(i) {
+ *bf++ = *s++;
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+
+ // copy trailing zeros
+ while(tz) {
+ stbsp__int32 i;
+ stbsp__cb_buf_clamp(i, tz);
+ tz -= i;
+ while(i) {
+ if((((stbsp__uintptr)bf) & 3) == 0)
+ break;
+ *bf++ = '0';
+ --i;
+ }
+ while(i >= 4) {
+ *(stbsp__uint32 *)bf = 0x30303030;
+ bf += 4;
+ i -= 4;
+ }
+ while(i) {
+ *bf++ = '0';
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+
+ // copy tail if there is one
+ sn = tail + 1;
+ while(tail[0]) {
+ stbsp__int32 i;
+ stbsp__cb_buf_clamp(i, tail[0]);
+ tail[0] -= (char)i;
+ while(i) {
+ *bf++ = *sn++;
+ --i;
+ }
+ stbsp__chk_cb_buf(1);
+ }
+
+ // handle the left justify
+ if(fl & STBSP__LEFTJUST)
+ if(fw > 0) {
+ while(fw) {
+ stbsp__int32 i;
+ stbsp__cb_buf_clamp(i, fw);
+ fw -= i;
+ while(i) {
+ if((((stbsp__uintptr)bf) & 3) == 0)
+ break;
+ *bf++ = ' ';
+ --i;
+ }
+ while(i >= 4) {
+ *(stbsp__uint32 *)bf = 0x20202020;
+ bf += 4;
+ i -= 4;
+ }
+ while(i--)
+ *bf++ = ' ';
+ stbsp__chk_cb_buf(1);
+ }
+ }
+ break;
+
+ default: // unknown, just copy code
+ s = num + STBSP__NUMSZ - 1;
+ *s = f[0];
+ l = 1;
+ fw = fl = 0;
+ lead[0] = 0;
+ tail[0] = 0;
+ pr = 0;
+ dp = 0;
+ cs = 0;
+ goto scopy;
+ }
+ ++f;
+ }
+endfmt:
+
+ if(!callback)
+ *bf = 0;
+ else
+ stbsp__flush_cb();
+
+done:
+ return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+// wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+ int result;
+ va_list va;
+ va_start(va, fmt);
+ result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+ va_end(va);
+ return result;
+}
+
+typedef struct stbsp__context {
+ char *buf;
+ int count;
+ int length;
+ char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+ stbsp__context *c = (stbsp__context *)user;
+ c->length += len;
+
+ if(len > c->count)
+ len = c->count;
+
+ if(len) {
+ if(buf != c->buf) {
+ const char *s, *se;
+ char *d;
+ d = c->buf;
+ s = buf;
+ se = buf + len;
+ do {
+ *d++ = *s++;
+ } while(s < se);
+ }
+ c->buf += len;
+ c->count -= len;
+ }
+
+ if(c->count <= 0)
+ return c->tmp;
+ return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+ stbsp__context * c = (stbsp__context*)user;
+ (void) sizeof(buf);
+
+ c->length += len;
+ return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+ stbsp__context c;
+
+ if( (count == 0) && !buf )
+ {
+ c.length = 0;
+
+ STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+ }
+ else
+ {
+ int l;
+
+ c.buf = buf;
+ c.count = count;
+ c.length = 0;
+
+ STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+ // zero-terminate
+ l = (int)( c.buf - buf );
+ if( l >= count ) // should never be greater, only equal (or less) than count
+ l = count - 1;
+ buf[l] = 0;
+ }
+
+ return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+ int result;
+ va_list va;
+ va_start(va, fmt);
+
+ result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+ va_end(va);
+
+ return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+ return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+// low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src) \
+ { \
+ int cn; \
+ for(cn = 0; cn < 8; cn++) \
+ ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+ }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+ double d;
+ stbsp__int64 b = 0;
+
+ // load value and round at the frac_digits
+ d = value;
+
+ STBSP__COPYFP(b, d);
+
+ *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+ *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+ return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+ 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+ 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+ 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+ 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+ -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+ 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029,
+ -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035,
+ 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+ 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+ 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+ 8388608,
+ 6.8601809640529717e+028,
+ -7.253143638152921e+052,
+ -4.3377296974619174e+075,
+ -1.5559416129466825e+098,
+ -3.2841562489204913e+121,
+ -3.7745893248228135e+144,
+ -1.7356668416969134e+167,
+ -3.8893577551088374e+190,
+ -9.9566444326005119e+213,
+ 6.3641293062232429e+236,
+ -5.2069140800249813e+259,
+ -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+ 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109,
+ -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201,
+ 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+ 8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+ 1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000,
+ 100000000000,
+ 1000000000000,
+ 10000000000000,
+ 100000000000000,
+ 1000000000000000,
+ 10000000000000000,
+ 100000000000000000,
+ 1000000000000000000,
+ 10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+ 1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000ULL,
+ 100000000000ULL,
+ 1000000000000ULL,
+ 10000000000000ULL,
+ 100000000000000ULL,
+ 1000000000000000ULL,
+ 10000000000000000ULL,
+ 100000000000000000ULL,
+ 1000000000000000000ULL,
+ 10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh) \
+ { \
+ double ahi = 0, alo, bhi = 0, blo; \
+ stbsp__int64 bt; \
+ oh = xh * yh; \
+ STBSP__COPYFP(bt, xh); \
+ bt &= ((~(stbsp__uint64)0) << 27); \
+ STBSP__COPYFP(ahi, bt); \
+ alo = xh - ahi; \
+ STBSP__COPYFP(bt, yh); \
+ bt &= ((~(stbsp__uint64)0) << 27); \
+ STBSP__COPYFP(bhi, bt); \
+ blo = yh - bhi; \
+ ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+ }
+
+#define stbsp__ddtoS64(ob, xh, xl) \
+ { \
+ double ahi = 0, alo, vh, t; \
+ ob = (stbsp__int64)xh; \
+ vh = (double)ob; \
+ ahi = (xh - vh); \
+ t = (ahi - xh); \
+ alo = (xh - (ahi - t)) - (vh + t); \
+ ob += (stbsp__int64)(ahi + alo + xl); \
+ }
+
+#define stbsp__ddrenorm(oh, ol) \
+ { \
+ double s; \
+ s = oh + ol; \
+ ol = ol - (s - oh); \
+ oh = s; \
+ }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+ double ph, pl;
+ if((power >= 0) && (power <= 22)) {
+ stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+ } else {
+ stbsp__int32 e, et, eb;
+ double p2h, p2l;
+
+ e = power;
+ if(power < 0)
+ e = -e;
+ et = (e * 0x2c9) >> 14; /* %23 */
+ if(et > 13)
+ et = 13;
+ eb = e - (et * 23);
+
+ ph = d;
+ pl = 0.0;
+ if(power < 0) {
+ if(eb) {
+ --eb;
+ stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+ stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+ }
+ if(et) {
+ stbsp__ddrenorm(ph, pl);
+ --et;
+ stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+ stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+ ph = p2h;
+ pl = p2l;
+ }
+ } else {
+ if(eb) {
+ e = eb;
+ if(eb > 22)
+ eb = 22;
+ e -= eb;
+ stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+ if(e) {
+ stbsp__ddrenorm(ph, pl);
+ stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+ stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+ ph = p2h;
+ pl = p2l;
+ }
+ }
+ if(et) {
+ stbsp__ddrenorm(ph, pl);
+ --et;
+ stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+ stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+ ph = p2h;
+ pl = p2l;
+ }
+ }
+ }
+ stbsp__ddrenorm(ph, pl);
+ *ohi = ph;
+ *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+// decimal point in decimal_pos. +/-INF and NAN are specified by special values
+// returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+ double d;
+ stbsp__int64 bits = 0;
+ stbsp__int32 expo, e, ng, tens;
+
+ d = value;
+ STBSP__COPYFP(bits, d);
+ expo = (stbsp__int32)((bits >> 52) & 2047);
+ ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+ if(ng)
+ d = -d;
+
+ if(expo == 2047) // is nan or inf?
+ {
+ *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+ *decimal_pos = STBSP__SPECIAL;
+ *len = 3;
+ return ng;
+ }
+
+ if(expo == 0) // is zero or denormal
+ {
+ if(((stbsp__uint64) bits << 1) == 0) // do zero
+ {
+ *decimal_pos = 1;
+ *start = out;
+ out[0] = '0';
+ *len = 1;
+ return ng;
+ }
+ // find the right expo for denormals
+ {
+ stbsp__int64 v = ((stbsp__uint64)1) << 51;
+ while((bits & v) == 0) {
+ --expo;
+ v >>= 1;
+ }
+ }
+ }
+
+ // find the decimal exponent as well as the decimal bits of the value
+ {
+ double ph, pl;
+
+ // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+ tens = expo - 1023;
+ tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+ // move the significant bits into position and stick them into an int
+ stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+ // get full as much precision from double-double as possible
+ stbsp__ddtoS64(bits, ph, pl);
+
+ // check if we undershot
+ if(((stbsp__uint64)bits) >= stbsp__tento19th)
+ ++tens;
+ }
+
+ // now do the rounding in integer land
+ frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+ if((frac_digits < 24)) {
+ stbsp__uint32 dg = 1;
+ if((stbsp__uint64)bits >= stbsp__powten[9])
+ dg = 10;
+ while((stbsp__uint64)bits >= stbsp__powten[dg]) {
+ ++dg;
+ if(dg == 20)
+ goto noround;
+ }
+ if(frac_digits < dg) {
+ stbsp__uint64 r;
+ // add 0.5 at the right position and round
+ e = dg - frac_digits;
+ if((stbsp__uint32)e >= 24)
+ goto noround;
+ r = stbsp__powten[e];
+ bits = bits + (r / 2);
+ if((stbsp__uint64)bits >= stbsp__powten[dg])
+ ++tens;
+ bits /= r;
+ }
+ noround:;
+ }
+
+ // kill long trailing runs of zeros
+ if(bits) {
+ stbsp__uint32 n;
+ for(;;) {
+ if(bits <= 0xffffffff)
+ break;
+ if(bits % 1000)
+ goto donez;
+ bits /= 1000;
+ }
+ n = (stbsp__uint32)bits;
+ while((n % 1000) == 0)
+ n /= 1000;
+ bits = n;
+ donez:;
+ }
+
+ // convert to string
+ out += 64;
+ e = 0;
+ for(;;) {
+ stbsp__uint32 n;
+ char *o = out - 8;
+ // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+ if(bits >= 100000000) {
+ n = (stbsp__uint32)(bits % 100000000);
+ bits /= 100000000;
+ } else {
+ n = (stbsp__uint32)bits;
+ bits = 0;
+ }
+ while(n) {
+ out -= 2;
+ *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+ n /= 100;
+ e += 2;
+ }
+ if(bits == 0) {
+ if((e) && (out[0] == '0')) {
+ ++out;
+ --e;
+ }
+ break;
+ }
+ while(out != o) {
+ *--out = '0';
+ ++e;
+ }
+ }
+
+ *decimal_pos = tens;
+ *start = out;
+ *len = e;
+ return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/base/ugg.h b/base/ugg.h
new file mode 100644
index 0000000..ac51678
--- /dev/null
+++ b/base/ugg.h
@@ -0,0 +1,10 @@
+#pragma once
+
+// Uncomplicated Generic Graphics
+struct ugg {
+ uint32_t width;
+ uint32_t height;
+ uint32_t palette[256];
+ uint8_t data[];
+};
+
diff --git a/base/vertex_shader.glsl b/base/vertex_shader.glsl
new file mode 100644
index 0000000..d8f1848
--- /dev/null
+++ b/base/vertex_shader.glsl
@@ -0,0 +1,9 @@
+in vec2 position;
+in vec2 texture_coord;
+
+out vec2 frag_texture_coord;
+
+void main() {
+ frag_texture_coord = texture_coord;
+ gl_Position = vec4(position, 0.0, 1.0);
+}