From d5486a5af100fb37fac08b60d862ac14943853ce Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Sat, 29 Mar 2025 20:11:56 +0100 Subject: add base code for windowing and opengl crt-shader. --- base/.gitignore | 6 + base/audio.c | 458 +++++++++++ base/base.c | 122 +++ base/build.sh | 77 ++ base/callbacks.c | 72 ++ base/common.h | 55 ++ base/fragment_shader.glsl | 149 ++++ base/incbin.h | 50 ++ base/opengl.c | 133 ++++ base/opengl_loader.c | 210 +++++ base/overlay.c | 375 +++++++++ base/render.c | 79 ++ base/settings.h | 11 + base/shader.c | 44 ++ base/shader.h | 34 + base/state.c | 134 ++++ base/stb_sprintf.h | 1906 +++++++++++++++++++++++++++++++++++++++++++++ base/ugg.h | 10 + base/vertex_shader.glsl | 9 + 19 files changed, 3934 insertions(+) create mode 100644 base/.gitignore create mode 100644 base/audio.c create mode 100644 base/base.c create mode 100755 base/build.sh create mode 100644 base/callbacks.c create mode 100644 base/common.h create mode 100644 base/fragment_shader.glsl create mode 100644 base/incbin.h create mode 100644 base/opengl.c create mode 100644 base/opengl_loader.c create mode 100644 base/overlay.c create mode 100644 base/render.c create mode 100644 base/settings.h create mode 100644 base/shader.c create mode 100644 base/shader.h create mode 100644 base/state.c create mode 100644 base/stb_sprintf.h create mode 100644 base/ugg.h create mode 100644 base/vertex_shader.glsl (limited to 'base') diff --git a/base/.gitignore b/base/.gitignore new file mode 100644 index 0000000..4b1168a --- /dev/null +++ b/base/.gitignore @@ -0,0 +1,6 @@ +/data +.base_linux +.base_windows +tags +base +base.exe diff --git a/base/audio.c b/base/audio.c new file mode 100644 index 0000000..1d385d5 --- /dev/null +++ b/base/audio.c @@ -0,0 +1,458 @@ +#include +#include +#include +#include + +#define SAMPLE_RATE 48000 +#define NUM_CHANNELS 2 +#define FRAME_SIZE (NUM_CHANNELS * sizeof(short)) + +// static inline float smoothstep(float edge0, float edge1, float x) { +// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1] +// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1] +// return x * x * (3.0f - 2.0f * x); // Smooth interpolation +// } + +// static inline float smootherstep(float edge0, float edge1, float x) { +// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1] +// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1] +// return x * x * x * (x * (x * 6 - 15) + 10); // Modified curve +// } + +static inline float fast_cos(float x) { + float x2 = x * x; + return 1.0f - x2 * (0.5f - x2 * 0.04166667f); // Approximation of cos(x) +} + +static inline float cosine_smooth(float edge0, float edge1, float x) { + x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1] + x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1] + return 0.5f * (1.0f - fast_cos(x * M_PI)); // Cosine smoothing +} + +static float filter_phase = 0.0f; +static float prev_output_sample_L = 0.0f; +static float prev_output_sample_R = 0.0f; + +static void audio_callback_thread(int16_t *audio_buffer, size_t frames) { + int filter_override = state.filter_override; // Manual override: -1 = auto, 0 = off, 1 = on + float filter_frequency = state.filter_frequency; // Frequency in Hz for squarewave toggle + + audio_callback(audio_buffer, frames); + + if(filter_override) { + float a = 1.0f * M_PI * 4000.0f / (SAMPLE_RATE + 1.0f * M_PI * 4000.0f); + float phase_increment = filter_frequency / SAMPLE_RATE; + + for(size_t i = 0; i < frames * 2; i += 2) { + float led_filter_active; + + if(filter_override == -1) { + filter_phase += phase_increment; + if(filter_phase >= 1.0f) filter_phase -= 1.0f; + + led_filter_active = cosine_smooth(0.45f, 0.50f, filter_phase) - cosine_smooth(0.95f, 1.00f, filter_phase); + + } else { + led_filter_active = 1.0f; // Manual override (1 = on) + } + + float input_sample_L = (float)audio_buffer[i] / 32767.0f; + float input_sample_R = (float)audio_buffer[i + 1] / 32767.0f; + + float filtered_sample_L = a * input_sample_L + (1.0f - a) * prev_output_sample_L; + float filtered_sample_R = a * input_sample_R + (1.0f - a) * prev_output_sample_R; + + prev_output_sample_L = filtered_sample_L; + prev_output_sample_R = filtered_sample_R; + + audio_buffer[i] = (int16_t)((1.0f - led_filter_active) * input_sample_L * 32767.0f + led_filter_active * filtered_sample_L * 32767.0f); + audio_buffer[i + 1] = (int16_t)((1.0f - led_filter_active) * input_sample_R * 32767.0f + led_filter_active * filtered_sample_R * 32767.0f); + } + } +} + +#ifdef __linux__ + + +#include +#include +#include + +#define BUFFER_SIZE (512 * FRAME_SIZE) + +static struct pw_thread_loop *pa_thread_loop; +static struct pw_context *pa_context; +static struct pw_core *pa_core; +static struct pw_stream *pa_stream; +static struct spa_hook pa_stream_listener; +static uint64_t audio_clock_frequency; +static uint64_t playback_cursor; + +/* + * Called from PipeWire's real-time thread whenever new audio data is needed. + * We dequeue a buffer, call your audio_callback() to fill it, and then re-queue. + */ +static void on_process(void *userdata) { + struct pw_buffer *buffer; + struct spa_buffer *spa_buf; + int16_t *data; + uint32_t size; + uint32_t frames; + struct pw_time time_info; + + buffer = pw_stream_dequeue_buffer(pa_stream); + if(!buffer) { + /* No buffer available, skip. */ + return; + } + + spa_buf = buffer->buffer; + if(!spa_buf->datas || !spa_buf->datas[0].data) { + pw_stream_queue_buffer(pa_stream, buffer); + return; + } + + data = spa_buf->datas[0].data; + size = spa_buf->datas[0].maxsize; + frames = size / FRAME_SIZE; + + // if(pw_stream_get_time_n(pa_stream, &time_info, sizeof(time_info)) == 0) { + // playback_cursor = time_info.now; + // } + // printf("Cursor(ns): %luns\n", playback_cursor); + + audio_callback_thread(data, frames); + + if(spa_buf->datas[0].chunk) { + spa_buf->datas[0].chunk->size = frames * FRAME_SIZE; + spa_buf->datas[0].chunk->stride = FRAME_SIZE; + } + + pw_stream_queue_buffer(pa_stream, buffer); +} + +/* + * Initialize PipeWire, create the stream, and connect for audio playback. + * Returns immediately so your main thread can continue. + */ +int audio_initialize(void) { + pw_init(0, 0); + + pa_thread_loop = pw_thread_loop_new("my-audio-loop", 0); + if(pa_thread_loop) { + if(pw_thread_loop_start(pa_thread_loop) == 0) { + pw_thread_loop_lock(pa_thread_loop); + + pa_context = pw_context_new(pw_thread_loop_get_loop(pa_thread_loop), 0, 0); + if(pa_context) { + pa_core = pw_context_connect(pa_context, 0, 0); + if(pa_core){ + static const struct spa_dict_item items[] = { + SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_TYPE, "Audio"), + SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_CATEGORY, "Playback"), + SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_ROLE, "Game"), + SPA_DICT_ITEM_INIT(PW_KEY_NODE_LATENCY, "512/48000") + }; + struct pw_properties *props = pw_properties_new_dict(&SPA_DICT_INIT(items, 4)); + // pw_properties_free(props); + + pa_stream = pw_stream_new(pa_core, "My Audio Stream", props); + if(pa_stream) { + static struct pw_stream_events stream_events = { PW_VERSION_STREAM_EVENTS, .process = on_process, }; + pw_stream_add_listener(pa_stream, &pa_stream_listener, &stream_events, 0); + + /* + * Build two SPA params: + * 1) The audio format: S16_LE, SAMPLE_RATE, NUM_CHANNELS + * 2) The buffer param: request BUFFER_SIZE bytes per buffer + */ + uint8_t fmt_buffer[1024]; + struct spa_pod_builder fmt_builder = SPA_POD_BUILDER_INIT(fmt_buffer, sizeof(fmt_buffer)); + const struct spa_pod *fmt_param = spa_pod_builder_add_object( + &fmt_builder, + SPA_TYPE_OBJECT_Format, SPA_PARAM_EnumFormat, + SPA_FORMAT_mediaType, SPA_POD_Id(SPA_MEDIA_TYPE_audio), + SPA_FORMAT_mediaSubtype, SPA_POD_Id(SPA_MEDIA_SUBTYPE_raw), + SPA_FORMAT_AUDIO_format, SPA_POD_Id(SPA_AUDIO_FORMAT_S16_LE), + SPA_FORMAT_AUDIO_rate, SPA_POD_Int(SAMPLE_RATE), + SPA_FORMAT_AUDIO_channels, SPA_POD_Int(NUM_CHANNELS) + ); + + uint8_t buf_buffer[1024]; + struct spa_pod_builder buf_builder = SPA_POD_BUILDER_INIT(buf_buffer, sizeof(buf_buffer)); + struct spa_pod *buf_param = spa_pod_builder_add_object( + &buf_builder, + SPA_TYPE_OBJECT_ParamBuffers, SPA_PARAM_Buffers, + SPA_PARAM_BUFFERS_buffers, SPA_POD_CHOICE_RANGE_Int(8, 2, 16), /* We'll request 8 buffers, each of size = BUFFER_SIZE bytes. */ + SPA_PARAM_BUFFERS_blocks, SPA_POD_Int(1), + SPA_PARAM_BUFFERS_size, SPA_POD_CHOICE_RANGE_Int(BUFFER_SIZE, BUFFER_SIZE, BUFFER_SIZE*8), + SPA_PARAM_BUFFERS_stride, SPA_POD_Int(FRAME_SIZE), + SPA_PARAM_BUFFERS_align, SPA_POD_Int(16) + ); + + const struct spa_pod *params[2]; + params[0] = fmt_param; + params[1] = buf_param; + + int res = pw_stream_connect(pa_stream, PW_DIRECTION_OUTPUT, PW_ID_ANY, PW_STREAM_FLAG_AUTOCONNECT | PW_STREAM_FLAG_RT_PROCESS | PW_STREAM_FLAG_MAP_BUFFERS, params, 2); + pw_thread_loop_unlock(pa_thread_loop); + return 0; + + } else { + fprintf(stderr, "Failed to create PipeWire stream\n"); + } + pw_core_disconnect(pa_core); + } else { + fprintf(stderr, "Failed to connect context to core\n"); + } + pw_context_destroy(pa_context); + } else { + fprintf(stderr, "Failed to create PipeWire context\n"); + } + pw_thread_loop_unlock(pa_thread_loop); + pw_thread_loop_stop(pa_thread_loop); + } else { + fprintf(stderr, "Failed to start PipeWire thread loop\n"); + } + pw_thread_loop_destroy(pa_thread_loop); + } else { + fprintf(stderr, "Failed to create PipeWire thread loop\n"); + } + pw_deinit(); + return -1; +} + +/* + * Clean up PipeWire objects, stop the thread loop, and deinit. + * This should be called before your program exits. + */ +void audio_shutdown(void) { + if(!pa_thread_loop) { + return; + } + + pw_thread_loop_lock(pa_thread_loop); + + if(pa_stream){ + pw_stream_disconnect(pa_stream); + pw_stream_destroy(pa_stream); + } + + if(pa_core){ + pw_core_disconnect(pa_core); + } + + if(pa_context){ + pw_context_destroy(pa_context); + } + + pw_thread_loop_unlock(pa_thread_loop); + pw_thread_loop_stop(pa_thread_loop); + pw_thread_loop_destroy(pa_thread_loop); + pw_deinit(); +} + + + + + + + + + + + + + + + +#elif _WIN32 + +#define COBJMACROS +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Minimal WASAPI shared-mode audio playback with explicit 48kHz/16-bit/2ch. + */ + +#define NUM_CHANNELS 2 + +static IMMDeviceEnumerator *enumerator; +static IMMDevice *device_out; +static IAudioClient *audio_client_out; +static IAudioRenderClient *render_client; +static HANDLE audio_event; +static HANDLE audio_thread; +static int running; + +static DWORD WINAPI audio_thread_proc(void *arg) { + UINT32 buffer_size; + UINT32 padding; + UINT32 available; + uint8_t *data; + + IAudioClient_GetBufferSize(audio_client_out, &buffer_size); + + while(running) { + WaitForSingleObject(audio_event, INFINITE); + if(!running) { + break; + } + + IAudioClient_GetCurrentPadding(audio_client_out, &padding); + available = buffer_size - padding; + IAudioRenderClient_GetBuffer(render_client, available, &data); + audio_callback_thread((int16_t*)data, available); + IAudioRenderClient_ReleaseBuffer(render_client, available, 0); + } + return 0; +} + +void audio_initialize() { + WAVEFORMATEX wf; + REFERENCE_TIME dur_out; + + CoInitializeEx(0, COINIT_MULTITHREADED); + if(SUCCEEDED(CoCreateInstance(&CLSID_MMDeviceEnumerator, 0, CLSCTX_ALL, &IID_IMMDeviceEnumerator, (void**)&enumerator))) { + if(SUCCEEDED(IMMDeviceEnumerator_GetDefaultAudioEndpoint(enumerator, eRender, eConsole, &device_out))) { + if(SUCCEEDED(IMMDevice_Activate(device_out, &IID_IAudioClient, CLSCTX_ALL, 0, (void**)&audio_client_out))) { + wf.wFormatTag = WAVE_FORMAT_PCM; + wf.nChannels = NUM_CHANNELS; + wf.nSamplesPerSec = 48000; + wf.wBitsPerSample = 16; + wf.nBlockAlign = (wf.nChannels * wf.wBitsPerSample) / 8; + wf.nAvgBytesPerSec = wf.nSamplesPerSec * wf.nBlockAlign; + wf.cbSize = 0; + + IAudioClient_GetDevicePeriod(audio_client_out, &dur_out, 0); + IAudioClient_Initialize(audio_client_out, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, dur_out, 0, &wf, 0); + audio_event = CreateEvent(0, FALSE, FALSE, 0); + if(audio_event){ + IAudioClient_SetEventHandle(audio_client_out, audio_event); + IAudioClient_GetService(audio_client_out, &IID_IAudioRenderClient, (void**)&render_client); + IAudioClient_Start(audio_client_out); + + running = 1; + audio_thread = CreateThread(0, 0, audio_thread_proc, 0, 0, 0); + return; + } else { + printf("Failed to create audio event\n"); + } + audio_client_out->lpVtbl->Release(audio_client_out); + } else { + printf("Failed to activate audio client\n"); + } + device_out->lpVtbl->Release(device_out); + } else { + printf("Failed to get default audio endpoint\n"); + } + enumerator->lpVtbl->Release(enumerator); + } else { + printf("Failed to create MMDeviceEnumerator\n"); + } +} + +void audio_shutdown() { + running = 0; + if(audio_thread) { + SetEvent(audio_event); + WaitForSingleObject(audio_thread, INFINITE); + CloseHandle(audio_thread); + } + if(audio_event) { + CloseHandle(audio_event); + } + if(audio_client_out) { + IAudioClient_Stop(audio_client_out); + audio_client_out->lpVtbl->Release(audio_client_out); + } + if(render_client) { + render_client->lpVtbl->Release(render_client); + } + if(device_out) { + device_out->lpVtbl->Release(device_out); + } + if(enumerator) { + enumerator->lpVtbl->Release(enumerator); + } + CoUninitialize(); +} + +#endif + + + + + +// BELOW IS FOR FUTURE FRAME SYNCHRONIZATION!!! + +#if 0 +// Audio sync throttling logic (using audio playback clock) + +#define AUDIO_SAMPLE_RATE 48000 +#define FRAMETIME (1000000000 / 60) // NES: ~16.67ms per frame (replace as needed for PAL/other) + +static uint64_t emulator_start_time_ns = 0; +static uint64_t audio_start_time_ns = 0; + +// Stub: return current audio playback time in nanoseconds +uint64_t get_audio_playback_time_ns(void); + +// Call this once at emulation start +void audio_sync_init(uint64_t current_time_ns) { + emulator_start_time_ns = current_time_ns; + audio_start_time_ns = get_audio_playback_time_ns(); +} + +// Call this at the end of each frame +void audio_throttle_emulator(uint64_t frame_number, int64_t *frame_duration_ns) { + uint64_t expected_emulated_time = frame_number * FRAMETIME; + uint64_t actual_audio_time = get_audio_playback_time_ns() - audio_start_time_ns; + + int64_t drift = (int64_t)(actual_audio_time) - (int64_t)(expected_emulated_time); + + // Adjust frame duration to correct drift gradually + *frame_duration_ns -= drift / 8; + // Clamp adjustment to avoid jitter + if(*frame_duration_ns > FRAMETIME + 50000) { + *frame_duration_ns = FRAMETIME + 50000; + } else if(*frame_duration_ns < FRAMETIME - 50000) { + *frame_duration_ns = FRAMETIME - 50000; + } +} + +#ifdef _WIN32 +#include +#include +#include + +uint64_t get_audio_playback_time_ns(void) { + // WASAPI: query IAudioClock interface + // This is just a placeholder. You’ll need to cache IAudioClock *audio_clock externally. + extern IAudioClock *audio_clock; + UINT64 pos; + audio_clock->lpVtbl->GetPosition(audio_clock, &pos, 0); + return (pos * 1000000000ULL) / AUDIO_SAMPLE_RATE; +} + +#else +// PipeWire backend +#include +extern struct spa_clock *audio_clock; + +uint64_t get_audio_playback_time_ns(void) { + struct spa_clock_info info; + audio_clock->get_time(audio_clock, &info); + return info.nsec; +} +#endif + +#endif diff --git a/base/base.c b/base/base.c new file mode 100644 index 0000000..4c6fbe3 --- /dev/null +++ b/base/base.c @@ -0,0 +1,122 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #define WIN32_LEAN_AND_MEAN + #define NOMINMAX + #undef NOCRYPT + #include + #include +#elif defined(__linux__) + #include + // #include + // #include +#endif + +#include "settings.h" +#ifdef PROFILER +#define STB_SPRINTF_IMPLEMENTATION +#define STB_SPRINTF_NOFLOAT +#define STB_SPRINTF_STATIC +#include "stb_sprintf.h" +#define DEBUG_PRINT(format, ...) do { \ + char buf[512]; \ + int len = stbsp_snprintf(buf, sizeof(buf), format, ##__VA_ARGS__); \ + write(STDOUT_FILENO, buf, len); \ +} while(0) +#else +#define DEBUG_PRINT(...) +#endif + +#include "opengl_loader.c" + +#include "incbin.h" +#include "ugg.h" +#include "state.c" +#include "common.h" + + +#include "opengl.c" +#include "render.c" +#include + +#ifdef PROFILER +#include "overlay.c" +#endif +#include "audio.c" +#include "callbacks.c" + +/* [=]===^=[ main ]=================================================================^===[=] */ +int main(int argc, char **argv) { + state.toggle_crt_emulation = true; + mkfw_init(SCREEN_WIDTH*3, SCREEN_HEIGHT*3); + mkfw_set_swapinterval(0); + mkfw_set_window_min_size_and_aspect(SCREEN_WIDTH*3, SCREEN_HEIGHT*3, 4, 3); + mkfw_set_key_callback(key_callback); + mkfw_set_mouse_move_delta_callback(mouse_move_callback); + mkfw_set_mouse_button_callback(mouse_button_callback); + mkfw_set_framebuffer_size_callback(framebuffer_callback); + opengl_setup(vertex_shader_start, fragment_shader_start); + change_resolution(SCREEN_WIDTH, SCREEN_HEIGHT); +#ifdef PROFILER + overlay_init(); +#endif + init_callback(); + audio_initialize(); + + set_decay(20); + + bool running = true; + uint64_t next_update = mkfw_gettime() + FRAMETIME; + while(running && !mkfw_should_close()) { + mkfw_pump_messages(); + if(key_pressed(MKS_KEY_ESCAPE)) { running = false; } + +#ifdef PROFILER + reset_profiling_data(); +#endif + render_callback(); + apply_phosphor_decay(); + update_keyboard_state(); + update_modifier_state(); + update_mouse_state(); + state.frame_number++; + +#ifndef PERF_TEST + render_frame(); +#ifdef PROFILER + debug_render(); +#endif + + uint64_t now = mkfw_gettime(); + int64_t remaining = next_update - now; + if(remaining > 0) { + if(remaining > SLEEP_MARGIN_NS) { + mkfw_sleep(remaining - SLEEP_MARGIN_NS); + } + while(mkfw_gettime() < next_update) { /**/ } + } else { + next_update = now; + } + next_update += FRAMETIME; + + mkfw_swap_buffers(); +#endif + } + +#ifdef PROFILER + overlay_shutdown(); +#endif + audio_shutdown(); + mkfw_cleanup(); + return 0; +} diff --git a/base/build.sh b/base/build.sh new file mode 100755 index 0000000..b415041 --- /dev/null +++ b/base/build.sh @@ -0,0 +1,77 @@ +#!/bin/bash +export PATH="$(git rev-parse --show-toplevel)/bin:$PATH" + +# Project name +PROJECT_NAME="base" + +# Base configuration common to all builds +CFLAGS="-mavx2 -mtune=native -std=gnu11 " +CFLAGS+="-msse4.1 -mfunction-return=keep -mindirect-branch=keep " +CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fwhole-program " +CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fno-strict-aliasing -ffunction-sections -fdata-sections " +CFLAGS+="-U_FORTIFY_SOURCE " +CFLAGS+="-Wall -Wextra " +CFLAGS+="-Wno-unused-parameter -Wno-sign-compare -Wno-trigraphs -Wno-maybe-uninitialized " +CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function " + +LDFLAGS="-Wl,--gc-sections " + +# Base include paths +INCLUDE_PATHS="-I../include -I../base -I../.." + +# Linux-specific includes and libraries +LINUX_INCLUDE="-I/usr/include/pipewire-0.3 -I/usr/include/spa-0.2" +LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread" + +# Windows-specific includes and libraries +WINDOWS_INCLUDE="" +WINDOWS_LIBS="-lwinmm -lksuser -lole32 -lmmdevapi -lavrt -lgdi32 -lopengl32 -luuid" + +# Determine build type +BUILD_TYPE=$1 +if [ -z "$BUILD_TYPE" ]; then + BUILD_TYPE="normal" +fi + +case "$BUILD_TYPE" in + "normal") + CFLAGS+=" -g -O2 -DDEBUG_INTERNAL" + ;; + "debug") + CFLAGS+=" -g -O0" + LDFLAGS+=" -fno-pie -no-pie" + ;; + "release") + CFLAGS+=" -s -O2" + ;; + *) + echo "Unknown build type: $BUILD_TYPE" + exit 1 + ;; +esac + +# Make sure shaders are up to date +shader2h 330 vertex_shader vertex_shader.glsl data/vertex_shader.h +shader2h 330 fragment_shader shader.h fragment_shader.glsl data/fragment_shader.h + +# Stop on first error +set -e + +# Common compile commands +gcc_cmd="gcc $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LDFLAGS" +mingw_cmd="x86_64-w64-mingw32-gcc $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME}.exe -mwindows $INCLUDE_PATHS $LDFLAGS" + +# Run Linux and Windows builds in parallel +( + ctime -begin .${PROJECT_NAME}_linux + $gcc_cmd $LINUX_INCLUDE $LINUX_LIBS + ctime -end .${PROJECT_NAME}_linux $? +) & + +( + ctime -begin .${PROJECT_NAME}_windows + $mingw_cmd $WINDOWS_INCLUDE $WINDOWS_LIBS + ctime -end .${PROJECT_NAME}_windows $? +) & +wait + diff --git a/base/callbacks.c b/base/callbacks.c new file mode 100644 index 0000000..c5397e9 --- /dev/null +++ b/base/callbacks.c @@ -0,0 +1,72 @@ + + +/* [=]===^=[ framebuffer_callback ]=================================================================^===[=] */ +static void framebuffer_callback(int32_t width, int32_t height) { + state.screen_width = width; + state.screen_height = height; + state.viewport.x = 0; + state.viewport.y = 0; + state.viewport.w = width; + state.viewport.h = height; + + float current_aspect = (float)width / (float)height; + + if(current_aspect > aspect_ratio) { // Window is wider than the desired aspect ratio + float new_width = height * aspect_ratio; // Compute new width based on the height and the desired aspect ratio + state.viewport.x = (width - new_width) / 2; + state.viewport.w = new_width; + } else if(current_aspect < aspect_ratio) { // Window is taller than the desired aspect ratio + float new_height = width / aspect_ratio; // Compute new height based on the width and the desired aspect ratio + state.viewport.y = (height - new_height) / 2; + state.viewport.h = new_height; + } +} + +/* [=]===^=[ key_callback ]=================================================================^===[=] */ +static void key_callback(uint32_t key, uint32_t action, uint32_t mods) { + + if(key == MKS_KEY_ESCAPE) { + if(action == MKS_PRESSED) { + mkfw_set_should_close(true); + } + } + + if(action == MKS_RELEASED) { + switch(key) { + // Handle shader CRT emulation toggle + case MKS_KEY_F12: { + state.toggle_crt_emulation = !state.toggle_crt_emulation; + } break; + + // Handle fullscreen toggle + case MKS_KEY_F11: { + if(!keyboard_state[MKS_KEY_SHIFT]) { + if(state.fullscreen) { + mkfw_fullscreen(false); + state.fullscreen = false; + } else { + mkfw_fullscreen(true); + state.fullscreen = true; + } + } else if(keyboard_state[MKS_KEY_SHIFT]) { +#ifdef PROFILER + state.overlay = !state.overlay; +#endif + } + } break; + + default: break; + } + } +} + +/* [=]===^=[ mouse_move_callback ]=================================================================^===[=] */ +static void mouse_move_callback(int32_t x, int32_t y) { + state.mouse_dx += x; + state.mouse_dy += y; +} + +/* [=]===^=[ mouse_button_callback ]=================================================================^===[=] */ +static void mouse_button_callback(uint8_t button, int action) { + // printf("mouse_button\n"); +} diff --git a/base/common.h b/base/common.h new file mode 100644 index 0000000..2138908 --- /dev/null +++ b/base/common.h @@ -0,0 +1,55 @@ + + +#ifdef _WIN32 +#include +#include +#include // For _aligned_malloc and _aligned_free on Windows +#define aligned_alloc(align, size) _aligned_malloc(size, align) +#define aligned_free _aligned_free + +#elif __linux__ +#include +#include +#include +#include // For aligned_alloc on Linux +#define aligned_free free +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(_Array) (sizeof(_Array) / sizeof(_Array[0])) +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#if defined(__GNUC__) || defined(__clang__) || defined(__TINYC__) +#define ALIGNED(x) __attribute__((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGNED(x) __declspec(align(x)) +#else +#define ALIGNED(x) /* No alignment support */ +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define ASSUME(condition) if (!(condition)) __builtin_unreachable() +#elif defined(_MSC_VER) +#define ASSUME(condition) __assume(condition) +#else +#define ASSUME(condition) ((void)0) /* Fallback: No-op */ +#endif + +#define UNREACHABLE(cond) do { if(cond) __builtin_unreachable(); } while(0) + +#define DEFAULT_ALIGNMENT 64 + +static void *mks_alloc(size_t size) { + size = (size + (DEFAULT_ALIGNMENT - 1)) & ~(DEFAULT_ALIGNMENT - 1); + void *ptr = aligned_alloc(DEFAULT_ALIGNMENT, size); + state.total_allocated += size; + memset(ptr, 0, size); + return ptr; +} + +static void mks_free(void *ptr) { + aligned_free(ptr); +} diff --git a/base/fragment_shader.glsl b/base/fragment_shader.glsl new file mode 100644 index 0000000..c21b2de --- /dev/null +++ b/base/fragment_shader.glsl @@ -0,0 +1,149 @@ +// Specify default precision for fragment shaders + +out vec4 outcolor; +in vec2 frag_texture_coord; + +uniform vec2 resolution; +uniform vec2 src_image_size; +uniform float brightness; +uniform vec4 tone_data; +uniform bool crt_emulation; +uniform sampler2D iChannel0; + +vec3 CrtsFetch(vec2 uv) { + const float bias = 0.003333333; + return max(texture(iChannel0, uv, -16.0).rgb, vec3(bias)); +} + +#define CrtsRcpF1(x) (1.0 / (x)) +#define CrtsSatF1(x) clamp((x), 0.0, 1.0) +const float PI2 = 6.28318530717958; +const float HALF = 0.5; + +float CrtsMax3F1(float a, float b, float c) { + return max(a, max(b, c)); +} + +vec3 CrtsMask(vec2 pos, float dark) { + #ifdef CRTS_MASK_GRILLE + vec3 m = vec3(dark); + float x = fract(pos.x * (1.0 / 3.0)); + m.r = (x < (1.0 / 3.0)) ? 1.0 : dark; + m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark; + m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark; + return m; + #endif + + #ifdef CRTS_MASK_GRILLE_LITE + vec3 m = vec3(1.0); + float x = fract(pos.x * (1.0 / 3.0)); + m.r = (x < (1.0 / 3.0)) ? dark : 1.0; + m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? dark : 1.0; + m.b = (x >= (2.0 / 3.0)) ? dark : 1.0; + return m; + #endif + + #ifdef CRTS_MASK_NONE + return vec3(1.0); + #endif + + #ifdef CRTS_MASK_SHADOW + pos.x += pos.y * 3.0; + vec3 m = vec3(dark); + float x = fract(pos.x * (1.0 / 6.0)); + m.r = (x < (1.0 / 3.0)) ? 1.0 : dark; + m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark; + m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark; + return m; + #endif +} + +vec3 CrtsFilter(vec2 ipos, vec2 inputSizeDivOutputSize, vec2 halfInputSize, vec2 rcpInputSize, vec2 rcpOutputSize, vec2 twoDivOutputSize, float inputHeight, vec2 warp, float thin, float blur, float mask, vec4 tone) { + vec2 pos = ipos * twoDivOutputSize - vec2(1.0); + pos *= vec2(1.0 + (pos.y * pos.y) * warp.x, 1.0 + (pos.x * pos.x) * warp.y); + float vin = 1.0 - ((1.0 - CrtsSatF1(pos.x * pos.x)) * (1.0 - CrtsSatF1(pos.y * pos.y))); + vin = CrtsSatF1((-vin) * inputHeight + inputHeight); + pos = pos * halfInputSize + halfInputSize; + + float y0 = floor(pos.y - 0.5) + 0.5; + float x0 = floor(pos.x - 1.5) + 0.5; + vec2 p = vec2(x0 * rcpInputSize.x, y0 * rcpInputSize.y); + + vec3 colA[4], colB[4]; + for (int i = 0; i < 4; i++) { + colA[i] = CrtsFetch(p); + p.x += rcpInputSize.x; + } + p.y += rcpInputSize.y; + for (int i = 3; i >= 0; i--) { + p.x -= rcpInputSize.x; + colB[i] = CrtsFetch(p); + } + + float off = pos.y - y0; + float scanA = cos(min(HALF, off * thin) * PI2) * HALF + HALF; + float scanB = cos(min(HALF, (-off) * thin + thin) * PI2) * HALF + HALF; + + float off0 = pos.x - x0; + float pix[4]; + for (int i = 0; i < 4; i++) { + float diff = off0 - float(i); + pix[i] = exp2(blur * diff * diff); + } + float pixT = CrtsRcpF1(pix[0] + pix[1] + pix[2] + pix[3]); + + #ifdef CRTS_WARP + pixT *= vin; + #endif + + scanA *= pixT; + scanB *= pixT; + + vec3 color = (colA[0] * pix[0] + colA[1] * pix[1] + colA[2] * pix[2] + colA[3] * pix[3]) * scanA + (colB[0] * pix[0] + colB[1] * pix[1] + colB[2] * pix[2] + colB[3] * pix[3]) * scanB; + color *= CrtsMask(ipos, mask); + + #ifdef CRTS_TONE + float peak = max(1.0 / (256.0 * 65536.0), CrtsMax3F1(color.r, color.g, color.b)); + vec3 ratio = color * CrtsRcpF1(peak); + #ifdef CRTS_CONTRAST + peak = pow(peak, tone.x); + #endif + peak = peak * CrtsRcpF1(peak * tone.y + tone.z); + #ifdef CRTS_SATURATION + ratio = pow(ratio, vec3(tone.w)); + #endif + return ratio * peak; + #else + return color; + #endif +} + +vec3 linearToSRGB(vec3 color) { + return pow(color, vec3(1.0 / 2.2)); +} + +void main() { + vec2 fragCoord = vec2(frag_texture_coord.x, 1.0 - frag_texture_coord.y); + if (crt_emulation) { + outcolor.rgb = CrtsFilter( + fragCoord.xy * resolution, + src_image_size / resolution, + src_image_size * vec2(0.5), + 1.0 / src_image_size, + 1.0 / resolution, + 2.0 / resolution, + src_image_size.y, + vec2(1.0 / 24.0, 1.0 / 16.0), // warp value + INPUT_THIN, + INPUT_BLUR, + INPUT_MASK, + tone_data + ); + + outcolor.rgb *= brightness; + outcolor = vec4(outcolor.rgb, 1.0); // Keep original color with alpha set to 1.0 + + } else { + outcolor = texture(iChannel0, fragCoord); + } +} diff --git a/base/incbin.h b/base/incbin.h new file mode 100644 index 0000000..fc1ecd2 --- /dev/null +++ b/base/incbin.h @@ -0,0 +1,50 @@ + + +#define STR2(x) #x +#define STR(x) STR2(x) + +#ifdef _WIN32 +#define INCBIN_SECTION ".data, \"aw\"" +#else +#define INCBIN_SECTION ".data" +#endif + +/* +#ifdef _WIN32 +#define INCBIN_SECTION ".rdata, \"dr\"" +#else +#define INCBIN_SECTION ".rodata" +#endif +*/ + +#define INCBIN(name, file) \ + __asm__(".section " INCBIN_SECTION "\n" \ + ".global " STR(name) "_data\n" \ + ".balign 64\n" \ + STR(name) "_data:\n" \ + ".incbin " STR(file) "\n" \ + ".zero 64\n" \ + ".global " STR(name) "_end\n" \ + ".balign 1\n" \ + STR(name) "_end:\n"); \ + extern __attribute__((aligned(64))) char name##_data[]; \ + extern char name##_end[]; + +/* +// INCBIN_SHADER(vertexshader, "#version 140", "shader_header.glsl", "vertex_shader.glsl"); +#define INCBIN_SHADER(name, version_str, header_file, shader_file) \ + __asm__(".section " INCBIN_SECTION "\n" \ + ".global " STR(name) "_data\n" \ + ".balign 64\n" \ + STR(name) "_data:\n" \ + ".ascii \"" version_str "\\n\"\n" \ + ".incbin \"" header_file "\"\n" \ + ".incbin \"" shader_file "\"\n" \ + ".byte 0\n" \ + ".global " STR(name) "_end\n" \ + ".balign 1\n" \ + STR(name) "_end:\n"); \ + extern __attribute__((aligned(64))) char name##_data[]; \ + extern char name##_end[]; + +*/ diff --git a/base/opengl.c b/base/opengl.c new file mode 100644 index 0000000..641a2a0 --- /dev/null +++ b/base/opengl.c @@ -0,0 +1,133 @@ + +#include "shader.c" +#include "shader.h" +#include "data/fragment_shader.h" +#include "data/vertex_shader.h" + +/* [=]===^=[ setup_render_target ]================================================================^===[=] */ +static void setup_render_target(void) { + glDeleteTextures(1, &state.texture); + + glGenTextures(1, &state.texture); + glBindTexture(GL_TEXTURE_2D, state.texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, state.render_width, state.render_height, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, buffer); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); +} + +/* [=]===^=[ compile_shader ]==============================================================^===[=] */ +static GLuint compile_shader(GLenum shader_type, const char *shader_source) { + GLuint shader = glCreateShader(shader_type); + glShaderSource(shader, 1, &shader_source, 0); + glCompileShader(shader); + + GLint success; + GLchar info_log[512]; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if(!success) { + glGetShaderInfoLog(shader, sizeof(info_log), 0, info_log); + DEBUG_PRINT("%s shader compilation failed:\n%s\n", (shader_type == GL_VERTEX_SHADER) ? "Vertex" : "Fragment", info_log); + } + return shader; +} + +/* [=]===^=[ setup_opengl ]================================================================^===[=] */ +static void opengl_setup(const char *vertex_shader_src, const char *fragment_shader_src) { + gl_loader(); + glEnable(GL_FRAMEBUFFER_SRGB); + glDisable(GL_DEPTH_TEST); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glDisable(GL_CULL_FACE); + + // Shader setup + GLuint vertex_shader = compile_shader(GL_VERTEX_SHADER, vertex_shader_src); + GLuint fragment_shader = compile_shader(GL_FRAGMENT_SHADER, fragment_shader_src); + + state.shader_program = glCreateProgram(); + glAttachShader(state.shader_program, vertex_shader); + glAttachShader(state.shader_program, fragment_shader); + + glBindAttribLocation(state.shader_program, 0, "position"); + glBindAttribLocation(state.shader_program, 1, "texture_coord"); + glLinkProgram(state.shader_program); + GLint success; + glGetProgramiv(state.shader_program, GL_LINK_STATUS, &success); + if(!success) { + char log[512]; + glGetProgramInfoLog(state.shader_program, sizeof(log), NULL, log); + DEBUG_PRINT("Shader Linking Failed: %s\n", log); + } + + glDeleteShader(vertex_shader); + glDeleteShader(fragment_shader); + glUseProgram(state.shader_program); + + // Calculations for the shader. + state.contrast = 1.0f; + state.saturation = 0.3f; + state.brightness = 1.0f; + CrtsTone(state.tone_data, state.contrast, state.saturation, INPUT_THIN, INPUT_MASK); // NOTE(peter): Move this into the mainloop if change of contrast/saturation is added as an interactive thing. + + // Retrieve shader uniforms + state.uniform_resolution = glGetUniformLocation(state.shader_program, "resolution"); + state.uniform_src_image_size = glGetUniformLocation(state.shader_program, "src_image_size"); + state.uniform_brightness = glGetUniformLocation(state.shader_program, "brightness"); + state.uniform_tone = glGetUniformLocation(state.shader_program, "tone_data"); + state.uniform_crt_emulation = glGetUniformLocation(state.shader_program, "crt_emulation"); + state.uniform_sampler_location = glGetUniformLocation(state.shader_program, "iChannel0"); + + glGenVertexArrays(1, &state.vao); + glGenBuffers(1, &state.vbo); + glGenBuffers(1, &state.ebo); + glBindVertexArray(state.vao); + + // Vertex data: Position (x, y) and Texture Coordinates (u, v) + const float vertices[] = { + -1.0f, -1.0f, 0.0f, 0.0f, // Bottom-left + 1.0f, -1.0f, 1.0f, 0.0f, // Bottom-right + 1.0f, 1.0f, 1.0f, 1.0f, // Top-right + -1.0f, 1.0f, 0.0f, 1.0f // Top-left + }; + + static const unsigned int indices[] = { 0, 1, 2, 2, 3, 0 }; + + glBindBuffer(GL_ARRAY_BUFFER, state.vbo); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); + + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); // Position + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); // Texture Coord + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); +} + +/* [=]===^=[ render_frame ]=================================================================^===[=] */ +__attribute__((always_inline)) +static inline void render_frame(void) { + glClearColor(.0f, 0.f, 0.f, 1.f); + glClear(GL_COLOR_BUFFER_BIT); + + glUseProgram(state.shader_program); + glBindVertexArray(state.vao); + glBindBuffer(GL_ARRAY_BUFFER, state.vbo); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, state.texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, state.render_width, state.render_height, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, display_buffer); + glUniform2f(state.uniform_src_image_size, (float)state.render_width, (float)state.render_height); + glUniform2f(state.uniform_resolution, (float)state.viewport.w, (float)state.viewport.h); + glUniform1f(state.uniform_brightness, state.brightness); + glUniform4f(state.uniform_tone, state.tone_data[0], state.tone_data[1], state.tone_data[2], state.tone_data[3]); + glUniform1i(state.uniform_crt_emulation, state.toggle_crt_emulation); + glUniform1i(state.uniform_sampler_location, 0); + glViewport(state.viewport.x, state.viewport.y, state.viewport.w, state.viewport.h); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); +} diff --git a/base/opengl_loader.c b/base/opengl_loader.c new file mode 100644 index 0000000..36d2444 --- /dev/null +++ b/base/opengl_loader.c @@ -0,0 +1,210 @@ +#ifdef _WIN32 +typedef __int64 GLintptr; +#else +typedef intptr_t GLintptr; +#endif +typedef void GLvoid; +typedef unsigned char GLboolean; +typedef unsigned char GLubyte; +typedef char GLchar; + +typedef int GLint; +typedef int GLsizei; + +typedef unsigned int GLenum; +typedef unsigned int GLuint; +typedef unsigned int GLbitfield; + +typedef float GLfloat; +typedef double GLdouble; + +typedef unsigned long long GLsizeiptr; + +#define GL_NO_ERROR 0 +#define GL_INFO_LOG_LENGTH 0x8b84 +#define GL_ZERO 0x0000 +#define GL_ONE 0x0001 +#define GL_ALPHA 0x1906 +#define GL_BLEND 0x0be2 +#define GL_CLAMP_TO_EDGE 0x812f +#define GL_COLOR_BUFFER_BIT 0x4000 +#define GL_COMPILE_STATUS 0x8b81 +#define GL_DEPTH_TEST 0x0b71 +#define GL_FRAMEBUFFER_SRGB 0x8db9 +#define GL_FRAGMENT_SHADER 0x8b30 +#define GL_LINK_STATUS 0x8b82 +#define GL_MODELVIEW 0x1700 +#define GL_NEAREST 0x2600 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_PROJECTION 0x1701 +#define GL_QUADS 0x0007 +#define GL_RGBA 0x1908 +#define GL_RGBA8 0x8058 +#define GL_SCISSOR_TEST 0x0c11 +#define GL_SRGB8_ALPHA8 0x8c43 +#define GL_SRC_ALPHA 0x0302 +#define GL_TEXTURE0 0x84c0 +#define GL_TEXTURE_2D 0x0de1 +#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_UNSIGNED_INT_8_8_8_8 0x8035 +#define GL_VERTEX_SHADER 0x8b31 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_STATIC_DRAW 0x88e4 +#define GL_FLOAT 0x1406 +#define GL_FALSE 0 +#define GL_TRUE 1 +#define GL_LINEAR 0x2601 +#define GL_FUNC_ADD 0x8006 +#define GL_CULL_FACE 0x0b44 +#define GL_STREAM_DRAW 0x88e0 +#define GL_WRITE_ONLY 0x88b9 +#define GL_TRIANGLES 0x0004 +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_MULTISAMPLE 0x809d +#define GL_DYNAMIC_DRAW 0x88e8 +#define GL_RED 0x1903 +#define GL_FRAMEBUFFER_WIDTH 0x9310 +#define GL_FRAMEBUFFER_HEIGHT 0x9311 +#define GL_VIEWPORT 0x0ba2 +#define GL_PIXEL_UNPACK_BUFFER 0x88ec +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88ef +#define GL_VERTEX_ARRAY_BINDING 0x85b5 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_CURRENT_PROGRAM 0x8b8d +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_SRGB_ALPHA 0x8c43 +#define GL_UNPACK_ROW_LENGTH 0x0cf2 +#define GL_UNPACK_SKIP_PIXELS 0x0cf4 +#define GL_UNPACK_SKIP_ROWS 0x0cf3 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8646 + +#define DECLARE_GL_FUNCTION(Name, ReturnType, ...) typedef ReturnType (*type_##Name)(__VA_ARGS__); +#define DECLARE_GLOBAL_FUNCTION(Name, ...) type_##Name Name; + +#define GL_FUNCTIONS(X) \ + X(glActiveTexture, void, GLenum texture) \ + X(glAttachShader, void, GLuint program, GLuint shader) \ + X(glBindBuffer, void, GLenum target, GLuint buffer) \ + X(glBindTexture, void, GLenum target, GLuint texture) \ + X(glBufferData, void, GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage) \ + X(glClear, void, GLbitfield mask) \ + X(glClearColor, void, GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) \ + X(glCompileShader, void, GLuint shader) \ + X(glCreateProgram, GLuint) \ + X(glCreateShader, GLuint, GLenum type) \ + X(glDeleteShader, void, GLuint shader) \ + X(glDrawElements, void, GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) \ + X(glEnableVertexAttribArray, void, GLuint index) \ + X(glGenBuffers, void, GLsizei n, GLuint *buffers) \ + X(glGenTextures, void, GLsizei n, GLuint *textures) \ + X(glGetShaderInfoLog, void, GLuint shader, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \ + X(glGetShaderiv, void, GLuint shader, GLenum pname, GLint *params) \ + X(glGetUniformLocation, GLint, GLuint program, const GLchar *name) \ + X(glLinkProgram, void, GLuint program) \ + X(glShaderSource, void, GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length) \ + X(glTexImage2D, void, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels) \ + X(glTexParameteri, void, GLenum target, GLenum pname, GLint param) \ + X(glTexSubImage2D, void, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels) \ + X(glUniform1f, void, GLint location, GLfloat v0) \ + X(glUniform1i, void, GLint location, GLint v0) \ + X(glUniform2f, void, GLint location, GLfloat v0, GLfloat v1) \ + X(glUniform4f, void, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3) \ + X(glUseProgram, void, GLuint program) \ + X(glVertexAttribPointer, void, GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer) \ + X(glViewport, void, GLint x, GLint y, GLsizei width, GLsizei height) \ + X(glDeleteProgram, void, GLuint program) \ + X(glDeleteBuffers, void, GLsizei n, const GLuint *buffers) \ + X(glDeleteTextures, void, GLsizei n, const GLuint *textures) \ + X(glEnable, void, GLenum cap) \ + X(glGenerateMipmap, void, GLenum target) \ + X(glGetProgramiv, void, GLuint program, GLenum pname, GLint *params) \ + X(glGetAttribLocation, GLint, GLuint program, const GLchar *name) \ + X(glDetachShader, void, GLuint program, GLuint shader) \ + X(glUniformMatrix4fv, void, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value) \ + X(glMapBuffer, void*, GLenum target, GLenum access) \ + X(glUnmapBuffer, GLboolean, GLenum target) \ + X(glBlendEquation, void, GLenum mode) \ + X(glBlendFunc, void, GLenum sfactor, GLenum dfactor) \ + X(glDisable, void, GLenum cap) \ + X(glScissor, void, GLint x, GLint y, GLsizei width, GLsizei height) \ + X(glTexCoord2f, void, GLfloat s, GLfloat t) \ + X(glVertex2f, void, GLfloat x, GLfloat y) \ + X(glGetError, GLenum) \ + X(glGetProgramInfoLog, void, GLuint program, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \ + X(glGenVertexArrays, void, GLsizei n, GLuint *arrays) \ + X(glBindVertexArray, void, GLuint array) \ + X(glDeleteVertexArrays, void, GLsizei n, const GLuint *arrays) \ + X(glDrawArrays, void, GLenum mode, GLint first, GLsizei count) \ + X(glBufferSubData, void, GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data) \ + X(glDisableVertexAttribArray, void, GLuint index) \ + X(glGetIntegerv, void, GLenum pname, GLint *data) \ + X(glBindAttribLocation, void, GLuint program, GLuint index, const GLchar *name) \ + X(glGetUniformfv, void, GLuint program, GLint location, GLfloat *params) \ + X(glPixelStorei, void, GLenum pname, GLint param) \ + X(glGetVertexAttribiv, void, GLuint index, GLenum pname, GLint *params) \ + X(glFinish, void) + +GL_FUNCTIONS(DECLARE_GL_FUNCTION) +GL_FUNCTIONS(DECLARE_GLOBAL_FUNCTION) + + +#if defined(_WIN32) +static void *get_any_gl_address(const char *name) { + void *p = (void *)wglGetProcAddress(name); + if(!p) { + HMODULE module = LoadLibraryA("opengl32.dll"); + if(module) { + p = (void *)GetProcAddress(module, name); + } + } + return p; +} + +#define GetOpenGLFunction(Name, ...) \ + *(void **)&Name = (void *)get_any_gl_address(#Name); \ + if(!Name) { \ + DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \ + exit(EXIT_FAILURE); \ + } + +#elif defined(__linux__) +#include +static void *glXGetProcAddress(const GLubyte *procName) { + static void *(*glxGetProcAddress)(const GLubyte *); + if(!glxGetProcAddress) { + void *libGL = dlopen("libGL.so.1", RTLD_LAZY | RTLD_GLOBAL); + if(!libGL) { + DEBUG_PRINT("Error: Unable to load libGL.so.1\n"); + exit(EXIT_FAILURE); + } + glxGetProcAddress = dlsym(libGL, "glXGetProcAddress"); + if(!glxGetProcAddress) { + DEBUG_PRINT("Error: Unable to find glXGetProcAddress\n"); + exit(EXIT_FAILURE); + } + } + return glxGetProcAddress(procName); +} + +#define GetOpenGLFunction(Name, ...) \ + *(void **)&Name = (void *)glXGetProcAddress((const GLubyte *)#Name); \ + if(!Name) { \ + DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \ + exit(EXIT_FAILURE); \ + } +#endif + +__attribute__((cold, noinline, section(".init_section"))) +static void gl_loader() { + GL_FUNCTIONS(GetOpenGLFunction); +}; \ No newline at end of file diff --git a/base/overlay.c b/base/overlay.c new file mode 100644 index 0000000..2316b24 --- /dev/null +++ b/base/overlay.c @@ -0,0 +1,375 @@ +#include "data/font_info.h" +INCBIN(_font_texture, "data/font.ugg"); +struct ugg *font_texture_data = (struct ugg*)_font_texture_data; + +/* Overlay state structure */ +struct overlay { + GLuint vao; + GLuint vbo; + GLuint ebo; + GLuint program; + GLint loc_proj; + GLint loc_tex; + GLint loc_color; + GLint loc_pos_offset; + GLuint font_texture; + GLuint white_texture; +}; + +static struct overlay overlay_state; +static float mat[16]; + +/* ------------------------------------------------------------------------- */ +/* Shader sources for a top-left orthographic approach */ +/* ------------------------------------------------------------------------- */ +static const char* overlay_vertex_shader_src = +"#version 140\n" +"in vec2 in_pos;\n" +"in vec2 in_uv;\n" +"uniform mat4 u_projection;\n" +"uniform vec2 u_pos_offset;\n" +"out vec2 v_uv;\n" +"void main() {\n" +" vec2 pos = in_pos + u_pos_offset;\n" +" gl_Position = u_projection * vec4(pos, 0.0, 1.0);\n" +" v_uv = in_uv;\n" +"}\n"; + +static const char* overlay_fragment_shader_src = +"#version 140\n" +"uniform sampler2D u_font_texture;\n" +"uniform vec4 u_color;\n" +"in vec2 v_uv;\n" +"out vec4 frag_color;\n" +"void main() {\n" +" vec4 tex_sample = texture(u_font_texture, v_uv);\n" +" float alpha = tex_sample.a;\n" +" frag_color = vec4(u_color.rgb * alpha, u_color.a * alpha);\n" +"}\n"; + +/* ------------------------------------------------------------------------- */ +/* Shader helpers */ +/* ------------------------------------------------------------------------- */ +__attribute__((cold, noinline, section(".init_section"))) +static GLuint overlay_compile_shader(const char *source, GLenum type) { + GLuint shader = glCreateShader(type); + if(shader == 0) { + fprintf(stderr, "Error creating shader of type %d.\n", type); + return 0; + } + + glShaderSource(shader, 1, &source, 0); + glCompileShader(shader); + + /* Check for compilation errors */ + GLint success; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if(!success) { + GLint log_size = 0; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_size); + char log[2048]; + glGetShaderInfoLog(shader, log_size, 0, log); + fprintf(stderr, "Shader compilation failed:\n%s\n", log); + glDeleteShader(shader); + return 0; + } + + return shader; +} + +__attribute__((cold, noinline, section(".init_section"))) +static GLuint overlay_create_program(const char *vs_source, const char *fs_source) { + GLuint vs = overlay_compile_shader(vs_source, GL_VERTEX_SHADER); + GLuint fs = overlay_compile_shader(fs_source, GL_FRAGMENT_SHADER); + GLuint prog = glCreateProgram(); + + glAttachShader(prog, vs); + glAttachShader(prog, fs); + glLinkProgram(prog); + + /* Check for linking errors */ + GLint success; + glGetProgramiv(prog, GL_LINK_STATUS, &success); + if(!success) { + /* Get and print log */ + GLint log_size = 0; + glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_size); + char log[2048]; + glGetProgramInfoLog(prog, log_size, 0, log); + fprintf(stderr, "Program linking failed:\n%s\n", log); + glDeleteProgram(prog); + glDeleteShader(vs); + glDeleteShader(fs); + return 0; + } + + /* Shaders can be deleted after linking */ + glDeleteShader(vs); + glDeleteShader(fs); + + return prog; +} + +static void overlay_make_ortho_top_left(float w, float h, float m[16]) { + m[0] = 2.0f / w; + m[5] = -2.0f / h; + m[10] = -1.0f; + m[12] = -1.0f; + m[13] = 1.0f; + m[15] = 1.0f; +} + +#define MAX_GLYPHS 128 // NOTE(peter): max glyphs per ROW +#define MAX_VERTICES (MAX_PROFILING_ENTRIES * MAX_GLYPHS * 4 * 4) +float vertices[MAX_VERTICES] __attribute__((section(".bss"))); + +static void overlay_init(void) { + overlay_state.program = overlay_create_program(overlay_vertex_shader_src, overlay_fragment_shader_src); + overlay_state.loc_proj = glGetUniformLocation(overlay_state.program, "u_projection"); + overlay_state.loc_tex = glGetUniformLocation(overlay_state.program, "u_font_texture"); + overlay_state.loc_color = glGetUniformLocation(overlay_state.program, "u_color"); + overlay_state.loc_pos_offset = glGetUniformLocation(overlay_state.program, "u_pos_offset"); + + glGenVertexArrays(1, &overlay_state.vao); + glBindVertexArray(overlay_state.vao); + + glGenBuffers(1, &overlay_state.vbo); + glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); + glBufferData(GL_ARRAY_BUFFER, MAX_VERTICES * sizeof(float), 0, GL_DYNAMIC_DRAW); + + glGenBuffers(1, &overlay_state.ebo); + + unsigned short indices[MAX_PROFILING_ENTRIES * MAX_GLYPHS * 6]; + size_t index_offset = 0; + size_t vertex_offset = 0; + + for(size_t i = 0; i < MAX_PROFILING_ENTRIES * MAX_GLYPHS; i++) { + indices[index_offset++] = vertex_offset + 0; + indices[index_offset++] = vertex_offset + 1; + indices[index_offset++] = vertex_offset + 2; + indices[index_offset++] = vertex_offset + 0; + indices[index_offset++] = vertex_offset + 2; + indices[index_offset++] = vertex_offset + 3; + vertex_offset += 4; + } + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, overlay_state.ebo); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); + + GLint in_pos_attrib = glGetAttribLocation(overlay_state.program, "in_pos"); + GLint in_uv_attrib = glGetAttribLocation(overlay_state.program, "in_uv"); + + glEnableVertexAttribArray(in_pos_attrib); + glVertexAttribPointer(in_pos_attrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); + glEnableVertexAttribArray(in_uv_attrib); + glVertexAttribPointer(in_uv_attrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); + + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + + uint8_t *rgba_data = mks_alloc(512 * 512 * 4); + uint8_t *dst = rgba_data; + for(uint32_t i = 0; i < 512 * 512; ++i) { + uint8_t alpha = font_texture_data->data[i]; + if(alpha) { + *dst++ = 255; + *dst++ = 255; + *dst++ = 255; + *dst++ = alpha; + + } else { + *dst++ = 0; + *dst++ = 0; + *dst++ = 0; + *dst++ = 0; + } + } + glGenTextures(1, &overlay_state.font_texture); + glBindTexture(GL_TEXTURE_2D, overlay_state.font_texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgba_data); + glGenerateMipmap(GL_TEXTURE_2D); + mks_free(rgba_data); + + glGenTextures(1, &overlay_state.white_texture); + glBindTexture(GL_TEXTURE_2D, overlay_state.white_texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + uint32_t white[] = { 0xffffffff }; + glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, 1, 1, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, white); + + glBindTexture(GL_TEXTURE_2D, 0); +} + +__attribute__((cold, noinline, section(".init_section"))) +static void overlay_shutdown(void) { + glDeleteProgram(overlay_state.program); + glDeleteBuffers(1, &overlay_state.vbo); + glDeleteBuffers(1, &overlay_state.ebo); + glDeleteVertexArrays(1, &overlay_state.vao); + glDeleteTextures(1, &overlay_state.font_texture); +} + +static void overlay_render_rect(float x1, float y1, float x2, float y2, float r, float g, float b, float a) { + float vertices[4 * 4] = { + /* pos.x, pos.y, u, v */ + x1, y1, 0.f, 0.f, // Top-left + x2, y1, 1.f, 0.f, // Top-right + x2, y2, 1.f, 1.f, // Bottom-right + x1, y2, 0.f, 1.f // Bottom-left + }; + + glBindVertexArray(overlay_state.vao); + glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices); + glUseProgram(overlay_state.program); + glUniform4f(overlay_state.loc_color, r, g, b, a); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, overlay_state.white_texture); + glUniform1i(overlay_state.loc_tex, 0); + glUniform2f(overlay_state.loc_pos_offset, 0.0f, 0.0f); + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + glBindTexture(GL_TEXTURE_2D, 0); + glBindVertexArray(0); +} + +__attribute__((always_inline)) +static inline void overlay_render_text_line(uint8_t **lines, float x, float y, float r, float g, float b, float a) { + uint32_t vertex_offset = 0; + uint32_t glyph_count = 0; + + glBindVertexArray(overlay_state.vao); + glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); + + float target_width = 24.f * .40f; + float target_height = 48.f * .40f; + + // Convert font units to pixels + float _scale = 48.0f / (1900 - (-480)); + float scaled_ascent = 1900 * _scale; + float scale_x = target_width / 24.0f; + float scale_y = target_height / 48.0f; + + // **Apply baseline correction once (not per character!)** + y += scaled_ascent * scale_y; + + for(int i = 0; i < MAX_PROFILING_ENTRIES; i++) { + uint8_t *text = lines[i]; + if(!text) continue; + + float cx = x; + while(*text) { + uint8_t c = *text++; + struct glyph_info *g = &glyph_data[c]; + + // Normalize texture coordinates + float u0 = g->x / 512.f; + float v0 = g->y / 512.f; + float u1 = (g->x + g->width) / 512.f; + float v1 = (g->y + g->height) / 512.f; + + // Apply width & height scaling + float glyph_width = g->width * scale_x; + float glyph_height = g->height * scale_y; + + // **Fix baseline positioning** + float x0 = roundf(cx + (g->x_offset * scale_x)); + float y0 = roundf(y + (g->y_offset * scale_y)); + float x1 = x0 + glyph_width; + float y1 = y0 + glyph_height; + + // Generate quad for the character + vertices[vertex_offset + 0] = x0; + vertices[vertex_offset + 1] = y0; + vertices[vertex_offset + 2] = u0; + vertices[vertex_offset + 3] = v0; + + vertices[vertex_offset + 4] = x1; + vertices[vertex_offset + 5] = y0; + vertices[vertex_offset + 6] = u1; + vertices[vertex_offset + 7] = v0; + + vertices[vertex_offset + 8] = x1; + vertices[vertex_offset + 9] = y1; + vertices[vertex_offset + 10] = u1; + vertices[vertex_offset + 11] = v1; + + vertices[vertex_offset + 12] = x0; + vertices[vertex_offset + 13] = y1; + vertices[vertex_offset + 14] = u0; + vertices[vertex_offset + 15] = v1; + + // Move cursor forward + cx += g->advance * scale_x; + vertex_offset += 16; + glyph_count++; + } + y += target_height; // Move to next row + } + + + if(glyph_count > 0) { + glBindVertexArray(overlay_state.vao); + glBindBuffer(GL_ARRAY_BUFFER, overlay_state.vbo); + glBufferSubData(GL_ARRAY_BUFFER, 0, vertex_offset * sizeof(float), vertices); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, overlay_state.font_texture); + glUniform1i(overlay_state.loc_tex, 0); + glUniform4f(overlay_state.loc_color, r, g, b, a); + glDrawElements(GL_TRIANGLES, glyph_count * 6, GL_UNSIGNED_SHORT, 0); + glBindVertexArray(0); + } +} + +static void overlay_render(float rect_x, float rect_y, float rect_w, float rect_h, uint8_t **lines, int window_w, int window_h) { + overlay_make_ortho_top_left((float)window_w, (float)window_h, mat); + glViewport(0.f, 0.f, window_w, window_h); + glUseProgram(overlay_state.program); + glUniformMatrix4fv(overlay_state.loc_proj, 1, GL_FALSE, mat); + + overlay_render_rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h, .01f, .013f, .04f, .6f); + overlay_render_text_line(lines, rect_x + 2.f, rect_y + 2.f, 0.f, 0.f, 0.f, 1.f); + overlay_render_text_line(lines, rect_x, rect_y, 1.f, 1.f, 1.f, 1.f); +} + +/* [=]===^=[ debug_render ]=================================================================^===[=] */ +__attribute__((section(".bss"))) +uint8_t *debug_lines[MAX_PROFILING_ENTRIES]; +static void debug_render(void) { + if(state.overlay) { + size_t offset = 0; + for(uint32_t i = 0; i < MAX_PROFILING_ENTRIES; ++i) { + if(state.debug.timings[i].count) { + debug_lines[i] = &debug_line_buffer[offset]; + offset += stbsp_sprintf((char *)&debug_line_buffer[offset], "%25s: cycles=%7" PRIu64 ", count=%2u, cycles/count=%7" PRIu64, state.debug.timings[i].name, state.debug.timings[i].cycles, state.debug.timings[i].count, state.debug.timings[i].cycles / state.debug.timings[i].count) + 1; + } else { + debug_lines[i] = 0; + } + } + overlay_render(20.f, 20.f, 705.f, 280.f, debug_lines, state.screen_width, state.screen_height); + + offset = 0; + for(uint32_t i = 0; i < MAX_PROFILING_ENTRIES; ++i) { + debug_lines[i] = 0; + } + debug_lines[0] = &debug_line_buffer[offset]; + offset += stbsp_sprintf((char *)&debug_line_buffer[offset], "Total memory allocated: %" PRIu64, state.total_allocated) + 1; + overlay_render(20.f, 340.f, 500.f, 200.f, debug_lines, state.screen_width, state.screen_height); + } +} + +/* [=]===^=[ reset_profiling_data ]=================================================================^===[=] */ +static inline void reset_profiling_data(void) { + memset(&state.debug, 0, sizeof(state.debug)); +} diff --git a/base/render.c b/base/render.c new file mode 100644 index 0000000..e7118d0 --- /dev/null +++ b/base/render.c @@ -0,0 +1,79 @@ + + + +// Get pointer to where in the buffer to render RENDER_START(0,0) is top left +#define RENDER_START(x, y) (state.display_buffer + ((y) << 11) + (x)) + +// Center X Coordinate for Rendering +#define CENTER_X(w) ((state.render_width - (w)) >> 1) + +/* [=]===^=[ update_render_position ]=================================================================^===[=] */ +static void update_render_position(void) { + state.render_x = (BUFFER_WIDTH - state.render_width) >> 1; + state.render_y = (BUFFER_HEIGHT - state.render_height) >> 1; + state.display_buffer = buffer + (state.render_y * BUFFER_WIDTH) + state.render_x; +} + +/* [=]===^=[ change_resolution ]=================================================================^===[=] */ +static void change_resolution(uint32_t new_width, uint32_t new_height) { + if(new_width != state.render_width || new_height != state.render_height) { + state.render_width = new_width; + state.render_height = new_height; + update_render_position(); + setup_render_target(); + } +} + +/* [=]===^=[ clear_buffer ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void clear_buffer(void) { + PROFILE_FUNCTION(); + uint32_t * restrict dst = RENDER_START(0, 0); + + for(uint32_t i = 0; i < state.render_height; i++) { + memset(dst, 0, state.render_width * 4); + dst += BUFFER_WIDTH; + } +} + +/* [=]===^=[ set_decay ]=================================================================^===[=] */ +static uint16_t _old_weight; +static uint16_t _new_weight; +static void set_decay(uint16_t old_weight) { + _old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0; + _new_weight = 256 - old_weight; +} + +/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void apply_phosphor_decay(void) { + PROFILE_FUNCTION(); + __m256i old_weight = _mm256_set1_epi16(_old_weight); + __m256i new_weight = _mm256_set1_epi16(_new_weight); + __m128i alpha_mask = _mm_set1_epi32(0x000000ff); + uint32_t render_width = state.render_width; + uint32_t render_height = state.render_height; + uint32_t * restrict src = RENDER_START(0, 0); + uint32_t * restrict dst = display_buffer; + + for(uint32_t y = 0; y < render_height; ++y, src += BUFFER_WIDTH, dst += render_width) { + for(uint32_t x = 0; x < render_width; x += 4) { + _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0); + _mm_prefetch((char*)&dst[x + 2 * render_width], _MM_HINT_T0); + + __m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]); + __m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]); + + __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels); + __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels); + + __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight)); + blended = _mm256_srli_epi16(blended, 8); + + __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1)); + final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask)); + _mm_storeu_si128((__m128i*)&dst[x], final_pixels); + } + } +} + diff --git a/base/settings.h b/base/settings.h new file mode 100644 index 0000000..be448c4 --- /dev/null +++ b/base/settings.h @@ -0,0 +1,11 @@ + +#ifdef DEBUG_INTERNAL + +// #define PERF_TEST +#ifndef PERF_TEST +#define PROFILER +#endif + + + +#endif diff --git a/base/shader.c b/base/shader.c new file mode 100644 index 0000000..a6e16f6 --- /dev/null +++ b/base/shader.c @@ -0,0 +1,44 @@ +#include "shader.h" + +//============================================================== +// +// CPU CODE +// +//============================================================== +// TONAL CONTROL CONSTANT GENERATION +//-------------------------------------------------------------- +// Make sure to use same CRTS_MASK_* defines on CPU and GPU!!!!! +//============================================================== +/* + * dst - Output 4 float array. + * + * contrast - Increase contrast, ranges from, + * 1.0 = no change + * 2.0 = very strong contrast (over 2.0 for even more) + * + * saturation - Increase saturation, ranges from, + * 0.0 = no change + * 1.0 = increased saturation (over 1.0 for even more) + * + * thin, mask - Inputs shared between CrtsTone() and CrtsFilter() + * + */ +static void CrtsTone(float * restrict dst, float contrast, float saturation, float thin, float mask) { +//-------------------------------------------------------------- +#ifdef CRTS_MASK_NONE + mask = 1.0f; +#endif +//-------------------------------------------------------------- +#ifdef CRTS_MASK_GRILLE_LITE + // Normal R mask is {1.0,mask,mask} + // LITE R mask is {mask,1.0,1.0} + mask = 0.5f + mask * 0.5f; +#endif +//-------------------------------------------------------------- + float midOut = 0.18f / ((1.5f - thin) * (0.5f * mask + 0.5f)); + float pMidIn = powf(0.18f, contrast); + dst[0] = contrast; + dst[1] = ((-pMidIn) + midOut) / ((1.0f - pMidIn) * midOut); + dst[2] = ((-pMidIn) * midOut + pMidIn) / (midOut * (-pMidIn) + midOut); + dst[3] = contrast + saturation; +} diff --git a/base/shader.h b/base/shader.h new file mode 100644 index 0000000..a9cb400 --- /dev/null +++ b/base/shader.h @@ -0,0 +1,34 @@ +//============================================================== +// SETUP FOR CRTS +//============================================================== +#define CRTS_TONE 1 +#define CRTS_CONTRAST 1 +#define CRTS_SATURATION 1 +//-------------------------------------------------------------- +#define CRTS_WARP 1 +//-------------------------------------------------------------- +// Try different masks +// #define CRTS_MASK_GRILLE 1 +// #define CRTS_MASK_GRILLE_LITE 1 +// #define CRTS_MASK_NONE 1 +#define CRTS_MASK_SHADOW 1 +// -------------------------------------------------------------- +// Scanline thinness +// 0.50 = fused scanlines +// 0.70 = recommended default +// 1.00 = thinner scanlines (too thin) +#define INPUT_THIN 0.7 +//-------------------------------------------------------------- +// Horizonal scan blur +// -3.0 = pixely +// -2.5 = default +// -2.0 = smooth +// -1.0 = too blurry +#define INPUT_BLUR -2.5 +//-------------------------------------------------------------- +// Shadow mask effect, ranges from, +// 0.25 = large amount of mask (not recommended, too dark) +// 0.50 = recommended default +// 1.00 = no shadow mask +#define INPUT_MASK 0.5 + diff --git a/base/state.c b/base/state.c new file mode 100644 index 0000000..f883d64 --- /dev/null +++ b/base/state.c @@ -0,0 +1,134 @@ + +#define BUFFER_WIDTH 2048 +#define BUFFER_HEIGHT 1024 +#define FPS 50 +#ifdef _WIN32 + #define SLEEP_MARGIN_NS 330000 // 0.33ms (Windows timing functionality is utter garbage) +#else + #define SLEEP_MARGIN_NS 100000 // 0.1ms +#endif + +#define ONE_SECOND_NS 1000000000 +#define FRAMETIME (ONE_SECOND_NS / FPS) + +#define SCREEN_WIDTH 360 +#define SCREEN_HEIGHT 270 + +uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); +uint32_t display_buffer[SCREEN_WIDTH * SCREEN_HEIGHT] __attribute__((section(".bss"), aligned(4096))); + +#ifdef PROFILER +#define MAX_PROFILING_ENTRIES (64) + +struct function_cycles { + const char *name; + uint64_t cycles; + uint32_t count; +}; + +struct debug_state { + struct function_cycles timings[MAX_PROFILING_ENTRIES]; +}; +#endif + +struct state { + // Pointers (8-byte aligned) + uint32_t *display_buffer; + struct { int32_t x, y, w, h; } viewport; + int32_t mouse_dx; + int32_t mouse_dy; + int32_t filter_override; // Manual override: -1 = automatic, 0 = off, 1 = on + float filter_frequency; // Frequency in Hz for squarewave toggle + + float contrast; + float saturation; + float brightness; + float tone_data[4]; + + // OpenGL Objects + GLuint shader_program; + GLuint texture; + GLuint vao; + GLuint vbo; + GLuint ebo; + + // Shader Uniforms + GLuint uniform_resolution; + GLuint uniform_src_image_size; + GLuint uniform_brightness; + GLuint uniform_tone; + GLuint uniform_crt_emulation; + GLuint uniform_sampler_location; + GLuint uniform_tex_bounds; + + // Rendering & Dynamic Resolution + uint32_t screen_width; // for the debugger + uint32_t screen_height; // for the debugger + uint32_t render_width; // The actual remake resolution (e.g., 360) + uint32_t render_height; // The actual remake resolution (e.g., 270) + uint32_t render_x; // X position inside BUFFER_WIDTH (to track centering) + uint32_t render_y; // Y position inside BUFFER_HEIGHT (to track centering) + uint32_t frame_number; + size_t total_allocated; + bool freewheeling; + bool toggle_crt_emulation; + bool fullscreen; +#ifdef PROFILER + bool overlay; + struct debug_state debug; +#endif +}; + +struct state state; + +static struct remake_callbacks *current_part = 0; +static void render_callback(); +static void audio_callback(int16_t *audio_buffer, size_t frames); +static void init_callback(); + +#ifndef PROFILER +#define PROFILE_NAMED(name) +#define PROFILE_FUNCTION() + +#else + +struct profiling_context { + uint32_t func_id; +}; + +__attribute__((section(".bss"))) +uint8_t debug_line_buffer[MAX_PROFILING_ENTRIES * 256]; // NOTE(peter): for storing all performance strings in an array to print to the debug-output + +__attribute__((always_inline, hot)) +static inline uint64_t read_tsc() { + unsigned int aux; + uint64_t result = __rdtscp(&aux); + return result; +} + +__attribute__((always_inline, hot)) +static inline void end_profiling(struct profiling_context *ctx) __attribute__((unused)); +__attribute__((always_inline, hot)) +static inline void end_profiling(struct profiling_context *ctx) { + state.debug.timings[ctx->func_id].cycles += read_tsc(); + state.debug.timings[ctx->func_id].count++; +} + +#define PROFILE_NAMED(name) \ + uint32_t func_id = __COUNTER__; \ + if((state).debug.timings[func_id].count == 0) \ + (state).debug.timings[func_id] = (struct function_cycles){(name), 0, 0}; \ + (state).debug.timings[func_id].cycles -= read_tsc(); \ + struct profiling_context ctx __attribute__((cleanup(end_profiling))) = { func_id }; + +#define PROFILE_FUNCTION() \ + uint32_t func_id = __COUNTER__; \ + if((state).debug.timings[func_id].count == 0) \ + (state).debug.timings[func_id] = (struct function_cycles){__func__, 0, 0}; \ + (state).debug.timings[func_id].cycles -= read_tsc(); \ + struct profiling_context ctx __attribute__((cleanup(end_profiling))) = { func_id }; +#endif + + + + diff --git a/base/stb_sprintf.h b/base/stb_sprintf.h new file mode 100644 index 0000000..4b4c171 --- /dev/null +++ b/base/stb_sprintf.h @@ -0,0 +1,1906 @@ +// stb_sprintf - v1.10 - public domain snprintf() implementation +// originally by Jeff Roberts / RAD Game Tools, 2015/10/20 +// http://github.com/nothings/stb +// +// allowed types: sc uidBboXx p AaGgEef n +// lengths : hh h ll j z t I64 I32 I +// +// Contributors: +// Fabian "ryg" Giesen (reformatting) +// github:aganm (attribute format) +// +// Contributors (bugfixes): +// github:d26435 +// github:trex78 +// github:account-login +// Jari Komppa (SI suffixes) +// Rohit Nirmal +// Marcin Wojdyr +// Leonard Ritter +// Stefano Zanotti +// Adam Allison +// Arvid Gerstmann +// Markus Kolb +// +// LICENSE: +// +// See end of file for license information. + +#ifndef STB_SPRINTF_H_INCLUDE +#define STB_SPRINTF_H_INCLUDE + +/* +Single file sprintf replacement. + +Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20. +Hereby placed in public domain. + +This is a full sprintf replacement that supports everything that +the C runtime sprintfs support, including float/double, 64-bit integers, +hex floats, field parameters (%*.*d stuff), length reads backs, etc. + +Why would you need this if sprintf already exists? Well, first off, +it's *much* faster (see below). It's also much smaller than the CRT +versions code-space-wise. We've also added some simple improvements +that are super handy (commas in thousands, callbacks at buffer full, +for example). Finally, the format strings for MSVC and GCC differ +for 64-bit integers (among other small things), so this lets you use +the same format strings in cross platform code. + +It uses the standard single file trick of being both the header file +and the source itself. If you just include it normally, you just get +the header file function definitions. To get the code, you include +it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first. + +It only uses va_args macros from the C runtime to do it's work. It +does cast doubles to S64s and shifts and divides U64s, which does +drag in CRT code on most platforms. + +It compiles to roughly 8K with float support, and 4K without. +As a comparison, when using MSVC static libs, calling sprintf drags +in 16K. + +API: +==== +int stbsp_sprintf( char * buf, char const * fmt, ... ) +int stbsp_snprintf( char * buf, int count, char const * fmt, ... ) + Convert an arg list into a buffer. stbsp_snprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintf( char * buf, char const * fmt, va_list va ) +int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va ) + Convert a va_list arg list into a buffer. stbsp_vsnprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va ) + typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len ); + Convert into a buffer, calling back every STB_SPRINTF_MIN chars. + Your callback can then copy the chars out, print them or whatever. + This function is actually the workhorse for everything else. + The buffer you pass in must hold at least STB_SPRINTF_MIN characters. + // you return the next buffer to use or 0 to stop converting + +void stbsp_set_separators( char comma, char period ) + Set the comma and period characters to use. + +FLOATS/DOUBLES: +=============== +This code uses a internal float->ascii conversion method that uses +doubles with error correction (double-doubles, for ~105 bits of +precision). This conversion is round-trip perfect - that is, an atof +of the values output here will give you the bit-exact double back. + +One difference is that our insignificant digits will be different than +with MSVC or GCC (but they don't match each other either). We also +don't attempt to find the minimum length matching float (pre-MSVC15 +doesn't either). + +If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT +and you'll save 4K of code space. + +64-BIT INTS: +============ +This library also supports 64-bit integers and you can use MSVC style or +GCC style indicators (%I64d or %lld). It supports the C99 specifiers +for size_t and ptr_diff_t (%jd %zd) as well. + +EXTRAS: +======= +Like some GCCs, for integers and floats, you can use a ' (single quote) +specifier and commas will be inserted on the thousands: "%'d" on 12345 +would print 12,345. + +For integers and floats, you can use a "$" specifier and the number +will be converted to float and then divided to get kilo, mega, giga or +tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is +"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn +2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three +$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the +suffix, add "_" specifier: "%_$d" -> "2.53M". + +In addition to octal and hexadecimal conversions, you can print +integers in binary: "%b" for 256 would print 100. + +PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC): +=================================================================== +"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC) +"%24d" across all 32-bit ints (4.5x/4.2x faster) +"%x" across all 32-bit ints (4.5x/3.8x faster) +"%08x" across all 32-bit ints (4.3x/3.8x faster) +"%f" across e-10 to e+10 floats (7.3x/6.0x faster) +"%e" across e-10 to e+10 floats (8.1x/6.0x faster) +"%g" across e-10 to e+10 floats (10.0x/7.1x faster) +"%f" for values near e-300 (7.9x/6.5x faster) +"%f" for values near e+300 (10.0x/9.1x faster) +"%e" for values near e-300 (10.1x/7.0x faster) +"%e" for values near e+300 (9.2x/6.0x faster) +"%.320f" for values near e-300 (12.6x/11.2x faster) +"%a" for random values (8.6x/4.3x faster) +"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster) +"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster) +"%s%s%s" for 64 char strings (7.1x/7.3x faster) +"...512 char string..." ( 35.0x/32.5x faster!) +*/ + +#if defined(__clang__) + #if defined(__has_feature) && defined(__has_attribute) + #if __has_feature(address_sanitizer) + #if __has_attribute(__no_sanitize__) + #define STBSP__ASAN __attribute__((__no_sanitize__("address"))) + #elif __has_attribute(__no_sanitize_address__) + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #elif __has_attribute(__no_address_safety_analysis__) + #define STBSP__ASAN __attribute__((__no_address_safety_analysis__)) + #endif + #endif + #endif +#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #endif +#endif + +#ifndef STBSP__ASAN +#define STBSP__ASAN +#endif + +#ifdef STB_SPRINTF_STATIC +#define STBSP__PUBLICDEC static +#define STBSP__PUBLICDEF static STBSP__ASAN +#else +#ifdef __cplusplus +#define STBSP__PUBLICDEC extern "C" +#define STBSP__PUBLICDEF extern "C" STBSP__ASAN +#else +#define STBSP__PUBLICDEC extern +#define STBSP__PUBLICDEF STBSP__ASAN +#endif +#endif + +#if defined(__has_attribute) + #if __has_attribute(format) + #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va))) + #endif +#endif + +#ifndef STBSP__ATTRIBUTE_FORMAT +#define STBSP__ATTRIBUTE_FORMAT(fmt,va) +#endif + +#ifdef _MSC_VER +#define STBSP__NOTUSED(v) (void)(v) +#else +#define STBSP__NOTUSED(v) (void)sizeof(v) +#endif + +#include // for va_arg(), va_list() +#include // size_t, ptrdiff_t + +#ifndef STB_SPRINTF_MIN +#define STB_SPRINTF_MIN 512 // how many characters per callback +#endif +typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len); + +#ifndef STB_SPRINTF_DECORATE +#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names +#endif + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4); + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period); + +#endif // STB_SPRINTF_H_INCLUDE + +#ifdef STB_SPRINTF_IMPLEMENTATION + +#define stbsp__uint32 unsigned int +#define stbsp__int32 signed int + +#ifdef _MSC_VER +#define stbsp__uint64 unsigned __int64 +#define stbsp__int64 signed __int64 +#else +#define stbsp__uint64 unsigned long long +#define stbsp__int64 signed long long +#endif +#define stbsp__uint16 unsigned short + +#ifndef stbsp__uintptr +#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__) +#define stbsp__uintptr stbsp__uint64 +#else +#define stbsp__uintptr stbsp__uint32 +#endif +#endif + +#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC) +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define STB_SPRINTF_MSVC_MODE +#endif +#endif + +#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses +#define STBSP__UNALIGNED(code) +#else +#define STBSP__UNALIGNED(code) code +#endif + +#ifndef STB_SPRINTF_NOFLOAT +// internal float utility functions +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits); +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value); +#define STBSP__SPECIAL 0x7000 +#endif + +static char stbsp__period = '.'; +static char stbsp__comma = ','; +static struct +{ + short temp; // force next field to be 2-byte aligned + char pair[201]; +} stbsp__digitpair = +{ + 0, + "00010203040506070809101112131415161718192021222324" + "25262728293031323334353637383940414243444546474849" + "50515253545556575859606162636465666768697071727374" + "75767778798081828384858687888990919293949596979899" +}; + +STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod) +{ + stbsp__period = pperiod; + stbsp__comma = pcomma; +} + +#define STBSP__LEFTJUST 1 +#define STBSP__LEADINGPLUS 2 +#define STBSP__LEADINGSPACE 4 +#define STBSP__LEADING_0X 8 +#define STBSP__LEADINGZERO 16 +#define STBSP__INTMAX 32 +#define STBSP__TRIPLET_COMMA 64 +#define STBSP__NEGATIVE 128 +#define STBSP__METRIC_SUFFIX 256 +#define STBSP__HALFWIDTH 512 +#define STBSP__METRIC_NOSPACE 1024 +#define STBSP__METRIC_1024 2048 +#define STBSP__METRIC_JEDEC 4096 + +static void stbsp__lead_sign(stbsp__uint32 fl, char *sign) +{ + sign[0] = 0; + if(fl & STBSP__NEGATIVE) { + sign[0] = 1; + sign[1] = '-'; + } else if(fl & STBSP__LEADINGSPACE) { + sign[0] = 1; + sign[1] = ' '; + } else if(fl & STBSP__LEADINGPLUS) { + sign[0] = 1; + sign[1] = '+'; + } +} + +static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit) +{ + char const * sn = s; + + // get up to 4-byte alignment + for(;;) { + if(((stbsp__uintptr)sn & 3) == 0) + break; + + if(!limit || *sn == 0) + return (stbsp__uint32)(sn - s); + + ++sn; + --limit; + } + + // scan over 4 bytes at a time to find terminating 0 + // this will intentionally scan up to 3 bytes past the end of buffers, + // but becase it works 4B aligned, it will never cross page boundaries + // (hence the STBSP__ASAN markup; the over-read here is intentional + // and harmless) + while(limit >= 4) { + stbsp__uint32 v = *(stbsp__uint32 *)sn; + // bit hack to find if there's a 0 byte in there + if((v - 0x01010101) & (~v) & 0x80808080UL) + break; + + sn += 4; + limit -= 4; + } + + // handle the last few characters to find actual size + while(limit && *sn) { + ++sn; + --limit; + } + + return (stbsp__uint32)(sn - s); +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va) +{ + static char hex[] = "0123456789abcdefxp"; + static char hexu[] = "0123456789ABCDEFXP"; + char *bf; + char const *f; + int tlen = 0; + + bf = buf; + f = fmt; + for(;;) { + stbsp__int32 fw, pr, tz; + stbsp__uint32 fl; + + // macros for the callback buffer stuff + #define stbsp__chk_cb_bufL(bytes) \ + { \ + int len = (int)(bf - buf); \ + if((len + (bytes)) >= STB_SPRINTF_MIN) { \ + tlen += len; \ + if(0 == (bf = buf = callback(buf, user, len))) \ + goto done; \ + } \ + } + #define stbsp__chk_cb_buf(bytes) \ + { \ + if(callback) { \ + stbsp__chk_cb_bufL(bytes); \ + } \ + } + #define stbsp__flush_cb() \ + { \ + stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \ + } // flush if there is even one byte in the buffer + #define stbsp__cb_buf_clamp(cl, v) \ + cl = v; \ + if(callback) { \ + int lg = STB_SPRINTF_MIN - (int)(bf - buf); \ + if(cl > lg) \ + cl = lg; \ + } + + // fast copy everything up to the next % (or end of string) + for(;;) { + while(((stbsp__uintptr)f) & 3) { + schk1: + if(f[0] == '%') + goto scandd; + schk2: + if(f[0] == 0) + goto endfmt; + stbsp__chk_cb_buf(1); + *bf++ = f[0]; + ++f; + } + for(;;) { + // Check if the next 4 bytes contain %(0x25) or end of string. + // Using the 'hasless' trick: + // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + stbsp__uint32 v, c; + v = *(stbsp__uint32 *)f; + c = (~v) & 0x80808080; + if(((v ^ 0x25252525) - 0x01010101) & c) + goto schk1; + if((v - 0x01010101) & c) + goto schk2; + if(callback) + if((STB_SPRINTF_MIN - (int)(bf - buf)) < 4) + goto schk1; + #ifdef STB_SPRINTF_NOUNALIGNED + if(((stbsp__uintptr)bf) & 3) { + bf[0] = f[0]; + bf[1] = f[1]; + bf[2] = f[2]; + bf[3] = f[3]; + } else + #endif + { + *(stbsp__uint32 *)bf = v; + } + bf += 4; + f += 4; + } + } + scandd: + + ++f; + + // ok, we have a percent, read the modifiers first + fw = 0; + pr = -1; + fl = 0; + tz = 0; + + // flags + for(;;) { + switch (f[0]) { + // if we have left justify + case '-': + fl |= STBSP__LEFTJUST; + ++f; + continue; + // if we have leading plus + case '+': + fl |= STBSP__LEADINGPLUS; + ++f; + continue; + // if we have leading space + case ' ': + fl |= STBSP__LEADINGSPACE; + ++f; + continue; + // if we have leading 0x + case '#': + fl |= STBSP__LEADING_0X; + ++f; + continue; + // if we have thousand commas + case '\'': + fl |= STBSP__TRIPLET_COMMA; + ++f; + continue; + // if we have kilo marker (none->kilo->kibi->jedec) + case '$': + if(fl & STBSP__METRIC_SUFFIX) { + if(fl & STBSP__METRIC_1024) { + fl |= STBSP__METRIC_JEDEC; + } else { + fl |= STBSP__METRIC_1024; + } + } else { + fl |= STBSP__METRIC_SUFFIX; + } + ++f; + continue; + // if we don't want space between metric suffix and number + case '_': + fl |= STBSP__METRIC_NOSPACE; + ++f; + continue; + // if we have leading zero + case '0': + fl |= STBSP__LEADINGZERO; + ++f; + goto flags_done; + default: goto flags_done; + } + } + flags_done: + + // get the field width + if(f[0] == '*') { + fw = va_arg(va, stbsp__uint32); + ++f; + } else { + while((f[0] >= '0') && (f[0] <= '9')) { + fw = fw * 10 + f[0] - '0'; + f++; + } + } + // get the precision + if(f[0] == '.') { + ++f; + if(f[0] == '*') { + pr = va_arg(va, stbsp__uint32); + ++f; + } else { + pr = 0; + while((f[0] >= '0') && (f[0] <= '9')) { + pr = pr * 10 + f[0] - '0'; + f++; + } + } + } + + // handle integer size overrides + switch (f[0]) { + // are we halfwidth? + case 'h': + fl |= STBSP__HALFWIDTH; + ++f; + if(f[0] == 'h') + ++f; // QUARTERWIDTH + break; + // are we 64-bit (unix style) + case 'l': + fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0); + ++f; + if(f[0] == 'l') { + fl |= STBSP__INTMAX; + ++f; + } + break; + // are we 64-bit on intmax? (c99) + case 'j': + fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit on size_t or ptrdiff_t? (c99) + case 'z': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + case 't': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit (msft style) + case 'I': + if((f[1] == '6') && (f[2] == '4')) { + fl |= STBSP__INTMAX; + f += 3; + } else if((f[1] == '3') && (f[2] == '2')) { + f += 3; + } else { + fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0); + ++f; + } + break; + default: break; + } + + // handle each replacement + switch (f[0]) { + #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307 + char num[STBSP__NUMSZ]; + char lead[8]; + char tail[8]; + char *s; + char const *h; + stbsp__uint32 l, n, cs; + stbsp__uint64 n64; +#ifndef STB_SPRINTF_NOFLOAT + double fv; +#endif + stbsp__int32 dp; + char const *sn; + + case 's': + // get the string + s = va_arg(va, char *); + if(s == 0) + s = (char *)"0"; + // get the length, limited to desired precision + // always limit to ~0u chars since our counts are 32b + l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u); + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + // copy the string in + goto scopy; + + case 'c': // char + // get the character + s = num + STBSP__NUMSZ - 1; + *s = (char)va_arg(va, int); + l = 1; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + + case 'n': // weird write-bytes specifier + { + int *d = va_arg(va, int *); + *d = tlen + (int)(bf - buf); + } break; + +#ifdef STB_SPRINTF_NOFLOAT + case 'A': // float + case 'a': // hex float + case 'G': // float + case 'g': // float + case 'E': // float + case 'e': // float + case 'f': // float + va_arg(va, double); // eat it + s = (char *)"No float"; + l = 8; + lead[0] = 0; + tail[0] = 0; + pr = 0; + cs = 0; + STBSP__NOTUSED(dp); + goto scopy; +#else + case 'A': // hex float + case 'a': // hex float + h = (f[0] == 'A') ? hexu : hex; + fv = va_arg(va, double); + if(pr == -1) + pr = 6; // default is 6 + // read the double into a string + if(stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) + fl |= STBSP__NEGATIVE; + + s = num + 64; + + stbsp__lead_sign(fl, lead); + + if(dp == -1023) + dp = (n64) ? -1022 : 0; + else + n64 |= (((stbsp__uint64)1) << 52); + n64 <<= (64 - 56); + if(pr < 15) + n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4)); +// add leading chars + +#ifdef STB_SPRINTF_MSVC_MODE + *s++ = '0'; + *s++ = 'x'; +#else + lead[1 + lead[0]] = '0'; + lead[2 + lead[0]] = 'x'; + lead[0] += 2; +#endif + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + if(pr) + *s++ = stbsp__period; + sn = s; + + // print the bits + n = pr; + if(n > 13) + n = 13; + if(pr > (stbsp__int32)n) + tz = pr - n; + pr = 0; + while(n--) { + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + } + + // print the expo + tail[1] = h[17]; + if(dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; + n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3)); + tail[0] = (char)n; + for(;;) { + tail[n] = '0' + dp % 10; + if(n <= 3) + break; + --n; + dp /= 10; + } + + dp = (int)(s - sn); + l = (int)(s - (num + 64)); + s = num + 64; + cs = 1 + (3 << 24); + goto scopy; + + case 'G': // float + case 'g': // float + h = (f[0] == 'G') ? hexu : hex; + fv = va_arg(va, double); + if(pr == -1) + pr = 6; + else if(pr == 0) + pr = 1; // default is 6 + // read the double into a string + if(stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000)) + fl |= STBSP__NEGATIVE; + + // clamp the precision and delete extra zeros after clamp + n = pr; + if(l > (stbsp__uint32)pr) + l = pr; + while((l > 1) && (pr) && (sn[l - 1] == '0')) { + --pr; + --l; + } + + // should we use %e + if((dp <= -4) || (dp > (stbsp__int32)n)) { + if(pr > (stbsp__int32)l) + pr = l - 1; + else if(pr) + --pr; // when using %e, there is one digit before the decimal + goto doexpfromg; + } + // this is the insane action to get the pr to match %g semantics for %f + if(dp > 0) { + pr = (dp < (stbsp__int32)l) ? l - dp : 0; + } else { + pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr); + } + goto dofloatfromg; + + case 'E': // float + case 'e': // float + h = (f[0] == 'E') ? hexu : hex; + fv = va_arg(va, double); + if(pr == -1) + pr = 6; // default is 6 + // read the double into a string + if(stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000)) + fl |= STBSP__NEGATIVE; + doexpfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if(dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + // handle leading chars + *s++ = sn[0]; + + if(pr) + *s++ = stbsp__period; + + // handle after decimal + if((l - 1) > (stbsp__uint32)pr) + l = pr + 1; + for(n = 1; n < l; n++) + *s++ = sn[n]; + // trailing zeros + tz = pr - (l - 1); + pr = 0; + // dump expo + tail[1] = h[0xe]; + dp -= 1; + if(dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; +#ifdef STB_SPRINTF_MSVC_MODE + n = 5; +#else + n = (dp >= 100) ? 5 : 4; +#endif + tail[0] = (char)n; + for(;;) { + tail[n] = '0' + dp % 10; + if(n <= 3) + break; + --n; + dp /= 10; + } + cs = 1 + (3 << 24); // how many tens + goto flt_lead; + + case 'f': // float + fv = va_arg(va, double); + doafloat: + // do kilos + if(fl & STBSP__METRIC_SUFFIX) { + double divisor; + divisor = 1000.0f; + if(fl & STBSP__METRIC_1024) + divisor = 1024.0; + while(fl < 0x4000000) { + if((fv < divisor) && (fv > -divisor)) + break; + fv /= divisor; + fl += 0x1000000; + } + } + if(pr == -1) + pr = 6; // default is 6 + // read the double into a string + if(stbsp__real_to_str(&sn, &l, num, &dp, fv, pr)) + fl |= STBSP__NEGATIVE; + dofloatfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if(dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + + // handle the three decimal varieties + if(dp <= 0) { + stbsp__int32 i; + // handle 0.000*000xxxx + *s++ = '0'; + if(pr) + *s++ = stbsp__period; + n = -dp; + if((stbsp__int32)n > pr) + n = pr; + i = n; + while(i) { + if((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --i; + } + while(i >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + i -= 4; + } + while(i) { + *s++ = '0'; + --i; + } + if((stbsp__int32)(l + n) > pr) + l = pr - n; + i = l; + while(i) { + *s++ = *sn++; + --i; + } + tz = pr - (n + l); + cs = 1 + (3 << 24); // how many tens did we write (for commas below) + } else { + cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0; + if((stbsp__uint32)dp >= l) { + // handle xxxx000*000.0 + n = 0; + for(;;) { + if((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if(n >= l) + break; + } + } + if(n < (stbsp__uint32)dp) { + n = dp - n; + if((fl & STBSP__TRIPLET_COMMA) == 0) { + while(n) { + if((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --n; + } + while(n >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + n -= 4; + } + } + while(n) { + if((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = '0'; + --n; + } + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if(pr) { + *s++ = stbsp__period; + tz = pr; + } + } else { + // handle xxxxx.xxxx000*000 + n = 0; + for(;;) { + if((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if(n >= (stbsp__uint32)dp) + break; + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if(pr) + *s++ = stbsp__period; + if((l - dp) > (stbsp__uint32)pr) + l = pr + dp; + while(n < l) { + *s++ = sn[n]; + ++n; + } + tz = pr - (l - dp); + } + } + pr = 0; + + // handle k,m,g,t + if(fl & STBSP__METRIC_SUFFIX) { + char idx; + idx = 1; + if(fl & STBSP__METRIC_NOSPACE) + idx = 0; + tail[0] = idx; + tail[1] = ' '; + { + if(fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'. + if(fl & STBSP__METRIC_1024) + tail[idx + 1] = "_KMGT"[fl >> 24]; + else + tail[idx + 1] = "_kMGT"[fl >> 24]; + idx++; + // If printing kibits and not in jedec, add the 'i'. + if(fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) { + tail[idx + 1] = 'i'; + idx++; + } + tail[0] = idx; + } + } + }; + + flt_lead: + // get the length that we copied + l = (stbsp__uint32)(s - (num + 64)); + s = num + 64; + goto scopy; +#endif + + case 'B': // upper binary + case 'b': // lower binary + h = (f[0] == 'B') ? hexu : hex; + lead[0] = 0; + if(fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[0xb]; + } + l = (8 << 4) | (1 << 8); + goto radixnum; + + case 'o': // octal + h = hexu; + lead[0] = 0; + if(fl & STBSP__LEADING_0X) { + lead[0] = 1; + lead[1] = '0'; + } + l = (3 << 4) | (3 << 8); + goto radixnum; + + case 'p': // pointer + fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0; + pr = sizeof(void *) * 2; + fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros + // fall through - to X + + case 'X': // upper hex + case 'x': // lower hex + h = (f[0] == 'X') ? hexu : hex; + l = (4 << 4) | (4 << 8); + lead[0] = 0; + if(fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[16]; + } + radixnum: + // get the number + if(fl & STBSP__INTMAX) + n64 = va_arg(va, stbsp__uint64); + else + n64 = va_arg(va, stbsp__uint32); + + s = num + STBSP__NUMSZ; + dp = 0; + // clear tail, and clear leading if value is zero + tail[0] = 0; + if(n64 == 0) { + lead[0] = 0; + if(pr == 0) { + l = 0; + cs = 0; + goto scopy; + } + } + // convert to string + for(;;) { + *--s = h[n64 & ((1 << (l >> 8)) - 1)]; + n64 >>= (l >> 8); + if(!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr))) + break; + if(fl & STBSP__TRIPLET_COMMA) { + ++l; + if((l & 15) == ((l >> 4) & 15)) { + l &= ~15; + *--s = stbsp__comma; + } + } + }; + // get the tens and the comma pos + cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24); + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + // copy it + goto scopy; + + case 'u': // unsigned + case 'i': + case 'd': // integer + // get the integer and abs it + if(fl & STBSP__INTMAX) { + stbsp__int64 i64 = va_arg(va, stbsp__int64); + n64 = (stbsp__uint64)i64; + if((f[0] != 'u') && (i64 < 0)) { + n64 = (stbsp__uint64)-i64; + fl |= STBSP__NEGATIVE; + } + } else { + stbsp__int32 i = va_arg(va, stbsp__int32); + n64 = (stbsp__uint32)i; + if((f[0] != 'u') && (i < 0)) { + n64 = (stbsp__uint32)-i; + fl |= STBSP__NEGATIVE; + } + } + +#ifndef STB_SPRINTF_NOFLOAT + if(fl & STBSP__METRIC_SUFFIX) { + if(n64 < 1024) + pr = 0; + else if(pr == -1) + pr = 1; + fv = (double)(stbsp__int64)n64; + goto doafloat; + } +#endif + + // convert to string + s = num + STBSP__NUMSZ; + l = 0; + + for(;;) { + // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators) + char *o = s - 8; + if(n64 >= 100000000) { + n = (stbsp__uint32)(n64 % 100000000); + n64 /= 100000000; + } else { + n = (stbsp__uint32)n64; + n64 = 0; + } + if((fl & STBSP__TRIPLET_COMMA) == 0) { + do { + s -= 2; + *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + } while(n); + } + while(n) { + if((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = (char)(n % 10) + '0'; + n /= 10; + } + } + if(n64 == 0) { + if((s[0] == '0') && (s != (num + STBSP__NUMSZ))) + ++s; + break; + } + while(s != o) + if((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = '0'; + } + } + + tail[0] = 0; + stbsp__lead_sign(fl, lead); + + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + if(l == 0) { + *--s = '0'; + l = 1; + } + cs = l + (3 << 24); + if(pr < 0) + pr = 0; + + scopy: + // get fw=leading/trailing space, pr=leading zeros + if(pr < (stbsp__int32)l) + pr = l; + n = pr + lead[0] + tail[0] + tz; + if(fw < (stbsp__int32)n) + fw = n; + fw -= n; + pr -= l; + + // handle right justify and leading zeros + if((fl & STBSP__LEFTJUST) == 0) { + if(fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr + { + pr = (fw > pr) ? fw : pr; + fw = 0; + } else { + fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas + } + } + + // copy the spaces and/or zeros + if(fw + pr) { + stbsp__int32 i; + stbsp__uint32 c; + + // copy leading spaces (or when doing %8.4d stuff) + if((fl & STBSP__LEFTJUST) == 0) + while(fw > 0) { + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while(i) { + if((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while(i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while(i) { + *bf++ = ' '; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leader + sn = lead + 1; + while(lead[0]) { + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while(i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leading zeros + c = cs >> 24; + cs &= 0xffffff; + cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0; + while(pr > 0) { + stbsp__cb_buf_clamp(i, pr); + pr -= i; + if((fl & STBSP__TRIPLET_COMMA) == 0) { + while(i) { + if((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while(i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + } + while(i) { + if((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) { + cs = 0; + *bf++ = stbsp__comma; + } else + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + } + + // copy leader if there is still one + sn = lead + 1; + while(lead[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while(i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy the string + n = l; + while(n) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, n); + n -= i; + STBSP__UNALIGNED(while(i >= 4) { + *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s; + bf += 4; + s += 4; + i -= 4; + }) + while(i) { + *bf++ = *s++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy trailing zeros + while(tz) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tz); + tz -= i; + while(i) { + if((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while(i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + while(i) { + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy tail if there is one + sn = tail + 1; + while(tail[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tail[0]); + tail[0] -= (char)i; + while(i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // handle the left justify + if(fl & STBSP__LEFTJUST) + if(fw > 0) { + while(fw) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while(i) { + if((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while(i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while(i--) + *bf++ = ' '; + stbsp__chk_cb_buf(1); + } + } + break; + + default: // unknown, just copy code + s = num + STBSP__NUMSZ - 1; + *s = f[0]; + l = 1; + fw = fl = 0; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + } + ++f; + } +endfmt: + + if(!callback) + *bf = 0; + else + stbsp__flush_cb(); + +done: + return tlen + (int)(bf - buf); +} + +// cleanup +#undef STBSP__LEFTJUST +#undef STBSP__LEADINGPLUS +#undef STBSP__LEADINGSPACE +#undef STBSP__LEADING_0X +#undef STBSP__LEADINGZERO +#undef STBSP__INTMAX +#undef STBSP__TRIPLET_COMMA +#undef STBSP__NEGATIVE +#undef STBSP__METRIC_SUFFIX +#undef STBSP__NUMSZ +#undef stbsp__chk_cb_bufL +#undef stbsp__chk_cb_buf +#undef stbsp__flush_cb +#undef stbsp__cb_buf_clamp + +// ============================================================================ +// wrapper functions + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); + va_end(va); + return result; +} + +typedef struct stbsp__context { + char *buf; + int count; + int length; + char tmp[STB_SPRINTF_MIN]; +} stbsp__context; + +static char *stbsp__clamp_callback(const char *buf, void *user, int len) +{ + stbsp__context *c = (stbsp__context *)user; + c->length += len; + + if(len > c->count) + len = c->count; + + if(len) { + if(buf != c->buf) { + const char *s, *se; + char *d; + d = c->buf; + s = buf; + se = buf + len; + do { + *d++ = *s++; + } while(s < se); + } + c->buf += len; + c->count -= len; + } + + if(c->count <= 0) + return c->tmp; + return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can +} + +static char * stbsp__count_clamp_callback( const char * buf, void * user, int len ) +{ + stbsp__context * c = (stbsp__context*)user; + (void) sizeof(buf); + + c->length += len; + return c->tmp; // go direct into buffer if you can +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va ) +{ + stbsp__context c; + + if( (count == 0) && !buf ) + { + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va ); + } + else + { + int l; + + c.buf = buf; + c.count = count; + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va ); + + // zero-terminate + l = (int)( c.buf - buf ); + if( l >= count ) // should never be greater, only equal (or less) than count + l = count - 1; + buf[l] = 0; + } + + return c.length; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + + result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va); + va_end(va); + + return result; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va) +{ + return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); +} + +// ======================================================================= +// low level float utility functions + +#ifndef STB_SPRINTF_NOFLOAT + +// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox) +#define STBSP__COPYFP(dest, src) \ + { \ + int cn; \ + for(cn = 0; cn < 8; cn++) \ + ((char *)&dest)[cn] = ((char *)&src)[cn]; \ + } + +// get float info +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value) +{ + double d; + stbsp__int64 b = 0; + + // load value and round at the frac_digits + d = value; + + STBSP__COPYFP(b, d); + + *bits = b & ((((stbsp__uint64)1) << 52) - 1); + *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023); + + return (stbsp__int32)((stbsp__uint64) b >> 63); +} + +static double const stbsp__bot[23] = { + 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, + 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 +}; +static double const stbsp__negbot[22] = { + 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, + 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 +}; +static double const stbsp__negboterr[22] = { + -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, + 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, + -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, + 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 +}; +static double const stbsp__top[13] = { + 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 +}; +static double const stbsp__negtop[13] = { + 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 +}; +static double const stbsp__toperr[13] = { + 8388608, + 6.8601809640529717e+028, + -7.253143638152921e+052, + -4.3377296974619174e+075, + -1.5559416129466825e+098, + -3.2841562489204913e+121, + -3.7745893248228135e+144, + -1.7356668416969134e+167, + -3.8893577551088374e+190, + -9.9566444326005119e+213, + 6.3641293062232429e+236, + -5.2069140800249813e+259, + -5.2504760255204387e+282 +}; +static double const stbsp__negtoperr[13] = { + 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, + -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, + 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, + 8.0970921678014997e-317 +}; + +#if defined(_MSC_VER) && (_MSC_VER <= 1200) +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000, + 10000000000000000000U +}; +#define stbsp__tento19th ((stbsp__uint64)1000000000000000000) +#else +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL +}; +#define stbsp__tento19th (1000000000000000000ULL) +#endif + +#define stbsp__ddmulthi(oh, ol, xh, yh) \ + { \ + double ahi = 0, alo, bhi = 0, blo; \ + stbsp__int64 bt; \ + oh = xh * yh; \ + STBSP__COPYFP(bt, xh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(ahi, bt); \ + alo = xh - ahi; \ + STBSP__COPYFP(bt, yh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(bhi, bt); \ + blo = yh - bhi; \ + ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \ + } + +#define stbsp__ddtoS64(ob, xh, xl) \ + { \ + double ahi = 0, alo, vh, t; \ + ob = (stbsp__int64)xh; \ + vh = (double)ob; \ + ahi = (xh - vh); \ + t = (ahi - xh); \ + alo = (xh - (ahi - t)) - (vh + t); \ + ob += (stbsp__int64)(ahi + alo + xl); \ + } + +#define stbsp__ddrenorm(oh, ol) \ + { \ + double s; \ + s = oh + ol; \ + ol = ol - (s - oh); \ + oh = s; \ + } + +#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh); + +#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl); + +static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350 +{ + double ph, pl; + if((power >= 0) && (power <= 22)) { + stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]); + } else { + stbsp__int32 e, et, eb; + double p2h, p2l; + + e = power; + if(power < 0) + e = -e; + et = (e * 0x2c9) >> 14; /* %23 */ + if(et > 13) + et = 13; + eb = e - (et * 23); + + ph = d; + pl = 0.0; + if(power < 0) { + if(eb) { + --eb; + stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]); + stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]); + } + if(et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]); + ph = p2h; + pl = p2l; + } + } else { + if(eb) { + e = eb; + if(eb > 22) + eb = 22; + e -= eb; + stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]); + if(e) { + stbsp__ddrenorm(ph, pl); + stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]); + stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl); + ph = p2h; + pl = p2l; + } + } + if(et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]); + ph = p2h; + pl = p2l; + } + } + } + stbsp__ddrenorm(ph, pl); + *ohi = ph; + *olo = pl; +} + +// given a float value, returns the significant bits in bits, and the position of the +// decimal point in decimal_pos. +/-INF and NAN are specified by special values +// returned in the decimal_pos parameter. +// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000 +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits) +{ + double d; + stbsp__int64 bits = 0; + stbsp__int32 expo, e, ng, tens; + + d = value; + STBSP__COPYFP(bits, d); + expo = (stbsp__int32)((bits >> 52) & 2047); + ng = (stbsp__int32)((stbsp__uint64) bits >> 63); + if(ng) + d = -d; + + if(expo == 2047) // is nan or inf? + { + *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf"; + *decimal_pos = STBSP__SPECIAL; + *len = 3; + return ng; + } + + if(expo == 0) // is zero or denormal + { + if(((stbsp__uint64) bits << 1) == 0) // do zero + { + *decimal_pos = 1; + *start = out; + out[0] = '0'; + *len = 1; + return ng; + } + // find the right expo for denormals + { + stbsp__int64 v = ((stbsp__uint64)1) << 51; + while((bits & v) == 0) { + --expo; + v >>= 1; + } + } + } + + // find the decimal exponent as well as the decimal bits of the value + { + double ph, pl; + + // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046 + tens = expo - 1023; + tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1); + + // move the significant bits into position and stick them into an int + stbsp__raise_to_power10(&ph, &pl, d, 18 - tens); + + // get full as much precision from double-double as possible + stbsp__ddtoS64(bits, ph, pl); + + // check if we undershot + if(((stbsp__uint64)bits) >= stbsp__tento19th) + ++tens; + } + + // now do the rounding in integer land + frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits); + if((frac_digits < 24)) { + stbsp__uint32 dg = 1; + if((stbsp__uint64)bits >= stbsp__powten[9]) + dg = 10; + while((stbsp__uint64)bits >= stbsp__powten[dg]) { + ++dg; + if(dg == 20) + goto noround; + } + if(frac_digits < dg) { + stbsp__uint64 r; + // add 0.5 at the right position and round + e = dg - frac_digits; + if((stbsp__uint32)e >= 24) + goto noround; + r = stbsp__powten[e]; + bits = bits + (r / 2); + if((stbsp__uint64)bits >= stbsp__powten[dg]) + ++tens; + bits /= r; + } + noround:; + } + + // kill long trailing runs of zeros + if(bits) { + stbsp__uint32 n; + for(;;) { + if(bits <= 0xffffffff) + break; + if(bits % 1000) + goto donez; + bits /= 1000; + } + n = (stbsp__uint32)bits; + while((n % 1000) == 0) + n /= 1000; + bits = n; + donez:; + } + + // convert to string + out += 64; + e = 0; + for(;;) { + stbsp__uint32 n; + char *o = out - 8; + // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned) + if(bits >= 100000000) { + n = (stbsp__uint32)(bits % 100000000); + bits /= 100000000; + } else { + n = (stbsp__uint32)bits; + bits = 0; + } + while(n) { + out -= 2; + *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + e += 2; + } + if(bits == 0) { + if((e) && (out[0] == '0')) { + ++out; + --e; + } + break; + } + while(out != o) { + *--out = '0'; + ++e; + } + } + + *decimal_pos = tens; + *start = out; + *len = e; + return ng; +} + +#undef stbsp__ddmulthi +#undef stbsp__ddrenorm +#undef stbsp__ddmultlo +#undef stbsp__ddmultlos +#undef STBSP__SPECIAL +#undef STBSP__COPYFP + +#endif // STB_SPRINTF_NOFLOAT + +// clean up +#undef stbsp__uint16 +#undef stbsp__uint32 +#undef stbsp__int32 +#undef stbsp__uint64 +#undef stbsp__int64 +#undef STBSP__UNALIGNED + +#endif // STB_SPRINTF_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/base/ugg.h b/base/ugg.h new file mode 100644 index 0000000..ac51678 --- /dev/null +++ b/base/ugg.h @@ -0,0 +1,10 @@ +#pragma once + +// Uncomplicated Generic Graphics +struct ugg { + uint32_t width; + uint32_t height; + uint32_t palette[256]; + uint8_t data[]; +}; + diff --git a/base/vertex_shader.glsl b/base/vertex_shader.glsl new file mode 100644 index 0000000..d8f1848 --- /dev/null +++ b/base/vertex_shader.glsl @@ -0,0 +1,9 @@ +in vec2 position; +in vec2 texture_coord; + +out vec2 frag_texture_coord; + +void main() { + frag_texture_coord = texture_coord; + gl_Position = vec4(position, 0.0, 1.0); +} -- cgit v1.2.3