From 6274071e3857c1640cc5aef804cb86509ab312f9 Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Thu, 3 Apr 2025 20:02:00 +0200 Subject: Move to glfw --- audio.c | 462 +++++++++++++++++++++++++++++++++++++++++++++++++++ base/base.c | 4 +- base/shader.h | 4 +- build.sh | 13 +- callbacks.c | 78 +++++++++ cpu.c | 2 +- fragment_shader.glsl | 149 +++++++++++++++++ mapper.c | 28 ++-- mapper.h | 2 +- mapper_001.c | 23 +++ mapper_066.c | 27 +++ mapper_066.h | 5 + mapper_gxrom.c | 27 --- mapper_gxrom.h | 5 - mapper_nrom.c | 23 --- memory.c | 10 +- mknes.c | 290 ++++++++++++++++++-------------- mknes.h | 42 ++--- opengl.c | 129 ++++++++++++++ opengl_loader.c | 210 +++++++++++++++++++++++ ppu.c | 70 +++++--- render.c | 47 ++++++ shader.c | 44 +++++ shader.h | 34 ++++ timer.c | 149 +++++++++++++++++ vertex_shader.glsl | 9 + 26 files changed, 1623 insertions(+), 263 deletions(-) create mode 100644 audio.c create mode 100644 callbacks.c create mode 100644 fragment_shader.glsl create mode 100644 mapper_001.c create mode 100644 mapper_066.c create mode 100644 mapper_066.h delete mode 100644 mapper_gxrom.c delete mode 100644 mapper_gxrom.h delete mode 100644 mapper_nrom.c create mode 100644 opengl.c create mode 100644 opengl_loader.c create mode 100644 render.c create mode 100644 shader.c create mode 100644 shader.h create mode 100644 timer.c create mode 100644 vertex_shader.glsl diff --git a/audio.c b/audio.c new file mode 100644 index 0000000..4311d2f --- /dev/null +++ b/audio.c @@ -0,0 +1,462 @@ +#include +#include +#include +#include + +#define SAMPLE_RATE 48000 +#define NUM_CHANNELS 2 +#define FRAME_SIZE (NUM_CHANNELS * sizeof(short)) + +// static inline float smoothstep(float edge0, float edge1, float x) { +// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1] +// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1] +// return x * x * (3.0f - 2.0f * x); // Smooth interpolation +// } + +// static inline float smootherstep(float edge0, float edge1, float x) { +// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1] +// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1] +// return x * x * x * (x * (x * 6 - 15) + 10); // Modified curve +// } + +static inline float fast_cos(float x) { + float x2 = x * x; + return 1.0f - x2 * (0.5f - x2 * 0.04166667f); // Approximation of cos(x) +} + +static inline float cosine_smooth(float edge0, float edge1, float x) { + x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1] + x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1] + return 0.5f * (1.0f - fast_cos(x * M_PI)); // Cosine smoothing +} + +static float filter_phase = 0.0f; +static float prev_output_sample_L = 0.0f; +static float prev_output_sample_R = 0.0f; + +static void audio_callback_thread(int16_t *audio_buffer, size_t frames) { + int filter_override = state.filter_override; // Manual override: -1 = auto, 0 = off, 1 = on + float filter_frequency = state.filter_frequency; // Frequency in Hz for squarewave toggle + + audio_callback(audio_buffer, frames); + + if(filter_override) { + float a = 1.0f * M_PI * 4000.0f / (SAMPLE_RATE + 1.0f * M_PI * 4000.0f); + float phase_increment = filter_frequency / SAMPLE_RATE; + + for(size_t i = 0; i < frames * 2; i += 2) { + float led_filter_active; + + if(filter_override == -1) { + filter_phase += phase_increment; + if(filter_phase >= 1.0f) filter_phase -= 1.0f; + + led_filter_active = cosine_smooth(0.45f, 0.50f, filter_phase) - cosine_smooth(0.95f, 1.00f, filter_phase); + + } else { + led_filter_active = 1.0f; // Manual override (1 = on) + } + + float input_sample_L = (float)audio_buffer[i] / 32767.0f; + float input_sample_R = (float)audio_buffer[i + 1] / 32767.0f; + + float filtered_sample_L = a * input_sample_L + (1.0f - a) * prev_output_sample_L; + float filtered_sample_R = a * input_sample_R + (1.0f - a) * prev_output_sample_R; + + prev_output_sample_L = filtered_sample_L; + prev_output_sample_R = filtered_sample_R; + + audio_buffer[i] = (int16_t)((1.0f - led_filter_active) * input_sample_L * 32767.0f + led_filter_active * filtered_sample_L * 32767.0f); + audio_buffer[i + 1] = (int16_t)((1.0f - led_filter_active) * input_sample_R * 32767.0f + led_filter_active * filtered_sample_R * 32767.0f); + } + } +} + +#ifdef __linux__ + + +#include +#include +#include + +#define BUFFER_SIZE (512 * FRAME_SIZE) + +static struct pw_thread_loop *pa_thread_loop; +static struct pw_context *pa_context; +static struct pw_core *pa_core; +static struct pw_stream *pa_stream; +static struct spa_hook pa_stream_listener; +static uint64_t audio_clock_frequency; +static uint64_t playback_cursor; + +/* + * Called from PipeWire's real-time thread whenever new audio data is needed. + * We dequeue a buffer, call your audio_callback() to fill it, and then re-queue. + */ +static void on_process(void *userdata) { + struct pw_buffer *buffer; + struct spa_buffer *spa_buf; + int16_t *data; + uint32_t size; + uint32_t frames; + struct pw_time time_info; + + buffer = pw_stream_dequeue_buffer(pa_stream); + if(!buffer) { + /* No buffer available, skip. */ + return; + } + + spa_buf = buffer->buffer; + if(!spa_buf->datas || !spa_buf->datas[0].data) { + pw_stream_queue_buffer(pa_stream, buffer); + return; + } + + data = (int16_t*)spa_buf->datas[0].data; + size = spa_buf->datas[0].maxsize; + frames = size / FRAME_SIZE; + + // if(pw_stream_get_time_n(pa_stream, &time_info, sizeof(time_info)) == 0) { + // playback_cursor = time_info.now; + // } + // printf("Cursor(ns): %luns\n", playback_cursor); + + audio_callback_thread(data, frames); + + if(spa_buf->datas[0].chunk) { + spa_buf->datas[0].chunk->size = frames * FRAME_SIZE; + spa_buf->datas[0].chunk->stride = FRAME_SIZE; + } + + pw_stream_queue_buffer(pa_stream, buffer); +} + +/* + * Initialize PipeWire, create the stream, and connect for audio playback. + * Returns immediately so your main thread can continue. + */ +int audio_initialize(void) { + pw_init(0, 0); + + pa_thread_loop = pw_thread_loop_new("my-audio-loop", 0); + if(pa_thread_loop) { + if(pw_thread_loop_start(pa_thread_loop) == 0) { + pw_thread_loop_lock(pa_thread_loop); + + pa_context = pw_context_new(pw_thread_loop_get_loop(pa_thread_loop), 0, 0); + if(pa_context) { + pa_core = pw_context_connect(pa_context, 0, 0); + if(pa_core){ + static const struct spa_dict_item items[] = { + SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_TYPE, "Audio"), + SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_CATEGORY, "Playback"), + SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_ROLE, "Game"), + SPA_DICT_ITEM_INIT(PW_KEY_NODE_LATENCY, "512/48000") + }; + struct spa_dict dict = SPA_DICT_INIT(items, 4); + struct pw_properties *props = pw_properties_new_dict(&dict); + // pw_properties_free(props); + + pa_stream = pw_stream_new(pa_core, "My Audio Stream", props); + if(pa_stream) { + static struct pw_stream_events stream_events = {}; + stream_events.version = PW_VERSION_STREAM_EVENTS; + stream_events.process = on_process; + + pw_stream_add_listener(pa_stream, &pa_stream_listener, &stream_events, 0); + + /* + * Build two SPA params: + * 1) The audio format: S16_LE, SAMPLE_RATE, NUM_CHANNELS + * 2) The buffer param: request BUFFER_SIZE bytes per buffer + */ + uint8_t fmt_buffer[1024]; + struct spa_pod_builder fmt_builder = SPA_POD_BUILDER_INIT(fmt_buffer, sizeof(fmt_buffer)); + const struct spa_pod *fmt_param = (const struct spa_pod*)spa_pod_builder_add_object( + &fmt_builder, + SPA_TYPE_OBJECT_Format, SPA_PARAM_EnumFormat, + SPA_FORMAT_mediaType, SPA_POD_Id(SPA_MEDIA_TYPE_audio), + SPA_FORMAT_mediaSubtype, SPA_POD_Id(SPA_MEDIA_SUBTYPE_raw), + SPA_FORMAT_AUDIO_format, SPA_POD_Id(SPA_AUDIO_FORMAT_S16_LE), + SPA_FORMAT_AUDIO_rate, SPA_POD_Int(SAMPLE_RATE), + SPA_FORMAT_AUDIO_channels, SPA_POD_Int(NUM_CHANNELS) + ); + + uint8_t buf_buffer[1024]; + struct spa_pod_builder buf_builder = SPA_POD_BUILDER_INIT(buf_buffer, sizeof(buf_buffer)); + struct spa_pod *buf_param = (struct spa_pod*)spa_pod_builder_add_object( + &buf_builder, + SPA_TYPE_OBJECT_ParamBuffers, SPA_PARAM_Buffers, + SPA_PARAM_BUFFERS_buffers, SPA_POD_CHOICE_RANGE_Int(8, 2, 16), /* We'll request 8 buffers, each of size = BUFFER_SIZE bytes. */ + SPA_PARAM_BUFFERS_blocks, SPA_POD_Int(1), + SPA_PARAM_BUFFERS_size, SPA_POD_CHOICE_RANGE_Int(BUFFER_SIZE, BUFFER_SIZE, BUFFER_SIZE*8), + SPA_PARAM_BUFFERS_stride, SPA_POD_Int(FRAME_SIZE), + SPA_PARAM_BUFFERS_align, SPA_POD_Int(16) + ); + + const struct spa_pod *params[2]; + params[0] = fmt_param; + params[1] = buf_param; + + int res = pw_stream_connect(pa_stream, PW_DIRECTION_OUTPUT, PW_ID_ANY, (pw_stream_flags)(PW_STREAM_FLAG_AUTOCONNECT | PW_STREAM_FLAG_RT_PROCESS | PW_STREAM_FLAG_MAP_BUFFERS), params, 2); + pw_thread_loop_unlock(pa_thread_loop); + return 0; + + } else { + fprintf(stderr, "Failed to create PipeWire stream\n"); + } + pw_core_disconnect(pa_core); + } else { + fprintf(stderr, "Failed to connect context to core\n"); + } + pw_context_destroy(pa_context); + } else { + fprintf(stderr, "Failed to create PipeWire context\n"); + } + pw_thread_loop_unlock(pa_thread_loop); + pw_thread_loop_stop(pa_thread_loop); + } else { + fprintf(stderr, "Failed to start PipeWire thread loop\n"); + } + pw_thread_loop_destroy(pa_thread_loop); + } else { + fprintf(stderr, "Failed to create PipeWire thread loop\n"); + } + pw_deinit(); + return -1; +} + +/* + * Clean up PipeWire objects, stop the thread loop, and deinit. + * This should be called before your program exits. + */ +void audio_shutdown(void) { + if(!pa_thread_loop) { + return; + } + + pw_thread_loop_lock(pa_thread_loop); + + if(pa_stream){ + pw_stream_disconnect(pa_stream); + pw_stream_destroy(pa_stream); + } + + if(pa_core){ + pw_core_disconnect(pa_core); + } + + if(pa_context){ + pw_context_destroy(pa_context); + } + + pw_thread_loop_unlock(pa_thread_loop); + pw_thread_loop_stop(pa_thread_loop); + pw_thread_loop_destroy(pa_thread_loop); + pw_deinit(); +} + + + + + + + + + + + + + + + +#elif _WIN32 + +#define COBJMACROS +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Minimal WASAPI shared-mode audio playback with explicit 48kHz/16-bit/2ch. + */ + +#define NUM_CHANNELS 2 + +static IMMDeviceEnumerator *enumerator; +static IMMDevice *device_out; +static IAudioClient *audio_client_out; +static IAudioRenderClient *render_client; +static HANDLE audio_event; +static HANDLE audio_thread; +static int running; + +static DWORD WINAPI audio_thread_proc(void *arg) { + UINT32 buffer_size; + UINT32 padding; + UINT32 available; + uint8_t *data; + + IAudioClient_GetBufferSize(audio_client_out, &buffer_size); + + while(running) { + WaitForSingleObject(audio_event, INFINITE); + if(!running) { + break; + } + + IAudioClient_GetCurrentPadding(audio_client_out, &padding); + available = buffer_size - padding; + IAudioRenderClient_GetBuffer(render_client, available, &data); + audio_callback_thread((int16_t*)data, available); + IAudioRenderClient_ReleaseBuffer(render_client, available, 0); + } + return 0; +} + +void audio_initialize() { + WAVEFORMATEX wf; + REFERENCE_TIME dur_out; + + CoInitializeEx(0, COINIT_MULTITHREADED); + if(SUCCEEDED(CoCreateInstance(&CLSID_MMDeviceEnumerator, 0, CLSCTX_ALL, &IID_IMMDeviceEnumerator, (void**)&enumerator))) { + if(SUCCEEDED(IMMDeviceEnumerator_GetDefaultAudioEndpoint(enumerator, eRender, eConsole, &device_out))) { + if(SUCCEEDED(IMMDevice_Activate(device_out, &IID_IAudioClient, CLSCTX_ALL, 0, (void**)&audio_client_out))) { + wf.wFormatTag = WAVE_FORMAT_PCM; + wf.nChannels = NUM_CHANNELS; + wf.nSamplesPerSec = 48000; + wf.wBitsPerSample = 16; + wf.nBlockAlign = (wf.nChannels * wf.wBitsPerSample) / 8; + wf.nAvgBytesPerSec = wf.nSamplesPerSec * wf.nBlockAlign; + wf.cbSize = 0; + + IAudioClient_GetDevicePeriod(audio_client_out, &dur_out, 0); + IAudioClient_Initialize(audio_client_out, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, dur_out, 0, &wf, 0); + audio_event = CreateEvent(0, FALSE, FALSE, 0); + if(audio_event){ + IAudioClient_SetEventHandle(audio_client_out, audio_event); + IAudioClient_GetService(audio_client_out, &IID_IAudioRenderClient, (void**)&render_client); + IAudioClient_Start(audio_client_out); + + running = 1; + audio_thread = CreateThread(0, 0, audio_thread_proc, 0, 0, 0); + return; + } else { + printf("Failed to create audio event\n"); + } + audio_client_out->lpVtbl->Release(audio_client_out); + } else { + printf("Failed to activate audio client\n"); + } + device_out->lpVtbl->Release(device_out); + } else { + printf("Failed to get default audio endpoint\n"); + } + enumerator->lpVtbl->Release(enumerator); + } else { + printf("Failed to create MMDeviceEnumerator\n"); + } +} + +void audio_shutdown() { + running = 0; + if(audio_thread) { + SetEvent(audio_event); + WaitForSingleObject(audio_thread, INFINITE); + CloseHandle(audio_thread); + } + if(audio_event) { + CloseHandle(audio_event); + } + if(audio_client_out) { + IAudioClient_Stop(audio_client_out); + audio_client_out->lpVtbl->Release(audio_client_out); + } + if(render_client) { + render_client->lpVtbl->Release(render_client); + } + if(device_out) { + device_out->lpVtbl->Release(device_out); + } + if(enumerator) { + enumerator->lpVtbl->Release(enumerator); + } + CoUninitialize(); +} + +#endif + + + + + +// BELOW IS FOR FUTURE FRAME SYNCHRONIZATION!!! + +#if 0 +// Audio sync throttling logic (using audio playback clock) + +#define AUDIO_SAMPLE_RATE 48000 +#define FRAMETIME (1000000000 / 60) // NES: ~16.67ms per frame (replace as needed for PAL/other) + +static uint64_t emulator_start_time_ns = 0; +static uint64_t audio_start_time_ns = 0; + +// Stub: return current audio playback time in nanoseconds +uint64_t get_audio_playback_time_ns(void); + +// Call this once at emulation start +void audio_sync_init(uint64_t current_time_ns) { + emulator_start_time_ns = current_time_ns; + audio_start_time_ns = get_audio_playback_time_ns(); +} + +// Call this at the end of each frame +void audio_throttle_emulator(uint64_t frame_number, int64_t *frame_duration_ns) { + uint64_t expected_emulated_time = frame_number * FRAMETIME; + uint64_t actual_audio_time = get_audio_playback_time_ns() - audio_start_time_ns; + + int64_t drift = (int64_t)(actual_audio_time) - (int64_t)(expected_emulated_time); + + // Adjust frame duration to correct drift gradually + *frame_duration_ns -= drift / 8; + // Clamp adjustment to avoid jitter + if(*frame_duration_ns > FRAMETIME + 50000) { + *frame_duration_ns = FRAMETIME + 50000; + } else if(*frame_duration_ns < FRAMETIME - 50000) { + *frame_duration_ns = FRAMETIME - 50000; + } +} + +#ifdef _WIN32 +#include +#include +#include + +uint64_t get_audio_playback_time_ns(void) { + // WASAPI: query IAudioClock interface + // This is just a placeholder. You’ll need to cache IAudioClock *audio_clock externally. + extern IAudioClock *audio_clock; + UINT64 pos; + audio_clock->lpVtbl->GetPosition(audio_clock, &pos, 0); + return (pos * 1000000000ULL) / AUDIO_SAMPLE_RATE; +} + +#else +// PipeWire backend +#include +extern struct spa_clock *audio_clock; + +uint64_t get_audio_playback_time_ns(void) { + struct spa_clock_info info; + audio_clock->get_time(audio_clock, &info); + return info.nsec; +} +#endif + +#endif diff --git a/base/base.c b/base/base.c index 00b73ae..c17ab47 100644 --- a/base/base.c +++ b/base/base.c @@ -60,7 +60,7 @@ int main(int argc, char **argv) { state.toggle_crt_emulation = true; mkfw_init(320*5, 240*5); mkfw_set_swapinterval(0); - mkfw_set_window_min_size_and_aspect(320*5, 240*5, 320, 240); + mkfw_set_window_min_size_and_aspect(320*3, 240*3, 320, 240); mkfw_set_key_callback(key_callback); mkfw_set_mouse_move_delta_callback(mouse_move_callback); mkfw_set_mouse_button_callback(mouse_button_callback); @@ -74,7 +74,7 @@ int main(int argc, char **argv) { init_callback(); audio_initialize(); - set_decay(20); + set_decay(30); bool running = true; uint64_t next_update = mkfw_gettime() + FRAMETIME; diff --git a/base/shader.h b/base/shader.h index a9cb400..df838df 100644 --- a/base/shader.h +++ b/base/shader.h @@ -8,10 +8,10 @@ #define CRTS_WARP 1 //-------------------------------------------------------------- // Try different masks -// #define CRTS_MASK_GRILLE 1 +#define CRTS_MASK_GRILLE 1 // #define CRTS_MASK_GRILLE_LITE 1 // #define CRTS_MASK_NONE 1 -#define CRTS_MASK_SHADOW 1 +// #define CRTS_MASK_SHADOW 1 // -------------------------------------------------------------- // Scanline thinness // 0.50 = fused scanlines diff --git a/build.sh b/build.sh index 60de1d1..348700d 100755 --- a/build.sh +++ b/build.sh @@ -4,13 +4,14 @@ PROJECT_NAME="mknes" # Change this for each new project # Base configuration common to all builds -CFLAGS="-std=gnu11 " +CFLAGS="-std=gnu++23 " CFLAGS+="-mavx2 -mbmi2 -mtune=native -mfunction-return=keep -mindirect-branch=keep " CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fwhole-program " CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fno-strict-aliasing -ffunction-sections -fdata-sections " +CFLAGS+="-fno-exceptions -fno-rtti -fno-use-cxa-atexit -fno-non-call-exceptions " CFLAGS+="-Wall -Wextra " CFLAGS+="-Wno-unused-parameter -Wno-sign-compare -Wno-trigraphs -Wno-maybe-uninitialized " -CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function " +CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function -Wno-write-strings " LDFLAGS="-Wl,--gc-sections " @@ -19,7 +20,7 @@ INCLUDE_PATHS="-Ibase -I.." # Linux-specific includes and libraries LINUX_INCLUDE="-I/usr/include/pipewire-0.3 -I/usr/include/spa-0.2" -LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread" +LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread -lglfw" # Windows-specific includes and libraries # WINDOWS_INCLUDE="" @@ -56,8 +57,8 @@ esac #env -C org_assets ../../bin/mks_time ./process.sh # Make sure the shaders are up to date if you are experimenting with them. -#env -C ../base ../bin/shader2h 140 vertex_shader vertex_shader.glsl -#env -C ../base ../bin/shader2h 140 fragment_shader shader.h fragment_shader.glsl +shader2h 330 vertex_shader vertex_shader.glsl +shader2h 330 fragment_shader shader.h fragment_shader.glsl # Make sure the compilation stops if any error happens. set -e @@ -65,7 +66,7 @@ set -e # Build Linux version ( # ../bin/ctime -begin .${PROJECT_NAME}_linux - gcc $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LINUX_INCLUDE $LDFLAGS $LINUX_LIBS + g++ $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LINUX_INCLUDE $LDFLAGS $LINUX_LIBS # ../bin/ctime -end .${PROJECT_NAME}_linux $? ) & diff --git a/callbacks.c b/callbacks.c new file mode 100644 index 0000000..806fe86 --- /dev/null +++ b/callbacks.c @@ -0,0 +1,78 @@ + + +static void framebuffer_callback(GLFWwindow *window, int width, int height) { + state.screen_width = width; + state.screen_height = height; + state.viewport.x = 0; + state.viewport.y = 0; + state.viewport.w = width; + state.viewport.h = height; + + float current_aspect = (float)width / (float)height; + float aspect_ratio = 4.f/3.f; + + if(current_aspect > aspect_ratio) { + float new_width = height * aspect_ratio; + state.viewport.x = (width - new_width) / 2; + state.viewport.w = new_width; + } else if(current_aspect < aspect_ratio) { + float new_height = width / aspect_ratio; + state.viewport.y = (height - new_height) / 2; + state.viewport.h = new_height; + } +} + +static void toggle_fullscreen(bool enable) { + static int windowed_x; + static int windowed_y; + static int windowed_width; + static int windowed_height; + + if(enable) { // Save current windowed size and position + glfwGetWindowPos(window, &windowed_x, &windowed_y); + glfwGetWindowSize(window, &windowed_width, &windowed_height); + + GLFWmonitor *monitor = glfwGetPrimaryMonitor(); + const GLFWvidmode *mode = glfwGetVideoMode(monitor); + + glfwSetWindowMonitor(window, monitor, 0, 0, mode->width, mode->height, mode->refreshRate); + + } else { // Restore to saved windowed position/size + glfwSetWindowMonitor(window, 0, windowed_x, windowed_y, windowed_width, windowed_height, 0); // 0 = don't change refresh rate + } +} + +static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) { + + if(key == GLFW_KEY_ESCAPE) { + if(action == GLFW_PRESS) { + glfwSetWindowShouldClose(window, 1); + } + } + + if(action == GLFW_RELEASE) { + switch(key) { + case GLFW_KEY_F12: { + state.toggle_crt_emulation = !state.toggle_crt_emulation; + } break; + + case GLFW_KEY_F11: { + if(!(mods & GLFW_MOD_SHIFT)) { + if(state.fullscreen) { + toggle_fullscreen(false); + state.fullscreen = false; + } else { + toggle_fullscreen(true); + state.fullscreen = true; + } + } else { +#ifdef PROFILER + state.overlay = !state.overlay; +#endif + } + } break; + + default: break; + } + } +} diff --git a/cpu.c b/cpu.c index b773e57..eb7c837 100644 --- a/cpu.c +++ b/cpu.c @@ -95,7 +95,7 @@ static void cpu_tick(struct nes_state *state) { // if(cpu->pc <= 0x90cc || cpu->pc >= 0x90e6) { // printf("%5.5d %4.4x: ", line++, cpu->pc); // opcode = memory_read(state, cpu->pc++); - // printf("%2.2x a:%2.2x x:%2.2x y:%2.2x p:%2.2x sp:%2.2x cycle: %ld\n", opcode, cpu->a, cpu->x, cpu->y, pack_flags(cpu), cpu->sp, state->cycle); + // printf("%2.2x a:%2.2x x:%2.2x y:%2.2x p:%2.2x sp:%2.2x cycle: %ld\n", opcode, cpu->a, cpu->x, cpu->y, pack_flags(cpu), cpu->sp, state->cycles); // } else { opcode = memory_read(state, cpu->pc++); // } diff --git a/fragment_shader.glsl b/fragment_shader.glsl new file mode 100644 index 0000000..c21b2de --- /dev/null +++ b/fragment_shader.glsl @@ -0,0 +1,149 @@ +// Specify default precision for fragment shaders + +out vec4 outcolor; +in vec2 frag_texture_coord; + +uniform vec2 resolution; +uniform vec2 src_image_size; +uniform float brightness; +uniform vec4 tone_data; +uniform bool crt_emulation; +uniform sampler2D iChannel0; + +vec3 CrtsFetch(vec2 uv) { + const float bias = 0.003333333; + return max(texture(iChannel0, uv, -16.0).rgb, vec3(bias)); +} + +#define CrtsRcpF1(x) (1.0 / (x)) +#define CrtsSatF1(x) clamp((x), 0.0, 1.0) +const float PI2 = 6.28318530717958; +const float HALF = 0.5; + +float CrtsMax3F1(float a, float b, float c) { + return max(a, max(b, c)); +} + +vec3 CrtsMask(vec2 pos, float dark) { + #ifdef CRTS_MASK_GRILLE + vec3 m = vec3(dark); + float x = fract(pos.x * (1.0 / 3.0)); + m.r = (x < (1.0 / 3.0)) ? 1.0 : dark; + m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark; + m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark; + return m; + #endif + + #ifdef CRTS_MASK_GRILLE_LITE + vec3 m = vec3(1.0); + float x = fract(pos.x * (1.0 / 3.0)); + m.r = (x < (1.0 / 3.0)) ? dark : 1.0; + m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? dark : 1.0; + m.b = (x >= (2.0 / 3.0)) ? dark : 1.0; + return m; + #endif + + #ifdef CRTS_MASK_NONE + return vec3(1.0); + #endif + + #ifdef CRTS_MASK_SHADOW + pos.x += pos.y * 3.0; + vec3 m = vec3(dark); + float x = fract(pos.x * (1.0 / 6.0)); + m.r = (x < (1.0 / 3.0)) ? 1.0 : dark; + m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark; + m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark; + return m; + #endif +} + +vec3 CrtsFilter(vec2 ipos, vec2 inputSizeDivOutputSize, vec2 halfInputSize, vec2 rcpInputSize, vec2 rcpOutputSize, vec2 twoDivOutputSize, float inputHeight, vec2 warp, float thin, float blur, float mask, vec4 tone) { + vec2 pos = ipos * twoDivOutputSize - vec2(1.0); + pos *= vec2(1.0 + (pos.y * pos.y) * warp.x, 1.0 + (pos.x * pos.x) * warp.y); + float vin = 1.0 - ((1.0 - CrtsSatF1(pos.x * pos.x)) * (1.0 - CrtsSatF1(pos.y * pos.y))); + vin = CrtsSatF1((-vin) * inputHeight + inputHeight); + pos = pos * halfInputSize + halfInputSize; + + float y0 = floor(pos.y - 0.5) + 0.5; + float x0 = floor(pos.x - 1.5) + 0.5; + vec2 p = vec2(x0 * rcpInputSize.x, y0 * rcpInputSize.y); + + vec3 colA[4], colB[4]; + for (int i = 0; i < 4; i++) { + colA[i] = CrtsFetch(p); + p.x += rcpInputSize.x; + } + p.y += rcpInputSize.y; + for (int i = 3; i >= 0; i--) { + p.x -= rcpInputSize.x; + colB[i] = CrtsFetch(p); + } + + float off = pos.y - y0; + float scanA = cos(min(HALF, off * thin) * PI2) * HALF + HALF; + float scanB = cos(min(HALF, (-off) * thin + thin) * PI2) * HALF + HALF; + + float off0 = pos.x - x0; + float pix[4]; + for (int i = 0; i < 4; i++) { + float diff = off0 - float(i); + pix[i] = exp2(blur * diff * diff); + } + float pixT = CrtsRcpF1(pix[0] + pix[1] + pix[2] + pix[3]); + + #ifdef CRTS_WARP + pixT *= vin; + #endif + + scanA *= pixT; + scanB *= pixT; + + vec3 color = (colA[0] * pix[0] + colA[1] * pix[1] + colA[2] * pix[2] + colA[3] * pix[3]) * scanA + (colB[0] * pix[0] + colB[1] * pix[1] + colB[2] * pix[2] + colB[3] * pix[3]) * scanB; + color *= CrtsMask(ipos, mask); + + #ifdef CRTS_TONE + float peak = max(1.0 / (256.0 * 65536.0), CrtsMax3F1(color.r, color.g, color.b)); + vec3 ratio = color * CrtsRcpF1(peak); + #ifdef CRTS_CONTRAST + peak = pow(peak, tone.x); + #endif + peak = peak * CrtsRcpF1(peak * tone.y + tone.z); + #ifdef CRTS_SATURATION + ratio = pow(ratio, vec3(tone.w)); + #endif + return ratio * peak; + #else + return color; + #endif +} + +vec3 linearToSRGB(vec3 color) { + return pow(color, vec3(1.0 / 2.2)); +} + +void main() { + vec2 fragCoord = vec2(frag_texture_coord.x, 1.0 - frag_texture_coord.y); + if (crt_emulation) { + outcolor.rgb = CrtsFilter( + fragCoord.xy * resolution, + src_image_size / resolution, + src_image_size * vec2(0.5), + 1.0 / src_image_size, + 1.0 / resolution, + 2.0 / resolution, + src_image_size.y, + vec2(1.0 / 24.0, 1.0 / 16.0), // warp value + INPUT_THIN, + INPUT_BLUR, + INPUT_MASK, + tone_data + ); + + outcolor.rgb *= brightness; + outcolor = vec4(outcolor.rgb, 1.0); // Keep original color with alpha set to 1.0 + + } else { + outcolor = texture(iChannel0, fragCoord); + } +} diff --git a/mapper.c b/mapper.c index 4fb8346..2e27304 100644 --- a/mapper.c +++ b/mapper.c @@ -1,21 +1,21 @@ -#include "mapper_nrom.c" +#include "mapper_001.c" // #include "mapper_mmc1.c" // #include "mapper_uxrom.c" -#include "mapper_gxrom.c" +#include "mapper_066.c" struct mapper_entry { - int id; - uint8_t (*read)(struct nes_state *state, uint32_t addr); - void (*write)(struct nes_state *state, uint32_t addr, uint8_t value); - void (*tick)(struct nes_state *state); - void (*init)(struct nes_state *state); + int id; + uint8_t (*read)(struct nes_state *state, uint32_t addr); + void (*write)(struct nes_state *state, uint32_t addr, uint8_t value); + void (*tick)(struct nes_state *state); + void (*init)(struct nes_state *state); }; static struct mapper_entry mapper_table[] = { - { 0, mapper_nrom_read, mapper_nrom_write, mapper_nrom_tick, mapper_nrom_init }, - { 66, mapper_66_read, mapper_66_write, mapper_66_tick, mapper_66_init }, + { 0, mapper_001_read, mapper_001_write, mapper_001_tick, mapper_001_init }, + { 66, mapper_066_read, mapper_066_write, mapper_066_tick, mapper_066_init }, // { 1, mapper_mmc1_read, ... }, etc }; @@ -23,7 +23,6 @@ static void mapper_setup(struct nes_state *state) { uint32_t mapper = state->ines.mapper; for(uint32_t i = 0; i < sizeof(mapper_table)/sizeof(mapper_table[0]); i++) { if(mapper_table[i].id == mapper) { -printf("%d\n", mapper); state->mapper.read = mapper_table[i].read; state->mapper.write = mapper_table[i].write; state->mapper.tick = mapper_table[i].tick; @@ -33,10 +32,11 @@ printf("%d\n", mapper); } } + // NOTE(peter): Not sure how safe this is, but it sure is funny... printf("Unsupported mapper %d, falling back to NROM\n", mapper); - state->mapper.read = mapper_nrom_read; - state->mapper.write = mapper_nrom_write; - state->mapper.tick = mapper_nrom_tick; - state->mapper.init = mapper_nrom_init; + state->mapper.read = mapper_001_read; + state->mapper.write = mapper_001_write; + state->mapper.tick = mapper_001_tick; + state->mapper.init = mapper_001_init; state->mapper.init(state); } diff --git a/mapper.h b/mapper.h index eb6c158..f38b000 100644 --- a/mapper.h +++ b/mapper.h @@ -1,2 +1,2 @@ -#include "mapper_gxrom.h" +#include "mapper_066.h" diff --git a/mapper_001.c b/mapper_001.c new file mode 100644 index 0000000..8c65367 --- /dev/null +++ b/mapper_001.c @@ -0,0 +1,23 @@ + + +static void mapper_001_init(struct nes_state *state) { + // Nothing to initialize for 001 +} + +static uint8_t mapper_001_read(struct nes_state *state, uint32_t addr) { + uint32_t prg_size = state->ines.prg_size; + + uint32_t mask = (state->ines.prg_size == 16384) ? 0x3fff : 0x7fff; + return state->rom[addr & mask]; +} + +static void mapper_001_write(struct nes_state *state, uint32_t addr, uint8_t value) { + (void)state; + (void)addr; + (void)value; +} + +static void mapper_001_tick(struct nes_state *state) { + (void)state; +} + diff --git a/mapper_066.c b/mapper_066.c new file mode 100644 index 0000000..aabf471 --- /dev/null +++ b/mapper_066.c @@ -0,0 +1,27 @@ + +static void mapper_066_init(struct nes_state *state) { + state->map.m066.prg_offset = 0; + state->map.m066.chr_offset = 0; +} + +static uint8_t mapper_066_read(struct nes_state *state, uint32_t addr) { + if(addr >= 0x8000) { + uint32_t base = state->map.m066.prg_offset; + return state->rom[base + (addr - 0x8000)]; + } + return 0; +} + +static void mapper_066_write(struct nes_state *state, uint32_t addr, uint8_t value) { + if(addr >= 0x8000) { + uint32_t prg_bank = (value >> 4) & 3; + uint32_t chr_bank = (value >> 0) & 3; + + state->map.m066.prg_offset = prg_bank * 0x8000; + state->map.m066.chr_offset = chr_bank * 0x2000; + } +} + +static void mapper_066_tick(struct nes_state *state) { + // No IRQ or timing logic needed +} diff --git a/mapper_066.h b/mapper_066.h new file mode 100644 index 0000000..d78f8b9 --- /dev/null +++ b/mapper_066.h @@ -0,0 +1,5 @@ + +struct mapper_066 { + uint32_t prg_offset; + uint32_t chr_offset; +}; diff --git a/mapper_gxrom.c b/mapper_gxrom.c deleted file mode 100644 index e03f9d2..0000000 --- a/mapper_gxrom.c +++ /dev/null @@ -1,27 +0,0 @@ - -static void mapper_66_init(struct nes_state *state) { - state->map.gxrom.prg_offset = 0; - state->map.gxrom.chr_offset = 0; -} - -static uint8_t mapper_66_read(struct nes_state *state, uint32_t addr) { - if(addr >= 0x8000) { - uint32_t base = state->map.gxrom.prg_offset; - return state->rom[base + (addr - 0x8000)]; - } - return 0; -} - -static void mapper_66_write(struct nes_state *state, uint32_t addr, uint8_t value) { - if(addr >= 0x8000) { - uint32_t prg_bank = (value >> 4) & 3; - uint32_t chr_bank = (value >> 0) & 3; - - state->map.gxrom.prg_offset = prg_bank * 0x8000; - state->map.gxrom.chr_offset = chr_bank * 0x2000; - } -} - -static void mapper_66_tick(struct nes_state *state) { - // No IRQ or timing logic needed -} diff --git a/mapper_gxrom.h b/mapper_gxrom.h deleted file mode 100644 index 757ec16..0000000 --- a/mapper_gxrom.h +++ /dev/null @@ -1,5 +0,0 @@ - -struct gxrom_mapper { - uint32_t prg_offset; - uint32_t chr_offset; -}; diff --git a/mapper_nrom.c b/mapper_nrom.c deleted file mode 100644 index 7abb89c..0000000 --- a/mapper_nrom.c +++ /dev/null @@ -1,23 +0,0 @@ - - -static void mapper_nrom_init(struct nes_state *state) { - // Nothing to initialize for NROM -} - -static uint8_t mapper_nrom_read(struct nes_state *state, uint32_t addr) { - uint32_t prg_size = state->ines.prg_size; - - uint32_t mask = (state->ines.prg_size == 16384) ? 0x3fff : 0x7fff; - return state->rom[addr & mask]; -} - -static void mapper_nrom_write(struct nes_state *state, uint32_t addr, uint8_t value) { - (void)state; - (void)addr; - (void)value; -} - -static void mapper_nrom_tick(struct nes_state *state) { - (void)state; -} - diff --git a/memory.c b/memory.c index baf10e9..35c1d41 100644 --- a/memory.c +++ b/memory.c @@ -2,7 +2,7 @@ static uint8_t memory_read(struct nes_state *restrict state, uint32_t offset) { - state->cycle++; + state->cycles++; ppu_tick(state); ppu_tick(state); ppu_tick(state); if(offset > 0xffff) { @@ -30,17 +30,13 @@ static uint8_t memory_read(struct nes_state *restrict state, uint32_t offset) { } static void memory_write(struct nes_state *restrict state, uint32_t offset, uint8_t value) { - state->cycle++; + state->cycles++; ppu_tick(state); ppu_tick(state); ppu_tick(state); if(offset > 0xffff) { printf("%x\n", offset); } -// if(offset == 0x0300) { -// printf("WRITE $0300 = %02x @ PC=%04x\n", value, state->cpu.pc); -// } - if(offset < 0x2000) { state->ram[offset & 0x07ff] = value; } else if(offset < 0x4000) { @@ -81,7 +77,7 @@ static uint8_t memory_read_dma(struct nes_state *restrict state, uint32_t offset } static uint8_t memory_read_dummy(struct nes_state *restrict state, uint32_t offset) { - state->cycle++; + state->cycles++; ppu_tick(state); ppu_tick(state); ppu_tick(state); if(offset < 0x2000) { diff --git a/mknes.c b/mknes.c index d94c868..54da663 100644 --- a/mknes.c +++ b/mknes.c @@ -1,6 +1,85 @@ -#include +#define GL_SILENCE_DEPRECATION +#define GLFW_INCLUDE_NONE +#include +#include +#include +#include +#include +#include +#include + +#define BUFFER_WIDTH 256 +#define BUFFER_HEIGHT 240 +#define WINDOW_WIDTH 320 * 3 +#define WINDOW_HEIGHT 240 * 3 + +#ifndef restrict +# if defined(__cplusplus) +# define restrict __restrict +# endif +#endif + + +struct main_state { + int32_t filter_override; + float filter_frequency; + + uint32_t screen_width; + uint32_t screen_height; + + uint32_t texture; + uint32_t render_width; + uint32_t render_height; + uint32_t shader_program; + + float contrast; + float saturation; + float brightness; + float tone_data[4]; + + int32_t uniform_resolution; + int32_t uniform_src_image_size; + int32_t uniform_brightness; + int32_t uniform_tone; + int32_t uniform_crt_emulation; + int32_t uniform_sampler_location; + + uint32_t vao; + uint32_t vbo; + uint32_t ebo; + + struct { + int32_t x; + int32_t y; + int32_t w; + int32_t h; + } viewport; + + uint8_t fullscreen; + uint8_t toggle_crt_emulation; +}; + +struct main_state state; + +uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); +uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); + +void audio_callback(int16_t *data, size_t frames) { -#include "base.c" +} + + +#define FRAME_INTERVAL_NS (1000000000ULL / 60.0988) + +static GLFWwindow *window; + +#define DEBUG_PRINT printf +#include "timer.c" +#include "opengl_loader.c" +#include "opengl.c" +#include "render.c" +#include "audio.c" +#include "callbacks.c" #include "mapper.h" @@ -12,65 +91,25 @@ #include "ines2.c" #include "mapper.c" - - struct nes_state nstate; static uint32_t frames; -static void render_callback(void) { - clear_buffer(); - while(!nstate.ppu.frame_ready) { - PROFILE_NAMED("nes emulator"); - cpu_tick(&nstate); - } - nstate.ppu.frame_ready = 0; - frames++; - - uint32_t *dst = RENDER_START(0,0); - uint8_t *src = nstate.ppu.pixels; - for(uint32_t y = 0; y < 240; ++y) { - for(uint32_t x = 0; x < 256; ++x) { - uint8_t val = *src++; - if(val >= 64) val = 0; - dst[x] = nes_palette[val]; - } - dst += BUFFER_WIDTH; - } -} - -static void shutdown_callback() { - printf("%d\n", frames); -} - -static void audio_callback(int16_t *buffer, size_t frames) { - -} - -#include -#include - -void protect_opcode_lut(void) { - uintptr_t addr = (uintptr_t)opcode_lut; - size_t page_size = getpagesize(); - uintptr_t page = addr & ~(page_size - 1); - - if(mprotect((void*)page, page_size, PROT_READ) != 0) { - perror("mprotect"); - abort(); - } -} +int main(int argc, char **argv) { +#ifdef _WIN32 + timeBeginPeriod(1); +#endif -static void init_callback(void) { + state.toggle_crt_emulation = 1; setbuf(stdout, 0); init_opcode_lut(); init_opcode_ud_lut(); // protect_opcode_lut(); ppu_reset(&nstate); // ines2_load(&nstate, "data/nrom/10-Yard Fight (USA, Europe).nes"); - // ines2_load(&nstate, "data/nrom/Balloon Fight (USA).nes"); - ines2_load(&nstate, "data/nrom/Excitebike (Japan, USA).nes"); + ines2_load(&nstate, "data/nrom/Balloon Fight (USA).nes"); + // ines2_load(&nstate, "data/nrom/Excitebike (Japan, USA).nes"); // ines2_load(&nstate, "data/nrom/Ice Climber (USA, Europe, Korea).nes"); // ines2_load(&nstate, "data/nrom/Kung Fu (Japan, USA).nes"); // ines2_load(&nstate, "data/nrom/Super Mario Bros. (World) (HVC-SM).nes"); @@ -93,82 +132,85 @@ static void init_callback(void) { uint32_t lo = nstate.mapper.read(&nstate, 0xfffc); uint32_t hi = nstate.mapper.read(&nstate, 0xfffd); nstate.cpu.pc = (hi << 8) | lo; -} - -// int main(void) { -// struct nes_state state = {0}; - -// init_opcode_lut(); -// init_opcode_ud_lut(); -// state.cpu.sp = 0xfd; - -// // FILE *f = fopen("nestest.nes", "rb"); -// // fseek(f, 16, SEEK_SET); -// // fread(&state.memory[0xc000], 1, 0x4000, f); -// // fclose(f); -// // state.cpu.pc = 0xc000; - -// FILE *f = fopen("6502_functional_test.bin", "rb"); -// fread(state.memory, 1, 0x10000, f); -// fclose(f); -// state.cpu.pc = 0x0400; - -// size_t i; -// for(i = 0; i < 70000000; ++i) { -// cpu_tick(&state); -// if(state.cpu.pc == 0x3469) break; -// } -// // printf("%lld", i); -// return 0; -// } - - - - -// bool running = true; -// uint64_t next_update = mkfw_gettime(); -// int64_t frame_duration_ns = 16666667; // NTSC ~60.0988 Hz - -// audio_sync_init(next_update); // Initialize sync base time -// while(running && !mkfw_should_close()) { -// mkfw_pump_messages(); -// if(key_pressed(MKS_KEY_ESCAPE)) { running = false; } - -// #ifdef PROFILER -// reset_profiling_data(); -// #endif - -// render_callback(); -// apply_phosphor_decay(); -// update_keyboard_state(); -// update_modifier_state(); -// update_mouse_state(); -// state.frame_number++; - -// #ifndef PERF_TEST -// render_frame(); -// #ifdef PROFILER -// debug_render(); -// #endif - -// audio_throttle_emulator(state.frame_number, &frame_duration_ns); - -// uint64_t now = mkfw_gettime(); -// int64_t remaining = next_update - now; - -// if(remaining > 0) { -// if(remaining > ONE_MILLISECOND_NS) { -// mkfw_sleep(remaining - ONE_MILLISECOND_NS); -// } -// while(mkfw_gettime() < next_update) { /**/ } -// } else { -// next_update = now; -// } + struct timer_handle *timer = timer_new(FRAME_INTERVAL_NS); + if(!timer) { + fprintf(stderr, "Failed to create timer\n"); + return 1; + } -// next_update += frame_duration_ns; + if(glfwInit()) { + + glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); + glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); + glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE); + + window = glfwCreateWindow(WINDOW_WIDTH, WINDOW_HEIGHT, "NES Emulator", 0, 0); + if(window) { + glfwSetWindowAspectRatio(window, 320, 240); + glfwSetWindowSizeLimits(window, 320*3, 240*3, GLFW_DONT_CARE, GLFW_DONT_CARE); + + glfwMakeContextCurrent(window); + opengl_setup(); + glfwSetKeyCallback(window, key_callback); + glfwSetFramebufferSizeCallback(window, framebuffer_callback); + glfwSwapInterval(0); + + framebuffer_callback(window, WINDOW_WIDTH, WINDOW_HEIGHT); + + for(int jid = GLFW_JOYSTICK_1; jid <= GLFW_JOYSTICK_LAST; jid++) { + if(glfwJoystickPresent(jid)) { + const char *name = glfwGetJoystickName(jid); + printf("Joystick %d detected: %s\n", jid, name); + break; + } + } + + set_decay(20); + + timer_start(timer); + + while(!glfwWindowShouldClose(window)) { + timer_wait(timer); + glfwPollEvents(); + +// // + while(!nstate.ppu.frame_ready) { + // PROFILE_NAMED("nes emulator"); + cpu_tick(&nstate); + } + nstate.ppu.frame_ready = 0; + frames++; + + uint32_t *dst = buffer; + uint8_t *src = nstate.ppu.pixels; + for(uint32_t y = 0; y < 240; ++y) { + for(uint32_t x = 0; x < 256; ++x) { + uint8_t val = *src++; + if(val >= 64) val = 0; + dst[x] = nes_palette[val]; + } + dst += BUFFER_WIDTH; + } +// + apply_phosphor_decay(); + render_frame(); + glfwSwapBuffers(window); + } + + glfwDestroyWindow(window); + } else { + fprintf(stderr, "Failed to create window\n"); + } + glfwTerminate(); -// mkfw_swap_buffers(); -// #endif -// } + } else { + fprintf(stderr, "Failed to initialize GLFW\n"); + } + timer_destroy(timer); +#ifdef _WIN32 + timeEndPeriod(1); +#endif + return 0; +} diff --git a/mknes.h b/mknes.h index 90bb58c..be2d946 100644 --- a/mknes.h +++ b/mknes.h @@ -64,7 +64,6 @@ struct ppu_state { uint8_t vram_read_buffer; uint8_t write_latch; - uint8_t ciram[0x800]; uint8_t palette[0x20]; uint32_t scanline; @@ -127,9 +126,7 @@ struct mapper { }; union mapper_data { - struct gxrom_mapper gxrom; - // struct mmc1_mapper mmc1; - // ... others + struct mapper_066 m066; }; struct nes_state { @@ -138,32 +135,23 @@ struct nes_state { struct ppu_state ppu; struct mapper mapper; union mapper_data map; - size_t cycle; - uint8_t ram[2048]; - uint8_t rom[4 * 1024 * 1024]; - uint8_t chrrom[4 * 1024 * 1024]; + size_t cycles; uint8_t irq_pending; uint8_t nmi_pending; + uint8_t ram[0x800]; + uint8_t sram[0x2000]; + uint8_t ciram[0x1000]; // NOTE(peter): Originally 0x800 bytes, but extended as it should work for up to fourway, this is optimization, reality is 2kb, but there is no side-effects, so this is fine! + uint8_t rom[4 * 1024 * 1024]; + uint8_t chrrom[4 * 1024 * 1024]; }; static uint32_t nes_palette[64] = { -// 0x757575ff, 0x271a75ff, 0x3b0072ff, 0x4c0f64ff, 0x400048ff, 0x600027ff, 0x600000ff, 0x500f00ff, -// 0x783a00ff, 0x755c00ff, 0x406c00ff, 0x504764ff, 0x005468ff, 0x000000ff, 0x000000ff, 0x000000ff, -// 0xbfbfbfff, 0x273aa7ff, 0x5c14a7ff, 0x7514a7ff, 0x751468ff, 0x982727ff, 0xa03a00ff, 0x986c00ff, -// 0x888800ff, 0x689800ff, 0x3aa700ff, 0x6c6c6cff, 0x007878ff, 0x000000ff, 0x000000ff, 0x000000ff, -// 0xffffffff, 0x3ab5ffff, 0x5cb5ffff, 0x9888ffff, 0xa778ffff, 0xc87878ff, 0xf05c00ff, 0xf08800ff, -// 0xe0a700ff, 0xb8b800ff, 0x88c800ff, 0xcccc68ff, 0x00e0d8ff, 0x000000ff, 0x000000ff, 0x000000ff, -// 0xffffffff, 0xa7e0ffff, 0xb8d8ffff, 0xc8c8ffff, 0xd8b8ffff, 0xd8a7a7ff, 0xf0d0b8ff, 0xf0d898ff, -// 0xf0c878ff, 0xd8d878ff, 0xb8e078ff, 0xd0e0b8ff, 0xb8f0f0ff, 0x000000ff, 0x000000ff, 0x000000ff -// }; - -0x585858ff, 0x00237cff, 0x0d1099ff, 0x300092ff, 0x4f006cff, 0x600035ff, 0x5c0500ff, 0x461800ff, -0x272d00ff, 0x093e00ff, 0x004500ff, 0x004106ff, 0x003545ff, 0x000000ff, 0x000000ff, 0x000000ff, -0xa1a1a1ff, 0x0b53d7ff, 0x3337feff, 0x6621f7ff, 0x9515beff, 0xac166eff, 0xa62721ff, 0x864300ff, -0x596200ff, 0x2d7a00ff, 0x0c8500ff, 0x007f2aff, 0x006d85ff, 0x000000ff, 0x000000ff, 0x000000ff, -0xffffffff, 0x51a5feff, 0x8084feff, 0xbc6afeff, 0xf15bfeff, 0xfe5ec4ff, 0xfe7269ff, 0xe19321ff, -0xadb600ff, 0x79d300ff, 0x51df21ff, 0x3ad974ff, 0x39c3dfff, 0x424242ff, 0x000000ff, 0x000000ff, -0xffffffff, 0xb5d9feff, 0xcacafeff, 0xe3befeff, 0xf9b8feff, 0xfebae7ff, 0xfec3bcff, 0xf4d199ff, -0xdee086ff, 0xc6ec87ff, 0xb2f29dff, 0xa7f0c3ff, 0xa8e7f0ff, 0xacacacff, 0x000000ff, 0x000000ff + 0x585858ff, 0x00237cff, 0x0d1099ff, 0x300092ff, 0x4f006cff, 0x600035ff, 0x5c0500ff, 0x461800ff, + 0x272d00ff, 0x093e00ff, 0x004500ff, 0x004106ff, 0x003545ff, 0x000000ff, 0x000000ff, 0x000000ff, + 0xa1a1a1ff, 0x0b53d7ff, 0x3337feff, 0x6621f7ff, 0x9515beff, 0xac166eff, 0xa62721ff, 0x864300ff, + 0x596200ff, 0x2d7a00ff, 0x0c8500ff, 0x007f2aff, 0x006d85ff, 0x000000ff, 0x000000ff, 0x000000ff, + 0xffffffff, 0x51a5feff, 0x8084feff, 0xbc6afeff, 0xf15bfeff, 0xfe5ec4ff, 0xfe7269ff, 0xe19321ff, + 0xadb600ff, 0x79d300ff, 0x51df21ff, 0x3ad974ff, 0x39c3dfff, 0x424242ff, 0x000000ff, 0x000000ff, + 0xffffffff, 0xb5d9feff, 0xcacafeff, 0xe3befeff, 0xf9b8feff, 0xfebae7ff, 0xfec3bcff, 0xf4d199ff, + 0xdee086ff, 0xc6ec87ff, 0xb2f29dff, 0xa7f0c3ff, 0xa8e7f0ff, 0xacacacff, 0x000000ff, 0x000000ff }; - diff --git a/opengl.c b/opengl.c new file mode 100644 index 0000000..847fc12 --- /dev/null +++ b/opengl.c @@ -0,0 +1,129 @@ + +#include "shader.c" +#include "shader.h" +#include "fragment_shader.h" +#include "vertex_shader.h" + +/* [=]===^=[ compile_shader ]==============================================================^===[=] */ +static GLuint compile_shader(GLenum shader_type, const char *shader_source) { + GLuint shader = glCreateShader(shader_type); + glShaderSource(shader, 1, &shader_source, 0); + glCompileShader(shader); + + GLint success; + GLchar info_log[512]; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if(!success) { + glGetShaderInfoLog(shader, sizeof(info_log), 0, info_log); + printf("%s shader compilation failed:\n%s\n", (shader_type == GL_VERTEX_SHADER) ? "Vertex" : "Fragment", info_log); + } + return shader; +} + +/* [=]===^=[ opengl_setup ]================================================================^===[=] */ +static void opengl_setup(void) { + gl_loader(); + + glEnable(GL_FRAMEBUFFER_SRGB); + glDisable(GL_DEPTH_TEST); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glDisable(GL_CULL_FACE); + + // Compile shaders + GLuint vertex_shader = compile_shader(GL_VERTEX_SHADER, vertex_shader_start); + GLuint fragment_shader = compile_shader(GL_FRAGMENT_SHADER, fragment_shader_start); + +printf("%d %d\n", vertex_shader, fragment_shader); + + state.shader_program = glCreateProgram(); + glAttachShader(state.shader_program, vertex_shader); + glAttachShader(state.shader_program, fragment_shader); + glBindAttribLocation(state.shader_program, 0, "position"); + glBindAttribLocation(state.shader_program, 1, "texture_coord"); + glLinkProgram(state.shader_program); + + GLint success; + glGetProgramiv(state.shader_program, GL_LINK_STATUS, &success); + if(!success) { + GLchar log[512]; + glGetProgramInfoLog(state.shader_program, sizeof(log), 0, log); + printf("Shader Linking Failed:\n%s\n", log); + } + + glDeleteShader(vertex_shader); + glDeleteShader(fragment_shader); + glUseProgram(state.shader_program); + + state.contrast = 1.0f; + state.saturation = 0.0f; + state.brightness = 1.0f; + CrtsTone(state.tone_data, state.contrast, state.saturation, INPUT_THIN, INPUT_MASK); + + state.uniform_resolution = glGetUniformLocation(state.shader_program, "resolution"); + state.uniform_src_image_size = glGetUniformLocation(state.shader_program, "src_image_size"); + state.uniform_brightness = glGetUniformLocation(state.shader_program, "brightness"); + state.uniform_tone = glGetUniformLocation(state.shader_program, "tone_data"); + state.uniform_crt_emulation = glGetUniformLocation(state.shader_program, "crt_emulation"); + state.uniform_sampler_location = glGetUniformLocation(state.shader_program, "iChannel0"); + + glGenVertexArrays(1, &state.vao); + glBindVertexArray(state.vao); + + glGenBuffers(1, &state.vbo); + glGenBuffers(1, &state.ebo); + + glBindBuffer(GL_ARRAY_BUFFER, state.vbo); + const float vertices[] = { + -1.0f, -1.0f, 0.0f, 0.0f, + 1.0f, -1.0f, 1.0f, 0.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + -1.0f, 1.0f, 0.0f, 1.0f + }; + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo); + static const unsigned int indices[] = { 0, 1, 2, 2, 3, 0 }; + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); + + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + + // Setup texture + glDeleteTextures(1, &state.texture); + glGenTextures(1, &state.texture); + glBindTexture(GL_TEXTURE_2D, state.texture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, BUFFER_WIDTH, BUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, buffer); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); +} + +/* [=]===^=[ render_frame ]=================================================================^===[=] */ +__attribute__((always_inline)) +static inline void render_frame(void) { + glClearColor(.0f, 0.f, 0.f, 1.f); + glClear(GL_COLOR_BUFFER_BIT); + + glUseProgram(state.shader_program); + glBindVertexArray(state.vao); + glBindBuffer(GL_ARRAY_BUFFER, state.vbo); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, state.texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, BUFFER_WIDTH, BUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, display_buffer); + glUniform2f(state.uniform_src_image_size, (float)BUFFER_WIDTH, (float)BUFFER_HEIGHT); + glUniform2f(state.uniform_resolution, (float)state.viewport.w, (float)state.viewport.h); + glUniform1f(state.uniform_brightness, state.brightness); + glUniform4f(state.uniform_tone, state.tone_data[0], state.tone_data[1], state.tone_data[2], state.tone_data[3]); + glUniform1i(state.uniform_crt_emulation, state.toggle_crt_emulation); + glUniform1i(state.uniform_sampler_location, 0); + glViewport(state.viewport.x, state.viewport.y, state.viewport.w, state.viewport.h); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); +} diff --git a/opengl_loader.c b/opengl_loader.c new file mode 100644 index 0000000..9914e00 --- /dev/null +++ b/opengl_loader.c @@ -0,0 +1,210 @@ +#ifdef _WIN32 +typedef __int64 GLintptr; +#else +typedef intptr_t GLintptr; +#endif +typedef void GLvoid; +typedef unsigned char GLboolean; +typedef unsigned char GLubyte; +typedef char GLchar; + +typedef int GLint; +typedef int GLsizei; + +typedef unsigned int GLenum; +typedef unsigned int GLuint; +typedef unsigned int GLbitfield; + +typedef float GLfloat; +typedef double GLdouble; + +typedef unsigned long long GLsizeiptr; + +#define GL_NO_ERROR 0 +#define GL_INFO_LOG_LENGTH 0x8b84 +#define GL_ZERO 0x0000 +#define GL_ONE 0x0001 +#define GL_ALPHA 0x1906 +#define GL_BLEND 0x0be2 +#define GL_CLAMP_TO_EDGE 0x812f +#define GL_COLOR_BUFFER_BIT 0x4000 +#define GL_COMPILE_STATUS 0x8b81 +#define GL_DEPTH_TEST 0x0b71 +#define GL_FRAMEBUFFER_SRGB 0x8db9 +#define GL_FRAGMENT_SHADER 0x8b30 +#define GL_LINK_STATUS 0x8b82 +#define GL_MODELVIEW 0x1700 +#define GL_NEAREST 0x2600 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_PROJECTION 0x1701 +#define GL_QUADS 0x0007 +#define GL_RGBA 0x1908 +#define GL_RGBA8 0x8058 +#define GL_SCISSOR_TEST 0x0c11 +#define GL_SRGB8_ALPHA8 0x8c43 +#define GL_SRC_ALPHA 0x0302 +#define GL_TEXTURE0 0x84c0 +#define GL_TEXTURE_2D 0x0de1 +#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_UNSIGNED_INT_8_8_8_8 0x8035 +#define GL_VERTEX_SHADER 0x8b31 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_STATIC_DRAW 0x88e4 +#define GL_FLOAT 0x1406 +#define GL_FALSE 0 +#define GL_TRUE 1 +#define GL_LINEAR 0x2601 +#define GL_FUNC_ADD 0x8006 +#define GL_CULL_FACE 0x0b44 +#define GL_STREAM_DRAW 0x88e0 +#define GL_WRITE_ONLY 0x88b9 +#define GL_TRIANGLES 0x0004 +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_MULTISAMPLE 0x809d +#define GL_DYNAMIC_DRAW 0x88e8 +#define GL_RED 0x1903 +#define GL_FRAMEBUFFER_WIDTH 0x9310 +#define GL_FRAMEBUFFER_HEIGHT 0x9311 +#define GL_VIEWPORT 0x0ba2 +#define GL_PIXEL_UNPACK_BUFFER 0x88ec +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88ef +#define GL_VERTEX_ARRAY_BINDING 0x85b5 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_CURRENT_PROGRAM 0x8b8d +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_SRGB_ALPHA 0x8c43 +#define GL_UNPACK_ROW_LENGTH 0x0cf2 +#define GL_UNPACK_SKIP_PIXELS 0x0cf4 +#define GL_UNPACK_SKIP_ROWS 0x0cf3 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8646 + +#define DECLARE_GL_FUNCTION(Name, ReturnType, ...) typedef ReturnType (*type_##Name)(__VA_ARGS__); +#define DECLARE_GLOBAL_FUNCTION(Name, ...) type_##Name Name; + +#define GL_FUNCTIONS(X) \ + X(glActiveTexture, void, GLenum texture) \ + X(glAttachShader, void, GLuint program, GLuint shader) \ + X(glBindBuffer, void, GLenum target, GLuint buffer) \ + X(glBindTexture, void, GLenum target, GLuint texture) \ + X(glBufferData, void, GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage) \ + X(glClear, void, GLbitfield mask) \ + X(glClearColor, void, GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) \ + X(glCompileShader, void, GLuint shader) \ + X(glCreateProgram, GLuint) \ + X(glCreateShader, GLuint, GLenum type) \ + X(glDeleteShader, void, GLuint shader) \ + X(glDrawElements, void, GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) \ + X(glEnableVertexAttribArray, void, GLuint index) \ + X(glGenBuffers, void, GLsizei n, GLuint *buffers) \ + X(glGenTextures, void, GLsizei n, GLuint *textures) \ + X(glGetShaderInfoLog, void, GLuint shader, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \ + X(glGetShaderiv, void, GLuint shader, GLenum pname, GLint *params) \ + X(glGetUniformLocation, GLint, GLuint program, const GLchar *name) \ + X(glLinkProgram, void, GLuint program) \ + X(glShaderSource, void, GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length) \ + X(glTexImage2D, void, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels) \ + X(glTexParameteri, void, GLenum target, GLenum pname, GLint param) \ + X(glTexSubImage2D, void, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels) \ + X(glUniform1f, void, GLint location, GLfloat v0) \ + X(glUniform1i, void, GLint location, GLint v0) \ + X(glUniform2f, void, GLint location, GLfloat v0, GLfloat v1) \ + X(glUniform4f, void, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3) \ + X(glUseProgram, void, GLuint program) \ + X(glVertexAttribPointer, void, GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer) \ + X(glViewport, void, GLint x, GLint y, GLsizei width, GLsizei height) \ + X(glDeleteProgram, void, GLuint program) \ + X(glDeleteBuffers, void, GLsizei n, const GLuint *buffers) \ + X(glDeleteTextures, void, GLsizei n, const GLuint *textures) \ + X(glEnable, void, GLenum cap) \ + X(glGenerateMipmap, void, GLenum target) \ + X(glGetProgramiv, void, GLuint program, GLenum pname, GLint *params) \ + X(glGetAttribLocation, GLint, GLuint program, const GLchar *name) \ + X(glDetachShader, void, GLuint program, GLuint shader) \ + X(glUniformMatrix4fv, void, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value) \ + X(glMapBuffer, void*, GLenum target, GLenum access) \ + X(glUnmapBuffer, GLboolean, GLenum target) \ + X(glBlendEquation, void, GLenum mode) \ + X(glBlendFunc, void, GLenum sfactor, GLenum dfactor) \ + X(glDisable, void, GLenum cap) \ + X(glScissor, void, GLint x, GLint y, GLsizei width, GLsizei height) \ + X(glTexCoord2f, void, GLfloat s, GLfloat t) \ + X(glVertex2f, void, GLfloat x, GLfloat y) \ + X(glGetError, GLenum) \ + X(glGetProgramInfoLog, void, GLuint program, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \ + X(glGenVertexArrays, void, GLsizei n, GLuint *arrays) \ + X(glBindVertexArray, void, GLuint array) \ + X(glDeleteVertexArrays, void, GLsizei n, const GLuint *arrays) \ + X(glDrawArrays, void, GLenum mode, GLint first, GLsizei count) \ + X(glBufferSubData, void, GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data) \ + X(glDisableVertexAttribArray, void, GLuint index) \ + X(glGetIntegerv, void, GLenum pname, GLint *data) \ + X(glBindAttribLocation, void, GLuint program, GLuint index, const GLchar *name) \ + X(glGetUniformfv, void, GLuint program, GLint location, GLfloat *params) \ + X(glPixelStorei, void, GLenum pname, GLint param) \ + X(glGetVertexAttribiv, void, GLuint index, GLenum pname, GLint *params) \ + X(glFinish, void) + +GL_FUNCTIONS(DECLARE_GL_FUNCTION) +GL_FUNCTIONS(DECLARE_GLOBAL_FUNCTION) + + +#if defined(_WIN32) +static void *get_any_gl_address(const char *name) { + void *p = (void *)wglGetProcAddress(name); + if(!p) { + HMODULE module = LoadLibraryA("opengl32.dll"); + if(module) { + p = (void *)GetProcAddress(module, name); + } + } + return p; +} + +#define GetOpenGLFunction(Name, ...) \ + *(void **)&Name = (void *)get_any_gl_address(#Name); \ + if(!Name) { \ + DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \ + exit(EXIT_FAILURE); \ + } + +#elif defined(__linux__) +#include +static void *glXGetProcAddress(const GLubyte *procName) { + static void *(*glxGetProcAddress)(const GLubyte *); + if(!glxGetProcAddress) { + void *libGL = dlopen("libGL.so.1", RTLD_LAZY | RTLD_GLOBAL); + if(!libGL) { + DEBUG_PRINT("Error: Unable to load libGL.so.1\n"); + exit(EXIT_FAILURE); + } + glxGetProcAddress = (void *(*)(const GLubyte *))dlsym(libGL, "glXGetProcAddress"); + if(!glxGetProcAddress) { + DEBUG_PRINT("Error: Unable to find glXGetProcAddress\n"); + exit(EXIT_FAILURE); + } + } + return glxGetProcAddress(procName); +} + +#define GetOpenGLFunction(Name, ...) \ + *(void **)&Name = (void *)glXGetProcAddress((const GLubyte *)#Name); \ + if(!Name) { \ + DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \ + exit(EXIT_FAILURE); \ + } +#endif + +__attribute__((cold, noinline, section(".init_section"))) +static void gl_loader() { + GL_FUNCTIONS(GetOpenGLFunction); +}; \ No newline at end of file diff --git a/ppu.c b/ppu.c index 4246e20..2c993c9 100644 --- a/ppu.c +++ b/ppu.c @@ -18,7 +18,6 @@ static void ppu_sprite_shift(struct nes_state *state) { } } - static void ppu_reset(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; memset(ppu, 0, sizeof(struct ppu_state)); @@ -44,11 +43,11 @@ static uint32_t ppu_resolve_ciram(struct nes_state *state, uint32_t addr) { static uint8_t ppu_ciram_read(struct nes_state *state, uint32_t addr) { - return state->ppu.ciram[ppu_resolve_ciram(state, addr)]; + return state->ciram[ppu_resolve_ciram(state, addr)]; } static void ppu_ciram_write(struct nes_state *state, uint32_t addr, uint8_t value) { - state->ppu.ciram[ppu_resolve_ciram(state, addr)] = value; + state->ciram[ppu_resolve_ciram(state, addr)] = value; } static void ppu_write_2000(struct nes_state *state, uint8_t value) { @@ -150,7 +149,41 @@ static uint8_t ppu_read_2007(struct nes_state *state) { return result; } +#if 1 +static void ppu_evaluate_sprites(struct nes_state *state) { + struct ppu_state *ppu = &state->ppu; + uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; + uint8_t n = 0; + + uint8_t *src = ppu->oam; + uint8_t *dst = ppu->secondary_oam; + for(uint8_t i = 0; i < 64; i++) { + uint8_t y = src[0]; + int32_t row = (int32_t)ppu->scanline - y; + + if(row >= 0 && row < sprite_height) { + if(n < 8) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + ppu->sprite_indexes[n] = i; + ppu->sprite_zero_hit_possible |= (i == 0) ? 1 : 0; + + dst += 4; + n++; + } else { + ppu->reg_status |= 0x20; + break; + } + } + src += 4; + } + + ppu->sprite_count = n; +} +#else static void ppu_evaluate_sprites(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; @@ -182,6 +215,7 @@ static void ppu_evaluate_sprites(struct nes_state *state) { ppu->sprite_count = n; } +#endif static void ppu_fetch_sprite_patterns(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; @@ -238,9 +272,9 @@ static void ppu_render_pixel(struct nes_state *state) { uint32_t x = ppu->dot - 1; uint32_t y = ppu->scanline; - if(x >= 256 || y >= 240) { - return; - } + // if(x >= 256 || y >= 240) { + // return; + // } uint32_t bit = 0x8000 >> ppu->fine_x; @@ -287,11 +321,10 @@ static void ppu_render_pixel(struct nes_state *state) { final_color = ppu->palette[(bg_palette << 2) | bg_pixel]; } } - assert(y*256+x <= 256*240); ppu->pixels[y * 256 + x] = final_color; } -__attribute__((flatten)) +__attribute__((hot, flatten)) static void ppu_tick(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; @@ -390,12 +423,6 @@ static void ppu_tick(struct nes_state *state) { ppu->vram_addr = (ppu->vram_addr & ~0x7be0) | (ppu->temp_addr & 0x7be0); } -// if(scanline == 261 && dot >= 280 && dot <= 304) { -// printf("Scroll Copy (V): SL:%d DOT:%d vram_addr=%04x temp_addr=%04x\n", scanline, dot, ppu->vram_addr, ppu->temp_addr); -// ppu->vram_addr = (ppu->vram_addr & 0x041f) | (ppu->temp_addr & 0x7be0); -// } - - if(dot == 257 && scanline < 240) { ppu_evaluate_sprites(state); } @@ -437,26 +464,21 @@ static void ppu_dma_4014(struct nes_state *state, uint8_t page) { uint32_t base = page << 8; // Add 1 or 2 idle cycles depending on current CPU cycle - uint8_t idle_cycles = (state->cycle & 1) ? 1 : 2; + uint8_t idle_cycles = (state->cycles & 1) ? 1 : 2; for(uint8_t i = 0; i < idle_cycles; i++) { - state->cycle++; + state->cycles++; ppu_tick(state); ppu_tick(state); ppu_tick(state); } for(uint32_t i = 0; i < 256; i++) { uint32_t addr = base + i; - // First CPU cycle (read, ticks only) - state->cycle++; + state->cycles++; ppu_tick(state); ppu_tick(state); ppu_tick(state); - - // Perform read uint8_t value = memory_read_dma(state, addr); - // Second CPU cycle (write) - ppu_write_2004(state, value); - - state->cycle++; + state->cycles++; ppu_tick(state); ppu_tick(state); ppu_tick(state); + ppu_write_2004(state, value); } } \ No newline at end of file diff --git a/render.c b/render.c new file mode 100644 index 0000000..1dc90aa --- /dev/null +++ b/render.c @@ -0,0 +1,47 @@ + + +/* [=]===^=[ clear_buffer ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void clear_buffer(void) { + // PROFILE_FUNCTION(); + memset(buffer, 0, sizeof(buffer)); +} + +/* [=]===^=[ set_decay ]=================================================================^===[=] */ +static uint16_t _old_weight; +static uint16_t _new_weight; +static void set_decay(uint16_t old_weight) { + _old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0; + _new_weight = 256 - old_weight; +} + +/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */ +__attribute__((always_inline, hot)) +static inline void apply_phosphor_decay(void) { + // PROFILE_FUNCTION(); + __m256i old_weight = _mm256_set1_epi16(_old_weight); + __m256i new_weight = _mm256_set1_epi16(_new_weight); + __m128i alpha_mask = _mm_set1_epi32(0x000000ff); + uint32_t * restrict src = buffer; + uint32_t * restrict dst = display_buffer; + + for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) { + for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) { + _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0); + _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0); + + __m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]); + __m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]); + + __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels); + __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels); + + __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight)); + blended = _mm256_srli_epi16(blended, 8); + + __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1)); + final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask)); + _mm_storeu_si128((__m128i*)&dst[x], final_pixels); + } + } +} diff --git a/shader.c b/shader.c new file mode 100644 index 0000000..a6e16f6 --- /dev/null +++ b/shader.c @@ -0,0 +1,44 @@ +#include "shader.h" + +//============================================================== +// +// CPU CODE +// +//============================================================== +// TONAL CONTROL CONSTANT GENERATION +//-------------------------------------------------------------- +// Make sure to use same CRTS_MASK_* defines on CPU and GPU!!!!! +//============================================================== +/* + * dst - Output 4 float array. + * + * contrast - Increase contrast, ranges from, + * 1.0 = no change + * 2.0 = very strong contrast (over 2.0 for even more) + * + * saturation - Increase saturation, ranges from, + * 0.0 = no change + * 1.0 = increased saturation (over 1.0 for even more) + * + * thin, mask - Inputs shared between CrtsTone() and CrtsFilter() + * + */ +static void CrtsTone(float * restrict dst, float contrast, float saturation, float thin, float mask) { +//-------------------------------------------------------------- +#ifdef CRTS_MASK_NONE + mask = 1.0f; +#endif +//-------------------------------------------------------------- +#ifdef CRTS_MASK_GRILLE_LITE + // Normal R mask is {1.0,mask,mask} + // LITE R mask is {mask,1.0,1.0} + mask = 0.5f + mask * 0.5f; +#endif +//-------------------------------------------------------------- + float midOut = 0.18f / ((1.5f - thin) * (0.5f * mask + 0.5f)); + float pMidIn = powf(0.18f, contrast); + dst[0] = contrast; + dst[1] = ((-pMidIn) + midOut) / ((1.0f - pMidIn) * midOut); + dst[2] = ((-pMidIn) * midOut + pMidIn) / (midOut * (-pMidIn) + midOut); + dst[3] = contrast + saturation; +} diff --git a/shader.h b/shader.h new file mode 100644 index 0000000..df838df --- /dev/null +++ b/shader.h @@ -0,0 +1,34 @@ +//============================================================== +// SETUP FOR CRTS +//============================================================== +#define CRTS_TONE 1 +#define CRTS_CONTRAST 1 +#define CRTS_SATURATION 1 +//-------------------------------------------------------------- +#define CRTS_WARP 1 +//-------------------------------------------------------------- +// Try different masks +#define CRTS_MASK_GRILLE 1 +// #define CRTS_MASK_GRILLE_LITE 1 +// #define CRTS_MASK_NONE 1 +// #define CRTS_MASK_SHADOW 1 +// -------------------------------------------------------------- +// Scanline thinness +// 0.50 = fused scanlines +// 0.70 = recommended default +// 1.00 = thinner scanlines (too thin) +#define INPUT_THIN 0.7 +//-------------------------------------------------------------- +// Horizonal scan blur +// -3.0 = pixely +// -2.5 = default +// -2.0 = smooth +// -1.0 = too blurry +#define INPUT_BLUR -2.5 +//-------------------------------------------------------------- +// Shadow mask effect, ranges from, +// 0.25 = large amount of mask (not recommended, too dark) +// 0.50 = recommended default +// 1.00 = no shadow mask +#define INPUT_MASK 0.5 + diff --git a/timer.c b/timer.c new file mode 100644 index 0000000..96eb453 --- /dev/null +++ b/timer.c @@ -0,0 +1,149 @@ + + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#include + +struct timer_handle { + HANDLE htimer; + uint64_t interval_ns; + uint64_t qpc_frequency; + uint64_t next_deadline; + bool started; +}; + +static inline uint64_t qpc_now_ns(uint64_t freq) { + LARGE_INTEGER qpc; + QueryPerformanceCounter(&qpc); + return (uint64_t)((qpc.QuadPart * 1000000000ULL) / freq); +} + +struct timer_handle *timer_new(uint64_t interval_ns) { + struct timer_handle *t = (struct timer_handle *)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct timer_handle)); + if(!t) return 0; + + t->htimer = CreateWaitableTimerW(0, TRUE, 0); + if(!t->htimer) { + HeapFree(GetProcessHeap(), 0, t); + return 0; + } + + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + t->qpc_frequency = freq.QuadPart; + t->interval_ns = interval_ns; + t->started = false; + + return t; +} + +bool timer_start(struct timer_handle *t) { + t->next_deadline = qpc_now_ns(t->qpc_frequency) + t->interval_ns; + t->started = true; + return true; +} + +bool timer_wait(struct timer_handle *t) { + if(!t->started) return false; + + uint64_t now = qpc_now_ns(t->qpc_frequency); + if(t->next_deadline <= now) { + t->next_deadline += t->interval_ns; + return true; + } + + uint64_t sleep_ns = t->next_deadline - now; + if(sleep_ns > 500000) { // > 0.5ms + LARGE_INTEGER due; + due.QuadPart = -(int64_t)((sleep_ns - 500000) / 100); // 100ns units, negative = relative + SetWaitableTimer(t->htimer, &due, 0, 0, 0, 0); + WaitForSingleObject(t->htimer, INFINITE); + } + + while(qpc_now_ns(t->qpc_frequency) < t->next_deadline) { + YieldProcessor(); // pause instruction + } + + t->next_deadline += t->interval_ns; + return true; +} + +void timer_destroy(struct timer_handle *t) { + if(t) { + if(t->htimer) CloseHandle(t->htimer); + HeapFree(GetProcessHeap(), 0, t); + } +} + + +#else // Linux +#include +#include +#include +#include +#include +#include + +struct timer_handle { + int tfd; + int epfd; + struct itimerspec spec; + int started; +}; + +static struct timer_handle *timer_new(uint64_t interval_ns) { + struct timer_handle *t = (struct timer_handle *)malloc(sizeof(struct timer_handle)); + if(!t) return 0; + + t->tfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC); + if(t->tfd < 0) { free(t); return 0; } + + t->epfd = epoll_create1(EPOLL_CLOEXEC); + if(t->epfd < 0) { + close(t->tfd); free(t); return 0; + } + + struct epoll_event ev = { .events = EPOLLIN, .data = { .fd = t->tfd } }; + epoll_ctl(t->epfd, EPOLL_CTL_ADD, t->tfd, &ev); + + t->spec.it_interval.tv_sec = interval_ns / 1000000000ULL; + t->spec.it_interval.tv_nsec = interval_ns % 1000000000ULL; + t->spec.it_value.tv_sec = 0; + t->spec.it_value.tv_nsec = 0; + t->started = 0; + + return t; +} + +static bool timer_start(struct timer_handle *t) { + if(t->started) return true; + + t->spec.it_value = t->spec.it_interval; + if(timerfd_settime(t->tfd, 0, &t->spec, 0)) return false; + + t->started = 1; + return true; +} + +static bool timer_wait(struct timer_handle *t) { + if(!t->started) return false; + + struct epoll_event ev; + int r = epoll_wait(t->epfd, &ev, 1, -1); + if(r < 0) return false; + uint64_t expirations; + read(t->tfd, &expirations, sizeof(expirations)); + return true; +} + +static void timer_destroy(struct timer_handle *t) { + if(t) { + if(t->tfd >= 0) close(t->tfd); + if(t->epfd >= 0) close(t->epfd); + free(t); + } +} + +#endif diff --git a/vertex_shader.glsl b/vertex_shader.glsl new file mode 100644 index 0000000..d8f1848 --- /dev/null +++ b/vertex_shader.glsl @@ -0,0 +1,9 @@ +in vec2 position; +in vec2 texture_coord; + +out vec2 frag_texture_coord; + +void main() { + frag_texture_coord = texture_coord; + gl_Position = vec4(position, 0.0, 1.0); +} -- cgit v1.2.3