summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--audio.c462
-rw-r--r--base/base.c4
-rw-r--r--base/shader.h4
-rwxr-xr-xbuild.sh13
-rw-r--r--callbacks.c78
-rw-r--r--cpu.c2
-rw-r--r--fragment_shader.glsl149
-rw-r--r--mapper.c28
-rw-r--r--mapper.h2
-rw-r--r--mapper_001.c23
-rw-r--r--mapper_066.c27
-rw-r--r--mapper_066.h (renamed from mapper_gxrom.h)2
-rw-r--r--mapper_gxrom.c27
-rw-r--r--mapper_nrom.c23
-rw-r--r--memory.c10
-rw-r--r--mknes.c290
-rw-r--r--mknes.h42
-rw-r--r--opengl.c129
-rw-r--r--opengl_loader.c210
-rw-r--r--ppu.c70
-rw-r--r--render.c47
-rw-r--r--shader.c44
-rw-r--r--shader.h34
-rw-r--r--timer.c149
-rw-r--r--vertex_shader.glsl9
25 files changed, 1619 insertions, 259 deletions
diff --git a/audio.c b/audio.c
new file mode 100644
index 0000000..4311d2f
--- /dev/null
+++ b/audio.c
@@ -0,0 +1,462 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <math.h>
+#include <stdlib.h>
+
+#define SAMPLE_RATE 48000
+#define NUM_CHANNELS 2
+#define FRAME_SIZE (NUM_CHANNELS * sizeof(short))
+
+// static inline float smoothstep(float edge0, float edge1, float x) {
+// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1]
+// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1]
+// return x * x * (3.0f - 2.0f * x); // Smooth interpolation
+// }
+
+// static inline float smootherstep(float edge0, float edge1, float x) {
+// x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1]
+// x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1]
+// return x * x * x * (x * (x * 6 - 15) + 10); // Modified curve
+// }
+
+static inline float fast_cos(float x) {
+ float x2 = x * x;
+ return 1.0f - x2 * (0.5f - x2 * 0.04166667f); // Approximation of cos(x)
+}
+
+static inline float cosine_smooth(float edge0, float edge1, float x) {
+ x = (x - edge0) / (edge1 - edge0); // Scale x to [0, 1]
+ x = x < 0.0f ? 0.0f : (x > 1.0f ? 1.0f : x); // Clamp to [0, 1]
+ return 0.5f * (1.0f - fast_cos(x * M_PI)); // Cosine smoothing
+}
+
+static float filter_phase = 0.0f;
+static float prev_output_sample_L = 0.0f;
+static float prev_output_sample_R = 0.0f;
+
+static void audio_callback_thread(int16_t *audio_buffer, size_t frames) {
+ int filter_override = state.filter_override; // Manual override: -1 = auto, 0 = off, 1 = on
+ float filter_frequency = state.filter_frequency; // Frequency in Hz for squarewave toggle
+
+ audio_callback(audio_buffer, frames);
+
+ if(filter_override) {
+ float a = 1.0f * M_PI * 4000.0f / (SAMPLE_RATE + 1.0f * M_PI * 4000.0f);
+ float phase_increment = filter_frequency / SAMPLE_RATE;
+
+ for(size_t i = 0; i < frames * 2; i += 2) {
+ float led_filter_active;
+
+ if(filter_override == -1) {
+ filter_phase += phase_increment;
+ if(filter_phase >= 1.0f) filter_phase -= 1.0f;
+
+ led_filter_active = cosine_smooth(0.45f, 0.50f, filter_phase) - cosine_smooth(0.95f, 1.00f, filter_phase);
+
+ } else {
+ led_filter_active = 1.0f; // Manual override (1 = on)
+ }
+
+ float input_sample_L = (float)audio_buffer[i] / 32767.0f;
+ float input_sample_R = (float)audio_buffer[i + 1] / 32767.0f;
+
+ float filtered_sample_L = a * input_sample_L + (1.0f - a) * prev_output_sample_L;
+ float filtered_sample_R = a * input_sample_R + (1.0f - a) * prev_output_sample_R;
+
+ prev_output_sample_L = filtered_sample_L;
+ prev_output_sample_R = filtered_sample_R;
+
+ audio_buffer[i] = (int16_t)((1.0f - led_filter_active) * input_sample_L * 32767.0f + led_filter_active * filtered_sample_L * 32767.0f);
+ audio_buffer[i + 1] = (int16_t)((1.0f - led_filter_active) * input_sample_R * 32767.0f + led_filter_active * filtered_sample_R * 32767.0f);
+ }
+ }
+}
+
+#ifdef __linux__
+
+
+#include <pipewire/pipewire.h>
+#include <spa/param/audio/format-utils.h>
+#include <spa/param/props.h>
+
+#define BUFFER_SIZE (512 * FRAME_SIZE)
+
+static struct pw_thread_loop *pa_thread_loop;
+static struct pw_context *pa_context;
+static struct pw_core *pa_core;
+static struct pw_stream *pa_stream;
+static struct spa_hook pa_stream_listener;
+static uint64_t audio_clock_frequency;
+static uint64_t playback_cursor;
+
+/*
+ * Called from PipeWire's real-time thread whenever new audio data is needed.
+ * We dequeue a buffer, call your audio_callback() to fill it, and then re-queue.
+ */
+static void on_process(void *userdata) {
+ struct pw_buffer *buffer;
+ struct spa_buffer *spa_buf;
+ int16_t *data;
+ uint32_t size;
+ uint32_t frames;
+ struct pw_time time_info;
+
+ buffer = pw_stream_dequeue_buffer(pa_stream);
+ if(!buffer) {
+ /* No buffer available, skip. */
+ return;
+ }
+
+ spa_buf = buffer->buffer;
+ if(!spa_buf->datas || !spa_buf->datas[0].data) {
+ pw_stream_queue_buffer(pa_stream, buffer);
+ return;
+ }
+
+ data = (int16_t*)spa_buf->datas[0].data;
+ size = spa_buf->datas[0].maxsize;
+ frames = size / FRAME_SIZE;
+
+ // if(pw_stream_get_time_n(pa_stream, &time_info, sizeof(time_info)) == 0) {
+ // playback_cursor = time_info.now;
+ // }
+ // printf("Cursor(ns): %luns\n", playback_cursor);
+
+ audio_callback_thread(data, frames);
+
+ if(spa_buf->datas[0].chunk) {
+ spa_buf->datas[0].chunk->size = frames * FRAME_SIZE;
+ spa_buf->datas[0].chunk->stride = FRAME_SIZE;
+ }
+
+ pw_stream_queue_buffer(pa_stream, buffer);
+}
+
+/*
+ * Initialize PipeWire, create the stream, and connect for audio playback.
+ * Returns immediately so your main thread can continue.
+ */
+int audio_initialize(void) {
+ pw_init(0, 0);
+
+ pa_thread_loop = pw_thread_loop_new("my-audio-loop", 0);
+ if(pa_thread_loop) {
+ if(pw_thread_loop_start(pa_thread_loop) == 0) {
+ pw_thread_loop_lock(pa_thread_loop);
+
+ pa_context = pw_context_new(pw_thread_loop_get_loop(pa_thread_loop), 0, 0);
+ if(pa_context) {
+ pa_core = pw_context_connect(pa_context, 0, 0);
+ if(pa_core){
+ static const struct spa_dict_item items[] = {
+ SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_TYPE, "Audio"),
+ SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_CATEGORY, "Playback"),
+ SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_ROLE, "Game"),
+ SPA_DICT_ITEM_INIT(PW_KEY_NODE_LATENCY, "512/48000")
+ };
+ struct spa_dict dict = SPA_DICT_INIT(items, 4);
+ struct pw_properties *props = pw_properties_new_dict(&dict);
+ // pw_properties_free(props);
+
+ pa_stream = pw_stream_new(pa_core, "My Audio Stream", props);
+ if(pa_stream) {
+ static struct pw_stream_events stream_events = {};
+ stream_events.version = PW_VERSION_STREAM_EVENTS;
+ stream_events.process = on_process;
+
+ pw_stream_add_listener(pa_stream, &pa_stream_listener, &stream_events, 0);
+
+ /*
+ * Build two SPA params:
+ * 1) The audio format: S16_LE, SAMPLE_RATE, NUM_CHANNELS
+ * 2) The buffer param: request BUFFER_SIZE bytes per buffer
+ */
+ uint8_t fmt_buffer[1024];
+ struct spa_pod_builder fmt_builder = SPA_POD_BUILDER_INIT(fmt_buffer, sizeof(fmt_buffer));
+ const struct spa_pod *fmt_param = (const struct spa_pod*)spa_pod_builder_add_object(
+ &fmt_builder,
+ SPA_TYPE_OBJECT_Format, SPA_PARAM_EnumFormat,
+ SPA_FORMAT_mediaType, SPA_POD_Id(SPA_MEDIA_TYPE_audio),
+ SPA_FORMAT_mediaSubtype, SPA_POD_Id(SPA_MEDIA_SUBTYPE_raw),
+ SPA_FORMAT_AUDIO_format, SPA_POD_Id(SPA_AUDIO_FORMAT_S16_LE),
+ SPA_FORMAT_AUDIO_rate, SPA_POD_Int(SAMPLE_RATE),
+ SPA_FORMAT_AUDIO_channels, SPA_POD_Int(NUM_CHANNELS)
+ );
+
+ uint8_t buf_buffer[1024];
+ struct spa_pod_builder buf_builder = SPA_POD_BUILDER_INIT(buf_buffer, sizeof(buf_buffer));
+ struct spa_pod *buf_param = (struct spa_pod*)spa_pod_builder_add_object(
+ &buf_builder,
+ SPA_TYPE_OBJECT_ParamBuffers, SPA_PARAM_Buffers,
+ SPA_PARAM_BUFFERS_buffers, SPA_POD_CHOICE_RANGE_Int(8, 2, 16), /* We'll request 8 buffers, each of size = BUFFER_SIZE bytes. */
+ SPA_PARAM_BUFFERS_blocks, SPA_POD_Int(1),
+ SPA_PARAM_BUFFERS_size, SPA_POD_CHOICE_RANGE_Int(BUFFER_SIZE, BUFFER_SIZE, BUFFER_SIZE*8),
+ SPA_PARAM_BUFFERS_stride, SPA_POD_Int(FRAME_SIZE),
+ SPA_PARAM_BUFFERS_align, SPA_POD_Int(16)
+ );
+
+ const struct spa_pod *params[2];
+ params[0] = fmt_param;
+ params[1] = buf_param;
+
+ int res = pw_stream_connect(pa_stream, PW_DIRECTION_OUTPUT, PW_ID_ANY, (pw_stream_flags)(PW_STREAM_FLAG_AUTOCONNECT | PW_STREAM_FLAG_RT_PROCESS | PW_STREAM_FLAG_MAP_BUFFERS), params, 2);
+ pw_thread_loop_unlock(pa_thread_loop);
+ return 0;
+
+ } else {
+ fprintf(stderr, "Failed to create PipeWire stream\n");
+ }
+ pw_core_disconnect(pa_core);
+ } else {
+ fprintf(stderr, "Failed to connect context to core\n");
+ }
+ pw_context_destroy(pa_context);
+ } else {
+ fprintf(stderr, "Failed to create PipeWire context\n");
+ }
+ pw_thread_loop_unlock(pa_thread_loop);
+ pw_thread_loop_stop(pa_thread_loop);
+ } else {
+ fprintf(stderr, "Failed to start PipeWire thread loop\n");
+ }
+ pw_thread_loop_destroy(pa_thread_loop);
+ } else {
+ fprintf(stderr, "Failed to create PipeWire thread loop\n");
+ }
+ pw_deinit();
+ return -1;
+}
+
+/*
+ * Clean up PipeWire objects, stop the thread loop, and deinit.
+ * This should be called before your program exits.
+ */
+void audio_shutdown(void) {
+ if(!pa_thread_loop) {
+ return;
+ }
+
+ pw_thread_loop_lock(pa_thread_loop);
+
+ if(pa_stream){
+ pw_stream_disconnect(pa_stream);
+ pw_stream_destroy(pa_stream);
+ }
+
+ if(pa_core){
+ pw_core_disconnect(pa_core);
+ }
+
+ if(pa_context){
+ pw_context_destroy(pa_context);
+ }
+
+ pw_thread_loop_unlock(pa_thread_loop);
+ pw_thread_loop_stop(pa_thread_loop);
+ pw_thread_loop_destroy(pa_thread_loop);
+ pw_deinit();
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#elif _WIN32
+
+#define COBJMACROS
+#include <windows.h>
+#include <initguid.h>
+#include <audioclient.h>
+#include <mmdeviceapi.h>
+#include <avrt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <timeapi.h>
+
+/*
+ * Minimal WASAPI shared-mode audio playback with explicit 48kHz/16-bit/2ch.
+ */
+
+#define NUM_CHANNELS 2
+
+static IMMDeviceEnumerator *enumerator;
+static IMMDevice *device_out;
+static IAudioClient *audio_client_out;
+static IAudioRenderClient *render_client;
+static HANDLE audio_event;
+static HANDLE audio_thread;
+static int running;
+
+static DWORD WINAPI audio_thread_proc(void *arg) {
+ UINT32 buffer_size;
+ UINT32 padding;
+ UINT32 available;
+ uint8_t *data;
+
+ IAudioClient_GetBufferSize(audio_client_out, &buffer_size);
+
+ while(running) {
+ WaitForSingleObject(audio_event, INFINITE);
+ if(!running) {
+ break;
+ }
+
+ IAudioClient_GetCurrentPadding(audio_client_out, &padding);
+ available = buffer_size - padding;
+ IAudioRenderClient_GetBuffer(render_client, available, &data);
+ audio_callback_thread((int16_t*)data, available);
+ IAudioRenderClient_ReleaseBuffer(render_client, available, 0);
+ }
+ return 0;
+}
+
+void audio_initialize() {
+ WAVEFORMATEX wf;
+ REFERENCE_TIME dur_out;
+
+ CoInitializeEx(0, COINIT_MULTITHREADED);
+ if(SUCCEEDED(CoCreateInstance(&CLSID_MMDeviceEnumerator, 0, CLSCTX_ALL, &IID_IMMDeviceEnumerator, (void**)&enumerator))) {
+ if(SUCCEEDED(IMMDeviceEnumerator_GetDefaultAudioEndpoint(enumerator, eRender, eConsole, &device_out))) {
+ if(SUCCEEDED(IMMDevice_Activate(device_out, &IID_IAudioClient, CLSCTX_ALL, 0, (void**)&audio_client_out))) {
+ wf.wFormatTag = WAVE_FORMAT_PCM;
+ wf.nChannels = NUM_CHANNELS;
+ wf.nSamplesPerSec = 48000;
+ wf.wBitsPerSample = 16;
+ wf.nBlockAlign = (wf.nChannels * wf.wBitsPerSample) / 8;
+ wf.nAvgBytesPerSec = wf.nSamplesPerSec * wf.nBlockAlign;
+ wf.cbSize = 0;
+
+ IAudioClient_GetDevicePeriod(audio_client_out, &dur_out, 0);
+ IAudioClient_Initialize(audio_client_out, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, dur_out, 0, &wf, 0);
+ audio_event = CreateEvent(0, FALSE, FALSE, 0);
+ if(audio_event){
+ IAudioClient_SetEventHandle(audio_client_out, audio_event);
+ IAudioClient_GetService(audio_client_out, &IID_IAudioRenderClient, (void**)&render_client);
+ IAudioClient_Start(audio_client_out);
+
+ running = 1;
+ audio_thread = CreateThread(0, 0, audio_thread_proc, 0, 0, 0);
+ return;
+ } else {
+ printf("Failed to create audio event\n");
+ }
+ audio_client_out->lpVtbl->Release(audio_client_out);
+ } else {
+ printf("Failed to activate audio client\n");
+ }
+ device_out->lpVtbl->Release(device_out);
+ } else {
+ printf("Failed to get default audio endpoint\n");
+ }
+ enumerator->lpVtbl->Release(enumerator);
+ } else {
+ printf("Failed to create MMDeviceEnumerator\n");
+ }
+}
+
+void audio_shutdown() {
+ running = 0;
+ if(audio_thread) {
+ SetEvent(audio_event);
+ WaitForSingleObject(audio_thread, INFINITE);
+ CloseHandle(audio_thread);
+ }
+ if(audio_event) {
+ CloseHandle(audio_event);
+ }
+ if(audio_client_out) {
+ IAudioClient_Stop(audio_client_out);
+ audio_client_out->lpVtbl->Release(audio_client_out);
+ }
+ if(render_client) {
+ render_client->lpVtbl->Release(render_client);
+ }
+ if(device_out) {
+ device_out->lpVtbl->Release(device_out);
+ }
+ if(enumerator) {
+ enumerator->lpVtbl->Release(enumerator);
+ }
+ CoUninitialize();
+}
+
+#endif
+
+
+
+
+
+// BELOW IS FOR FUTURE FRAME SYNCHRONIZATION!!!
+
+#if 0
+// Audio sync throttling logic (using audio playback clock)
+
+#define AUDIO_SAMPLE_RATE 48000
+#define FRAMETIME (1000000000 / 60) // NES: ~16.67ms per frame (replace as needed for PAL/other)
+
+static uint64_t emulator_start_time_ns = 0;
+static uint64_t audio_start_time_ns = 0;
+
+// Stub: return current audio playback time in nanoseconds
+uint64_t get_audio_playback_time_ns(void);
+
+// Call this once at emulation start
+void audio_sync_init(uint64_t current_time_ns) {
+ emulator_start_time_ns = current_time_ns;
+ audio_start_time_ns = get_audio_playback_time_ns();
+}
+
+// Call this at the end of each frame
+void audio_throttle_emulator(uint64_t frame_number, int64_t *frame_duration_ns) {
+ uint64_t expected_emulated_time = frame_number * FRAMETIME;
+ uint64_t actual_audio_time = get_audio_playback_time_ns() - audio_start_time_ns;
+
+ int64_t drift = (int64_t)(actual_audio_time) - (int64_t)(expected_emulated_time);
+
+ // Adjust frame duration to correct drift gradually
+ *frame_duration_ns -= drift / 8;
+ // Clamp adjustment to avoid jitter
+ if(*frame_duration_ns > FRAMETIME + 50000) {
+ *frame_duration_ns = FRAMETIME + 50000;
+ } else if(*frame_duration_ns < FRAMETIME - 50000) {
+ *frame_duration_ns = FRAMETIME - 50000;
+ }
+}
+
+#ifdef _WIN32
+#include <windows.h>
+#include <mmdeviceapi.h>
+#include <audioclient.h>
+
+uint64_t get_audio_playback_time_ns(void) {
+ // WASAPI: query IAudioClock interface
+ // This is just a placeholder. You’ll need to cache IAudioClock *audio_clock externally.
+ extern IAudioClock *audio_clock;
+ UINT64 pos;
+ audio_clock->lpVtbl->GetPosition(audio_clock, &pos, 0);
+ return (pos * 1000000000ULL) / AUDIO_SAMPLE_RATE;
+}
+
+#else
+// PipeWire backend
+#include <spa/clock/clock.h>
+extern struct spa_clock *audio_clock;
+
+uint64_t get_audio_playback_time_ns(void) {
+ struct spa_clock_info info;
+ audio_clock->get_time(audio_clock, &info);
+ return info.nsec;
+}
+#endif
+
+#endif
diff --git a/base/base.c b/base/base.c
index 00b73ae..c17ab47 100644
--- a/base/base.c
+++ b/base/base.c
@@ -60,7 +60,7 @@ int main(int argc, char **argv) {
state.toggle_crt_emulation = true;
mkfw_init(320*5, 240*5);
mkfw_set_swapinterval(0);
- mkfw_set_window_min_size_and_aspect(320*5, 240*5, 320, 240);
+ mkfw_set_window_min_size_and_aspect(320*3, 240*3, 320, 240);
mkfw_set_key_callback(key_callback);
mkfw_set_mouse_move_delta_callback(mouse_move_callback);
mkfw_set_mouse_button_callback(mouse_button_callback);
@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
init_callback();
audio_initialize();
- set_decay(20);
+ set_decay(30);
bool running = true;
uint64_t next_update = mkfw_gettime() + FRAMETIME;
diff --git a/base/shader.h b/base/shader.h
index a9cb400..df838df 100644
--- a/base/shader.h
+++ b/base/shader.h
@@ -8,10 +8,10 @@
#define CRTS_WARP 1
//--------------------------------------------------------------
// Try different masks
-// #define CRTS_MASK_GRILLE 1
+#define CRTS_MASK_GRILLE 1
// #define CRTS_MASK_GRILLE_LITE 1
// #define CRTS_MASK_NONE 1
-#define CRTS_MASK_SHADOW 1
+// #define CRTS_MASK_SHADOW 1
// --------------------------------------------------------------
// Scanline thinness
// 0.50 = fused scanlines
diff --git a/build.sh b/build.sh
index 60de1d1..348700d 100755
--- a/build.sh
+++ b/build.sh
@@ -4,13 +4,14 @@
PROJECT_NAME="mknes" # Change this for each new project
# Base configuration common to all builds
-CFLAGS="-std=gnu11 "
+CFLAGS="-std=gnu++23 "
CFLAGS+="-mavx2 -mbmi2 -mtune=native -mfunction-return=keep -mindirect-branch=keep "
CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fwhole-program "
CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fno-strict-aliasing -ffunction-sections -fdata-sections "
+CFLAGS+="-fno-exceptions -fno-rtti -fno-use-cxa-atexit -fno-non-call-exceptions "
CFLAGS+="-Wall -Wextra "
CFLAGS+="-Wno-unused-parameter -Wno-sign-compare -Wno-trigraphs -Wno-maybe-uninitialized "
-CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function "
+CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function -Wno-write-strings "
LDFLAGS="-Wl,--gc-sections "
@@ -19,7 +20,7 @@ INCLUDE_PATHS="-Ibase -I.."
# Linux-specific includes and libraries
LINUX_INCLUDE="-I/usr/include/pipewire-0.3 -I/usr/include/spa-0.2"
-LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread"
+LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread -lglfw"
# Windows-specific includes and libraries
# WINDOWS_INCLUDE=""
@@ -56,8 +57,8 @@ esac
#env -C org_assets ../../bin/mks_time ./process.sh
# Make sure the <basecode> shaders are up to date if you are experimenting with them.
-#env -C ../base ../bin/shader2h 140 vertex_shader vertex_shader.glsl
-#env -C ../base ../bin/shader2h 140 fragment_shader shader.h fragment_shader.glsl
+shader2h 330 vertex_shader vertex_shader.glsl
+shader2h 330 fragment_shader shader.h fragment_shader.glsl
# Make sure the compilation stops if any error happens.
set -e
@@ -65,7 +66,7 @@ set -e
# Build Linux version
(
# ../bin/ctime -begin .${PROJECT_NAME}_linux
- gcc $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LINUX_INCLUDE $LDFLAGS $LINUX_LIBS
+ g++ $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LINUX_INCLUDE $LDFLAGS $LINUX_LIBS
# ../bin/ctime -end .${PROJECT_NAME}_linux $?
) &
diff --git a/callbacks.c b/callbacks.c
new file mode 100644
index 0000000..806fe86
--- /dev/null
+++ b/callbacks.c
@@ -0,0 +1,78 @@
+
+
+static void framebuffer_callback(GLFWwindow *window, int width, int height) {
+ state.screen_width = width;
+ state.screen_height = height;
+ state.viewport.x = 0;
+ state.viewport.y = 0;
+ state.viewport.w = width;
+ state.viewport.h = height;
+
+ float current_aspect = (float)width / (float)height;
+ float aspect_ratio = 4.f/3.f;
+
+ if(current_aspect > aspect_ratio) {
+ float new_width = height * aspect_ratio;
+ state.viewport.x = (width - new_width) / 2;
+ state.viewport.w = new_width;
+ } else if(current_aspect < aspect_ratio) {
+ float new_height = width / aspect_ratio;
+ state.viewport.y = (height - new_height) / 2;
+ state.viewport.h = new_height;
+ }
+}
+
+static void toggle_fullscreen(bool enable) {
+ static int windowed_x;
+ static int windowed_y;
+ static int windowed_width;
+ static int windowed_height;
+
+ if(enable) { // Save current windowed size and position
+ glfwGetWindowPos(window, &windowed_x, &windowed_y);
+ glfwGetWindowSize(window, &windowed_width, &windowed_height);
+
+ GLFWmonitor *monitor = glfwGetPrimaryMonitor();
+ const GLFWvidmode *mode = glfwGetVideoMode(monitor);
+
+ glfwSetWindowMonitor(window, monitor, 0, 0, mode->width, mode->height, mode->refreshRate);
+
+ } else { // Restore to saved windowed position/size
+ glfwSetWindowMonitor(window, 0, windowed_x, windowed_y, windowed_width, windowed_height, 0); // 0 = don't change refresh rate
+ }
+}
+
+static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) {
+
+ if(key == GLFW_KEY_ESCAPE) {
+ if(action == GLFW_PRESS) {
+ glfwSetWindowShouldClose(window, 1);
+ }
+ }
+
+ if(action == GLFW_RELEASE) {
+ switch(key) {
+ case GLFW_KEY_F12: {
+ state.toggle_crt_emulation = !state.toggle_crt_emulation;
+ } break;
+
+ case GLFW_KEY_F11: {
+ if(!(mods & GLFW_MOD_SHIFT)) {
+ if(state.fullscreen) {
+ toggle_fullscreen(false);
+ state.fullscreen = false;
+ } else {
+ toggle_fullscreen(true);
+ state.fullscreen = true;
+ }
+ } else {
+#ifdef PROFILER
+ state.overlay = !state.overlay;
+#endif
+ }
+ } break;
+
+ default: break;
+ }
+ }
+}
diff --git a/cpu.c b/cpu.c
index b773e57..eb7c837 100644
--- a/cpu.c
+++ b/cpu.c
@@ -95,7 +95,7 @@ static void cpu_tick(struct nes_state *state) {
// if(cpu->pc <= 0x90cc || cpu->pc >= 0x90e6) {
// printf("%5.5d %4.4x: ", line++, cpu->pc);
// opcode = memory_read(state, cpu->pc++);
- // printf("%2.2x a:%2.2x x:%2.2x y:%2.2x p:%2.2x sp:%2.2x cycle: %ld\n", opcode, cpu->a, cpu->x, cpu->y, pack_flags(cpu), cpu->sp, state->cycle);
+ // printf("%2.2x a:%2.2x x:%2.2x y:%2.2x p:%2.2x sp:%2.2x cycle: %ld\n", opcode, cpu->a, cpu->x, cpu->y, pack_flags(cpu), cpu->sp, state->cycles);
// } else {
opcode = memory_read(state, cpu->pc++);
// }
diff --git a/fragment_shader.glsl b/fragment_shader.glsl
new file mode 100644
index 0000000..c21b2de
--- /dev/null
+++ b/fragment_shader.glsl
@@ -0,0 +1,149 @@
+// Specify default precision for fragment shaders
+
+out vec4 outcolor;
+in vec2 frag_texture_coord;
+
+uniform vec2 resolution;
+uniform vec2 src_image_size;
+uniform float brightness;
+uniform vec4 tone_data;
+uniform bool crt_emulation;
+uniform sampler2D iChannel0;
+
+vec3 CrtsFetch(vec2 uv) {
+ const float bias = 0.003333333;
+ return max(texture(iChannel0, uv, -16.0).rgb, vec3(bias));
+}
+
+#define CrtsRcpF1(x) (1.0 / (x))
+#define CrtsSatF1(x) clamp((x), 0.0, 1.0)
+const float PI2 = 6.28318530717958;
+const float HALF = 0.5;
+
+float CrtsMax3F1(float a, float b, float c) {
+ return max(a, max(b, c));
+}
+
+vec3 CrtsMask(vec2 pos, float dark) {
+ #ifdef CRTS_MASK_GRILLE
+ vec3 m = vec3(dark);
+ float x = fract(pos.x * (1.0 / 3.0));
+ m.r = (x < (1.0 / 3.0)) ? 1.0 : dark;
+ m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark;
+ m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark;
+ return m;
+ #endif
+
+ #ifdef CRTS_MASK_GRILLE_LITE
+ vec3 m = vec3(1.0);
+ float x = fract(pos.x * (1.0 / 3.0));
+ m.r = (x < (1.0 / 3.0)) ? dark : 1.0;
+ m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? dark : 1.0;
+ m.b = (x >= (2.0 / 3.0)) ? dark : 1.0;
+ return m;
+ #endif
+
+ #ifdef CRTS_MASK_NONE
+ return vec3(1.0);
+ #endif
+
+ #ifdef CRTS_MASK_SHADOW
+ pos.x += pos.y * 3.0;
+ vec3 m = vec3(dark);
+ float x = fract(pos.x * (1.0 / 6.0));
+ m.r = (x < (1.0 / 3.0)) ? 1.0 : dark;
+ m.g = (x >= (1.0 / 3.0) && x < (2.0 / 3.0)) ? 1.0 : dark;
+ m.b = (x >= (2.0 / 3.0)) ? 1.0 : dark;
+ return m;
+ #endif
+}
+
+vec3 CrtsFilter(vec2 ipos, vec2 inputSizeDivOutputSize, vec2 halfInputSize, vec2 rcpInputSize, vec2 rcpOutputSize, vec2 twoDivOutputSize, float inputHeight, vec2 warp, float thin, float blur, float mask, vec4 tone) {
+ vec2 pos = ipos * twoDivOutputSize - vec2(1.0);
+ pos *= vec2(1.0 + (pos.y * pos.y) * warp.x, 1.0 + (pos.x * pos.x) * warp.y);
+ float vin = 1.0 - ((1.0 - CrtsSatF1(pos.x * pos.x)) * (1.0 - CrtsSatF1(pos.y * pos.y)));
+ vin = CrtsSatF1((-vin) * inputHeight + inputHeight);
+ pos = pos * halfInputSize + halfInputSize;
+
+ float y0 = floor(pos.y - 0.5) + 0.5;
+ float x0 = floor(pos.x - 1.5) + 0.5;
+ vec2 p = vec2(x0 * rcpInputSize.x, y0 * rcpInputSize.y);
+
+ vec3 colA[4], colB[4];
+ for (int i = 0; i < 4; i++) {
+ colA[i] = CrtsFetch(p);
+ p.x += rcpInputSize.x;
+ }
+ p.y += rcpInputSize.y;
+ for (int i = 3; i >= 0; i--) {
+ p.x -= rcpInputSize.x;
+ colB[i] = CrtsFetch(p);
+ }
+
+ float off = pos.y - y0;
+ float scanA = cos(min(HALF, off * thin) * PI2) * HALF + HALF;
+ float scanB = cos(min(HALF, (-off) * thin + thin) * PI2) * HALF + HALF;
+
+ float off0 = pos.x - x0;
+ float pix[4];
+ for (int i = 0; i < 4; i++) {
+ float diff = off0 - float(i);
+ pix[i] = exp2(blur * diff * diff);
+ }
+ float pixT = CrtsRcpF1(pix[0] + pix[1] + pix[2] + pix[3]);
+
+ #ifdef CRTS_WARP
+ pixT *= vin;
+ #endif
+
+ scanA *= pixT;
+ scanB *= pixT;
+
+ vec3 color = (colA[0] * pix[0] + colA[1] * pix[1] + colA[2] * pix[2] + colA[3] * pix[3]) * scanA + (colB[0] * pix[0] + colB[1] * pix[1] + colB[2] * pix[2] + colB[3] * pix[3]) * scanB;
+ color *= CrtsMask(ipos, mask);
+
+ #ifdef CRTS_TONE
+ float peak = max(1.0 / (256.0 * 65536.0), CrtsMax3F1(color.r, color.g, color.b));
+ vec3 ratio = color * CrtsRcpF1(peak);
+ #ifdef CRTS_CONTRAST
+ peak = pow(peak, tone.x);
+ #endif
+ peak = peak * CrtsRcpF1(peak * tone.y + tone.z);
+ #ifdef CRTS_SATURATION
+ ratio = pow(ratio, vec3(tone.w));
+ #endif
+ return ratio * peak;
+ #else
+ return color;
+ #endif
+}
+
+vec3 linearToSRGB(vec3 color) {
+ return pow(color, vec3(1.0 / 2.2));
+}
+
+void main() {
+ vec2 fragCoord = vec2(frag_texture_coord.x, 1.0 - frag_texture_coord.y);
+ if (crt_emulation) {
+ outcolor.rgb = CrtsFilter(
+ fragCoord.xy * resolution,
+ src_image_size / resolution,
+ src_image_size * vec2(0.5),
+ 1.0 / src_image_size,
+ 1.0 / resolution,
+ 2.0 / resolution,
+ src_image_size.y,
+ vec2(1.0 / 24.0, 1.0 / 16.0), // warp value
+ INPUT_THIN,
+ INPUT_BLUR,
+ INPUT_MASK,
+ tone_data
+ );
+
+ outcolor.rgb *= brightness;
+ outcolor = vec4(outcolor.rgb, 1.0); // Keep original color with alpha set to 1.0
+
+ } else {
+ outcolor = texture(iChannel0, fragCoord);
+ }
+}
diff --git a/mapper.c b/mapper.c
index 4fb8346..2e27304 100644
--- a/mapper.c
+++ b/mapper.c
@@ -1,21 +1,21 @@
-#include "mapper_nrom.c"
+#include "mapper_001.c"
// #include "mapper_mmc1.c"
// #include "mapper_uxrom.c"
-#include "mapper_gxrom.c"
+#include "mapper_066.c"
struct mapper_entry {
- int id;
- uint8_t (*read)(struct nes_state *state, uint32_t addr);
- void (*write)(struct nes_state *state, uint32_t addr, uint8_t value);
- void (*tick)(struct nes_state *state);
- void (*init)(struct nes_state *state);
+ int id;
+ uint8_t (*read)(struct nes_state *state, uint32_t addr);
+ void (*write)(struct nes_state *state, uint32_t addr, uint8_t value);
+ void (*tick)(struct nes_state *state);
+ void (*init)(struct nes_state *state);
};
static struct mapper_entry mapper_table[] = {
- { 0, mapper_nrom_read, mapper_nrom_write, mapper_nrom_tick, mapper_nrom_init },
- { 66, mapper_66_read, mapper_66_write, mapper_66_tick, mapper_66_init },
+ { 0, mapper_001_read, mapper_001_write, mapper_001_tick, mapper_001_init },
+ { 66, mapper_066_read, mapper_066_write, mapper_066_tick, mapper_066_init },
// { 1, mapper_mmc1_read, ... }, etc
};
@@ -23,7 +23,6 @@ static void mapper_setup(struct nes_state *state) {
uint32_t mapper = state->ines.mapper;
for(uint32_t i = 0; i < sizeof(mapper_table)/sizeof(mapper_table[0]); i++) {
if(mapper_table[i].id == mapper) {
-printf("%d\n", mapper);
state->mapper.read = mapper_table[i].read;
state->mapper.write = mapper_table[i].write;
state->mapper.tick = mapper_table[i].tick;
@@ -33,10 +32,11 @@ printf("%d\n", mapper);
}
}
+ // NOTE(peter): Not sure how safe this is, but it sure is funny...
printf("Unsupported mapper %d, falling back to NROM\n", mapper);
- state->mapper.read = mapper_nrom_read;
- state->mapper.write = mapper_nrom_write;
- state->mapper.tick = mapper_nrom_tick;
- state->mapper.init = mapper_nrom_init;
+ state->mapper.read = mapper_001_read;
+ state->mapper.write = mapper_001_write;
+ state->mapper.tick = mapper_001_tick;
+ state->mapper.init = mapper_001_init;
state->mapper.init(state);
}
diff --git a/mapper.h b/mapper.h
index eb6c158..f38b000 100644
--- a/mapper.h
+++ b/mapper.h
@@ -1,2 +1,2 @@
-#include "mapper_gxrom.h"
+#include "mapper_066.h"
diff --git a/mapper_001.c b/mapper_001.c
new file mode 100644
index 0000000..8c65367
--- /dev/null
+++ b/mapper_001.c
@@ -0,0 +1,23 @@
+
+
+static void mapper_001_init(struct nes_state *state) {
+ // Nothing to initialize for 001
+}
+
+static uint8_t mapper_001_read(struct nes_state *state, uint32_t addr) {
+ uint32_t prg_size = state->ines.prg_size;
+
+ uint32_t mask = (state->ines.prg_size == 16384) ? 0x3fff : 0x7fff;
+ return state->rom[addr & mask];
+}
+
+static void mapper_001_write(struct nes_state *state, uint32_t addr, uint8_t value) {
+ (void)state;
+ (void)addr;
+ (void)value;
+}
+
+static void mapper_001_tick(struct nes_state *state) {
+ (void)state;
+}
+
diff --git a/mapper_066.c b/mapper_066.c
new file mode 100644
index 0000000..aabf471
--- /dev/null
+++ b/mapper_066.c
@@ -0,0 +1,27 @@
+
+static void mapper_066_init(struct nes_state *state) {
+ state->map.m066.prg_offset = 0;
+ state->map.m066.chr_offset = 0;
+}
+
+static uint8_t mapper_066_read(struct nes_state *state, uint32_t addr) {
+ if(addr >= 0x8000) {
+ uint32_t base = state->map.m066.prg_offset;
+ return state->rom[base + (addr - 0x8000)];
+ }
+ return 0;
+}
+
+static void mapper_066_write(struct nes_state *state, uint32_t addr, uint8_t value) {
+ if(addr >= 0x8000) {
+ uint32_t prg_bank = (value >> 4) & 3;
+ uint32_t chr_bank = (value >> 0) & 3;
+
+ state->map.m066.prg_offset = prg_bank * 0x8000;
+ state->map.m066.chr_offset = chr_bank * 0x2000;
+ }
+}
+
+static void mapper_066_tick(struct nes_state *state) {
+ // No IRQ or timing logic needed
+}
diff --git a/mapper_gxrom.h b/mapper_066.h
index 757ec16..d78f8b9 100644
--- a/mapper_gxrom.h
+++ b/mapper_066.h
@@ -1,5 +1,5 @@
-struct gxrom_mapper {
+struct mapper_066 {
uint32_t prg_offset;
uint32_t chr_offset;
};
diff --git a/mapper_gxrom.c b/mapper_gxrom.c
deleted file mode 100644
index e03f9d2..0000000
--- a/mapper_gxrom.c
+++ /dev/null
@@ -1,27 +0,0 @@
-
-static void mapper_66_init(struct nes_state *state) {
- state->map.gxrom.prg_offset = 0;
- state->map.gxrom.chr_offset = 0;
-}
-
-static uint8_t mapper_66_read(struct nes_state *state, uint32_t addr) {
- if(addr >= 0x8000) {
- uint32_t base = state->map.gxrom.prg_offset;
- return state->rom[base + (addr - 0x8000)];
- }
- return 0;
-}
-
-static void mapper_66_write(struct nes_state *state, uint32_t addr, uint8_t value) {
- if(addr >= 0x8000) {
- uint32_t prg_bank = (value >> 4) & 3;
- uint32_t chr_bank = (value >> 0) & 3;
-
- state->map.gxrom.prg_offset = prg_bank * 0x8000;
- state->map.gxrom.chr_offset = chr_bank * 0x2000;
- }
-}
-
-static void mapper_66_tick(struct nes_state *state) {
- // No IRQ or timing logic needed
-}
diff --git a/mapper_nrom.c b/mapper_nrom.c
deleted file mode 100644
index 7abb89c..0000000
--- a/mapper_nrom.c
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-static void mapper_nrom_init(struct nes_state *state) {
- // Nothing to initialize for NROM
-}
-
-static uint8_t mapper_nrom_read(struct nes_state *state, uint32_t addr) {
- uint32_t prg_size = state->ines.prg_size;
-
- uint32_t mask = (state->ines.prg_size == 16384) ? 0x3fff : 0x7fff;
- return state->rom[addr & mask];
-}
-
-static void mapper_nrom_write(struct nes_state *state, uint32_t addr, uint8_t value) {
- (void)state;
- (void)addr;
- (void)value;
-}
-
-static void mapper_nrom_tick(struct nes_state *state) {
- (void)state;
-}
-
diff --git a/memory.c b/memory.c
index baf10e9..35c1d41 100644
--- a/memory.c
+++ b/memory.c
@@ -2,7 +2,7 @@
static uint8_t memory_read(struct nes_state *restrict state, uint32_t offset) {
- state->cycle++;
+ state->cycles++;
ppu_tick(state); ppu_tick(state); ppu_tick(state);
if(offset > 0xffff) {
@@ -30,17 +30,13 @@ static uint8_t memory_read(struct nes_state *restrict state, uint32_t offset) {
}
static void memory_write(struct nes_state *restrict state, uint32_t offset, uint8_t value) {
- state->cycle++;
+ state->cycles++;
ppu_tick(state); ppu_tick(state); ppu_tick(state);
if(offset > 0xffff) {
printf("%x\n", offset);
}
-// if(offset == 0x0300) {
-// printf("WRITE $0300 = %02x @ PC=%04x\n", value, state->cpu.pc);
-// }
-
if(offset < 0x2000) {
state->ram[offset & 0x07ff] = value;
} else if(offset < 0x4000) {
@@ -81,7 +77,7 @@ static uint8_t memory_read_dma(struct nes_state *restrict state, uint32_t offset
}
static uint8_t memory_read_dummy(struct nes_state *restrict state, uint32_t offset) {
- state->cycle++;
+ state->cycles++;
ppu_tick(state); ppu_tick(state); ppu_tick(state);
if(offset < 0x2000) {
diff --git a/mknes.c b/mknes.c
index d94c868..54da663 100644
--- a/mknes.c
+++ b/mknes.c
@@ -1,6 +1,85 @@
-#include <assert.h>
+#define GL_SILENCE_DEPRECATION
+#define GLFW_INCLUDE_NONE
+#include <GLFW/glfw3.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include <string.h>
+#include <immintrin.h>
+
+#define BUFFER_WIDTH 256
+#define BUFFER_HEIGHT 240
+#define WINDOW_WIDTH 320 * 3
+#define WINDOW_HEIGHT 240 * 3
+
+#ifndef restrict
+# if defined(__cplusplus)
+# define restrict __restrict
+# endif
+#endif
+
+
+struct main_state {
+ int32_t filter_override;
+ float filter_frequency;
+
+ uint32_t screen_width;
+ uint32_t screen_height;
+
+ uint32_t texture;
+ uint32_t render_width;
+ uint32_t render_height;
+ uint32_t shader_program;
+
+ float contrast;
+ float saturation;
+ float brightness;
+ float tone_data[4];
+
+ int32_t uniform_resolution;
+ int32_t uniform_src_image_size;
+ int32_t uniform_brightness;
+ int32_t uniform_tone;
+ int32_t uniform_crt_emulation;
+ int32_t uniform_sampler_location;
+
+ uint32_t vao;
+ uint32_t vbo;
+ uint32_t ebo;
+
+ struct {
+ int32_t x;
+ int32_t y;
+ int32_t w;
+ int32_t h;
+ } viewport;
+
+ uint8_t fullscreen;
+ uint8_t toggle_crt_emulation;
+};
+
+struct main_state state;
+
+uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
+uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
+
+void audio_callback(int16_t *data, size_t frames) {
-#include "base.c"
+}
+
+
+#define FRAME_INTERVAL_NS (1000000000ULL / 60.0988)
+
+static GLFWwindow *window;
+
+#define DEBUG_PRINT printf
+#include "timer.c"
+#include "opengl_loader.c"
+#include "opengl.c"
+#include "render.c"
+#include "audio.c"
+#include "callbacks.c"
#include "mapper.h"
@@ -12,65 +91,25 @@
#include "ines2.c"
#include "mapper.c"
-
-
struct nes_state nstate;
static uint32_t frames;
-static void render_callback(void) {
- clear_buffer();
- while(!nstate.ppu.frame_ready) {
- PROFILE_NAMED("nes emulator");
- cpu_tick(&nstate);
- }
- nstate.ppu.frame_ready = 0;
- frames++;
-
- uint32_t *dst = RENDER_START(0,0);
- uint8_t *src = nstate.ppu.pixels;
- for(uint32_t y = 0; y < 240; ++y) {
- for(uint32_t x = 0; x < 256; ++x) {
- uint8_t val = *src++;
- if(val >= 64) val = 0;
- dst[x] = nes_palette[val];
- }
- dst += BUFFER_WIDTH;
- }
-}
-
-static void shutdown_callback() {
- printf("%d\n", frames);
-}
-
-static void audio_callback(int16_t *buffer, size_t frames) {
-
-}
-
-#include <sys/mman.h>
-#include <unistd.h>
-
-void protect_opcode_lut(void) {
- uintptr_t addr = (uintptr_t)opcode_lut;
- size_t page_size = getpagesize();
- uintptr_t page = addr & ~(page_size - 1);
-
- if(mprotect((void*)page, page_size, PROT_READ) != 0) {
- perror("mprotect");
- abort();
- }
-}
+int main(int argc, char **argv) {
+#ifdef _WIN32
+ timeBeginPeriod(1);
+#endif
-static void init_callback(void) {
+ state.toggle_crt_emulation = 1;
setbuf(stdout, 0);
init_opcode_lut();
init_opcode_ud_lut();
// protect_opcode_lut();
ppu_reset(&nstate);
// ines2_load(&nstate, "data/nrom/10-Yard Fight (USA, Europe).nes");
- // ines2_load(&nstate, "data/nrom/Balloon Fight (USA).nes");
- ines2_load(&nstate, "data/nrom/Excitebike (Japan, USA).nes");
+ ines2_load(&nstate, "data/nrom/Balloon Fight (USA).nes");
+ // ines2_load(&nstate, "data/nrom/Excitebike (Japan, USA).nes");
// ines2_load(&nstate, "data/nrom/Ice Climber (USA, Europe, Korea).nes");
// ines2_load(&nstate, "data/nrom/Kung Fu (Japan, USA).nes");
// ines2_load(&nstate, "data/nrom/Super Mario Bros. (World) (HVC-SM).nes");
@@ -93,82 +132,85 @@ static void init_callback(void) {
uint32_t lo = nstate.mapper.read(&nstate, 0xfffc);
uint32_t hi = nstate.mapper.read(&nstate, 0xfffd);
nstate.cpu.pc = (hi << 8) | lo;
-}
-
-// int main(void) {
-// struct nes_state state = {0};
-
-// init_opcode_lut();
-// init_opcode_ud_lut();
-// state.cpu.sp = 0xfd;
-
-// // FILE *f = fopen("nestest.nes", "rb");
-// // fseek(f, 16, SEEK_SET);
-// // fread(&state.memory[0xc000], 1, 0x4000, f);
-// // fclose(f);
-// // state.cpu.pc = 0xc000;
-
-// FILE *f = fopen("6502_functional_test.bin", "rb");
-// fread(state.memory, 1, 0x10000, f);
-// fclose(f);
-// state.cpu.pc = 0x0400;
-
-// size_t i;
-// for(i = 0; i < 70000000; ++i) {
-// cpu_tick(&state);
-// if(state.cpu.pc == 0x3469) break;
-// }
-// // printf("%lld", i);
-// return 0;
-// }
-
-
-
-
-// bool running = true;
-// uint64_t next_update = mkfw_gettime();
-// int64_t frame_duration_ns = 16666667; // NTSC ~60.0988 Hz
-
-// audio_sync_init(next_update); // Initialize sync base time
-// while(running && !mkfw_should_close()) {
-// mkfw_pump_messages();
-// if(key_pressed(MKS_KEY_ESCAPE)) { running = false; }
-
-// #ifdef PROFILER
-// reset_profiling_data();
-// #endif
-
-// render_callback();
-// apply_phosphor_decay();
-// update_keyboard_state();
-// update_modifier_state();
-// update_mouse_state();
-// state.frame_number++;
-
-// #ifndef PERF_TEST
-// render_frame();
-// #ifdef PROFILER
-// debug_render();
-// #endif
-
-// audio_throttle_emulator(state.frame_number, &frame_duration_ns);
-
-// uint64_t now = mkfw_gettime();
-// int64_t remaining = next_update - now;
-
-// if(remaining > 0) {
-// if(remaining > ONE_MILLISECOND_NS) {
-// mkfw_sleep(remaining - ONE_MILLISECOND_NS);
-// }
-// while(mkfw_gettime() < next_update) { /**/ }
-// } else {
-// next_update = now;
-// }
+ struct timer_handle *timer = timer_new(FRAME_INTERVAL_NS);
+ if(!timer) {
+ fprintf(stderr, "Failed to create timer\n");
+ return 1;
+ }
-// next_update += frame_duration_ns;
+ if(glfwInit()) {
+
+ glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+ glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
+ glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE);
+
+ window = glfwCreateWindow(WINDOW_WIDTH, WINDOW_HEIGHT, "NES Emulator", 0, 0);
+ if(window) {
+ glfwSetWindowAspectRatio(window, 320, 240);
+ glfwSetWindowSizeLimits(window, 320*3, 240*3, GLFW_DONT_CARE, GLFW_DONT_CARE);
+
+ glfwMakeContextCurrent(window);
+ opengl_setup();
+ glfwSetKeyCallback(window, key_callback);
+ glfwSetFramebufferSizeCallback(window, framebuffer_callback);
+ glfwSwapInterval(0);
+
+ framebuffer_callback(window, WINDOW_WIDTH, WINDOW_HEIGHT);
+
+ for(int jid = GLFW_JOYSTICK_1; jid <= GLFW_JOYSTICK_LAST; jid++) {
+ if(glfwJoystickPresent(jid)) {
+ const char *name = glfwGetJoystickName(jid);
+ printf("Joystick %d detected: %s\n", jid, name);
+ break;
+ }
+ }
+
+ set_decay(20);
+
+ timer_start(timer);
+
+ while(!glfwWindowShouldClose(window)) {
+ timer_wait(timer);
+ glfwPollEvents();
+
+// //
+ while(!nstate.ppu.frame_ready) {
+ // PROFILE_NAMED("nes emulator");
+ cpu_tick(&nstate);
+ }
+ nstate.ppu.frame_ready = 0;
+ frames++;
+
+ uint32_t *dst = buffer;
+ uint8_t *src = nstate.ppu.pixels;
+ for(uint32_t y = 0; y < 240; ++y) {
+ for(uint32_t x = 0; x < 256; ++x) {
+ uint8_t val = *src++;
+ if(val >= 64) val = 0;
+ dst[x] = nes_palette[val];
+ }
+ dst += BUFFER_WIDTH;
+ }
+//
+ apply_phosphor_decay();
+ render_frame();
+ glfwSwapBuffers(window);
+ }
+
+ glfwDestroyWindow(window);
+ } else {
+ fprintf(stderr, "Failed to create window\n");
+ }
+ glfwTerminate();
-// mkfw_swap_buffers();
-// #endif
-// }
+ } else {
+ fprintf(stderr, "Failed to initialize GLFW\n");
+ }
+ timer_destroy(timer);
+#ifdef _WIN32
+ timeEndPeriod(1);
+#endif
+ return 0;
+}
diff --git a/mknes.h b/mknes.h
index 90bb58c..be2d946 100644
--- a/mknes.h
+++ b/mknes.h
@@ -64,7 +64,6 @@ struct ppu_state {
uint8_t vram_read_buffer;
uint8_t write_latch;
- uint8_t ciram[0x800];
uint8_t palette[0x20];
uint32_t scanline;
@@ -127,9 +126,7 @@ struct mapper {
};
union mapper_data {
- struct gxrom_mapper gxrom;
- // struct mmc1_mapper mmc1;
- // ... others
+ struct mapper_066 m066;
};
struct nes_state {
@@ -138,32 +135,23 @@ struct nes_state {
struct ppu_state ppu;
struct mapper mapper;
union mapper_data map;
- size_t cycle;
- uint8_t ram[2048];
- uint8_t rom[4 * 1024 * 1024];
- uint8_t chrrom[4 * 1024 * 1024];
+ size_t cycles;
uint8_t irq_pending;
uint8_t nmi_pending;
+ uint8_t ram[0x800];
+ uint8_t sram[0x2000];
+ uint8_t ciram[0x1000]; // NOTE(peter): Originally 0x800 bytes, but extended as it should work for up to fourway, this is optimization, reality is 2kb, but there is no side-effects, so this is fine!
+ uint8_t rom[4 * 1024 * 1024];
+ uint8_t chrrom[4 * 1024 * 1024];
};
static uint32_t nes_palette[64] = {
-// 0x757575ff, 0x271a75ff, 0x3b0072ff, 0x4c0f64ff, 0x400048ff, 0x600027ff, 0x600000ff, 0x500f00ff,
-// 0x783a00ff, 0x755c00ff, 0x406c00ff, 0x504764ff, 0x005468ff, 0x000000ff, 0x000000ff, 0x000000ff,
-// 0xbfbfbfff, 0x273aa7ff, 0x5c14a7ff, 0x7514a7ff, 0x751468ff, 0x982727ff, 0xa03a00ff, 0x986c00ff,
-// 0x888800ff, 0x689800ff, 0x3aa700ff, 0x6c6c6cff, 0x007878ff, 0x000000ff, 0x000000ff, 0x000000ff,
-// 0xffffffff, 0x3ab5ffff, 0x5cb5ffff, 0x9888ffff, 0xa778ffff, 0xc87878ff, 0xf05c00ff, 0xf08800ff,
-// 0xe0a700ff, 0xb8b800ff, 0x88c800ff, 0xcccc68ff, 0x00e0d8ff, 0x000000ff, 0x000000ff, 0x000000ff,
-// 0xffffffff, 0xa7e0ffff, 0xb8d8ffff, 0xc8c8ffff, 0xd8b8ffff, 0xd8a7a7ff, 0xf0d0b8ff, 0xf0d898ff,
-// 0xf0c878ff, 0xd8d878ff, 0xb8e078ff, 0xd0e0b8ff, 0xb8f0f0ff, 0x000000ff, 0x000000ff, 0x000000ff
-// };
-
-0x585858ff, 0x00237cff, 0x0d1099ff, 0x300092ff, 0x4f006cff, 0x600035ff, 0x5c0500ff, 0x461800ff,
-0x272d00ff, 0x093e00ff, 0x004500ff, 0x004106ff, 0x003545ff, 0x000000ff, 0x000000ff, 0x000000ff,
-0xa1a1a1ff, 0x0b53d7ff, 0x3337feff, 0x6621f7ff, 0x9515beff, 0xac166eff, 0xa62721ff, 0x864300ff,
-0x596200ff, 0x2d7a00ff, 0x0c8500ff, 0x007f2aff, 0x006d85ff, 0x000000ff, 0x000000ff, 0x000000ff,
-0xffffffff, 0x51a5feff, 0x8084feff, 0xbc6afeff, 0xf15bfeff, 0xfe5ec4ff, 0xfe7269ff, 0xe19321ff,
-0xadb600ff, 0x79d300ff, 0x51df21ff, 0x3ad974ff, 0x39c3dfff, 0x424242ff, 0x000000ff, 0x000000ff,
-0xffffffff, 0xb5d9feff, 0xcacafeff, 0xe3befeff, 0xf9b8feff, 0xfebae7ff, 0xfec3bcff, 0xf4d199ff,
-0xdee086ff, 0xc6ec87ff, 0xb2f29dff, 0xa7f0c3ff, 0xa8e7f0ff, 0xacacacff, 0x000000ff, 0x000000ff
+ 0x585858ff, 0x00237cff, 0x0d1099ff, 0x300092ff, 0x4f006cff, 0x600035ff, 0x5c0500ff, 0x461800ff,
+ 0x272d00ff, 0x093e00ff, 0x004500ff, 0x004106ff, 0x003545ff, 0x000000ff, 0x000000ff, 0x000000ff,
+ 0xa1a1a1ff, 0x0b53d7ff, 0x3337feff, 0x6621f7ff, 0x9515beff, 0xac166eff, 0xa62721ff, 0x864300ff,
+ 0x596200ff, 0x2d7a00ff, 0x0c8500ff, 0x007f2aff, 0x006d85ff, 0x000000ff, 0x000000ff, 0x000000ff,
+ 0xffffffff, 0x51a5feff, 0x8084feff, 0xbc6afeff, 0xf15bfeff, 0xfe5ec4ff, 0xfe7269ff, 0xe19321ff,
+ 0xadb600ff, 0x79d300ff, 0x51df21ff, 0x3ad974ff, 0x39c3dfff, 0x424242ff, 0x000000ff, 0x000000ff,
+ 0xffffffff, 0xb5d9feff, 0xcacafeff, 0xe3befeff, 0xf9b8feff, 0xfebae7ff, 0xfec3bcff, 0xf4d199ff,
+ 0xdee086ff, 0xc6ec87ff, 0xb2f29dff, 0xa7f0c3ff, 0xa8e7f0ff, 0xacacacff, 0x000000ff, 0x000000ff
};
-
diff --git a/opengl.c b/opengl.c
new file mode 100644
index 0000000..847fc12
--- /dev/null
+++ b/opengl.c
@@ -0,0 +1,129 @@
+
+#include "shader.c"
+#include "shader.h"
+#include "fragment_shader.h"
+#include "vertex_shader.h"
+
+/* [=]===^=[ compile_shader ]==============================================================^===[=] */
+static GLuint compile_shader(GLenum shader_type, const char *shader_source) {
+ GLuint shader = glCreateShader(shader_type);
+ glShaderSource(shader, 1, &shader_source, 0);
+ glCompileShader(shader);
+
+ GLint success;
+ GLchar info_log[512];
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
+ if(!success) {
+ glGetShaderInfoLog(shader, sizeof(info_log), 0, info_log);
+ printf("%s shader compilation failed:\n%s\n", (shader_type == GL_VERTEX_SHADER) ? "Vertex" : "Fragment", info_log);
+ }
+ return shader;
+}
+
+/* [=]===^=[ opengl_setup ]================================================================^===[=] */
+static void opengl_setup(void) {
+ gl_loader();
+
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ glDisable(GL_DEPTH_TEST);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+ glDisable(GL_CULL_FACE);
+
+ // Compile shaders
+ GLuint vertex_shader = compile_shader(GL_VERTEX_SHADER, vertex_shader_start);
+ GLuint fragment_shader = compile_shader(GL_FRAGMENT_SHADER, fragment_shader_start);
+
+printf("%d %d\n", vertex_shader, fragment_shader);
+
+ state.shader_program = glCreateProgram();
+ glAttachShader(state.shader_program, vertex_shader);
+ glAttachShader(state.shader_program, fragment_shader);
+ glBindAttribLocation(state.shader_program, 0, "position");
+ glBindAttribLocation(state.shader_program, 1, "texture_coord");
+ glLinkProgram(state.shader_program);
+
+ GLint success;
+ glGetProgramiv(state.shader_program, GL_LINK_STATUS, &success);
+ if(!success) {
+ GLchar log[512];
+ glGetProgramInfoLog(state.shader_program, sizeof(log), 0, log);
+ printf("Shader Linking Failed:\n%s\n", log);
+ }
+
+ glDeleteShader(vertex_shader);
+ glDeleteShader(fragment_shader);
+ glUseProgram(state.shader_program);
+
+ state.contrast = 1.0f;
+ state.saturation = 0.0f;
+ state.brightness = 1.0f;
+ CrtsTone(state.tone_data, state.contrast, state.saturation, INPUT_THIN, INPUT_MASK);
+
+ state.uniform_resolution = glGetUniformLocation(state.shader_program, "resolution");
+ state.uniform_src_image_size = glGetUniformLocation(state.shader_program, "src_image_size");
+ state.uniform_brightness = glGetUniformLocation(state.shader_program, "brightness");
+ state.uniform_tone = glGetUniformLocation(state.shader_program, "tone_data");
+ state.uniform_crt_emulation = glGetUniformLocation(state.shader_program, "crt_emulation");
+ state.uniform_sampler_location = glGetUniformLocation(state.shader_program, "iChannel0");
+
+ glGenVertexArrays(1, &state.vao);
+ glBindVertexArray(state.vao);
+
+ glGenBuffers(1, &state.vbo);
+ glGenBuffers(1, &state.ebo);
+
+ glBindBuffer(GL_ARRAY_BUFFER, state.vbo);
+ const float vertices[] = {
+ -1.0f, -1.0f, 0.0f, 0.0f,
+ 1.0f, -1.0f, 1.0f, 0.0f,
+ 1.0f, 1.0f, 1.0f, 1.0f,
+ -1.0f, 1.0f, 0.0f, 1.0f
+ };
+ glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo);
+ static const unsigned int indices[] = { 0, 1, 2, 2, 3, 0 };
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
+
+ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0);
+ glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float)));
+ glEnableVertexAttribArray(0);
+ glEnableVertexAttribArray(1);
+
+ // Setup texture
+ glDeleteTextures(1, &state.texture);
+ glGenTextures(1, &state.texture);
+ glBindTexture(GL_TEXTURE_2D, state.texture);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, BUFFER_WIDTH, BUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, buffer);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
+}
+
+/* [=]===^=[ render_frame ]=================================================================^===[=] */
+__attribute__((always_inline))
+static inline void render_frame(void) {
+ glClearColor(.0f, 0.f, 0.f, 1.f);
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ glUseProgram(state.shader_program);
+ glBindVertexArray(state.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, state.vbo);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.ebo);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, state.texture);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, BUFFER_WIDTH, BUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, display_buffer);
+ glUniform2f(state.uniform_src_image_size, (float)BUFFER_WIDTH, (float)BUFFER_HEIGHT);
+ glUniform2f(state.uniform_resolution, (float)state.viewport.w, (float)state.viewport.h);
+ glUniform1f(state.uniform_brightness, state.brightness);
+ glUniform4f(state.uniform_tone, state.tone_data[0], state.tone_data[1], state.tone_data[2], state.tone_data[3]);
+ glUniform1i(state.uniform_crt_emulation, state.toggle_crt_emulation);
+ glUniform1i(state.uniform_sampler_location, 0);
+ glViewport(state.viewport.x, state.viewport.y, state.viewport.w, state.viewport.h);
+ glEnableVertexAttribArray(0);
+ glEnableVertexAttribArray(1);
+ glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
+}
diff --git a/opengl_loader.c b/opengl_loader.c
new file mode 100644
index 0000000..9914e00
--- /dev/null
+++ b/opengl_loader.c
@@ -0,0 +1,210 @@
+#ifdef _WIN32
+typedef __int64 GLintptr;
+#else
+typedef intptr_t GLintptr;
+#endif
+typedef void GLvoid;
+typedef unsigned char GLboolean;
+typedef unsigned char GLubyte;
+typedef char GLchar;
+
+typedef int GLint;
+typedef int GLsizei;
+
+typedef unsigned int GLenum;
+typedef unsigned int GLuint;
+typedef unsigned int GLbitfield;
+
+typedef float GLfloat;
+typedef double GLdouble;
+
+typedef unsigned long long GLsizeiptr;
+
+#define GL_NO_ERROR 0
+#define GL_INFO_LOG_LENGTH 0x8b84
+#define GL_ZERO 0x0000
+#define GL_ONE 0x0001
+#define GL_ALPHA 0x1906
+#define GL_BLEND 0x0be2
+#define GL_CLAMP_TO_EDGE 0x812f
+#define GL_COLOR_BUFFER_BIT 0x4000
+#define GL_COMPILE_STATUS 0x8b81
+#define GL_DEPTH_TEST 0x0b71
+#define GL_FRAMEBUFFER_SRGB 0x8db9
+#define GL_FRAGMENT_SHADER 0x8b30
+#define GL_LINK_STATUS 0x8b82
+#define GL_MODELVIEW 0x1700
+#define GL_NEAREST 0x2600
+#define GL_ONE_MINUS_SRC_ALPHA 0x0303
+#define GL_PROJECTION 0x1701
+#define GL_QUADS 0x0007
+#define GL_RGBA 0x1908
+#define GL_RGBA8 0x8058
+#define GL_SCISSOR_TEST 0x0c11
+#define GL_SRGB8_ALPHA8 0x8c43
+#define GL_SRC_ALPHA 0x0302
+#define GL_TEXTURE0 0x84c0
+#define GL_TEXTURE_2D 0x0de1
+#define GL_TEXTURE_COORD_ARRAY 0x8078
+#define GL_TEXTURE_MAG_FILTER 0x2800
+#define GL_TEXTURE_MIN_FILTER 0x2801
+#define GL_TEXTURE_WRAP_S 0x2802
+#define GL_TEXTURE_WRAP_T 0x2803
+#define GL_UNSIGNED_BYTE 0x1401
+#define GL_UNSIGNED_INT_8_8_8_8 0x8035
+#define GL_VERTEX_SHADER 0x8b31
+#define GL_ARRAY_BUFFER 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER 0x8893
+#define GL_STATIC_DRAW 0x88e4
+#define GL_FLOAT 0x1406
+#define GL_FALSE 0
+#define GL_TRUE 1
+#define GL_LINEAR 0x2601
+#define GL_FUNC_ADD 0x8006
+#define GL_CULL_FACE 0x0b44
+#define GL_STREAM_DRAW 0x88e0
+#define GL_WRITE_ONLY 0x88b9
+#define GL_TRIANGLES 0x0004
+#define GL_UNSIGNED_INT 0x1405
+#define GL_UNSIGNED_SHORT 0x1403
+#define GL_MULTISAMPLE 0x809d
+#define GL_DYNAMIC_DRAW 0x88e8
+#define GL_RED 0x1903
+#define GL_FRAMEBUFFER_WIDTH 0x9310
+#define GL_FRAMEBUFFER_HEIGHT 0x9311
+#define GL_VIEWPORT 0x0ba2
+#define GL_PIXEL_UNPACK_BUFFER 0x88ec
+#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88ef
+#define GL_VERTEX_ARRAY_BINDING 0x85b5
+#define GL_ARRAY_BUFFER_BINDING 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895
+#define GL_CURRENT_PROGRAM 0x8b8d
+#define GL_TEXTURE_BINDING_2D 0x8069
+#define GL_LINEAR_MIPMAP_LINEAR 0x2703
+#define GL_SRGB_ALPHA 0x8c43
+#define GL_UNPACK_ROW_LENGTH 0x0cf2
+#define GL_UNPACK_SKIP_PIXELS 0x0cf4
+#define GL_UNPACK_SKIP_ROWS 0x0cf3
+#define GL_TRIANGLE_STRIP 0x0005
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8646
+
+#define DECLARE_GL_FUNCTION(Name, ReturnType, ...) typedef ReturnType (*type_##Name)(__VA_ARGS__);
+#define DECLARE_GLOBAL_FUNCTION(Name, ...) type_##Name Name;
+
+#define GL_FUNCTIONS(X) \
+ X(glActiveTexture, void, GLenum texture) \
+ X(glAttachShader, void, GLuint program, GLuint shader) \
+ X(glBindBuffer, void, GLenum target, GLuint buffer) \
+ X(glBindTexture, void, GLenum target, GLuint texture) \
+ X(glBufferData, void, GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage) \
+ X(glClear, void, GLbitfield mask) \
+ X(glClearColor, void, GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) \
+ X(glCompileShader, void, GLuint shader) \
+ X(glCreateProgram, GLuint) \
+ X(glCreateShader, GLuint, GLenum type) \
+ X(glDeleteShader, void, GLuint shader) \
+ X(glDrawElements, void, GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) \
+ X(glEnableVertexAttribArray, void, GLuint index) \
+ X(glGenBuffers, void, GLsizei n, GLuint *buffers) \
+ X(glGenTextures, void, GLsizei n, GLuint *textures) \
+ X(glGetShaderInfoLog, void, GLuint shader, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \
+ X(glGetShaderiv, void, GLuint shader, GLenum pname, GLint *params) \
+ X(glGetUniformLocation, GLint, GLuint program, const GLchar *name) \
+ X(glLinkProgram, void, GLuint program) \
+ X(glShaderSource, void, GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length) \
+ X(glTexImage2D, void, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels) \
+ X(glTexParameteri, void, GLenum target, GLenum pname, GLint param) \
+ X(glTexSubImage2D, void, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels) \
+ X(glUniform1f, void, GLint location, GLfloat v0) \
+ X(glUniform1i, void, GLint location, GLint v0) \
+ X(glUniform2f, void, GLint location, GLfloat v0, GLfloat v1) \
+ X(glUniform4f, void, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3) \
+ X(glUseProgram, void, GLuint program) \
+ X(glVertexAttribPointer, void, GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer) \
+ X(glViewport, void, GLint x, GLint y, GLsizei width, GLsizei height) \
+ X(glDeleteProgram, void, GLuint program) \
+ X(glDeleteBuffers, void, GLsizei n, const GLuint *buffers) \
+ X(glDeleteTextures, void, GLsizei n, const GLuint *textures) \
+ X(glEnable, void, GLenum cap) \
+ X(glGenerateMipmap, void, GLenum target) \
+ X(glGetProgramiv, void, GLuint program, GLenum pname, GLint *params) \
+ X(glGetAttribLocation, GLint, GLuint program, const GLchar *name) \
+ X(glDetachShader, void, GLuint program, GLuint shader) \
+ X(glUniformMatrix4fv, void, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value) \
+ X(glMapBuffer, void*, GLenum target, GLenum access) \
+ X(glUnmapBuffer, GLboolean, GLenum target) \
+ X(glBlendEquation, void, GLenum mode) \
+ X(glBlendFunc, void, GLenum sfactor, GLenum dfactor) \
+ X(glDisable, void, GLenum cap) \
+ X(glScissor, void, GLint x, GLint y, GLsizei width, GLsizei height) \
+ X(glTexCoord2f, void, GLfloat s, GLfloat t) \
+ X(glVertex2f, void, GLfloat x, GLfloat y) \
+ X(glGetError, GLenum) \
+ X(glGetProgramInfoLog, void, GLuint program, GLsizei maxLength, GLsizei *length, GLchar *infoLog) \
+ X(glGenVertexArrays, void, GLsizei n, GLuint *arrays) \
+ X(glBindVertexArray, void, GLuint array) \
+ X(glDeleteVertexArrays, void, GLsizei n, const GLuint *arrays) \
+ X(glDrawArrays, void, GLenum mode, GLint first, GLsizei count) \
+ X(glBufferSubData, void, GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data) \
+ X(glDisableVertexAttribArray, void, GLuint index) \
+ X(glGetIntegerv, void, GLenum pname, GLint *data) \
+ X(glBindAttribLocation, void, GLuint program, GLuint index, const GLchar *name) \
+ X(glGetUniformfv, void, GLuint program, GLint location, GLfloat *params) \
+ X(glPixelStorei, void, GLenum pname, GLint param) \
+ X(glGetVertexAttribiv, void, GLuint index, GLenum pname, GLint *params) \
+ X(glFinish, void)
+
+GL_FUNCTIONS(DECLARE_GL_FUNCTION)
+GL_FUNCTIONS(DECLARE_GLOBAL_FUNCTION)
+
+
+#if defined(_WIN32)
+static void *get_any_gl_address(const char *name) {
+ void *p = (void *)wglGetProcAddress(name);
+ if(!p) {
+ HMODULE module = LoadLibraryA("opengl32.dll");
+ if(module) {
+ p = (void *)GetProcAddress(module, name);
+ }
+ }
+ return p;
+}
+
+#define GetOpenGLFunction(Name, ...) \
+ *(void **)&Name = (void *)get_any_gl_address(#Name); \
+ if(!Name) { \
+ DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \
+ exit(EXIT_FAILURE); \
+ }
+
+#elif defined(__linux__)
+#include <dlfcn.h>
+static void *glXGetProcAddress(const GLubyte *procName) {
+ static void *(*glxGetProcAddress)(const GLubyte *);
+ if(!glxGetProcAddress) {
+ void *libGL = dlopen("libGL.so.1", RTLD_LAZY | RTLD_GLOBAL);
+ if(!libGL) {
+ DEBUG_PRINT("Error: Unable to load libGL.so.1\n");
+ exit(EXIT_FAILURE);
+ }
+ glxGetProcAddress = (void *(*)(const GLubyte *))dlsym(libGL, "glXGetProcAddress");
+ if(!glxGetProcAddress) {
+ DEBUG_PRINT("Error: Unable to find glXGetProcAddress\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+ return glxGetProcAddress(procName);
+}
+
+#define GetOpenGLFunction(Name, ...) \
+ *(void **)&Name = (void *)glXGetProcAddress((const GLubyte *)#Name); \
+ if(!Name) { \
+ DEBUG_PRINT("Failed to load OpenGL function: %s\n", #Name); \
+ exit(EXIT_FAILURE); \
+ }
+#endif
+
+__attribute__((cold, noinline, section(".init_section")))
+static void gl_loader() {
+ GL_FUNCTIONS(GetOpenGLFunction);
+}; \ No newline at end of file
diff --git a/ppu.c b/ppu.c
index 4246e20..2c993c9 100644
--- a/ppu.c
+++ b/ppu.c
@@ -18,7 +18,6 @@ static void ppu_sprite_shift(struct nes_state *state) {
}
}
-
static void ppu_reset(struct nes_state *state) {
struct ppu_state *ppu = &state->ppu;
memset(ppu, 0, sizeof(struct ppu_state));
@@ -44,11 +43,11 @@ static uint32_t ppu_resolve_ciram(struct nes_state *state, uint32_t addr) {
static uint8_t ppu_ciram_read(struct nes_state *state, uint32_t addr) {
- return state->ppu.ciram[ppu_resolve_ciram(state, addr)];
+ return state->ciram[ppu_resolve_ciram(state, addr)];
}
static void ppu_ciram_write(struct nes_state *state, uint32_t addr, uint8_t value) {
- state->ppu.ciram[ppu_resolve_ciram(state, addr)] = value;
+ state->ciram[ppu_resolve_ciram(state, addr)] = value;
}
static void ppu_write_2000(struct nes_state *state, uint8_t value) {
@@ -150,7 +149,41 @@ static uint8_t ppu_read_2007(struct nes_state *state) {
return result;
}
+#if 1
+static void ppu_evaluate_sprites(struct nes_state *state) {
+ struct ppu_state *ppu = &state->ppu;
+ uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
+ uint8_t n = 0;
+
+ uint8_t *src = ppu->oam;
+ uint8_t *dst = ppu->secondary_oam;
+ for(uint8_t i = 0; i < 64; i++) {
+ uint8_t y = src[0];
+ int32_t row = (int32_t)ppu->scanline - y;
+
+ if(row >= 0 && row < sprite_height) {
+ if(n < 8) {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ ppu->sprite_indexes[n] = i;
+ ppu->sprite_zero_hit_possible |= (i == 0) ? 1 : 0;
+
+ dst += 4;
+ n++;
+ } else {
+ ppu->reg_status |= 0x20;
+ break;
+ }
+ }
+ src += 4;
+ }
+
+ ppu->sprite_count = n;
+}
+#else
static void ppu_evaluate_sprites(struct nes_state *state) {
struct ppu_state *ppu = &state->ppu;
uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
@@ -182,6 +215,7 @@ static void ppu_evaluate_sprites(struct nes_state *state) {
ppu->sprite_count = n;
}
+#endif
static void ppu_fetch_sprite_patterns(struct nes_state *state) {
struct ppu_state *ppu = &state->ppu;
@@ -238,9 +272,9 @@ static void ppu_render_pixel(struct nes_state *state) {
uint32_t x = ppu->dot - 1;
uint32_t y = ppu->scanline;
- if(x >= 256 || y >= 240) {
- return;
- }
+ // if(x >= 256 || y >= 240) {
+ // return;
+ // }
uint32_t bit = 0x8000 >> ppu->fine_x;
@@ -287,11 +321,10 @@ static void ppu_render_pixel(struct nes_state *state) {
final_color = ppu->palette[(bg_palette << 2) | bg_pixel];
}
}
- assert(y*256+x <= 256*240);
ppu->pixels[y * 256 + x] = final_color;
}
-__attribute__((flatten))
+__attribute__((hot, flatten))
static void ppu_tick(struct nes_state *state) {
struct ppu_state *ppu = &state->ppu;
@@ -390,12 +423,6 @@ static void ppu_tick(struct nes_state *state) {
ppu->vram_addr = (ppu->vram_addr & ~0x7be0) | (ppu->temp_addr & 0x7be0);
}
-// if(scanline == 261 && dot >= 280 && dot <= 304) {
-// printf("Scroll Copy (V): SL:%d DOT:%d vram_addr=%04x temp_addr=%04x\n", scanline, dot, ppu->vram_addr, ppu->temp_addr);
-// ppu->vram_addr = (ppu->vram_addr & 0x041f) | (ppu->temp_addr & 0x7be0);
-// }
-
-
if(dot == 257 && scanline < 240) {
ppu_evaluate_sprites(state);
}
@@ -437,26 +464,21 @@ static void ppu_dma_4014(struct nes_state *state, uint8_t page) {
uint32_t base = page << 8;
// Add 1 or 2 idle cycles depending on current CPU cycle
- uint8_t idle_cycles = (state->cycle & 1) ? 1 : 2;
+ uint8_t idle_cycles = (state->cycles & 1) ? 1 : 2;
for(uint8_t i = 0; i < idle_cycles; i++) {
- state->cycle++;
+ state->cycles++;
ppu_tick(state); ppu_tick(state); ppu_tick(state);
}
for(uint32_t i = 0; i < 256; i++) {
uint32_t addr = base + i;
- // First CPU cycle (read, ticks only)
- state->cycle++;
+ state->cycles++;
ppu_tick(state); ppu_tick(state); ppu_tick(state);
-
- // Perform read
uint8_t value = memory_read_dma(state, addr);
- // Second CPU cycle (write)
- ppu_write_2004(state, value);
-
- state->cycle++;
+ state->cycles++;
ppu_tick(state); ppu_tick(state); ppu_tick(state);
+ ppu_write_2004(state, value);
}
} \ No newline at end of file
diff --git a/render.c b/render.c
new file mode 100644
index 0000000..1dc90aa
--- /dev/null
+++ b/render.c
@@ -0,0 +1,47 @@
+
+
+/* [=]===^=[ clear_buffer ]=================================================================^===[=] */
+__attribute__((always_inline, hot))
+static inline void clear_buffer(void) {
+ // PROFILE_FUNCTION();
+ memset(buffer, 0, sizeof(buffer));
+}
+
+/* [=]===^=[ set_decay ]=================================================================^===[=] */
+static uint16_t _old_weight;
+static uint16_t _new_weight;
+static void set_decay(uint16_t old_weight) {
+ _old_weight = old_weight ? (old_weight > 256 ? 256 : old_weight) : 0;
+ _new_weight = 256 - old_weight;
+}
+
+/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
+__attribute__((always_inline, hot))
+static inline void apply_phosphor_decay(void) {
+ // PROFILE_FUNCTION();
+ __m256i old_weight = _mm256_set1_epi16(_old_weight);
+ __m256i new_weight = _mm256_set1_epi16(_new_weight);
+ __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
+ uint32_t * restrict src = buffer;
+ uint32_t * restrict dst = display_buffer;
+
+ for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
+ for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
+ _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+ _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+
+ __m128i new_pixels = _mm_loadu_si128((__m128i*)&src[x]);
+ __m128i old_pixels = _mm_loadu_si128((__m128i*)&dst[x]);
+
+ __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
+ __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
+
+ __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
+ blended = _mm256_srli_epi16(blended, 8);
+
+ __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
+ final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
+ _mm_storeu_si128((__m128i*)&dst[x], final_pixels);
+ }
+ }
+}
diff --git a/shader.c b/shader.c
new file mode 100644
index 0000000..a6e16f6
--- /dev/null
+++ b/shader.c
@@ -0,0 +1,44 @@
+#include "shader.h"
+
+//==============================================================
+//
+// CPU CODE
+//
+//==============================================================
+// TONAL CONTROL CONSTANT GENERATION
+//--------------------------------------------------------------
+// Make sure to use same CRTS_MASK_* defines on CPU and GPU!!!!!
+//==============================================================
+/*
+ * dst - Output 4 float array.
+ *
+ * contrast - Increase contrast, ranges from,
+ * 1.0 = no change
+ * 2.0 = very strong contrast (over 2.0 for even more)
+ *
+ * saturation - Increase saturation, ranges from,
+ * 0.0 = no change
+ * 1.0 = increased saturation (over 1.0 for even more)
+ *
+ * thin, mask - Inputs shared between CrtsTone() and CrtsFilter()
+ *
+ */
+static void CrtsTone(float * restrict dst, float contrast, float saturation, float thin, float mask) {
+//--------------------------------------------------------------
+#ifdef CRTS_MASK_NONE
+ mask = 1.0f;
+#endif
+//--------------------------------------------------------------
+#ifdef CRTS_MASK_GRILLE_LITE
+ // Normal R mask is {1.0,mask,mask}
+ // LITE R mask is {mask,1.0,1.0}
+ mask = 0.5f + mask * 0.5f;
+#endif
+//--------------------------------------------------------------
+ float midOut = 0.18f / ((1.5f - thin) * (0.5f * mask + 0.5f));
+ float pMidIn = powf(0.18f, contrast);
+ dst[0] = contrast;
+ dst[1] = ((-pMidIn) + midOut) / ((1.0f - pMidIn) * midOut);
+ dst[2] = ((-pMidIn) * midOut + pMidIn) / (midOut * (-pMidIn) + midOut);
+ dst[3] = contrast + saturation;
+}
diff --git a/shader.h b/shader.h
new file mode 100644
index 0000000..df838df
--- /dev/null
+++ b/shader.h
@@ -0,0 +1,34 @@
+//==============================================================
+// SETUP FOR CRTS
+//==============================================================
+#define CRTS_TONE 1
+#define CRTS_CONTRAST 1
+#define CRTS_SATURATION 1
+//--------------------------------------------------------------
+#define CRTS_WARP 1
+//--------------------------------------------------------------
+// Try different masks
+#define CRTS_MASK_GRILLE 1
+// #define CRTS_MASK_GRILLE_LITE 1
+// #define CRTS_MASK_NONE 1
+// #define CRTS_MASK_SHADOW 1
+// --------------------------------------------------------------
+// Scanline thinness
+// 0.50 = fused scanlines
+// 0.70 = recommended default
+// 1.00 = thinner scanlines (too thin)
+#define INPUT_THIN 0.7
+//--------------------------------------------------------------
+// Horizonal scan blur
+// -3.0 = pixely
+// -2.5 = default
+// -2.0 = smooth
+// -1.0 = too blurry
+#define INPUT_BLUR -2.5
+//--------------------------------------------------------------
+// Shadow mask effect, ranges from,
+// 0.25 = large amount of mask (not recommended, too dark)
+// 0.50 = recommended default
+// 1.00 = no shadow mask
+#define INPUT_MASK 0.5
+
diff --git a/timer.c b/timer.c
new file mode 100644
index 0000000..96eb453
--- /dev/null
+++ b/timer.c
@@ -0,0 +1,149 @@
+
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+struct timer_handle {
+ HANDLE htimer;
+ uint64_t interval_ns;
+ uint64_t qpc_frequency;
+ uint64_t next_deadline;
+ bool started;
+};
+
+static inline uint64_t qpc_now_ns(uint64_t freq) {
+ LARGE_INTEGER qpc;
+ QueryPerformanceCounter(&qpc);
+ return (uint64_t)((qpc.QuadPart * 1000000000ULL) / freq);
+}
+
+struct timer_handle *timer_new(uint64_t interval_ns) {
+ struct timer_handle *t = (struct timer_handle *)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct timer_handle));
+ if(!t) return 0;
+
+ t->htimer = CreateWaitableTimerW(0, TRUE, 0);
+ if(!t->htimer) {
+ HeapFree(GetProcessHeap(), 0, t);
+ return 0;
+ }
+
+ LARGE_INTEGER freq;
+ QueryPerformanceFrequency(&freq);
+ t->qpc_frequency = freq.QuadPart;
+ t->interval_ns = interval_ns;
+ t->started = false;
+
+ return t;
+}
+
+bool timer_start(struct timer_handle *t) {
+ t->next_deadline = qpc_now_ns(t->qpc_frequency) + t->interval_ns;
+ t->started = true;
+ return true;
+}
+
+bool timer_wait(struct timer_handle *t) {
+ if(!t->started) return false;
+
+ uint64_t now = qpc_now_ns(t->qpc_frequency);
+ if(t->next_deadline <= now) {
+ t->next_deadline += t->interval_ns;
+ return true;
+ }
+
+ uint64_t sleep_ns = t->next_deadline - now;
+ if(sleep_ns > 500000) { // > 0.5ms
+ LARGE_INTEGER due;
+ due.QuadPart = -(int64_t)((sleep_ns - 500000) / 100); // 100ns units, negative = relative
+ SetWaitableTimer(t->htimer, &due, 0, 0, 0, 0);
+ WaitForSingleObject(t->htimer, INFINITE);
+ }
+
+ while(qpc_now_ns(t->qpc_frequency) < t->next_deadline) {
+ YieldProcessor(); // pause instruction
+ }
+
+ t->next_deadline += t->interval_ns;
+ return true;
+}
+
+void timer_destroy(struct timer_handle *t) {
+ if(t) {
+ if(t->htimer) CloseHandle(t->htimer);
+ HeapFree(GetProcessHeap(), 0, t);
+ }
+}
+
+
+#else // Linux
+#include <sys/timerfd.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+struct timer_handle {
+ int tfd;
+ int epfd;
+ struct itimerspec spec;
+ int started;
+};
+
+static struct timer_handle *timer_new(uint64_t interval_ns) {
+ struct timer_handle *t = (struct timer_handle *)malloc(sizeof(struct timer_handle));
+ if(!t) return 0;
+
+ t->tfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
+ if(t->tfd < 0) { free(t); return 0; }
+
+ t->epfd = epoll_create1(EPOLL_CLOEXEC);
+ if(t->epfd < 0) {
+ close(t->tfd); free(t); return 0;
+ }
+
+ struct epoll_event ev = { .events = EPOLLIN, .data = { .fd = t->tfd } };
+ epoll_ctl(t->epfd, EPOLL_CTL_ADD, t->tfd, &ev);
+
+ t->spec.it_interval.tv_sec = interval_ns / 1000000000ULL;
+ t->spec.it_interval.tv_nsec = interval_ns % 1000000000ULL;
+ t->spec.it_value.tv_sec = 0;
+ t->spec.it_value.tv_nsec = 0;
+ t->started = 0;
+
+ return t;
+}
+
+static bool timer_start(struct timer_handle *t) {
+ if(t->started) return true;
+
+ t->spec.it_value = t->spec.it_interval;
+ if(timerfd_settime(t->tfd, 0, &t->spec, 0)) return false;
+
+ t->started = 1;
+ return true;
+}
+
+static bool timer_wait(struct timer_handle *t) {
+ if(!t->started) return false;
+
+ struct epoll_event ev;
+ int r = epoll_wait(t->epfd, &ev, 1, -1);
+ if(r < 0) return false;
+ uint64_t expirations;
+ read(t->tfd, &expirations, sizeof(expirations));
+ return true;
+}
+
+static void timer_destroy(struct timer_handle *t) {
+ if(t) {
+ if(t->tfd >= 0) close(t->tfd);
+ if(t->epfd >= 0) close(t->epfd);
+ free(t);
+ }
+}
+
+#endif
diff --git a/vertex_shader.glsl b/vertex_shader.glsl
new file mode 100644
index 0000000..d8f1848
--- /dev/null
+++ b/vertex_shader.glsl
@@ -0,0 +1,9 @@
+in vec2 position;
+in vec2 texture_coord;
+
+out vec2 frag_texture_coord;
+
+void main() {
+ frag_texture_coord = texture_coord;
+ gl_Position = vec4(position, 0.0, 1.0);
+}