summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Fors <peter.fors@mindkiller.com>2025-10-16 04:19:32 +0200
committerPeter Fors <peter.fors@mindkiller.com>2025-10-16 04:19:32 +0200
commita4c261c6ee3940099e653a6f448dc952dfd5899f (patch)
tree7b14cfde56d735259f6e852a6d337228e00db0f5
parentdcaf169691cfbb865241e96a4786af0862424701 (diff)
optimized, but bug with rasterdemos
-rwxr-xr-xBench.sh9
-rwxr-xr-xbuild.sh13
-rw-r--r--linux_timer.c1
-rw-r--r--mknes.c55
-rw-r--r--mknes.h82
-rw-r--r--mknes_cpu.c16
-rw-r--r--mknes_cpu_opcodes.c1
-rw-r--r--mknes_memory.c96
-rw-r--r--mknes_ppu.c149
-rw-r--r--win32_timer.c35
10 files changed, 174 insertions, 283 deletions
diff --git a/Bench.sh b/Bench.sh
index 14208cd..4a26ab5 100755
--- a/Bench.sh
+++ b/Bench.sh
@@ -3,6 +3,9 @@
./build.sh profile
./mknes
./build.sh profile_release
-taskset -c 1 chrt -f 99 -- perf stat -C 1 ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -C 1 ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -C 1 ./mknes
+taskset -c 1 chrt -f 99 -- perf stat -- ./mknes
+taskset -c 1 chrt -f 99 -- perf stat -- ./mknes
+taskset -c 1 chrt -f 99 -- perf stat -- ./mknes
+taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes
+taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes
+taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes
diff --git a/build.sh b/build.sh
index b854c3e..b8e4fa9 100755
--- a/build.sh
+++ b/build.sh
@@ -52,17 +52,18 @@ case "$BUILD_TYPE" in
# -pg # for gprof
;;
"release")
- CFLAGS+="-s -Wl,--strip-all -O2 "
+ # CFLAGS+="-s -Wl,--strip-all -O2 "
+ CFLAGS+="-g -O2 "
;;
"profile")
CFLAGS+="-O2 -fprofile-generate -ftest-coverage "
;;
"profile_release")
# CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use "
- CFLAGS+="-O2 -fprofile-use "
+ CFLAGS+="-g -O2 -fprofile-use "
;;
"debug")
- CFLAGS+="-g -O0 "
+ CFLAGS+="-g -O0 -DTIMER_DEBUG "
LDFLAGS+="-fno-pie -no-pie "
;;
"coverage")
@@ -90,8 +91,8 @@ set -e
) &
# Build Windows version
-# (
-# $WIN_CC $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME}.exe $INCLUDE_PATHS $WINDOWS_INCLUDE $LDFLAGS $WINDOWS_LIBS
-# ) &
+#(
+# $WIN_CC $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME}.exe $INCLUDE_PATHS $WINDOWS_INCLUDE $LDFLAGS $WINDOWS_LIBS
+#) &
wait
diff --git a/linux_timer.c b/linux_timer.c
index dce0a5b..9a9d296 100644
--- a/linux_timer.c
+++ b/linux_timer.c
@@ -81,7 +81,6 @@ static void *timer_thread_func(void *arg) {
#ifdef TIMER_DEBUG
if(t->last_wait_start.tv_sec) {
- int64_t total_frame_time_ns = timespec_diff_ns(&now, &t->last_wait_start);
int64_t overshoot_ns = timespec_diff_ns(&now, &t->next_deadline);
if(overshoot_ns < 0) overshoot_ns = 0;
diff --git a/mknes.c b/mknes.c
index 37225ff..a6d224d 100644
--- a/mknes.c
+++ b/mknes.c
@@ -44,7 +44,7 @@ static void audio_callback(int16_t *data, size_t frames) { }
#define FRAME_INTERVAL_NS (1000000000ULL / 60.0988)
-#define DEBUG_PRINT printf
+#define DEBUG_PRINT(fmt, ...) printf(fmt, ##__VA_ARGS__)
#ifdef _WIN32
#include "win32_timer.c"
#else
@@ -158,7 +158,6 @@ static uint32_t frames; // debug information
// #include "smb_tas.h" // REMOVE ME
-
// NES core
#include "mknes_mapper.h"
#include "mknes.h"
@@ -172,7 +171,7 @@ static uint32_t frames; // debug information
// struct nes_state nstate;
-static void framebuffer_callback(int32_t width, int32_t height, float aspect_ratio) {
+static void framebuffer_callback(struct mkfw_state *mkfw_window, int32_t width, int32_t height, float aspect_ratio) {
// state.screen_width = width;
// state.screen_height = height;
// state.viewport.x = 0;
@@ -254,7 +253,7 @@ int main(int argc, char **argv) {
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3a.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3b.NES");
- // ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES");
+ ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3d.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3e.NES");
// ines2_load(nstate, "data/0000/NEStress.NES");
@@ -291,7 +290,7 @@ int main(int argc, char **argv) {
mapper_setup(nstate);
cpu_reset(nstate);
-#if 1
+#if 0
for(uint32_t i = 0; i < 0x5000; ++i) {
while(!nstate->ppu.frame_ready) {
// PROFILE_NAMED("nes emulator");
@@ -300,6 +299,8 @@ int main(int argc, char **argv) {
nstate->ppu.frame_ready = 0;
frames++;
}
+
+
// for(size_t i = 0; i < 256; ++i) {
// printf("instr %2.2x: %lld\n", i, instr_count[i]);
// }
@@ -307,18 +308,18 @@ int main(int argc, char **argv) {
#else
// WINDOW SETUP
- mkfw_init(WINDOW_WIDTH, WINDOW_HEIGHT);
- mkfw_set_window_title("mknes");
- mkfw_set_framebuffer_size_callback(framebuffer_callback);
- mkfw_set_swapinterval(0);
+ struct mkfw_state *window = mkfw_init(WINDOW_WIDTH, WINDOW_HEIGHT);
+ mkfw_set_window_title(window, "mknes");
+ mkfw_set_framebuffer_size_callback(window, framebuffer_callback);
+ mkfw_set_swapinterval(window, 0);
gl_loader();
opengl_setup();
// setup_render_targets();
change_resolution(BUFFER_WIDTH, BUFFER_HEIGHT);
- mkfw_show_window();
+ mkfw_show_window(window);
- mkfw_set_window_min_size_and_aspect(WINDOW_WIDTH, WINDOW_HEIGHT, 4.f, 3.f);
+ mkfw_set_window_min_size_and_aspect(window, WINDOW_WIDTH, WINDOW_HEIGHT, 4.f, 3.f);
timer_init();
@@ -326,22 +327,22 @@ int main(int argc, char **argv) {
uint8_t running = true;
- while(running && !mkfw_should_close()) {
- mkfw_pump_messages();
+ while(running && !mkfw_should_close(window)) {
+ mkfw_pump_messages(window);
// // Check for ESC key
- if(mkfw_is_key_pressed(MKS_KEY_ESCAPE)) running = false;
+ if(mkfw_is_key_pressed(window, MKS_KEY_ESCAPE)) running = false;
// Joypad input
uint8_t input = 0;
- if(keyboard_state[MKS_KEY_X]) { input |= (1 << 0); }
- if(keyboard_state[MKS_KEY_Z]) { input |= (1 << 1); }
- if(keyboard_state[MKS_KEY_SPACE]) { input |= (1 << 2); }
- if(keyboard_state[MKS_KEY_RETURN]) { input |= (1 << 3); }
- if(keyboard_state[MKS_KEY_UP]) { input |= (1 << 4); }
- if(keyboard_state[MKS_KEY_DOWN]) { input |= (1 << 5); }
- if(keyboard_state[MKS_KEY_LEFT]) { input |= (1 << 6); }
- if(keyboard_state[MKS_KEY_RIGHT]) { input |= (1 << 7); }
+ if(window->keyboard_state[MKS_KEY_X]) { input |= (1 << 0); }
+ if(window->keyboard_state[MKS_KEY_Z]) { input |= (1 << 1); }
+ if(window->keyboard_state[MKS_KEY_SPACE]) { input |= (1 << 2); }
+ if(window->keyboard_state[MKS_KEY_RETURN]) { input |= (1 << 3); }
+ if(window->keyboard_state[MKS_KEY_UP]) { input |= (1 << 4); }
+ if(window->keyboard_state[MKS_KEY_DOWN]) { input |= (1 << 5); }
+ if(window->keyboard_state[MKS_KEY_LEFT]) { input |= (1 << 6); }
+ if(window->keyboard_state[MKS_KEY_RIGHT]) { input |= (1 << 7); }
nstate->ppu.input[0] = input;
// Run NES emulation for one frame
@@ -373,13 +374,13 @@ int main(int argc, char **argv) {
dst += BUFFER_WIDTH;
}
- mkfw_update_keyboard_state();
- mkfw_update_modifier_state();
- mkfw_update_mouse_state();
+ mkfw_update_keyboard_state(window);
+ mkfw_update_modifier_state(window);
+ mkfw_update_mouse_state(window);
// Render and swap buffers
render_frame();
- mkfw_swap_buffers();
+ mkfw_swap_buffers(window);
timer_wait(timer);
}
@@ -388,7 +389,7 @@ int main(int argc, char **argv) {
timer_destroy(timer);
// free_nes_state(&nstate);
timer_shutdown();
- mkfw_cleanup();
+ mkfw_cleanup(window);
#endif
return 0;
diff --git a/mknes.h b/mknes.h
index c41d7af..d21a9ad 100644
--- a/mknes.h
+++ b/mknes.h
@@ -60,7 +60,7 @@ struct ppu_state {
uint8_t position;
uint8_t priority;
uint8_t palette;
- } __attribute__((packed, aligned(64))) sprites[8];
+ } __attribute__((packed)) sprites[8] __attribute__((aligned(64)));
uint8_t input[2]; // 40 - Controller 1 & 2
uint8_t input_latch[2]; // 42 - Latched inputs after strobe
@@ -151,83 +151,3 @@ __attribute__((aligned(4096))) static uint32_t nes_palette[65] = {
0xdee086ff, 0xc6ec87ff, 0xb2f29dff, 0xa7f0c3ff, 0xa8e7f0ff, 0xacacacff, 0x000000ff, 0x000000ff,
0xffffffff // one extra for debug-coloring...
};
-
-struct remake_state {
- struct { int32_t x, y, w, h; } viewport;
- int32_t mouse_dx;
- int32_t mouse_dy;
-
- float contrast;
- float saturation;
- float brightness;
- float tone_data[4];
-
- // OpenGL Objects
- GLuint shader_program;
- GLuint persistence_program;
- // GLuint upscale_program;
- GLuint upscale_warp_program;
- GLuint bloom_extract_program;
- GLuint bloom_blur_program;
- GLuint bloom_warp_program;
- GLuint bloom_composite_program;
-
- GLuint texture;
- GLuint persistence_texture;
- GLuint persistence_output_texture;
- GLuint crt_output_texture;
- GLuint bloom_texture;
- GLuint bloom_temp_texture;
- GLuint bloom_warped_texture;
- GLuint upscaled_source_texture;
-
- GLuint persistence_fbo;
- GLuint upscaled_source_fbo;
- GLuint crt_fbo;
- GLuint bloom_fbo;
- GLuint bloom_temp_fbo;
- GLuint bloom_warp_fbo;
-
- GLuint vao;
- GLuint vbo;
- GLuint ebo;
-
- // CRT Shader Uniforms
- GLuint uniform_resolution;
- GLuint uniform_src_image_size;
- GLuint uniform_brightness;
- GLuint uniform_tone;
- GLuint uniform_crt_emulation;
- GLuint uniform_apply_mask;
- GLuint uniform_sampler_location;
-
- // Bloom Shader Uniforms
- GLuint bloom_uniform_threshold;
- GLuint bloom_uniform_sampler;
- GLuint blur_uniform_horizontal;
- GLuint blur_uniform_sampler;
- GLuint composite_uniform_bloom_strength;
- GLuint composite_uniform_crt_sampler;
- GLuint composite_uniform_bloom_sampler;
-
- // Bloom settings
- float bloom_threshold;
- float bloom_strength;
- uint32_t bloom_width;
- uint32_t bloom_height;
-
- // Phosphor persistence
- float persistence_decay;
-
- // Rendering & Dynamic Resolution
- uint32_t render_width; // The actual remake resolution (e.g., 360)
- uint32_t render_height; // The actual remake resolution (e.g., 270)
- uint32_t frame_number;
- uint8_t running;
- uint8_t toggle_crt_emulation;
- uint8_t toggle_bloom;
- uint8_t fullscreen;
- uint8_t viewport_changed; // Flag to signal render thread to recreate FBOs
-};
-
-// static struct remake_state state;
diff --git a/mknes_cpu.c b/mknes_cpu.c
index 3c0a3f2..0eccf03 100644
--- a/mknes_cpu.c
+++ b/mknes_cpu.c
@@ -6,6 +6,7 @@ static inline uint8_t pack_flags(struct cpu_state *cpu) {
return (cpu->n << 7) | (cpu->v << 6) | (1 << 5) | (cpu->d << 3) | (cpu->i << 2) | (cpu->z << 1) | cpu->c;
}
+__attribute__((always_inline))
static inline void unpack_flags(struct cpu_state *cpu, uint8_t value) {
cpu->n = (value >> 7) & 1;
cpu->v = (value >> 6) & 1;
@@ -15,6 +16,8 @@ static inline void unpack_flags(struct cpu_state *cpu, uint8_t value) {
cpu->c = value & 1;
}
+
+__attribute__((always_inline))
static inline void update_zn(struct cpu_state *cpu, uint8_t result) {
cpu->z = (result == 0);
cpu->n = (result & 0x80) != 0;
@@ -63,6 +66,7 @@ static inline void do_irq(struct nes_state *state) {
cpu->i = 1;
}
+__attribute__((always_inline))
static inline void check_interrupts(struct nes_state *state) {
struct cpu_state * restrict cpu = &state->cpu;
@@ -86,7 +90,17 @@ static inline void cpu_reset(struct nes_state *state) {
static inline void cpu_tick(struct nes_state *state) {
struct cpu_state * restrict cpu = &state->cpu;
- check_interrupts(state);
+ // check_interrupts(state);
+ if(state->cpu.nmi_pending) {
+ state->cpu.nmi_pending = 0;
+ do_nmi(state);
+ }
+ if(state->cpu.irq_pending && cpu->i == 0) {
+ state->cpu.irq_pending = 0;
+ do_irq(state);
+ }
+
+
// printf("%4.4x: ", cpu->pc);
uint8_t opcode = memory_read(state, cpu->pc++);
diff --git a/mknes_cpu_opcodes.c b/mknes_cpu_opcodes.c
index 0254b5c..b2fafd5 100644
--- a/mknes_cpu_opcodes.c
+++ b/mknes_cpu_opcodes.c
@@ -1,6 +1,7 @@
// ADC
+__attribute__((hot, always_inline))
static inline void adc(struct cpu_state *cpu, uint8_t value) {
#ifdef ENABLE_DECIMAL_MODE
if(cpu->d) {
diff --git a/mknes_memory.c b/mknes_memory.c
index a27fa04..7b74424 100644
--- a/mknes_memory.c
+++ b/mknes_memory.c
@@ -1,8 +1,9 @@
+
__attribute__((hot))
static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) {
state->cpu.cycles++;
- ppu_tick(state);
apu_tick(state);
+ ppu_tick(state);
if(offset <= 0x1fff) {
return state->ram[offset & 0x07ff];
@@ -20,11 +21,11 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) {
return apu_read4015(state);
}
- if(offset == 0x4016 || offset == 0x4017) {
+ if(offset >= 0x4016 && offset <= 0x4017) {
uint32_t index = offset & 1;
uint8_t value = (state->ppu.input_latch[index] >> state->ppu.input_bit[index]) & 1;
state->ppu.input_bit[index]++;
- return value | 0x40; // Bit 6 open bus high, bit 7 low
+ return value | 0x40; // Bit 6 open bus high, bit 7 low
}
if(offset >= 0x6000 && offset <= 0x7fff) {
@@ -34,65 +35,76 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) {
return 0;
}
-
-__attribute__((hot))
+__attribute__((always_inline, hot))
static inline uint8_t memory_read_dummy(struct nes_state *state, uint32_t offset) {
state->cpu.cycles++;
- ppu_tick(state);
apu_tick(state);
+ ppu_tick(state);
+
+ uint8_t result = 0;
if(offset >= 0x2000 && offset < 0x4000) {
- return ppu_read(state, offset);
+ result = ppu_read(state, offset);
}
- return 0;
+ return result;
}
-
__attribute__((hot, optimize("no-jump-tables")))
static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_t value) {
state->cpu.cycles++;
- ppu_tick(state);
apu_tick(state);
+ ppu_tick(state);
- if(offset <= 0x1fff) {
- state->ram[offset & 0x07ff] = value;
- }
+ switch(offset) {
+ case 0x0000 ... 0x1fff: {
+ state->ram[offset & 0x07ff] = value;
+ } break;
- if(offset >= 0x2000 && offset <= 0x3fff) {
- ppu_write(state, offset, value);
- }
- if(offset >= 0x4000 && offset <= 0x4017) {
- if(offset == 0x4014) {
- ppu_dma_4014(state, value);
- }
-
- if(offset == 0x4016) {
- uint8_t s = value & 1;
- uint8_t prev = state->ppu.input_strobe;
- state->ppu.input_strobe = s;
-
- if(prev == 1 && s == 0) {
- state->ppu.input_latch[0] = state->ppu.input[0];
- state->ppu.input_latch[1] = state->ppu.input[1];
- state->ppu.input_bit[0] = 0;
- state->ppu.input_bit[1] = 0;
+ case 0x2000 ... 0x3fff: {
+ ppu_write(state, offset, value);
+ } break;
+
+ case 0x4000 ... 0x4017: {
+ switch(offset) {
+ case 0x4014: {
+ ppu_dma_4014(state, value);
+ } break;
+
+ case 0x4016: {
+ // joypad strobe
+ uint8_t s = value & 1;
+
+ // if(s) {
+ uint8_t prev = state->ppu.input_strobe;
+ state->ppu.input_strobe = s;
+
+ if(prev == 1 && (s) == 0) {
+ // state->ppu.input[0] = tas_input[tas_frame_count];
+
+ state->ppu.input_latch[0] = state->ppu.input[0];
+ state->ppu.input_latch[1] = state->ppu.input[1];
+ state->ppu.input_bit[0] = 0;
+ state->ppu.input_bit[1] = 0;
+ }
+ // }
+ } break;
+
+ default: {
+ apu_write(state, offset, value);
+ } break;
}
- }
+ } break;
- if(offset != 0x4014 && offset != 0x4016) {
- apu_write(state, offset, value);
- }
- }
+ case 0x6000 ... 0x7fff: {
+ state->mapper_function.prg_ram_write(state, offset, value);
+ } break;
- if(offset >= 0x6000 && offset <= 0x7fff) {
- state->mapper_function.prg_ram_write(state, offset, value);
- }
+ case 0x8000 ... 0xffff: {
+ state->mapper_function.prg_rom_write(state, offset, value);
+ } break;
- if(offset >= 0x8000) {
- state->mapper_function.prg_rom_write(state, offset, value);
}
-
}
__attribute__((hot, flatten))
diff --git a/mknes_ppu.c b/mknes_ppu.c
index 64d6821..92b22fc 100644
--- a/mknes_ppu.c
+++ b/mknes_ppu.c
@@ -25,7 +25,7 @@ static void ppu_reset(struct nes_state *state) {
}
__attribute__((hot, flatten))
-static inline void ppu_evaluate_sprites(struct nes_state *state) {
+static inline void ppu_evaluate_sprites(struct nes_state *state, uint32_t scanline) {
struct ppu_state *restrict ppu = &state->ppu;
uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
uint8_t n = 0;
@@ -36,7 +36,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) {
for(uint8_t i = 0; i < 64; i++, src += 4) {
uint8_t y = src[0];
- int32_t row = (int32_t)ppu->scanline - y;
+ int32_t row = (int32_t)scanline - y;
if(row >= 0 && row < sprite_height) {
if(n < 8) {
@@ -63,7 +63,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) {
}
__attribute__((hot))
-static inline void ppu_fetch_sprite_patterns(struct nes_state *state, uint32_t scanline) {
+static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state, uint32_t scanline) {
struct ppu_state *restrict ppu = &state->ppu;
uint8_t * restrict sec_oam = ppu->secondary_oam;
@@ -110,8 +110,8 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state, uint32_t s
}
-__attribute__((always_inline, hot))
-static inline void ppu_render_pixel(struct nes_state *state, uint32_t x, uint32_t y) {
+__attribute__((always_inline, hot, optimize("no-jump-tables")))
+static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t x, uint32_t y) {
struct ppu_state *restrict ppu = &state->ppu;
uint16_t bit = 0x8000 >> ppu->fine_x;
@@ -138,100 +138,42 @@ static inline void ppu_render_pixel(struct nes_state *state, uint32_t x, uint32_
uint8_t bg_pixel = ((p1 << 1) | p0) & bg_mask;
uint8_t bg_palette = ((a1 << 1) | a0) & bg_mask;
- // Sprite
-
- if(sp_mask && ppu->sprite_count) {
- switch(ppu->sprite_count) {
- case 8: {
- if(!ppu->sprites[7].position) {
- sp_pixel = (((ppu->sprites[7].shift_hi & 0x80) >> 6) | ((ppu->sprites[7].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[7].priority;
- sp_palette = ppu->sprites[7].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 7: {
- if(!ppu->sprites[6].position) {
- sp_pixel = (((ppu->sprites[6].shift_hi & 0x80) >> 6) | ((ppu->sprites[6].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[6].priority;
- sp_palette = ppu->sprites[6].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 6: {
- if(!ppu->sprites[5].position) {
- sp_pixel = (((ppu->sprites[5].shift_hi & 0x80) >> 6) | ((ppu->sprites[5].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[5].priority;
- sp_palette = ppu->sprites[5].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 5: {
- if(!ppu->sprites[4].position) {
- sp_pixel = (((ppu->sprites[4].shift_hi & 0x80) >> 6) | ((ppu->sprites[4].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[4].priority;
- sp_palette = ppu->sprites[4].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 4: {
- if(!ppu->sprites[3].position) {
- sp_pixel = (((ppu->sprites[3].shift_hi & 0x80) >> 6) | ((ppu->sprites[3].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[3].priority;
- sp_palette = ppu->sprites[3].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 3: {
- if(!ppu->sprites[2].position) {
- sp_pixel = (((ppu->sprites[2].shift_hi & 0x80) >> 6) | ((ppu->sprites[2].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[2].priority;
- sp_palette = ppu->sprites[2].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 2: {
- if(!ppu->sprites[1].position) {
- sp_pixel = (((ppu->sprites[1].shift_hi & 0x80) >> 6) | ((ppu->sprites[1].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[1].priority;
- sp_palette = ppu->sprites[1].palette;
- break;
- }
- }
- } __attribute__((fallthrough));
-
- case 1: {
- if(!ppu->sprites[0].position) {
- sp_pixel = (((ppu->sprites[0].shift_hi & 0x80) >> 6) | ((ppu->sprites[0].shift_lo & 0x80) >> 7));
- if(sp_pixel) {
- sp_prio = ppu->sprites[0].priority;
- sp_palette = ppu->sprites[0].palette;
- sp_zero = ppu->sprite_zero_in_range; // Only slot 0 can be sprite 0
- break;
- }
- }
- }
- }
+ // Sprites
+#define SPRITE_STEP(N) do { \
+ if(!ppu->sprites[(N)].position) { \
+ sp_pixel = (((ppu->sprites[(N)].shift_hi & 0x80) >> 6) | ((ppu->sprites[(N)].shift_lo & 0x80) >> 7)); \
+ if(sp_pixel) { \
+ sp_prio = ppu->sprites[(N)].priority; \
+ sp_palette = ppu->sprites[(N)].palette; \
+ if((N) == 0) { \
+ sp_zero = ppu->sprite_zero_in_range; \
+ } \
+ goto sprite_done; \
+ } \
+ } \
+} while (0)
+
+ if(sp_mask && ppu->sprite_count > 0) {
+ if(ppu->sprite_count == 1) goto sprite_1;
+ if(ppu->sprite_count == 2) goto sprite_2;
+ if(ppu->sprite_count == 3) goto sprite_3;
+ if(ppu->sprite_count == 4) goto sprite_4;
+ if(ppu->sprite_count == 5) goto sprite_5;
+ if(ppu->sprite_count == 6) goto sprite_6;
+ if(ppu->sprite_count == 7) goto sprite_7;
+ if(ppu->sprite_count == 8) goto sprite_8;
+
+sprite_8: SPRITE_STEP(7);
+sprite_7: SPRITE_STEP(6);
+sprite_6: SPRITE_STEP(5);
+sprite_5: SPRITE_STEP(4);
+sprite_4: SPRITE_STEP(3);
+sprite_3: SPRITE_STEP(2);
+sprite_2: SPRITE_STEP(1);
+sprite_1: SPRITE_STEP(0);
}
+sprite_done:;
+
// Final pixel composition
uint8_t bg_index = (bg_palette << 2) + bg_pixel;
@@ -266,7 +208,7 @@ static void ppu_tick(struct nes_state *state) {
if(scanline <= 239) {
if(dot == 65) {
- ppu_evaluate_sprites(state); // Evaluate sprites early, sets overflow immediately
+ ppu_evaluate_sprites(state, scanline); // Evaluate sprites early, sets overflow immediately
}
if(dot >= 1 && dot <= 256) {
@@ -371,7 +313,7 @@ stupid: if(ppu->reg_mask & 0x10) {
if(scanline == 261) {
if(dot == 65) {
- ppu_evaluate_sprites(state); // Evaluate sprites early
+ ppu_evaluate_sprites(state, scanline); // Evaluate sprites early
}
if(dot >= 1 && dot <= 256) {
@@ -424,6 +366,7 @@ stupid2: if(ppu->reg_mask & 0x10) {
ppu->bg_shift_attrib_low <<= 1;
ppu->bg_shift_attrib_high <<= 1;
+
switch(dot % 8) {
case 1: {
uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff);
@@ -512,9 +455,7 @@ stupid2: if(ppu->reg_mask & 0x10) {
if(state->mapper_function.tick) {
state->mapper_function.tick(state);
}
-
- ppu->dot = dot;
- ppu->scanline = scanline;
-
}
+ ppu->dot = dot;
+ ppu->scanline = scanline;
}
diff --git a/win32_timer.c b/win32_timer.c
index c89000f..250861b 100644
--- a/win32_timer.c
+++ b/win32_timer.c
@@ -18,8 +18,6 @@ struct timer_handle {
#ifdef TIMER_DEBUG
uint64_t last_wait_start_ns;
- uint32_t overshoot_log[1000000];
- uint32_t overshoot_index;
#endif
};
@@ -75,12 +73,19 @@ static DWORD WINAPI timer_thread_func(LPVOID arg) {
set_realtime_priority(&t->mmcss_handle);
while(t->running) {
+#ifdef TIMER_DEBUG
+ int64_t remaining_after_sleep_ns = -1;
+#endif
uint64_t now = qpc_now_ns(t->qpc_frequency);
if(now < t->next_deadline) {
uint64_t diff = t->next_deadline - now;
if(diff > SPIN_THRESHOLD_NS) {
timer_sleep(diff - SPIN_THRESHOLD_NS);
+#ifdef TIMER_DEBUG
+ now = qpc_now_ns(t->qpc_frequency);
+ remaining_after_sleep_ns = (int64_t)(t->next_deadline - now);
+#endif
}
while(qpc_now_ns(t->qpc_frequency) < t->next_deadline) {
_mm_pause();
@@ -92,9 +97,14 @@ static DWORD WINAPI timer_thread_func(LPVOID arg) {
#ifdef TIMER_DEBUG
if(t->last_wait_start_ns > 0) {
- uint64_t overshoot_ns = (now > t->next_deadline) ? (now - t->next_deadline) : 0;
- t->overshoot_log[t->overshoot_index % 1000000] = (uint32_t)overshoot_ns;
- t->overshoot_index++;
+ int64_t overshoot_ns = (int64_t)(now - t->next_deadline);
+ if(overshoot_ns < 0) overshoot_ns = 0;
+
+ if(remaining_after_sleep_ns >= 0) {
+ DEBUG_PRINT("[DEBUG] Woke up with %lld ns left. Overshoot: %5lld ns\n", remaining_after_sleep_ns, overshoot_ns);
+ } else {
+ DEBUG_PRINT("[DEBUG] No sleep. Overshoot: %lld ns\n", overshoot_ns);
+ }
}
t->last_wait_start_ns = now;
#endif
@@ -131,7 +141,6 @@ static struct timer_handle *timer_new(uint64_t interval_ns) {
#ifdef TIMER_DEBUG
t->last_wait_start_ns = 0;
- t->overshoot_index = 0;
#endif
t->event = CreateEvent(0, FALSE, FALSE, 0);
@@ -147,6 +156,8 @@ static uint32_t timer_wait(struct timer_handle *t) {
static void timer_destroy(struct timer_handle *t) {
t->running = 0;
+
+ SetEvent(t->event);
WaitForSingleObject(t->timer_thread, INFINITE);
CloseHandle(t->timer_thread);
CloseHandle(t->event);
@@ -155,17 +166,5 @@ static void timer_destroy(struct timer_handle *t) {
AvRevertMmThreadCharacteristics(t->mmcss_handle);
}
-#ifdef TIMER_DEBUG
- uint32_t threshold = 10000; // 10µs
- uint32_t overshoot_count = 0;
- for(uint32_t i = 0; i < t->overshoot_index && i < 1000000; i++) {
- if(t->overshoot_log[i] >= threshold) {
- DEBUG_PRINT("Frame %u: overshoot %u ns\n", i, t->overshoot_log[i]);
- overshoot_count++;
- }
- }
- DEBUG_PRINT("Total frames: %u, Overshoots >= %u ns: %u\n", t->overshoot_index, threshold, overshoot_count);
-#endif
-
free(t);
}