diff options
| -rwxr-xr-x | Bench.sh | 9 | ||||
| -rwxr-xr-x | build.sh | 13 | ||||
| -rw-r--r-- | linux_timer.c | 1 | ||||
| -rw-r--r-- | mknes.c | 55 | ||||
| -rw-r--r-- | mknes.h | 82 | ||||
| -rw-r--r-- | mknes_cpu.c | 16 | ||||
| -rw-r--r-- | mknes_cpu_opcodes.c | 1 | ||||
| -rw-r--r-- | mknes_memory.c | 96 | ||||
| -rw-r--r-- | mknes_ppu.c | 149 | ||||
| -rw-r--r-- | win32_timer.c | 35 |
10 files changed, 174 insertions, 283 deletions
@@ -3,6 +3,9 @@ ./build.sh profile ./mknes ./build.sh profile_release -taskset -c 1 chrt -f 99 -- perf stat -C 1 ./mknes -taskset -c 1 chrt -f 99 -- perf stat -C 1 ./mknes -taskset -c 1 chrt -f 99 -- perf stat -C 1 ./mknes +taskset -c 1 chrt -f 99 -- perf stat -- ./mknes +taskset -c 1 chrt -f 99 -- perf stat -- ./mknes +taskset -c 1 chrt -f 99 -- perf stat -- ./mknes +taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes +taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes +taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes @@ -52,17 +52,18 @@ case "$BUILD_TYPE" in # -pg # for gprof ;; "release") - CFLAGS+="-s -Wl,--strip-all -O2 " + # CFLAGS+="-s -Wl,--strip-all -O2 " + CFLAGS+="-g -O2 " ;; "profile") CFLAGS+="-O2 -fprofile-generate -ftest-coverage " ;; "profile_release") # CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use " - CFLAGS+="-O2 -fprofile-use " + CFLAGS+="-g -O2 -fprofile-use " ;; "debug") - CFLAGS+="-g -O0 " + CFLAGS+="-g -O0 -DTIMER_DEBUG " LDFLAGS+="-fno-pie -no-pie " ;; "coverage") @@ -90,8 +91,8 @@ set -e ) & # Build Windows version -# ( -# $WIN_CC $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME}.exe $INCLUDE_PATHS $WINDOWS_INCLUDE $LDFLAGS $WINDOWS_LIBS -# ) & +#( +# $WIN_CC $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME}.exe $INCLUDE_PATHS $WINDOWS_INCLUDE $LDFLAGS $WINDOWS_LIBS +#) & wait diff --git a/linux_timer.c b/linux_timer.c index dce0a5b..9a9d296 100644 --- a/linux_timer.c +++ b/linux_timer.c @@ -81,7 +81,6 @@ static void *timer_thread_func(void *arg) { #ifdef TIMER_DEBUG if(t->last_wait_start.tv_sec) { - int64_t total_frame_time_ns = timespec_diff_ns(&now, &t->last_wait_start); int64_t overshoot_ns = timespec_diff_ns(&now, &t->next_deadline); if(overshoot_ns < 0) overshoot_ns = 0; @@ -44,7 +44,7 @@ static void audio_callback(int16_t *data, size_t frames) { } #define FRAME_INTERVAL_NS (1000000000ULL / 60.0988) -#define DEBUG_PRINT printf +#define DEBUG_PRINT(fmt, ...) printf(fmt, ##__VA_ARGS__) #ifdef _WIN32 #include "win32_timer.c" #else @@ -158,7 +158,6 @@ static uint32_t frames; // debug information // #include "smb_tas.h" // REMOVE ME - // NES core #include "mknes_mapper.h" #include "mknes.h" @@ -172,7 +171,7 @@ static uint32_t frames; // debug information // struct nes_state nstate; -static void framebuffer_callback(int32_t width, int32_t height, float aspect_ratio) { +static void framebuffer_callback(struct mkfw_state *mkfw_window, int32_t width, int32_t height, float aspect_ratio) { // state.screen_width = width; // state.screen_height = height; // state.viewport.x = 0; @@ -254,7 +253,7 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/0000/raster_demos/RasterTest3.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3a.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3b.NES"); - // ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES"); + ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3d.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3e.NES"); // ines2_load(nstate, "data/0000/NEStress.NES"); @@ -291,7 +290,7 @@ int main(int argc, char **argv) { mapper_setup(nstate); cpu_reset(nstate); -#if 1 +#if 0 for(uint32_t i = 0; i < 0x5000; ++i) { while(!nstate->ppu.frame_ready) { // PROFILE_NAMED("nes emulator"); @@ -300,6 +299,8 @@ int main(int argc, char **argv) { nstate->ppu.frame_ready = 0; frames++; } + + // for(size_t i = 0; i < 256; ++i) { // printf("instr %2.2x: %lld\n", i, instr_count[i]); // } @@ -307,18 +308,18 @@ int main(int argc, char **argv) { #else // WINDOW SETUP - mkfw_init(WINDOW_WIDTH, WINDOW_HEIGHT); - mkfw_set_window_title("mknes"); - mkfw_set_framebuffer_size_callback(framebuffer_callback); - mkfw_set_swapinterval(0); + struct mkfw_state *window = mkfw_init(WINDOW_WIDTH, WINDOW_HEIGHT); + mkfw_set_window_title(window, "mknes"); + mkfw_set_framebuffer_size_callback(window, framebuffer_callback); + mkfw_set_swapinterval(window, 0); gl_loader(); opengl_setup(); // setup_render_targets(); change_resolution(BUFFER_WIDTH, BUFFER_HEIGHT); - mkfw_show_window(); + mkfw_show_window(window); - mkfw_set_window_min_size_and_aspect(WINDOW_WIDTH, WINDOW_HEIGHT, 4.f, 3.f); + mkfw_set_window_min_size_and_aspect(window, WINDOW_WIDTH, WINDOW_HEIGHT, 4.f, 3.f); timer_init(); @@ -326,22 +327,22 @@ int main(int argc, char **argv) { uint8_t running = true; - while(running && !mkfw_should_close()) { - mkfw_pump_messages(); + while(running && !mkfw_should_close(window)) { + mkfw_pump_messages(window); // // Check for ESC key - if(mkfw_is_key_pressed(MKS_KEY_ESCAPE)) running = false; + if(mkfw_is_key_pressed(window, MKS_KEY_ESCAPE)) running = false; // Joypad input uint8_t input = 0; - if(keyboard_state[MKS_KEY_X]) { input |= (1 << 0); } - if(keyboard_state[MKS_KEY_Z]) { input |= (1 << 1); } - if(keyboard_state[MKS_KEY_SPACE]) { input |= (1 << 2); } - if(keyboard_state[MKS_KEY_RETURN]) { input |= (1 << 3); } - if(keyboard_state[MKS_KEY_UP]) { input |= (1 << 4); } - if(keyboard_state[MKS_KEY_DOWN]) { input |= (1 << 5); } - if(keyboard_state[MKS_KEY_LEFT]) { input |= (1 << 6); } - if(keyboard_state[MKS_KEY_RIGHT]) { input |= (1 << 7); } + if(window->keyboard_state[MKS_KEY_X]) { input |= (1 << 0); } + if(window->keyboard_state[MKS_KEY_Z]) { input |= (1 << 1); } + if(window->keyboard_state[MKS_KEY_SPACE]) { input |= (1 << 2); } + if(window->keyboard_state[MKS_KEY_RETURN]) { input |= (1 << 3); } + if(window->keyboard_state[MKS_KEY_UP]) { input |= (1 << 4); } + if(window->keyboard_state[MKS_KEY_DOWN]) { input |= (1 << 5); } + if(window->keyboard_state[MKS_KEY_LEFT]) { input |= (1 << 6); } + if(window->keyboard_state[MKS_KEY_RIGHT]) { input |= (1 << 7); } nstate->ppu.input[0] = input; // Run NES emulation for one frame @@ -373,13 +374,13 @@ int main(int argc, char **argv) { dst += BUFFER_WIDTH; } - mkfw_update_keyboard_state(); - mkfw_update_modifier_state(); - mkfw_update_mouse_state(); + mkfw_update_keyboard_state(window); + mkfw_update_modifier_state(window); + mkfw_update_mouse_state(window); // Render and swap buffers render_frame(); - mkfw_swap_buffers(); + mkfw_swap_buffers(window); timer_wait(timer); } @@ -388,7 +389,7 @@ int main(int argc, char **argv) { timer_destroy(timer); // free_nes_state(&nstate); timer_shutdown(); - mkfw_cleanup(); + mkfw_cleanup(window); #endif return 0; @@ -60,7 +60,7 @@ struct ppu_state { uint8_t position; uint8_t priority; uint8_t palette; - } __attribute__((packed, aligned(64))) sprites[8]; + } __attribute__((packed)) sprites[8] __attribute__((aligned(64))); uint8_t input[2]; // 40 - Controller 1 & 2 uint8_t input_latch[2]; // 42 - Latched inputs after strobe @@ -151,83 +151,3 @@ __attribute__((aligned(4096))) static uint32_t nes_palette[65] = { 0xdee086ff, 0xc6ec87ff, 0xb2f29dff, 0xa7f0c3ff, 0xa8e7f0ff, 0xacacacff, 0x000000ff, 0x000000ff, 0xffffffff // one extra for debug-coloring... }; - -struct remake_state { - struct { int32_t x, y, w, h; } viewport; - int32_t mouse_dx; - int32_t mouse_dy; - - float contrast; - float saturation; - float brightness; - float tone_data[4]; - - // OpenGL Objects - GLuint shader_program; - GLuint persistence_program; - // GLuint upscale_program; - GLuint upscale_warp_program; - GLuint bloom_extract_program; - GLuint bloom_blur_program; - GLuint bloom_warp_program; - GLuint bloom_composite_program; - - GLuint texture; - GLuint persistence_texture; - GLuint persistence_output_texture; - GLuint crt_output_texture; - GLuint bloom_texture; - GLuint bloom_temp_texture; - GLuint bloom_warped_texture; - GLuint upscaled_source_texture; - - GLuint persistence_fbo; - GLuint upscaled_source_fbo; - GLuint crt_fbo; - GLuint bloom_fbo; - GLuint bloom_temp_fbo; - GLuint bloom_warp_fbo; - - GLuint vao; - GLuint vbo; - GLuint ebo; - - // CRT Shader Uniforms - GLuint uniform_resolution; - GLuint uniform_src_image_size; - GLuint uniform_brightness; - GLuint uniform_tone; - GLuint uniform_crt_emulation; - GLuint uniform_apply_mask; - GLuint uniform_sampler_location; - - // Bloom Shader Uniforms - GLuint bloom_uniform_threshold; - GLuint bloom_uniform_sampler; - GLuint blur_uniform_horizontal; - GLuint blur_uniform_sampler; - GLuint composite_uniform_bloom_strength; - GLuint composite_uniform_crt_sampler; - GLuint composite_uniform_bloom_sampler; - - // Bloom settings - float bloom_threshold; - float bloom_strength; - uint32_t bloom_width; - uint32_t bloom_height; - - // Phosphor persistence - float persistence_decay; - - // Rendering & Dynamic Resolution - uint32_t render_width; // The actual remake resolution (e.g., 360) - uint32_t render_height; // The actual remake resolution (e.g., 270) - uint32_t frame_number; - uint8_t running; - uint8_t toggle_crt_emulation; - uint8_t toggle_bloom; - uint8_t fullscreen; - uint8_t viewport_changed; // Flag to signal render thread to recreate FBOs -}; - -// static struct remake_state state; diff --git a/mknes_cpu.c b/mknes_cpu.c index 3c0a3f2..0eccf03 100644 --- a/mknes_cpu.c +++ b/mknes_cpu.c @@ -6,6 +6,7 @@ static inline uint8_t pack_flags(struct cpu_state *cpu) { return (cpu->n << 7) | (cpu->v << 6) | (1 << 5) | (cpu->d << 3) | (cpu->i << 2) | (cpu->z << 1) | cpu->c; } +__attribute__((always_inline)) static inline void unpack_flags(struct cpu_state *cpu, uint8_t value) { cpu->n = (value >> 7) & 1; cpu->v = (value >> 6) & 1; @@ -15,6 +16,8 @@ static inline void unpack_flags(struct cpu_state *cpu, uint8_t value) { cpu->c = value & 1; } + +__attribute__((always_inline)) static inline void update_zn(struct cpu_state *cpu, uint8_t result) { cpu->z = (result == 0); cpu->n = (result & 0x80) != 0; @@ -63,6 +66,7 @@ static inline void do_irq(struct nes_state *state) { cpu->i = 1; } +__attribute__((always_inline)) static inline void check_interrupts(struct nes_state *state) { struct cpu_state * restrict cpu = &state->cpu; @@ -86,7 +90,17 @@ static inline void cpu_reset(struct nes_state *state) { static inline void cpu_tick(struct nes_state *state) { struct cpu_state * restrict cpu = &state->cpu; - check_interrupts(state); + // check_interrupts(state); + if(state->cpu.nmi_pending) { + state->cpu.nmi_pending = 0; + do_nmi(state); + } + if(state->cpu.irq_pending && cpu->i == 0) { + state->cpu.irq_pending = 0; + do_irq(state); + } + + // printf("%4.4x: ", cpu->pc); uint8_t opcode = memory_read(state, cpu->pc++); diff --git a/mknes_cpu_opcodes.c b/mknes_cpu_opcodes.c index 0254b5c..b2fafd5 100644 --- a/mknes_cpu_opcodes.c +++ b/mknes_cpu_opcodes.c @@ -1,6 +1,7 @@ // ADC +__attribute__((hot, always_inline)) static inline void adc(struct cpu_state *cpu, uint8_t value) { #ifdef ENABLE_DECIMAL_MODE if(cpu->d) { diff --git a/mknes_memory.c b/mknes_memory.c index a27fa04..7b74424 100644 --- a/mknes_memory.c +++ b/mknes_memory.c @@ -1,8 +1,9 @@ + __attribute__((hot)) static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { state->cpu.cycles++; - ppu_tick(state); apu_tick(state); + ppu_tick(state); if(offset <= 0x1fff) { return state->ram[offset & 0x07ff]; @@ -20,11 +21,11 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { return apu_read4015(state); } - if(offset == 0x4016 || offset == 0x4017) { + if(offset >= 0x4016 && offset <= 0x4017) { uint32_t index = offset & 1; uint8_t value = (state->ppu.input_latch[index] >> state->ppu.input_bit[index]) & 1; state->ppu.input_bit[index]++; - return value | 0x40; // Bit 6 open bus high, bit 7 low + return value | 0x40; // Bit 6 open bus high, bit 7 low } if(offset >= 0x6000 && offset <= 0x7fff) { @@ -34,65 +35,76 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { return 0; } - -__attribute__((hot)) +__attribute__((always_inline, hot)) static inline uint8_t memory_read_dummy(struct nes_state *state, uint32_t offset) { state->cpu.cycles++; - ppu_tick(state); apu_tick(state); + ppu_tick(state); + + uint8_t result = 0; if(offset >= 0x2000 && offset < 0x4000) { - return ppu_read(state, offset); + result = ppu_read(state, offset); } - return 0; + return result; } - __attribute__((hot, optimize("no-jump-tables"))) static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_t value) { state->cpu.cycles++; - ppu_tick(state); apu_tick(state); + ppu_tick(state); - if(offset <= 0x1fff) { - state->ram[offset & 0x07ff] = value; - } + switch(offset) { + case 0x0000 ... 0x1fff: { + state->ram[offset & 0x07ff] = value; + } break; - if(offset >= 0x2000 && offset <= 0x3fff) { - ppu_write(state, offset, value); - } - if(offset >= 0x4000 && offset <= 0x4017) { - if(offset == 0x4014) { - ppu_dma_4014(state, value); - } - - if(offset == 0x4016) { - uint8_t s = value & 1; - uint8_t prev = state->ppu.input_strobe; - state->ppu.input_strobe = s; - - if(prev == 1 && s == 0) { - state->ppu.input_latch[0] = state->ppu.input[0]; - state->ppu.input_latch[1] = state->ppu.input[1]; - state->ppu.input_bit[0] = 0; - state->ppu.input_bit[1] = 0; + case 0x2000 ... 0x3fff: { + ppu_write(state, offset, value); + } break; + + case 0x4000 ... 0x4017: { + switch(offset) { + case 0x4014: { + ppu_dma_4014(state, value); + } break; + + case 0x4016: { + // joypad strobe + uint8_t s = value & 1; + + // if(s) { + uint8_t prev = state->ppu.input_strobe; + state->ppu.input_strobe = s; + + if(prev == 1 && (s) == 0) { + // state->ppu.input[0] = tas_input[tas_frame_count]; + + state->ppu.input_latch[0] = state->ppu.input[0]; + state->ppu.input_latch[1] = state->ppu.input[1]; + state->ppu.input_bit[0] = 0; + state->ppu.input_bit[1] = 0; + } + // } + } break; + + default: { + apu_write(state, offset, value); + } break; } - } + } break; - if(offset != 0x4014 && offset != 0x4016) { - apu_write(state, offset, value); - } - } + case 0x6000 ... 0x7fff: { + state->mapper_function.prg_ram_write(state, offset, value); + } break; - if(offset >= 0x6000 && offset <= 0x7fff) { - state->mapper_function.prg_ram_write(state, offset, value); - } + case 0x8000 ... 0xffff: { + state->mapper_function.prg_rom_write(state, offset, value); + } break; - if(offset >= 0x8000) { - state->mapper_function.prg_rom_write(state, offset, value); } - } __attribute__((hot, flatten)) diff --git a/mknes_ppu.c b/mknes_ppu.c index 64d6821..92b22fc 100644 --- a/mknes_ppu.c +++ b/mknes_ppu.c @@ -25,7 +25,7 @@ static void ppu_reset(struct nes_state *state) { } __attribute__((hot, flatten)) -static inline void ppu_evaluate_sprites(struct nes_state *state) { +static inline void ppu_evaluate_sprites(struct nes_state *state, uint32_t scanline) { struct ppu_state *restrict ppu = &state->ppu; uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; uint8_t n = 0; @@ -36,7 +36,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) { for(uint8_t i = 0; i < 64; i++, src += 4) { uint8_t y = src[0]; - int32_t row = (int32_t)ppu->scanline - y; + int32_t row = (int32_t)scanline - y; if(row >= 0 && row < sprite_height) { if(n < 8) { @@ -63,7 +63,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) { } __attribute__((hot)) -static inline void ppu_fetch_sprite_patterns(struct nes_state *state, uint32_t scanline) { +static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state, uint32_t scanline) { struct ppu_state *restrict ppu = &state->ppu; uint8_t * restrict sec_oam = ppu->secondary_oam; @@ -110,8 +110,8 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state, uint32_t s } -__attribute__((always_inline, hot)) -static inline void ppu_render_pixel(struct nes_state *state, uint32_t x, uint32_t y) { +__attribute__((always_inline, hot, optimize("no-jump-tables"))) +static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t x, uint32_t y) { struct ppu_state *restrict ppu = &state->ppu; uint16_t bit = 0x8000 >> ppu->fine_x; @@ -138,100 +138,42 @@ static inline void ppu_render_pixel(struct nes_state *state, uint32_t x, uint32_ uint8_t bg_pixel = ((p1 << 1) | p0) & bg_mask; uint8_t bg_palette = ((a1 << 1) | a0) & bg_mask; - // Sprite - - if(sp_mask && ppu->sprite_count) { - switch(ppu->sprite_count) { - case 8: { - if(!ppu->sprites[7].position) { - sp_pixel = (((ppu->sprites[7].shift_hi & 0x80) >> 6) | ((ppu->sprites[7].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[7].priority; - sp_palette = ppu->sprites[7].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 7: { - if(!ppu->sprites[6].position) { - sp_pixel = (((ppu->sprites[6].shift_hi & 0x80) >> 6) | ((ppu->sprites[6].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[6].priority; - sp_palette = ppu->sprites[6].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 6: { - if(!ppu->sprites[5].position) { - sp_pixel = (((ppu->sprites[5].shift_hi & 0x80) >> 6) | ((ppu->sprites[5].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[5].priority; - sp_palette = ppu->sprites[5].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 5: { - if(!ppu->sprites[4].position) { - sp_pixel = (((ppu->sprites[4].shift_hi & 0x80) >> 6) | ((ppu->sprites[4].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[4].priority; - sp_palette = ppu->sprites[4].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 4: { - if(!ppu->sprites[3].position) { - sp_pixel = (((ppu->sprites[3].shift_hi & 0x80) >> 6) | ((ppu->sprites[3].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[3].priority; - sp_palette = ppu->sprites[3].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 3: { - if(!ppu->sprites[2].position) { - sp_pixel = (((ppu->sprites[2].shift_hi & 0x80) >> 6) | ((ppu->sprites[2].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[2].priority; - sp_palette = ppu->sprites[2].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 2: { - if(!ppu->sprites[1].position) { - sp_pixel = (((ppu->sprites[1].shift_hi & 0x80) >> 6) | ((ppu->sprites[1].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[1].priority; - sp_palette = ppu->sprites[1].palette; - break; - } - } - } __attribute__((fallthrough)); - - case 1: { - if(!ppu->sprites[0].position) { - sp_pixel = (((ppu->sprites[0].shift_hi & 0x80) >> 6) | ((ppu->sprites[0].shift_lo & 0x80) >> 7)); - if(sp_pixel) { - sp_prio = ppu->sprites[0].priority; - sp_palette = ppu->sprites[0].palette; - sp_zero = ppu->sprite_zero_in_range; // Only slot 0 can be sprite 0 - break; - } - } - } - } + // Sprites +#define SPRITE_STEP(N) do { \ + if(!ppu->sprites[(N)].position) { \ + sp_pixel = (((ppu->sprites[(N)].shift_hi & 0x80) >> 6) | ((ppu->sprites[(N)].shift_lo & 0x80) >> 7)); \ + if(sp_pixel) { \ + sp_prio = ppu->sprites[(N)].priority; \ + sp_palette = ppu->sprites[(N)].palette; \ + if((N) == 0) { \ + sp_zero = ppu->sprite_zero_in_range; \ + } \ + goto sprite_done; \ + } \ + } \ +} while (0) + + if(sp_mask && ppu->sprite_count > 0) { + if(ppu->sprite_count == 1) goto sprite_1; + if(ppu->sprite_count == 2) goto sprite_2; + if(ppu->sprite_count == 3) goto sprite_3; + if(ppu->sprite_count == 4) goto sprite_4; + if(ppu->sprite_count == 5) goto sprite_5; + if(ppu->sprite_count == 6) goto sprite_6; + if(ppu->sprite_count == 7) goto sprite_7; + if(ppu->sprite_count == 8) goto sprite_8; + +sprite_8: SPRITE_STEP(7); +sprite_7: SPRITE_STEP(6); +sprite_6: SPRITE_STEP(5); +sprite_5: SPRITE_STEP(4); +sprite_4: SPRITE_STEP(3); +sprite_3: SPRITE_STEP(2); +sprite_2: SPRITE_STEP(1); +sprite_1: SPRITE_STEP(0); } +sprite_done:; + // Final pixel composition uint8_t bg_index = (bg_palette << 2) + bg_pixel; @@ -266,7 +208,7 @@ static void ppu_tick(struct nes_state *state) { if(scanline <= 239) { if(dot == 65) { - ppu_evaluate_sprites(state); // Evaluate sprites early, sets overflow immediately + ppu_evaluate_sprites(state, scanline); // Evaluate sprites early, sets overflow immediately } if(dot >= 1 && dot <= 256) { @@ -371,7 +313,7 @@ stupid: if(ppu->reg_mask & 0x10) { if(scanline == 261) { if(dot == 65) { - ppu_evaluate_sprites(state); // Evaluate sprites early + ppu_evaluate_sprites(state, scanline); // Evaluate sprites early } if(dot >= 1 && dot <= 256) { @@ -424,6 +366,7 @@ stupid2: if(ppu->reg_mask & 0x10) { ppu->bg_shift_attrib_low <<= 1; ppu->bg_shift_attrib_high <<= 1; + switch(dot % 8) { case 1: { uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff); @@ -512,9 +455,7 @@ stupid2: if(ppu->reg_mask & 0x10) { if(state->mapper_function.tick) { state->mapper_function.tick(state); } - - ppu->dot = dot; - ppu->scanline = scanline; - } + ppu->dot = dot; + ppu->scanline = scanline; } diff --git a/win32_timer.c b/win32_timer.c index c89000f..250861b 100644 --- a/win32_timer.c +++ b/win32_timer.c @@ -18,8 +18,6 @@ struct timer_handle { #ifdef TIMER_DEBUG uint64_t last_wait_start_ns; - uint32_t overshoot_log[1000000]; - uint32_t overshoot_index; #endif }; @@ -75,12 +73,19 @@ static DWORD WINAPI timer_thread_func(LPVOID arg) { set_realtime_priority(&t->mmcss_handle); while(t->running) { +#ifdef TIMER_DEBUG + int64_t remaining_after_sleep_ns = -1; +#endif uint64_t now = qpc_now_ns(t->qpc_frequency); if(now < t->next_deadline) { uint64_t diff = t->next_deadline - now; if(diff > SPIN_THRESHOLD_NS) { timer_sleep(diff - SPIN_THRESHOLD_NS); +#ifdef TIMER_DEBUG + now = qpc_now_ns(t->qpc_frequency); + remaining_after_sleep_ns = (int64_t)(t->next_deadline - now); +#endif } while(qpc_now_ns(t->qpc_frequency) < t->next_deadline) { _mm_pause(); @@ -92,9 +97,14 @@ static DWORD WINAPI timer_thread_func(LPVOID arg) { #ifdef TIMER_DEBUG if(t->last_wait_start_ns > 0) { - uint64_t overshoot_ns = (now > t->next_deadline) ? (now - t->next_deadline) : 0; - t->overshoot_log[t->overshoot_index % 1000000] = (uint32_t)overshoot_ns; - t->overshoot_index++; + int64_t overshoot_ns = (int64_t)(now - t->next_deadline); + if(overshoot_ns < 0) overshoot_ns = 0; + + if(remaining_after_sleep_ns >= 0) { + DEBUG_PRINT("[DEBUG] Woke up with %lld ns left. Overshoot: %5lld ns\n", remaining_after_sleep_ns, overshoot_ns); + } else { + DEBUG_PRINT("[DEBUG] No sleep. Overshoot: %lld ns\n", overshoot_ns); + } } t->last_wait_start_ns = now; #endif @@ -131,7 +141,6 @@ static struct timer_handle *timer_new(uint64_t interval_ns) { #ifdef TIMER_DEBUG t->last_wait_start_ns = 0; - t->overshoot_index = 0; #endif t->event = CreateEvent(0, FALSE, FALSE, 0); @@ -147,6 +156,8 @@ static uint32_t timer_wait(struct timer_handle *t) { static void timer_destroy(struct timer_handle *t) { t->running = 0; + + SetEvent(t->event); WaitForSingleObject(t->timer_thread, INFINITE); CloseHandle(t->timer_thread); CloseHandle(t->event); @@ -155,17 +166,5 @@ static void timer_destroy(struct timer_handle *t) { AvRevertMmThreadCharacteristics(t->mmcss_handle); } -#ifdef TIMER_DEBUG - uint32_t threshold = 10000; // 10µs - uint32_t overshoot_count = 0; - for(uint32_t i = 0; i < t->overshoot_index && i < 1000000; i++) { - if(t->overshoot_log[i] >= threshold) { - DEBUG_PRINT("Frame %u: overshoot %u ns\n", i, t->overshoot_log[i]); - overshoot_count++; - } - } - DEBUG_PRINT("Total frames: %u, Overshoots >= %u ns: %u\n", t->overshoot_index, threshold, overshoot_count); -#endif - free(t); } |
