diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-10-28 22:18:16 +0100 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-10-28 22:18:16 +0100 |
| commit | a3087dd6d0938056f7f0e3d89e60f36e56ac27d2 (patch) | |
| tree | 42e230aa90c1a9932bac340439b5b55b9d2294dc | |
| parent | a12149c9aff979949c86cc5a66929f985db4caa4 (diff) | |
reached 2944fps again
| -rw-r--r-- | .gitignore | 2 | ||||
| -rwxr-xr-x | Bench.sh | 2 | ||||
| -rw-r--r-- | mknes.c | 4 | ||||
| -rw-r--r-- | mknes_apu.c | 3 | ||||
| -rw-r--r-- | mknes_memory.c | 14 | ||||
| -rw-r--r-- | mknes_ppu.c | 15 | ||||
| -rw-r--r-- | mknes_ppu_registers.c | 5 |
7 files changed, 18 insertions, 27 deletions
@@ -20,4 +20,4 @@ record.txt _Bench.sh toolchain accuracycoin.asm - +CLAUDE.md @@ -2,7 +2,7 @@ ./build.sh clean ./build.sh profile -./mknes -n 1 &> /dev/null +./mknes -n 1 -f 8192 &> /dev/null ./build.sh profile_release # Run full benchmark @@ -271,7 +271,7 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/0000/Excitebike (Japan, USA).nes"); // ines2_load(nstate, "data/0000/Ice Climber (USA, Europe, Korea).nes"); // ines2_load(nstate, "data/0000/Kung Fu (Japan, USA).nes"); - ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).nes"); + // ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).nes"); // ines2_load(nstate, "data/Super Mario Bros. (W) (V1.0) [!].nes"); // ines2_load(nstate, "data/Super Mario Bros. (JU) [!].nes"); // ines2_load(nstate, "data/0000/Urban Champion (World).nes"); @@ -293,7 +293,7 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/0000/Xevious - The Avenger (USA).zip"); // ines2_load(nstate, "data/tv.nes"); - // ines2_load(nstate, "data/Life Force (USA).nes"); // 2002 + ines2_load(nstate, "data/Life Force (USA).nes"); // 2002 // ines2_load(nstate, "data/0003/Flipull - An Exciting Cube Game (Japan) (En).zip"); // ines2_load(nstate, "data/0003/Friday the 13th (USA).zip"); diff --git a/mknes_apu.c b/mknes_apu.c index 027b1e5..f1c27a8 100644 --- a/mknes_apu.c +++ b/mknes_apu.c @@ -1,8 +1,7 @@ -static inline void ppu_tick(struct nes_state *state); +static void ppu_tick(struct nes_state *state); static uint8_t memory_read(struct nes_state *state, uint32_t offset); -static uint8_t memory_read_dma(struct nes_state *state, uint32_t offset); // DMC frequency table (NTSC) static const uint16_t dmc_rate_table[16] = { diff --git a/mknes_memory.c b/mknes_memory.c index 49546b2..57f527e 100644 --- a/mknes_memory.c +++ b/mknes_memory.c @@ -1,11 +1,12 @@ __attribute__((hot)) static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { + if(offset >= 0x10000) __builtin_unreachable(); + state->cpu.cycles++; apu_tick(state); ppu_tick(state); - if(offset >= 0x10000) __builtin_unreachable(); if(offset <= 0x1fff) { return state->ram[offset & 0x07ff]; @@ -51,13 +52,14 @@ static inline uint8_t memory_read_dummy(struct nes_state *state, uint32_t offset return result; } -__attribute__((hot, optimize("no-jump-tables"))) +__attribute__((hot)) static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_t value) { + if(offset >= 0x10000) __builtin_unreachable(); + state->cpu.cycles++; apu_tick(state); ppu_tick(state); - if(offset >= 0x10000) __builtin_unreachable(); if(offset <= 0x1fff) { state->ram[offset & 0x07ff] = value; @@ -109,9 +111,3 @@ static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_ return; } } - -__attribute__((hot, flatten)) -static inline uint8_t memory_read_dma(struct nes_state *state, uint32_t offset) { - // NOTE(peter): DO NOT tick CPU/PPU/APU — caller handles timing - return state->ram[offset & 0x07ff]; -} diff --git a/mknes_ppu.c b/mknes_ppu.c index 7c2b4ac..6e8e3ae 100644 --- a/mknes_ppu.c +++ b/mknes_ppu.c @@ -164,21 +164,22 @@ no_sprite: uint8_t bg_index = (bg_palette << 2) + bg_pixel; uint8_t sp_index = (sp_palette << 2) + sp_pixel; uint8_t selector = (bg_pixel ? 2 : 0) | (sp_pixel ? 1 : 0); - - // NOTE(peter): It's actually faster to preset case 3 version of palette_index than to start from zero - uint8_t palette_index = (sp_prio) ? bg_index : 0x10 | sp_index; + uint8_t palette_index = 0; switch(selector) { - case 0: { palette_index = 0; } break; + // case 0: { palette_index = 0; } break; case 1: { palette_index = 0x10 | sp_index; } break; case 2: { palette_index = bg_index; } break; - case 3: { ppu->reg_status |= (sp_zero && x < 255) ? PPU_STATUS_SPRITE_ZERO_HIT : 0; } break; // NOTE(peter): Sprite zero hit! + case 3: { + palette_index = (sp_prio) ? bg_index : 0x10 | sp_index; + ppu->reg_status |= (sp_zero && x < 255) ? PPU_STATUS_SPRITE_ZERO_HIT : 0; + } break; // NOTE(peter): Sprite zero hit! } state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x). } -__attribute__((hot, optimize("no-jump-tables", "unroll-loops"))) +__attribute__((noinline, hot, optimize("no-jump-tables", "unroll-loops"))) static void ppu_tick(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; @@ -387,9 +388,7 @@ stupid2: if(reg_mask & PPU_MASK_SHOW_SPRITES) { if(dot >= 280 && dot <= 304) { ppu->vram_addr = (ppu->vram_addr & ~0x7be0) | (ppu->temp_addr & 0x7be0); } - } - } if(dot == 1) { diff --git a/mknes_ppu_registers.c b/mknes_ppu_registers.c index b66f7e5..536bf2a 100644 --- a/mknes_ppu_registers.c +++ b/mknes_ppu_registers.c @@ -126,9 +126,6 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { return result; } - -static uint8_t memory_read_dma(struct nes_state *state, uint32_t offset); - __attribute__((hot)) static inline void ppu_dma_4014(struct nes_state *state, uint8_t page) { uint32_t base = page << 8; @@ -147,7 +144,7 @@ static inline void ppu_dma_4014(struct nes_state *state, uint8_t page) { state->cpu.cycles++; ppu_tick(state); apu_tick(state); - uint8_t value = memory_read_dma(state, addr); + uint8_t value = state->ram[addr & 0x07ff]; // NOTE(peter): was; memory_read_dma(state, addr); state->cpu.cycles++; ppu_tick(state); |
