From 1372919415b23a9a596ca7211fd432328b2ac0cc Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Sun, 6 Apr 2025 16:38:53 +0200 Subject: optimized away an if(dot == 256) in the hot path ~5% improvement --- mknes.c | 4 +-- mknes.h | 4 +-- ppu.c | 95 ++++++++++++++++++++++++++++++++++++++--------------------------- 3 files changed, 59 insertions(+), 44 deletions(-) diff --git a/mknes.c b/mknes.c index 7417bed..51e737e 100644 --- a/mknes.c +++ b/mknes.c @@ -227,8 +227,8 @@ int main(int argc, char **argv) { // while(!glfwWindowShouldClose(window)) { for(uint32_t i = 0; i < 0x5000; ++ i) { - // timer_wait(timer); - // glfwPollEvents(); + // timer_wait(timer); + // glfwPollEvents(); // // while(!nstate->ppu.frame_ready) { diff --git a/mknes.h b/mknes.h index a296fe4..10ae358 100644 --- a/mknes.h +++ b/mknes.h @@ -62,7 +62,7 @@ struct ppu_state { uint8_t sprite_zero_hit_possible; uint8_t sprite_count; -} __attribute__((packed, aligned(64))); +} __attribute__((aligned(64))); struct cpu_state { uint32_t pc; // Program Counter @@ -80,7 +80,7 @@ struct cpu_state { uint8_t c; // Carry Flag // -- uint8_t die; // KIL instruction found! -} __attribute__((packed, aligned(64))); +} __attribute__((aligned(64))); struct ines_state { diff --git a/ppu.c b/ppu.c index da360e0..e9a6894 100644 --- a/ppu.c +++ b/ppu.c @@ -280,29 +280,46 @@ static void ppu_tick(struct nes_state *state) { switch(scanline) { case 0 ... 239: { switch(dot) { - case 1: - __attribute__((fallthrough)); - - case 2 ... 256: // fallthrough: this is 1->256 - ppu_render_pixel(state); - - if(UNLIKELY(dot == 256)) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; + case 256: { + + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + y++; } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); } + goto render_and_shift; + } break; + + case 1 ... 255: // fallthrough: this is 1->256 +render_and_shift: ppu_render_pixel(state); + + // if(UNLIKELY(dot == 256)) { + // if((ppu->vram_addr & 0x7000) != 0x7000) { + // ppu->vram_addr += 0x1000; + // } else { + // ppu->vram_addr &= ~0x7000; + // uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + // if(y == 29) { + // y = 0; + // ppu->vram_addr ^= 0x0800; + // } else if(y == 31) { + // y = 0; + // } else { + // y++; + // } + // ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + // } + // } __attribute__((fallthrough)); case 321 ... 336: { // fallthrough: the code below has to run 1->256 + 321->336 @@ -386,30 +403,28 @@ static void ppu_tick(struct nes_state *state) { case 261: { switch(dot) { - case 1: - // Clear vblank, sprite 0 hit, etc. - __attribute__((fallthrough)); - - case 2 ... 256: - if(UNLIKELY(dot == 256)) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; + case 256: { + + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + y++; } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); } - + } __attribute__((fallthrough)); + + case 1 ... 255: __attribute__((fallthrough)); + case 321 ... 336: { // Rendering and tile fetch if(ppu->reg_mask & 0x10) { for(uint32_t i = 0; i < ppu->sprite_count; i++) { -- cgit v1.2.3