From 1372919415b23a9a596ca7211fd432328b2ac0cc Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Sun, 6 Apr 2025 16:38:53 +0200 Subject: optimized away an if(dot == 256) in the hot path ~5% improvement --- ppu.c | 95 +++++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 40 deletions(-) (limited to 'ppu.c') diff --git a/ppu.c b/ppu.c index da360e0..e9a6894 100644 --- a/ppu.c +++ b/ppu.c @@ -280,29 +280,46 @@ static void ppu_tick(struct nes_state *state) { switch(scanline) { case 0 ... 239: { switch(dot) { - case 1: - __attribute__((fallthrough)); - - case 2 ... 256: // fallthrough: this is 1->256 - ppu_render_pixel(state); - - if(UNLIKELY(dot == 256)) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; + case 256: { + + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + y++; } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); } + goto render_and_shift; + } break; + + case 1 ... 255: // fallthrough: this is 1->256 +render_and_shift: ppu_render_pixel(state); + + // if(UNLIKELY(dot == 256)) { + // if((ppu->vram_addr & 0x7000) != 0x7000) { + // ppu->vram_addr += 0x1000; + // } else { + // ppu->vram_addr &= ~0x7000; + // uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + // if(y == 29) { + // y = 0; + // ppu->vram_addr ^= 0x0800; + // } else if(y == 31) { + // y = 0; + // } else { + // y++; + // } + // ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + // } + // } __attribute__((fallthrough)); case 321 ... 336: { // fallthrough: the code below has to run 1->256 + 321->336 @@ -386,30 +403,28 @@ static void ppu_tick(struct nes_state *state) { case 261: { switch(dot) { - case 1: - // Clear vblank, sprite 0 hit, etc. - __attribute__((fallthrough)); - - case 2 ... 256: - if(UNLIKELY(dot == 256)) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; + case 256: { + + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + y++; } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); } - + } __attribute__((fallthrough)); + + case 1 ... 255: __attribute__((fallthrough)); + case 321 ... 336: { // Rendering and tile fetch if(ppu->reg_mask & 0x10) { for(uint32_t i = 0; i < ppu->sprite_count; i++) { -- cgit v1.2.3