diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-04-06 16:38:53 +0200 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-04-06 16:38:53 +0200 |
| commit | 1372919415b23a9a596ca7211fd432328b2ac0cc (patch) | |
| tree | 3a4d048606970e8aea670fed0f77486a1cffbbc4 | |
| parent | 39715ca6bf65d2e2dd889cdef4b39d584464d9e7 (diff) | |
optimized away an if(dot == 256) in the hot path ~5% improvement
| -rw-r--r-- | mknes.c | 4 | ||||
| -rw-r--r-- | mknes.h | 4 | ||||
| -rw-r--r-- | ppu.c | 95 |
3 files changed, 59 insertions, 44 deletions
@@ -227,8 +227,8 @@ int main(int argc, char **argv) { // while(!glfwWindowShouldClose(window)) { for(uint32_t i = 0; i < 0x5000; ++ i) { - // timer_wait(timer); - // glfwPollEvents(); + // timer_wait(timer); + // glfwPollEvents(); // // while(!nstate->ppu.frame_ready) { @@ -62,7 +62,7 @@ struct ppu_state { uint8_t sprite_zero_hit_possible; uint8_t sprite_count; -} __attribute__((packed, aligned(64))); +} __attribute__((aligned(64))); struct cpu_state { uint32_t pc; // Program Counter @@ -80,7 +80,7 @@ struct cpu_state { uint8_t c; // Carry Flag // -- uint8_t die; // KIL instruction found! -} __attribute__((packed, aligned(64))); +} __attribute__((aligned(64))); struct ines_state { @@ -280,29 +280,46 @@ static void ppu_tick(struct nes_state *state) { switch(scanline) { case 0 ... 239: { switch(dot) { - case 1: - __attribute__((fallthrough)); - - case 2 ... 256: // fallthrough: this is 1->256 - ppu_render_pixel(state); - - if(UNLIKELY(dot == 256)) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; + case 256: { + + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + y++; } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); } + goto render_and_shift; + } break; + + case 1 ... 255: // fallthrough: this is 1->256 +render_and_shift: ppu_render_pixel(state); + + // if(UNLIKELY(dot == 256)) { + // if((ppu->vram_addr & 0x7000) != 0x7000) { + // ppu->vram_addr += 0x1000; + // } else { + // ppu->vram_addr &= ~0x7000; + // uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + // if(y == 29) { + // y = 0; + // ppu->vram_addr ^= 0x0800; + // } else if(y == 31) { + // y = 0; + // } else { + // y++; + // } + // ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + // } + // } __attribute__((fallthrough)); case 321 ... 336: { // fallthrough: the code below has to run 1->256 + 321->336 @@ -386,30 +403,28 @@ static void ppu_tick(struct nes_state *state) { case 261: { switch(dot) { - case 1: - // Clear vblank, sprite 0 hit, etc. - __attribute__((fallthrough)); - - case 2 ... 256: - if(UNLIKELY(dot == 256)) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; + case 256: { + + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + y++; } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); } - + } __attribute__((fallthrough)); + + case 1 ... 255: __attribute__((fallthrough)); + case 321 ... 336: { // Rendering and tile fetch if(ppu->reg_mask & 0x10) { for(uint32_t i = 0; i < ppu->sprite_count; i++) { |
