From f1bd6a7d2f4ffe3e5263e0254bcf7522ab381264 Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Sat, 5 Apr 2025 08:58:12 +0200 Subject: transform to switch case for ppu_tick() --- ppu.c | 435 ++++++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 321 insertions(+), 114 deletions(-) (limited to 'ppu.c') diff --git a/ppu.c b/ppu.c index aff5166..02de73a 100644 --- a/ppu.c +++ b/ppu.c @@ -17,7 +17,8 @@ static void ppu_reset(struct nes_state *state) { memset(ppu, 0, sizeof(struct ppu_state)); } -static void ppu_write(struct nes_state *state, uint32_t offset, uint8_t value) { +__attribute__((always_inline, hot)) +static inline void ppu_write(struct nes_state *state, uint32_t offset, uint8_t value) { struct ppu_state *ppu = &state->ppu; switch(offset & 7) { @@ -73,7 +74,7 @@ static void ppu_write(struct nes_state *state, uint32_t offset, uint8_t value) { state->mapper.ciram_write(state, addr, value); } else if(addr < 0x4000) { uint32_t pal_addr = addr & 0x1f; - if((pal_addr & 0x13) == 0x10) { + if((pal_addr & 3) == 0) { pal_addr &= ~0x10; } ppu->palette[pal_addr] = value; @@ -83,7 +84,8 @@ static void ppu_write(struct nes_state *state, uint32_t offset, uint8_t value) { } } -static uint8_t ppu_read(struct nes_state *state, uint32_t offset) { +__attribute__((always_inline, hot)) +static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { struct ppu_state *ppu = &state->ppu; uint8_t result = ppu->open_bus; @@ -122,7 +124,8 @@ static uint8_t ppu_read(struct nes_state *state, uint32_t offset) { return result; } -static void ppu_evaluate_sprites(struct nes_state *state) { +__attribute__((always_inline, hot)) +static inline void ppu_evaluate_sprites(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; uint8_t n = 0; @@ -155,7 +158,8 @@ static void ppu_evaluate_sprites(struct nes_state *state) { ppu->sprite_count = n; } -static void ppu_fetch_sprite_patterns(struct nes_state *state) { +__attribute__((always_inline, hot)) +static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; for(uint8_t i = 0; i < ppu->sprite_count; i++) { uint8_t *s = ppu->secondary_oam + i * 4; @@ -194,7 +198,8 @@ static void ppu_fetch_sprite_patterns(struct nes_state *state) { } } -static void ppu_render_pixel(struct nes_state *state) { +__attribute__((always_inline, hot)) +static inline void ppu_render_pixel(struct nes_state *state) { uint8_t bg_pixel = 0; uint8_t bg_palette = 0; uint8_t sp_pixel = 0; @@ -256,172 +261,374 @@ static void ppu_render_pixel(struct nes_state *state) { ppu->pixels[y * 256 + x] = final_color; } + __attribute__((hot, flatten)) static void ppu_tick(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; - uint32_t dot = ppu->dot; - uint32_t scanline = ppu->scanline; + for(uint32_t ppu_loops = 0; ppu_loops < 3; ++ppu_loops) { + + uint32_t dot = ppu->dot; + uint32_t scanline = ppu->scanline; + + uint8_t rendering = (ppu->reg_mask & 0x18); + +#if 1 +if(rendering) { + switch(scanline) { + case 0 ... 239: { + // if(rendering && (dot >= 1 && dot <= 256)) { + // } + + switch(dot) { + case 1 ... 256: + ppu_render_pixel(state); + + + if(dot == 256) { + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; + } else { + y++; + } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + } + } - uint8_t rendering = (ppu->reg_mask & 0x18) != 0; + __attribute__((fallthrough)); + case 321 ... 336: { + + if(rendering && ((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336))) { + if(ppu->reg_mask & 0x10) { + for(uint32_t i = 0; i < ppu->sprite_count; i++) { + if(ppu->sprite_positions[i] > 0) { + ppu->sprite_positions[i]--; + } else { + ppu->sprite_shift_lo[i] <<= 1; + ppu->sprite_shift_hi[i] <<= 1; + } + } + } + + ppu->bg_shift_pattern_low <<= 1; + ppu->bg_shift_pattern_high <<= 1; + ppu->bg_shift_attrib_low <<= 1; + ppu->bg_shift_attrib_high <<= 1; + } - if(rendering && scanline < 240 && dot >= 1 && dot <= 256) { - ppu_render_pixel(state); - } + switch(dot % 8) { + case 1: { + uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff); + ppu->bg_next_tile_id = state->mapper.ciram_read(state, nt_addr); + } break; + case 3: { + uint32_t attr_addr = 0x23c0 | (ppu->vram_addr & 0x0c00) | + ((ppu->vram_addr >> 4) & 0x38) | + ((ppu->vram_addr >> 2) & 0x07); + uint8_t attr = state->mapper.ciram_read(state, attr_addr & 0x0fff); + uint8_t shift = ((ppu->vram_addr >> 4) & 4) | (ppu->vram_addr & 2); + ppu->bg_next_tile_attrib = (attr >> shift) & 3; + } break; + case 5: { + uint32_t base = (ppu->reg_ctrl & 0x10) ? 0x1000 : 0x0000; + uint32_t tile = ppu->bg_next_tile_id; + uint32_t fine_y = (ppu->vram_addr >> 12) & 7; + uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff; + ppu->bg_next_tile_lsb = state->mapper.chr_read(state, addr_lsb); + } break; + case 7: { + uint32_t base = (ppu->reg_ctrl & 0x10) ? 0x1000 : 0x0000; + uint32_t tile = ppu->bg_next_tile_id; + uint32_t fine_y = (ppu->vram_addr >> 12) & 7; + uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff; + ppu->bg_next_tile_msb = state->mapper.chr_read(state, addr_msb); + + } break; + case 0: { + ppu->bg_shift_pattern_low = (ppu->bg_shift_pattern_low & 0xff00) | ppu->bg_next_tile_lsb; + ppu->bg_shift_pattern_high = (ppu->bg_shift_pattern_high & 0xff00) | ppu->bg_next_tile_msb; + + uint8_t a = ppu->bg_next_tile_attrib; + ppu->bg_shift_attrib_low = (ppu->bg_shift_attrib_low & 0xff00) | ((a & 1) ? 0xff : 0x00); + ppu->bg_shift_attrib_high = (ppu->bg_shift_attrib_high & 0xff00) | ((a & 2) ? 0xff : 0x00); + + if((ppu->vram_addr & 0x001f) == 31) { + ppu->vram_addr &= ~0x001f; + ppu->vram_addr ^= 0x0400; + } else { + ppu->vram_addr++; + } + } break; + } + } break; - if(rendering && ((dot >= 2 && dot <= 257) || (dot >= 322 && dot <= 337))) { + case 257: { + ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f); + ppu_evaluate_sprites(state); + break; + } - if(ppu->reg_mask & 0x10) { - for(uint32_t i = 0; i < ppu->sprite_count; i++) { - if(ppu->sprite_positions[i] > 0) { - ppu->sprite_positions[i]--; - } else { - ppu->sprite_shift_lo[i] <<= 1; - ppu->sprite_shift_hi[i] <<= 1; + case 340: { + ppu_fetch_sprite_patterns(state); + break; } } - } + } break; - ppu->bg_shift_pattern_low <<= 1; - ppu->bg_shift_pattern_high <<= 1; - ppu->bg_shift_attrib_low <<= 1; - ppu->bg_shift_attrib_high <<= 1; + // case 241: { + // if(dot == 1) { + // ppu->reg_status |= 0x80; + // if(ppu->reg_ctrl & 0x80) { + // state->nmi_pending = 1; + // } + // } + // } break; + + // case 261: { + // if(dot == 1) { + // ppu->reg_status &= ~0x80; + // ppu->reg_status &= ~0x40; + // ppu->sprite_zero_hit_possible = 0; + // } + + // if(dot >= 280 && dot <= 304) { + // ppu->vram_addr = (ppu->vram_addr & ~0x7be0) | (ppu->temp_addr & 0x7be0); + // } + + // if(dot == 340) { + // ppu_fetch_sprite_patterns(state); + // } + // } break; + + // // Handle the frame rendering + // if(++dot > 340) { + // dot = 0; + // scanline++; + // if(scanline > 261) { + // scanline = 0; + // ppu->frame_ready = 1; + // } + // } + + // ppu->dot = dot; + // ppu->scanline = scanline; } +} + +#else + + // if(ppu->even_frame && (ppu->reg_mask & 0x18)) { + // // skip this dot + // // call mapper_tick here. + // ppu->dot++; + // } + + + if(rendering && scanline < 240 && dot >= 1 && dot <= 256) { + ppu_render_pixel(state); + } - if(scanline < 240 || scanline == 261) { if(rendering && ((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336))) { - switch(dot % 8) { - case 1: { - uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff); - ppu->bg_next_tile_id = state->mapper.ciram_read(state, nt_addr); - break; + + if(ppu->reg_mask & 0x10) { + for(uint32_t i = 0; i < ppu->sprite_count; i++) { + if(ppu->sprite_positions[i] > 0) { + ppu->sprite_positions[i]--; + } else { + ppu->sprite_shift_lo[i] <<= 1; + ppu->sprite_shift_hi[i] <<= 1; + } } - case 3: { - uint32_t attr_addr = 0x23c0 | (ppu->vram_addr & 0x0c00) | ((ppu->vram_addr >> 4) & 0x38) | ((ppu->vram_addr >> 2) & 0x07); - uint8_t attr = state->mapper.ciram_read(state, attr_addr & 0x0fff); - uint8_t shift = ((ppu->vram_addr >> 4) & 4) | (ppu->vram_addr & 2); - ppu->bg_next_tile_attrib = (attr >> shift) & 3; - break; + } + + ppu->bg_shift_pattern_low <<= 1; + ppu->bg_shift_pattern_high <<= 1; + ppu->bg_shift_attrib_low <<= 1; + ppu->bg_shift_attrib_high <<= 1; + } + + + if(scanline < 240 || scanline == 261) { + if(rendering && ((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336))) { + switch(dot % 8) { + case 1: { + uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff); + ppu->bg_next_tile_id = state->mapper.ciram_read(state, nt_addr); + break; + } + case 3: { + uint32_t attr_addr = 0x23c0 | (ppu->vram_addr & 0x0c00) | ((ppu->vram_addr >> 4) & 0x38) | ((ppu->vram_addr >> 2) & 0x07); + uint8_t attr = state->mapper.ciram_read(state, attr_addr & 0x0fff); + uint8_t shift = ((ppu->vram_addr >> 4) & 4) | (ppu->vram_addr & 2); + ppu->bg_next_tile_attrib = (attr >> shift) & 3; + break; + } + case 5: { + uint32_t base = (ppu->reg_ctrl & 0x10) ? 0x1000 : 0x0000; + uint32_t tile = ppu->bg_next_tile_id; + uint32_t fine_y = (ppu->vram_addr >> 12) & 7; + uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff; + ppu->bg_next_tile_lsb = state->mapper.chr_read(state, addr_lsb); + break; + } + case 7: { + uint32_t base = (ppu->reg_ctrl & 0x10) ? 0x1000 : 0x0000; + uint32_t tile = ppu->bg_next_tile_id; + uint32_t fine_y = (ppu->vram_addr >> 12) & 7; + uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff; + ppu->bg_next_tile_msb = state->mapper.chr_read(state, addr_msb); + break; + } + case 0: { + ppu->bg_shift_pattern_low = (ppu->bg_shift_pattern_low & 0xff00) | ppu->bg_next_tile_lsb; + ppu->bg_shift_pattern_high = (ppu->bg_shift_pattern_high & 0xff00) | ppu->bg_next_tile_msb; + + uint8_t a = ppu->bg_next_tile_attrib; + ppu->bg_shift_attrib_low = (ppu->bg_shift_attrib_low & 0xff00) | ((a & 1) ? 0xff : 0x00); + ppu->bg_shift_attrib_high = (ppu->bg_shift_attrib_high & 0xff00) | ((a & 2) ? 0xff : 0x00); + + if((ppu->vram_addr & 0x001f) == 31) { + ppu->vram_addr &= ~0x001f; + ppu->vram_addr ^= 0x0400; + } else { + ppu->vram_addr++; + } + + break; + } } - case 5: { - uint32_t base = (ppu->reg_ctrl & 0x10) ? 0x1000 : 0x0000; - uint32_t tile = ppu->bg_next_tile_id; - uint32_t fine_y = (ppu->vram_addr >> 12) & 7; - uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff; - ppu->bg_next_tile_lsb = state->mapper.chr_read(state, addr_lsb); - break; + } + + if(rendering) { + if(dot == 256) { + if((ppu->vram_addr & 0x7000) != 0x7000) { + ppu->vram_addr += 0x1000; + } else { + ppu->vram_addr &= ~0x7000; + uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; + if(y == 29) { + y = 0; + ppu->vram_addr ^= 0x0800; + } else if(y == 31) { + y = 0; + } else { + y++; + } + ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + } } - case 7: { - uint32_t base = (ppu->reg_ctrl & 0x10) ? 0x1000 : 0x0000; - uint32_t tile = ppu->bg_next_tile_id; - uint32_t fine_y = (ppu->vram_addr >> 12) & 7; - uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff; - ppu->bg_next_tile_msb = state->mapper.chr_read(state, addr_msb); - break; + + if(dot == 257) { + ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f); } - case 0: { - ppu->bg_shift_pattern_low = (ppu->bg_shift_pattern_low & 0xff00) | ppu->bg_next_tile_lsb; - ppu->bg_shift_pattern_high = (ppu->bg_shift_pattern_high & 0xff00) | ppu->bg_next_tile_msb; - uint8_t a = ppu->bg_next_tile_attrib; - ppu->bg_shift_attrib_low = (ppu->bg_shift_attrib_low & 0xff00) | ((a & 1) ? 0xff : 0x00); - ppu->bg_shift_attrib_high = (ppu->bg_shift_attrib_high & 0xff00) | ((a & 2) ? 0xff : 0x00); + if(UNLIKELY(scanline == 261) && dot >= 280 && dot <= 304) { + ppu->vram_addr = (ppu->vram_addr & ~0x7be0) | (ppu->temp_addr & 0x7be0); + } - if((ppu->vram_addr & 0x001f) == 31) { - ppu->vram_addr &= ~0x001f; - ppu->vram_addr ^= 0x0400; - } else { - ppu->vram_addr++; - } + if(dot == 257 && LIKELY(scanline < 240)) { + ppu_evaluate_sprites(state); + } - break; + if(dot == 340 && (LIKELY(scanline < 240) || UNLIKELY(scanline == 261))) { + ppu_fetch_sprite_patterns(state); } } } +#endif - if(rendering) { - if(dot == 256) { - if((ppu->vram_addr & 0x7000) != 0x7000) { - ppu->vram_addr += 0x1000; - } else { - ppu->vram_addr &= ~0x7000; - uint32_t y = (ppu->vram_addr & 0x03e0) >> 5; - if(y == 29) { - y = 0; - ppu->vram_addr ^= 0x0800; - } else if(y == 31) { - y = 0; - } else { - y++; - } - ppu->vram_addr = (ppu->vram_addr & ~0x03e0) | (y << 5); + + +// TEST SWITCH CODE + + switch(scanline) { + case 241: { + if(dot == 1) { + ppu->reg_status |= 0x80; + if(ppu->reg_ctrl & 0x80) { + state->nmi_pending = 1; } } + break; + } - if(dot == 257) { - ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f); + case 261: { + if(dot == 1) { + ppu->reg_status &= ~0x80; + ppu->reg_status &= ~0x40; + ppu->sprite_zero_hit_possible = 0; } - if(scanline == 261 && dot == 304) { //>= 280 && dot <= 304) { + if(dot >= 280 && dot <= 304) { ppu->vram_addr = (ppu->vram_addr & ~0x7be0) | (ppu->temp_addr & 0x7be0); } - if(dot == 257 && scanline < 240) { - ppu_evaluate_sprites(state); - } - - if(dot == 340 && scanline < 240) { + if(dot == 340) { ppu_fetch_sprite_patterns(state); } + break; } } - if(scanline == 241 && dot == 1) { - ppu->reg_status |= 0x80; - if(ppu->reg_ctrl & 0x80) { - state->nmi_pending = 1; - } - } - - if(scanline == 261 && dot == 1) { - ppu->reg_status &= ~0x80; - ppu->reg_status &= ~0x40; - ppu->sprite_zero_hit_possible = 0; - } - dot++; - if(dot > 340) { - dot = 0; - scanline++; - if(scanline > 261) { - scanline = 0; - ppu->frame_ready = 1; +// TEST SWITCH CODE + + // if(UNLIKELY(scanline == 241) && dot == 1) { + // ppu->reg_status |= 0x80; + // if(ppu->reg_ctrl & 0x80) { + // state->nmi_pending = 1; + // } + // } + + // if(UNLIKELY(scanline == 261) && dot == 1) { + // ppu->reg_status &= ~0x80; + // ppu->reg_status &= ~0x40; + // ppu->sprite_zero_hit_possible = 0; + // } + + dot++; + if(dot > 340) { + dot = 0; + scanline++; + if(scanline > 261) { + scanline = 0; + ppu->frame_ready = 1; + } } - } - ppu->dot = dot; - ppu->scanline = scanline; + ppu->dot = dot; + ppu->scanline = scanline; + } } -static void ppu_dma_4014(struct nes_state *state, uint8_t page) { +__attribute__((always_inline, hot)) +static inline void ppu_dma_4014(struct nes_state *state, uint8_t page) { uint32_t base = page << 8; // Add 1 or 2 idle cycles depending on current CPU cycle uint8_t idle_cycles = (state->cycles & 1) ? 1 : 2; for(uint8_t i = 0; i < idle_cycles; i++) { state->cycles++; - ppu_tick(state); ppu_tick(state); ppu_tick(state); + ppu_tick(state); } for(uint32_t i = 0; i < 256; i++) { uint32_t addr = base + i; state->cycles++; - ppu_tick(state); ppu_tick(state); ppu_tick(state); + ppu_tick(state); uint8_t value = memory_read_dma(state, addr); state->cycles++; - ppu_tick(state); ppu_tick(state); ppu_tick(state); + ppu_tick(state); // ppu_write_2004(state, value); ppu_write(state, 4, value); } -- cgit v1.2.3