diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-04-05 20:26:45 +0200 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-04-05 20:26:45 +0200 |
| commit | dabd7a5848e6aa55e91cf4c804f6236b4f7fe30e (patch) | |
| tree | cbde04c65735fb8ecf3e9d433172c942f7bb63db /ppu.c | |
| parent | 8573fb74da28b89f52c168c7f10ddbcaf5858532 (diff) | |
everything working, 2285 fps
Diffstat (limited to 'ppu.c')
| -rw-r--r-- | ppu.c | 180 |
1 files changed, 106 insertions, 74 deletions
@@ -124,8 +124,8 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { return result; } -__attribute__((always_inline, hot)) -static inline void ppu_evaluate_sprites(struct nes_state *state) { +__attribute__((hot)) +static void ppu_evaluate_sprites(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; uint8_t n = 0; @@ -143,7 +143,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) { dst[2] = src[2]; dst[3] = src[3]; ppu->sprite_indexes[n] = i; - ppu->sprite_zero_hit_possible |= (i == 0) ? 1 : 0; + ppu->sprite_zero_hit_possible |= (i == 0); dst += 4; n++; @@ -158,30 +158,32 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) { ppu->sprite_count = n; } -__attribute__((always_inline, hot)) -static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { +__attribute__((hot)) +static void ppu_fetch_sprite_patterns(struct nes_state *state) { struct ppu_state *ppu = &state->ppu; + uint32_t addr; + uint32_t bank; + + uint8_t *s = ppu->secondary_oam; + uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8; + for(uint8_t i = 0; i < ppu->sprite_count; i++) { - uint8_t *s = ppu->secondary_oam + i * 4; uint8_t y = s[0], tile = s[1], attr = s[2], x = s[3]; uint8_t row = ppu->scanline - y; - uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8; row = (attr & 0x80) ? height - 1 - row : row; - uint32_t addr; if(height == 16) { - uint32_t bank = (tile & 1) ? 0x1000 : 0x0000; + bank = (tile & 1) << 12; tile &= 0xfe; if(row >= 8) { tile++; row -= 8; } - addr = bank + tile * 16 + row; } else { - uint32_t bank = (ppu->reg_ctrl & 0x08) ? 0x1000 : 0x0000; - addr = bank + tile * 16 + row; + bank = (ppu->reg_ctrl & 0x08) << 9; } + addr = bank + tile * 16 + row; uint8_t lsb = state->mapper.chr_read(state, addr); uint8_t msb = state->mapper.chr_read(state, addr + 8); @@ -194,73 +196,107 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { ppu->sprite_shift_lo[i] = lsb; ppu->sprite_shift_hi[i] = msb; ppu->sprite_positions[i] = x; - ppu->sprite_priorities[i] = (attr >> 5) & 1; + ppu->sprite_priorities[i] = attr & 0x20; + s += 4; } } -__attribute__((always_inline, hot)) -static inline void ppu_render_pixel(struct nes_state *state) { +__attribute__((hot)) +static void ppu_render_pixel(struct nes_state *state) { + struct ppu_state *ppu = &state->ppu; + + uint32_t x = ppu->dot - 1; + uint32_t y = ppu->scanline; + + // Fine X shift mask + // static const uint16_t fine_shift[8] = { 0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100 }; + uint16_t bit = 0x8000 >> ppu->fine_x;//fine_shift[ppu->fine_x]; + uint8_t bg_pixel = 0; uint8_t bg_palette = 0; uint8_t sp_pixel = 0; uint8_t sp_palette = 0; uint8_t sp_prio = 0; uint8_t sp_zero = 0; - uint8_t final_color = 0; - struct ppu_state *ppu = &state->ppu; +#if 1 // TODO(peter): Decide what I prefer, masking away unlikely path, or LIKELY hint to the compiler + uint8_t bg_mask = (ppu->reg_mask & 0x08) ? 0xff : 0x00; + uint8_t sp_mask = (ppu->reg_mask & 0x10) ? 0xff : 0x00; - uint32_t x = ppu->dot - 1; - uint32_t y = ppu->scanline; + // Background + uint8_t p0 = !!(ppu->bg_shift_pattern_low & bit); + uint8_t p1 = !!(ppu->bg_shift_pattern_high & bit); + uint8_t a0 = !!(ppu->bg_shift_attrib_low & bit); + uint8_t a1 = !!(ppu->bg_shift_attrib_high & bit); + + bg_pixel = ((p1 << 1) | p0) & bg_mask; + bg_palette = ((a1 << 1) | a0) & bg_mask; + + // Sprite + for(uint8_t i = 0; i < ppu->sprite_count; i++) { + if(ppu->sprite_positions[i]) continue; - uint32_t bit = 0x8000 >> ppu->fine_x; + uint8_t lo = ppu->sprite_shift_lo[i]; + uint8_t hi = ppu->sprite_shift_hi[i]; + sp_pixel = (((hi & 0x80) >> 6) | ((lo & 0x80) >> 7)) & sp_mask; - if(ppu->reg_mask & 0x08) { - uint8_t p0 = (ppu->bg_shift_pattern_low & bit) ? 1 : 0; - uint8_t p1 = (ppu->bg_shift_pattern_high & bit) ? 1 : 0; + if(!sp_pixel) continue; + + sp_palette = ppu->secondary_oam[i * 4 + 2] & 3; + sp_prio = ppu->sprite_priorities[i]; + sp_zero = (ppu->sprite_indexes[i] == 0); + break; + } +#else + // Background fetch + if(LIKELY(ppu->reg_mask & 0x08)) { + uint8_t p0 = !!(ppu->bg_shift_pattern_low & bit); + uint8_t p1 = !!(ppu->bg_shift_pattern_high & bit); bg_pixel = (p1 << 1) | p0; - uint8_t a0 = (ppu->bg_shift_attrib_low & bit) ? 1 : 0; - uint8_t a1 = (ppu->bg_shift_attrib_high & bit) ? 1 : 0; + uint8_t a0 = !!(ppu->bg_shift_attrib_low & bit); + uint8_t a1 = !!(ppu->bg_shift_attrib_high & bit); bg_palette = (a1 << 1) | a0; } - if(ppu->reg_mask & 0x10) { + // Sprite fetch + if(LIKELY(ppu->reg_mask & 0x10)) { for(uint8_t i = 0; i < ppu->sprite_count; i++) { - if(ppu->sprite_positions[i] == 0) { - uint8_t p0 = (ppu->sprite_shift_lo[i] & 0x80) ? 1 : 0; - uint8_t p1 = (ppu->sprite_shift_hi[i] & 0x80) ? 1 : 0; - sp_pixel = (p1 << 1) | p0; - - if(sp_pixel) { - sp_palette = ppu->secondary_oam[i * 4 + 2] & 3; - sp_prio = ppu->sprite_priorities[i]; - sp_zero = (ppu->sprite_indexes[i] == 0); - break; - } - } - } - } + if(ppu->sprite_positions[i]) continue; - if(bg_pixel == 0 && sp_pixel == 0) { - final_color = ppu->palette[0]; - } else if(bg_pixel == 0 && sp_pixel != 0) { - final_color = ppu->palette[0x10 | (sp_palette << 2) | sp_pixel]; - } else if(bg_pixel != 0 && sp_pixel == 0) { - final_color = ppu->palette[(bg_palette << 2) | bg_pixel]; - } else { - if(sp_zero && ppu->sprite_zero_hit_possible && x < 255) { - ppu->reg_status |= 0x40; - } - if(sp_prio == 0) { - final_color = ppu->palette[0x10 | (sp_palette << 2) | sp_pixel]; - } else { - final_color = ppu->palette[(bg_palette << 2) | bg_pixel]; + uint8_t lo = ppu->sprite_shift_lo[i]; + uint8_t hi = ppu->sprite_shift_hi[i]; + sp_pixel = ((hi & 0x80) >> 6) | ((lo & 0x80) >> 7); + + if(!sp_pixel) continue; + + sp_palette = ppu->secondary_oam[i * 4 + 2] & 3; + sp_prio = ppu->sprite_priorities[i]; + sp_zero = (ppu->sprite_indexes[i] == 0); + break; } } - ppu->pixels[y * 256 + x] = final_color; -} +#endif + // Final pixel composition + uint8_t palette_index = 0; + uint8_t bg_index = (bg_palette << 2) + bg_pixel; + uint8_t sp_index = (sp_palette << 2) + sp_pixel; + uint8_t selector = (bg_pixel ? 2 : 0) | (sp_pixel ? 1 : 0); + + switch(selector) { + case 0: { palette_index = 0; } break; + case 1: { palette_index = 0x10 | sp_index; } break; + case 2: { palette_index = bg_index; } break; + case 3: { + if(sp_zero && ppu->sprite_zero_hit_possible && x < 255) { + ppu->reg_status |= 0x40; + } + palette_index = (sp_prio) ? bg_index : 0x10 | sp_index; + } break; + } + state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x). +} __attribute__((hot, flatten)) static void ppu_tick(struct nes_state *state) { @@ -272,7 +308,6 @@ static void ppu_tick(struct nes_state *state) { for(uint32_t ppu_loops = 0; ppu_loops < 3; ++ppu_loops) { - if(LIKELY(rendering)) { if(ppu->even_frame && dot == 0) { @@ -284,28 +319,25 @@ static void ppu_tick(struct nes_state *state) { ppu_render_pixel(state); } - if((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336)) { + if(scanline < 240 || scanline == 261) { + if((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336)) { - if(ppu->reg_mask & 0x10) { - for(uint32_t i = 0; i < ppu->sprite_count; i++) { - if(ppu->sprite_positions[i] > 0) { - ppu->sprite_positions[i]--; - } else { - ppu->sprite_shift_lo[i] <<= 1; - ppu->sprite_shift_hi[i] <<= 1; + if(ppu->reg_mask & 0x10) { + for(uint32_t i = 0; i < ppu->sprite_count; i++) { + if(ppu->sprite_positions[i] > 0) { + ppu->sprite_positions[i]--; + } else { + ppu->sprite_shift_lo[i] <<= 1; + ppu->sprite_shift_hi[i] <<= 1; + } } } - } - - ppu->bg_shift_pattern_low <<= 1; - ppu->bg_shift_pattern_high <<= 1; - ppu->bg_shift_attrib_low <<= 1; - ppu->bg_shift_attrib_high <<= 1; - } + ppu->bg_shift_pattern_low <<= 1; + ppu->bg_shift_pattern_high <<= 1; + ppu->bg_shift_attrib_low <<= 1; + ppu->bg_shift_attrib_high <<= 1; - if(scanline < 240 || scanline == 261) { - if((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336)) { switch(dot % 8) { case 1: { uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff); |
