summaryrefslogtreecommitdiff
path: root/mknes_ppu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mknes_ppu.c')
-rw-r--r--mknes_ppu.c105
1 files changed, 96 insertions, 9 deletions
diff --git a/mknes_ppu.c b/mknes_ppu.c
index 6e8e3ae..249f9f9 100644
--- a/mknes_ppu.c
+++ b/mknes_ppu.c
@@ -109,6 +109,93 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state,
}
}
+
+#if 0
+// layout reminder
+// struct sprite_data { u8 shift_lo, shift_hi, position, priority, palette; } __attribute__((packed));
+
+__attribute__((always_inline, hot, optimize("no-jump-tables","no-unroll-loops")))
+static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t x, uint32_t y, uint8_t mask_reg) {
+ struct ppu_state *restrict ppu = &state->ppu;
+
+ // 32-bit temps for bg path; keep struct 16-bit/8-bit
+ const uint32_t s = 15u - (uint32_t)ppu->fine_x;
+ const uint32_t show_bg = (mask_reg & PPU_MASK_SHOW_BG) != 0;
+ const uint32_t show_sprites = (mask_reg & PPU_MASK_SHOW_SPRITES) != 0;
+ const uint32_t left_bg = (mask_reg & 0x02) != 0;
+ const uint32_t left_sp = (mask_reg & 0x04) != 0;
+ const uint32_t x_ge_8 = (x & ~7u) != 0;
+
+ const uint32_t bg_on = show_bg & (left_bg | x_ge_8);
+ const uint32_t sp_on = show_sprites & (left_sp | x_ge_8);
+
+ const uint32_t pat_lo = (uint32_t)ppu->bg_shift_pattern_low;
+ const uint32_t pat_hi = (uint32_t)ppu->bg_shift_pattern_high;
+ const uint32_t att_lo = (uint32_t)ppu->bg_shift_attrib_low;
+ const uint32_t att_hi = (uint32_t)ppu->bg_shift_attrib_high;
+
+ const uint32_t p0 = (pat_lo >> s) & 1u;
+ const uint32_t p1 = (pat_hi >> s) & 1u;
+ const uint32_t a0 = (att_lo >> s) & 1u;
+ const uint32_t a1 = (att_hi >> s) & 1u;
+
+ const uint32_t bg_pixel = ((p1 << 1) | p0) & -bg_on;
+ const uint32_t bg_palette = ((a1 << 1) | a0) & -bg_on;
+
+ // Sprite resolve: only load fields in this order:
+ // position (branch filter) -> shift bytes (pixel test) -> meta (on hit)
+ uint32_t sp_pixel = 0, sp_palette = 0, sp_prio = 0, sp_zero = 0;
+
+ if (sp_on) {
+ struct sprite_data * restrict s_ptr = ppu->sprites; // address calc only
+ uint32_t n = ppu->sprite_count; // one load
+ if (n) {
+ uint32_t i = 0;
+ find_sprite:
+ // 1) position
+ uint32_t pos = s_ptr->position; // load 1
+ if (!pos) {
+ // 2) pixel from shift bytes
+ uint32_t sh = s_ptr->shift_hi; // load 2
+ uint32_t sl = s_ptr->shift_lo; // load 3
+ uint32_t pix = ((sh & 0x80u) >> 6) | ((sl & 0x80u) >> 7);
+ if (pix) {
+ // 3) only now fetch metadata
+ sp_pixel = pix;
+ sp_palette = s_ptr->palette; // load 4 (only on hit)
+ sp_prio = s_ptr->priority; // load 5 (only on hit)
+ sp_zero = (ppu->sprite_zero_in_range != 0u) & (i == 0u);
+ goto sprite_done;
+ }
+ }
+ // next sprite
+ ++s_ptr; ++i;
+ if (i < n) goto find_sprite;
+ }
+ }
+sprite_done: ;
+
+ const uint32_t bg_index = (bg_palette << 2) + bg_pixel;
+ const uint32_t sp_index = (sp_palette << 2) + sp_pixel;
+ const uint32_t selector = ((bg_pixel != 0u) << 1) | (sp_pixel != 0u);
+
+ // Two-way combine with minimal control flow
+ uint32_t palette_index = 0;
+ if (selector == 1u) {
+ palette_index = 0x10u | sp_index;
+ } else if (selector == 2u) {
+ palette_index = bg_index;
+ } else if (selector == 3u) {
+ const uint32_t use_bg = (sp_prio != 0u);
+ palette_index = use_bg ? bg_index : (0x10u | sp_index);
+ if ((ppu->sprite_zero_in_range != 0u) & (sp_zero != 0u) & (x <= 254u)) {
+ ppu->reg_status |= PPU_STATUS_SPRITE_ZERO_HIT;
+ }
+ }
+ state->pixels[y * 256 + x] = ppu->palette[palette_index];
+}
+
+#else
__attribute__((always_inline, hot, optimize("no-jump-tables", "no-unroll-loops")))
static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t x, uint32_t y, uint8_t mask_reg) {
struct ppu_state *restrict ppu = &state->ppu;
@@ -159,25 +246,25 @@ sprite_found:
}
no_sprite:
-
// Final pixel composition
uint8_t bg_index = (bg_palette << 2) + bg_pixel;
uint8_t sp_index = (sp_palette << 2) + sp_pixel;
uint8_t selector = (bg_pixel ? 2 : 0) | (sp_pixel ? 1 : 0);
uint8_t palette_index = 0;
- switch(selector) {
- // case 0: { palette_index = 0; } break;
- case 1: { palette_index = 0x10 | sp_index; } break;
- case 2: { palette_index = bg_index; } break;
- case 3: {
- palette_index = (sp_prio) ? bg_index : 0x10 | sp_index;
- ppu->reg_status |= (sp_zero && x < 255) ? PPU_STATUS_SPRITE_ZERO_HIT : 0;
- } break; // NOTE(peter): Sprite zero hit!
+ if(selector == 1) {
+ palette_index = 0x10 | sp_index;
+ } else if(selector == 2) {
+ palette_index = bg_index;
+ } else if(selector == 3) {
+ palette_index = (sp_prio) ? bg_index : (0x10 | sp_index);
+ if (sp_zero && x <= 254) ppu->reg_status |= PPU_STATUS_SPRITE_ZERO_HIT;
}
state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x).
}
+#endif
+
__attribute__((noinline, hot, optimize("no-jump-tables", "unroll-loops")))
static void ppu_tick(struct nes_state *state) {