summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Fors <peter.fors@mindkiller.com>2025-10-30 05:06:53 +0100
committerPeter Fors <peter.fors@mindkiller.com>2025-10-30 05:06:53 +0100
commit4814d60951d5b2c357086d8f8e369bd34793211a (patch)
tree8820cdebd943c28dff43062aec137c5fd0d6a778
parentfda447031a4314542b0cd5183826ddc46f97204e (diff)
some optimization tests that didn't pan out
-rw-r--r--mknes.c4
-rw-r--r--mknes_ppu.c88
2 files changed, 2 insertions, 90 deletions
diff --git a/mknes.c b/mknes.c
index 244a032..e3e9ffd 100644
--- a/mknes.c
+++ b/mknes.c
@@ -58,8 +58,8 @@ static void audio_callback(int16_t *data, size_t frames) { }
#ifdef BENCHMARK
// Embed the ROM for benchmarking to eliminate file I/O overhead
// Uncomment the ROM you want to benchmark:
-INCBIN_BYTES(benchmark_rom, "data/Life Force (USA).nes");
-// INCBIN_BYTES(benchmark_rom, "data/0000/Super Mario Bros. (World) (HVC-SM).nes");
+// INCBIN_BYTES(benchmark_rom, "data/Life Force (USA).nes");
+INCBIN_BYTES(benchmark_rom, "data/0000/Super Mario Bros. (World) (HVC-SM).nes");
// INCBIN_BYTES(benchmark_rom, "data/0003/Gradius (USA).nes");
#endif
diff --git a/mknes_ppu.c b/mknes_ppu.c
index b00e380..870439b 100644
--- a/mknes_ppu.c
+++ b/mknes_ppu.c
@@ -109,93 +109,6 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state,
}
}
-
-#if 0
-// layout reminder
-// struct sprite_data { u8 shift_lo, shift_hi, position, priority, palette; } __attribute__((packed));
-
-__attribute__((always_inline, hot, optimize("no-jump-tables","no-unroll-loops")))
-static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t x, uint32_t y, uint8_t mask_reg) {
- struct ppu_state *restrict ppu = &state->ppu;
-
- // 32-bit temps for bg path; keep struct 16-bit/8-bit
- const uint32_t s = 15u - (uint32_t)ppu->fine_x;
- const uint32_t show_bg = (mask_reg & PPU_MASK_SHOW_BG) != 0;
- const uint32_t show_sprites = (mask_reg & PPU_MASK_SHOW_SPRITES) != 0;
- const uint32_t left_bg = (mask_reg & 0x02) != 0;
- const uint32_t left_sp = (mask_reg & 0x04) != 0;
- const uint32_t x_ge_8 = (x & ~7u) != 0;
-
- const uint32_t bg_on = show_bg & (left_bg | x_ge_8);
- const uint32_t sp_on = show_sprites & (left_sp | x_ge_8);
-
- const uint32_t pat_lo = (uint32_t)ppu->bg_shift_pattern_low;
- const uint32_t pat_hi = (uint32_t)ppu->bg_shift_pattern_high;
- const uint32_t att_lo = (uint32_t)ppu->bg_shift_attrib_low;
- const uint32_t att_hi = (uint32_t)ppu->bg_shift_attrib_high;
-
- const uint32_t p0 = (pat_lo >> s) & 1u;
- const uint32_t p1 = (pat_hi >> s) & 1u;
- const uint32_t a0 = (att_lo >> s) & 1u;
- const uint32_t a1 = (att_hi >> s) & 1u;
-
- const uint32_t bg_pixel = ((p1 << 1) | p0) & -bg_on;
- const uint32_t bg_palette = ((a1 << 1) | a0) & -bg_on;
-
- // Sprite resolve: only load fields in this order:
- // position (branch filter) -> shift bytes (pixel test) -> meta (on hit)
- uint32_t sp_pixel = 0, sp_palette = 0, sp_prio = 0, sp_zero = 0;
-
- if (sp_on) {
- struct sprite_data * restrict s_ptr = ppu->sprites; // address calc only
- uint32_t n = ppu->sprite_count; // one load
- if (n) {
- uint32_t i = 0;
- find_sprite:
- // 1) position
- uint32_t pos = s_ptr->position; // load 1
- if (!pos) {
- // 2) pixel from shift bytes
- uint32_t sh = s_ptr->shift_hi; // load 2
- uint32_t sl = s_ptr->shift_lo; // load 3
- uint32_t pix = ((sh & 0x80u) >> 6) | ((sl & 0x80u) >> 7);
- if (pix) {
- // 3) only now fetch metadata
- sp_pixel = pix;
- sp_palette = s_ptr->palette; // load 4 (only on hit)
- sp_prio = s_ptr->priority; // load 5 (only on hit)
- sp_zero = (ppu->sprite_zero_in_range != 0u) & (i == 0u);
- goto sprite_done;
- }
- }
- // next sprite
- ++s_ptr; ++i;
- if (i < n) goto find_sprite;
- }
- }
-sprite_done: ;
-
- const uint32_t bg_index = (bg_palette << 2) + bg_pixel;
- const uint32_t sp_index = (sp_palette << 2) + sp_pixel;
- const uint32_t selector = ((bg_pixel != 0u) << 1) | (sp_pixel != 0u);
-
- // Two-way combine with minimal control flow
- uint32_t palette_index = 0;
- if (selector == 1u) {
- palette_index = 0x10u | sp_index;
- } else if (selector == 2u) {
- palette_index = bg_index;
- } else if (selector == 3u) {
- const uint32_t use_bg = (sp_prio != 0u);
- palette_index = use_bg ? bg_index : (0x10u | sp_index);
- if ((ppu->sprite_zero_in_range != 0u) & (sp_zero != 0u) & (x <= 254u)) {
- ppu->reg_status |= PPU_STATUS_SPRITE_ZERO_HIT;
- }
- }
- state->pixels[y * 256 + x] = ppu->palette[palette_index];
-}
-
-#else
__attribute__((always_inline, hot, optimize("no-jump-tables", "no-unroll-loops")))
static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t x, uint32_t y, uint8_t mask_reg) {
struct ppu_state *restrict ppu = &state->ppu;
@@ -263,7 +176,6 @@ no_sprite:
state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x).
}
-#endif
__attribute__((noinline, hot, optimize("no-jump-tables", "unroll-loops")))