diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-10-25 21:56:37 +0200 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-10-25 21:56:37 +0200 |
| commit | 396b10dd5e206462ebeab5fed368ba3ae25c6a51 (patch) | |
| tree | 9d3c759298de673fb6d778b8bbeae4bec2174a9b | |
| parent | a4c261c6ee3940099e653a6f448dc952dfd5899f (diff) | |
Better benchmarking, some small optimizations
| -rwxr-xr-x | Bench.sh | 48 | ||||
| -rwxr-xr-x | build.sh | 9 | ||||
| -rw-r--r-- | mknes.c | 34 | ||||
| -rw-r--r-- | mknes.h | 28 | ||||
| -rw-r--r-- | mknes_cpu_opcodes.c | 1 | ||||
| -rw-r--r-- | mknes_ines2.c | 59 | ||||
| -rw-r--r-- | mknes_memory.c | 79 | ||||
| -rw-r--r-- | mknes_ppu.c | 97 | ||||
| -rw-r--r-- | mknes_ppu_registers.c | 6 |
9 files changed, 211 insertions, 150 deletions
@@ -1,11 +1,45 @@ -#. ~/.local/bin/dev +#!/usr/bin/env bash ./build.sh clean ./build.sh profile ./mknes ./build.sh profile_release -taskset -c 1 chrt -f 99 -- perf stat -- ./mknes -taskset -c 1 chrt -f 99 -- perf stat -- ./mknes -taskset -c 1 chrt -f 99 -- perf stat -- ./mknes -taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes -taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes -taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes + +runs=10 +frames=4096 +events="cycles,instructions,task-clock" +tmp=$(mktemp) + +taskset -c 1 ./mknes + +> "$tmp" +for i in $(seq 1 $runs); do + taskset -c 1 chrt -f 99 perf stat -x, -e $events -- ./mknes 2>>"$tmp" +done + + +awk -F, -v F="$frames" ' + $3=="cycles" { c[++nc]=$1/F } + $3=="instructions" { i[++ni]=$1/F } +# $3=="task-clock" { t[++nt]=$1 } # milliseconds NOTE(peter): changed to nanoseconds... + $3=="task-clock" { t[++nt]=$1/1000000 } + + END { + for(k=1;k<=nc;k++) sumc+=c[k]; mc=sumc/nc + for(k=1;k<=ni;k++) sumi+=i[k]; mi=sumi/ni + for(k=1;k<=nt;k++) sumt+=t[k]; mt=sumt/nt + + for(k=1;k<=nc;k++) sdc+=(c[k]-mc)^2; sdc=sqrt(sdc/(nc-1)) + for(k=1;k<=ni;k++) sdi+=(i[k]-mi)^2; sdi=sqrt(sdi/(ni-1)) + for(k=1;k<=nt;k++) sdt+=(t[k]-mt)^2; sdt=sqrt(sdt/(nt-1)) + + ms_per_frame = mt / F + fps = F / (mt / 1000) + + printf "IPC (insn/cycle) = %.3f\n", mi/mc + printf "cycles/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mc, sdc, 100*sdc/mc, nc + printf "insn/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mi, sdi, 100*sdi/mi, ni + printf "time (ms) mean=%.3f sd=%.3f relSD=%.3f%% n=%d\n", mt, sdt, 100*sdt/mt, nt + printf "FPS (frames/second) = %.2f\n", fps + printf "ms/frame = %.6f\n", ms_per_frame + }' "$tmp" + @@ -53,17 +53,18 @@ case "$BUILD_TYPE" in ;; "release") # CFLAGS+="-s -Wl,--strip-all -O2 " - CFLAGS+="-g -O2 " + CFLAGS+=" -O2 " ;; "profile") - CFLAGS+="-O2 -fprofile-generate -ftest-coverage " + CFLAGS+="-O2 -fprofile-generate -ftest-coverage -DBENCHMARK " ;; "profile_release") # CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use " - CFLAGS+="-g -O2 -fprofile-use " + CFLAGS+="-g -O2 -fprofile-use -DBENCHMARK " ;; "debug") - CFLAGS+="-g -O0 -DTIMER_DEBUG " + CFLAGS+="-g -O0 " +# -DTIMER_DEBUG " LDFLAGS+="-fno-pie -no-pie " ;; "coverage") @@ -37,8 +37,8 @@ #define CIRAM_SIZE 0x1000 #define CHR_RAM_SIZE 0x4000 -uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); -uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); +static uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); +static uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); static void audio_callback(int16_t *data, size_t frames) { } @@ -54,6 +54,14 @@ static void audio_callback(int16_t *data, size_t frames) { } // #include "audio.c" #include "incbin.h" +#ifdef BENCHMARK +// Embed the ROM for benchmarking to eliminate file I/O overhead +// Uncomment the ROM you want to benchmark: +INCBIN_BYTES(benchmark_rom, "data/Life Force (USA).nes"); +// INCBIN_BYTES(benchmark_rom, "data/0000/Super Mario Bros. (World) (HVC-SM).nes"); +// INCBIN_BYTES(benchmark_rom, "data/0003/Gradius (USA).zip"); +#endif + #include "platform_gl_loader.c" @@ -152,7 +160,7 @@ struct main_state state __attribute__((aligned(64))); #include "mkfw.h" #include "platform_opengl.c" - +static size_t sprite_counts[8]; static uint32_t frames; // debug information // static int32_t tas_frame_count; @@ -235,12 +243,17 @@ int main(int argc, char **argv) { memset(nstate, 0, sizeof(struct nes_state)); ppu_reset(nstate); +#ifdef BENCHMARK + // Use embedded ROM for consistent benchmarking without file I/O overhead + ines2_load_from_memory(nstate, benchmark_rom, INCBIN_SIZE(benchmark_rom)); + +#else // ines2_load(nstate, "data/0000/10-Yard Fight (USA, Europe).nes"); // ines2_load(nstate, "data/0000/Balloon Fight (USA).nes"); // ines2_load(nstate, "data/0000/Excitebike (Japan, USA).nes"); // ines2_load(nstate, "data/0000/Ice Climber (USA, Europe, Korea).nes"); // ines2_load(nstate, "data/0000/Kung Fu (Japan, USA).nes"); - ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).nes"); + // ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).nes"); // ines2_load(nstate, "data/Super Mario Bros. (W) (V1.0) [!].nes"); // ines2_load(nstate, "data/Super Mario Bros. (JU) [!].nes"); // ines2_load(nstate, "data/0000/Urban Champion (World).nes"); @@ -253,7 +266,7 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/0000/raster_demos/RasterTest3.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3a.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3b.NES"); - ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES"); + // ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3d.NES"); // ines2_load(nstate, "data/0000/raster_demos/RasterTest3e.NES"); // ines2_load(nstate, "data/0000/NEStress.NES"); @@ -262,7 +275,7 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/0000/Xevious - The Avenger (USA).zip"); // ines2_load(nstate, "data/tv.nes"); - // ines2_load(nstate, "data/Life Force (USA).zip"); // 2002 + ines2_load(nstate, "data/Life Force (USA).nes"); // 2002 // ines2_load(nstate, "data/0003/Flipull - An Exciting Cube Game (Japan) (En).zip"); // ines2_load(nstate, "data/0003/Friday the 13th (USA).zip"); @@ -286,12 +299,13 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/Blaster Master (USA).zip"); // mapper 1 // ines2_load(nstate, "AccuracyCoin.nes"); // mapper 1 +#endif mapper_setup(nstate); cpu_reset(nstate); -#if 0 - for(uint32_t i = 0; i < 0x5000; ++i) { +#ifdef BENCHMARK + for(uint32_t i = 0; i < 0x1000; ++i) { while(!nstate->ppu.frame_ready) { // PROFILE_NAMED("nes emulator"); cpu_tick(nstate); @@ -300,10 +314,14 @@ int main(int argc, char **argv) { frames++; } + // for(size_t i = 0; i < 9; ++i) { + // printf("count %d: %lld\n", i, sprite_counts[i]); + // } // for(size_t i = 0; i < 256; ++i) { // printf("instr %2.2x: %lld\n", i, instr_count[i]); // } + return 0; #else @@ -1,17 +1,27 @@ -#define PPU_CTRL_BG_TILE_SELECT 0x10 -#define PPU_CTRL_SPRITE_TILE_SELECT 0x08 - -// Define constants for PPU control and mask bits +// Define constants for PPU control register bits ($2000) #define PPU_CTRL_NMI 0x80 #define PPU_CTRL_SPRITE_HEIGHT 0x20 +#define PPU_CTRL_BG_TILE_SELECT 0x10 #define PPU_CTRL_SPRITE_TILE 0x08 #define PPU_CTRL_VRAM_INCREMENT 0x04 -#define PPU_MASK_SHOW_BG 0x08 +// Define constants for PPU mask register bits ($2001) #define PPU_MASK_SHOW_SPRITES 0x10 +#define PPU_MASK_SHOW_BG 0x08 + +// Define constants for PPU status register bits ($2002) +#define PPU_STATUS_VBLANK 0x80 +#define PPU_STATUS_SPRITE_ZERO_HIT 0x40 +#define PPU_STATUS_SPRITE_OVERFLOW 0x20 + +// Define constants for sprite attribute bits +#define SPRITE_ATTR_FLIP_VERTICAL 0x80 +#define SPRITE_ATTR_FLIP_HORIZONTAL 0x40 +#define SPRITE_ATTR_PRIORITY 0x20 +#define SPRITE_ATTR_PALETTE_MASK 0x03 // Define mirroring modes #define MIRROR_HORIZONTAL 0 @@ -45,13 +55,11 @@ struct ppu_state { uint8_t vram_read_buffer; // 28 uint8_t open_bus; // 29 - uint8_t sprite_count; // 30 - uint8_t overflow_scheduled_dot; // 31 - uint8_t palette[32]; // 32 + uint8_t sprite_count; // 30 - Number of sprites in secondary OAM (0-8) + uint8_t palette[32]; // 31 // NOTE(peter): CACHELINE 2 uint8_t secondary_oam[32] __attribute__((aligned(64))); - uint8_t temp_secondary_oam[32]; // NOTE(peter): CACHELINE 3 struct sprite_data { @@ -68,8 +76,6 @@ struct ppu_state { uint8_t input_strobe; // 46 - Control bit (0 or 1) uint8_t frame_ready; // 47 uint8_t sprite_zero_in_range; // 48 - Boolean: is sprite 0 in range (will always be slot 0 if true) - uint8_t sprite_count_next; // 49 - Sprite count for next scanline - // 15 bytes left. // NOTE(peter): CACHELINE 4 uint8_t oam[256] __attribute__((aligned(64))); diff --git a/mknes_cpu_opcodes.c b/mknes_cpu_opcodes.c index b2fafd5..6b67f77 100644 --- a/mknes_cpu_opcodes.c +++ b/mknes_cpu_opcodes.c @@ -1758,6 +1758,7 @@ static void opcode_ror_absx(struct nes_state *state) { // SBC +__attribute__((hot, always_inline)) static inline void sbc(struct cpu_state * restrict cpu, uint8_t value) { #ifdef ENABLE_DECIMAL_MODE if(cpu->d) { diff --git a/mknes_ines2.c b/mknes_ines2.c index 29ceb71..686eb38 100644 --- a/mknes_ines2.c +++ b/mknes_ines2.c @@ -1,9 +1,11 @@ +#ifndef BENCHMARK #define USE_LIBARCHIVE #ifdef USE_LIBARCHIVE #include <archive.h> #include <archive_entry.h> #endif +#endif // iNES header fields #define INES_HEADER_SIZE 16 @@ -30,7 +32,7 @@ #define MIRROR_VERTICAL 1 #define MIRROR_FOUR_SCREEN 2 - +#ifndef BENCHMARK static uint8_t *ines2_read_entire_file(const char *path, size_t *out_size) { FILE *f = fopen(path, "rb"); if(!f) return 0; @@ -99,32 +101,16 @@ uint8_t *ines2_unzip_file_to_memory(const char *zip_path, size_t *out_size) { return buffer; } #endif - -static int ines2_load(struct nes_state *state, const char *path) { - uint8_t *data = 0; - size_t size = 0; - -#ifdef USE_LIBARCHIVE - if(strstr(path, ".zip")) { - data = ines2_unzip_file_to_memory(path, &size); - } else { - data = ines2_read_entire_file(path, &size); - } -#else - if(strstr(path, ".zip")) { - fprintf(stderr, "ZIP support not compiled in. Please use .nes files directly.\n"); - return -1; - } - data = ines2_read_entire_file(path, &size); #endif +// Load ROM from memory buffer (for embedded ROMs) +static int ines2_load_from_memory(struct nes_state *state, const uint8_t *data, size_t size) { if(!data || size < INES_HEADER_SIZE) { - free(data); return -1; } - uint8_t *ptr = data; - uint8_t *header = ptr; ptr += INES_HEADER_SIZE; + const uint8_t *ptr = data; + const uint8_t *header = ptr; ptr += INES_HEADER_SIZE; uint8_t prg_lsb = header[INES_PRG_SIZE_LSB]; uint8_t chr_lsb = header[INES_CHR_SIZE_LSB]; @@ -168,9 +154,38 @@ static int ines2_load(struct nes_state *state, const char *path) { memcpy(state->chr_rom, ptr, chr_size); } - free(data); return 0; } +#ifndef BENCHMARK +static int ines2_load(struct nes_state *state, const char *path) { + uint8_t *data = 0; + size_t size = 0; + +#ifdef USE_LIBARCHIVE + if(strstr(path, ".zip")) { + data = ines2_unzip_file_to_memory(path, &size); + } else { + data = ines2_read_entire_file(path, &size); + } +#else + if(strstr(path, ".zip")) { + fprintf(stderr, "ZIP support not compiled in. Please use .nes files directly.\n"); + return -1; + } + data = ines2_read_entire_file(path, &size); +#endif + + if(!data || size < INES_HEADER_SIZE) { + free(data); + return -1; + } + + int result = ines2_load_from_memory(state, data, size); + free(data); + return result; +} +#endif + diff --git a/mknes_memory.c b/mknes_memory.c index 7b74424..49546b2 100644 --- a/mknes_memory.c +++ b/mknes_memory.c @@ -5,6 +5,8 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { apu_tick(state); ppu_tick(state); + if(offset >= 0x10000) __builtin_unreachable(); + if(offset <= 0x1fff) { return state->ram[offset & 0x07ff]; } @@ -55,55 +57,56 @@ static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_ apu_tick(state); ppu_tick(state); - switch(offset) { - case 0x0000 ... 0x1fff: { - state->ram[offset & 0x07ff] = value; - } break; + if(offset >= 0x10000) __builtin_unreachable(); + if(offset <= 0x1fff) { + state->ram[offset & 0x07ff] = value; + return; + } - case 0x2000 ... 0x3fff: { - ppu_write(state, offset, value); - } break; + if(offset >= 0x8000) { + state->mapper_function.prg_rom_write(state, offset, value); + return; + } - case 0x4000 ... 0x4017: { - switch(offset) { - case 0x4014: { - ppu_dma_4014(state, value); - } break; + if(offset >= 0x2000 && offset <= 0x3fff) { + ppu_write(state, offset, value); + return; + } - case 0x4016: { - // joypad strobe - uint8_t s = value & 1; + if(offset == 0x4014) { + ppu_dma_4014(state, value); + return; + } - // if(s) { - uint8_t prev = state->ppu.input_strobe; - state->ppu.input_strobe = s; + if(offset == 0x4016) { + // joypad strobe + uint8_t s = value & 1; - if(prev == 1 && (s) == 0) { - // state->ppu.input[0] = tas_input[tas_frame_count]; + // if(s) { + uint8_t prev = state->ppu.input_strobe; + state->ppu.input_strobe = s; - state->ppu.input_latch[0] = state->ppu.input[0]; - state->ppu.input_latch[1] = state->ppu.input[1]; - state->ppu.input_bit[0] = 0; - state->ppu.input_bit[1] = 0; - } - // } - } break; + if(prev == 1 && (s) == 0) { + // state->ppu.input[0] = tas_input[tas_frame_count]; - default: { - apu_write(state, offset, value); - } break; + state->ppu.input_latch[0] = state->ppu.input[0]; + state->ppu.input_latch[1] = state->ppu.input[1]; + state->ppu.input_bit[0] = 0; + state->ppu.input_bit[1] = 0; } - } break; - - case 0x6000 ... 0x7fff: { - state->mapper_function.prg_ram_write(state, offset, value); - } break; + // } + return; + } - case 0x8000 ... 0xffff: { - state->mapper_function.prg_rom_write(state, offset, value); - } break; + if(offset >= 0x4000 && offset <= 0x4017) { + apu_write(state, offset, value); + return; + } + if(offset >= 0x6000 && offset <= 0x7fff) { + state->mapper_function.prg_ram_write(state, offset, value); + return; } } diff --git a/mknes_ppu.c b/mknes_ppu.c index 92b22fc..fcaf681 100644 --- a/mknes_ppu.c +++ b/mknes_ppu.c @@ -27,39 +27,32 @@ static void ppu_reset(struct nes_state *state) { __attribute__((hot, flatten)) static inline void ppu_evaluate_sprites(struct nes_state *state, uint32_t scanline) { struct ppu_state *restrict ppu = &state->ppu; - uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; + uint8_t sprite_height = (ppu->reg_ctrl & PPU_CTRL_SPRITE_HEIGHT) ? 16 : 8; uint8_t n = 0; uint8_t sprite_zero_found = 0; uint8_t * restrict src = ppu->oam; - uint8_t * restrict dst = ppu->temp_secondary_oam; // Write to temp buffer + uint8_t * restrict dst = ppu->secondary_oam; for(uint8_t i = 0; i < 64; i++, src += 4) { - uint8_t y = src[0]; - int32_t row = (int32_t)scanline - y; - - if(row >= 0 && row < sprite_height) { + uint32_t row = scanline - src[0]; + if(row < sprite_height) { if(n < 8) { dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; - - if(i == 0) { - sprite_zero_found = 1; // Sprite 0 is in range, will be in slot 0 - } - dst += 4; + sprite_zero_found |= (i == 0); n++; - } else { - ppu->reg_status |= 0x20; // Set overflow immediately + ppu->reg_status |= PPU_STATUS_SPRITE_OVERFLOW; break; } } } ppu->sprite_zero_in_range = sprite_zero_found; - ppu->sprite_count_next = n; + ppu->sprite_count = n; } __attribute__((hot)) @@ -68,8 +61,8 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state, uint8_t * restrict sec_oam = ppu->secondary_oam; uint8_t ctrl = ppu->reg_ctrl; - uint8_t sprite_height = (ctrl & 0x20) ? 16 : 8; - uint32_t sprite_pattern_table_base = (ctrl & 0x08) << 9; + uint8_t sprite_height = (ctrl & PPU_CTRL_SPRITE_HEIGHT) ? 16 : 8; + uint32_t sprite_pattern_table_base = (ctrl & PPU_CTRL_SPRITE_TILE) << 9; for(uint8_t i = 0; i < ppu->sprite_count; i++, sec_oam += 4) { uint8_t y = sec_oam[0]; @@ -78,9 +71,10 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state, uint8_t x = sec_oam[3]; uint32_t row = scanline - y; - row = (attr & 0x80) ? sprite_height - 1 - row : row; + row = (attr & SPRITE_ATTR_FLIP_VERTICAL) ? sprite_height - 1 - row : row; - uint32_t bank, addr; + uint32_t bank; + uint32_t addr; if(sprite_height == 16) { bank = (tile & 1) << 12; tile &= 0xfe; @@ -97,15 +91,15 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state, uint8_t val_lo = state->mapper_function.chr_read(state, addr); uint8_t val_hi = state->mapper_function.chr_read(state, addr + 8); - uint8_t rev = -(!!(attr & 0x40)); + uint8_t rev = -(!!(attr & SPRITE_ATTR_FLIP_HORIZONTAL)); uint8_t lsb = (rev & ppu_bitreverse_lut[val_lo]) | (~rev & val_lo); uint8_t msb = (rev & ppu_bitreverse_lut[val_hi]) | (~rev & val_hi); ppu->sprites[i].shift_lo = lsb; ppu->sprites[i].shift_hi = msb; ppu->sprites[i].position = x; - ppu->sprites[i].priority = attr & 0x20; - ppu->sprites[i].palette = attr & 0x3; + ppu->sprites[i].priority = attr & SPRITE_ATTR_PRIORITY; + ppu->sprites[i].palette = attr & SPRITE_ATTR_PALETTE_MASK; } } @@ -121,10 +115,11 @@ static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t uint8_t sp_prio = 0; uint8_t sp_zero = 0; - uint8_t show_bg = ppu->reg_mask & 0x08; - uint8_t show_sprites = ppu->reg_mask & 0x10; - uint8_t left_bg = ppu->reg_mask & 0x02; - uint8_t left_sp = ppu->reg_mask & 0x04; + uint8_t mask_reg = ppu->reg_mask; // Single load + uint8_t show_bg = mask_reg & PPU_MASK_SHOW_BG; + uint8_t show_sprites = mask_reg & PPU_MASK_SHOW_SPRITES; + uint8_t left_bg = mask_reg & 0x02; + uint8_t left_sp = mask_reg & 0x04; uint8_t bg_mask = (show_bg && (left_bg || x & ~7)) ? 0xff : 0x00; uint8_t sp_mask = (show_sprites && (left_sp || x & ~7));// ? 0xff : 0x00; @@ -153,15 +148,17 @@ static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t } \ } while (0) + // sprite_counts[ppu->sprite_count]++; if(sp_mask && ppu->sprite_count > 0) { - if(ppu->sprite_count == 1) goto sprite_1; if(ppu->sprite_count == 2) goto sprite_2; + if(ppu->sprite_count == 1) goto sprite_1; if(ppu->sprite_count == 3) goto sprite_3; if(ppu->sprite_count == 4) goto sprite_4; if(ppu->sprite_count == 5) goto sprite_5; if(ppu->sprite_count == 6) goto sprite_6; - if(ppu->sprite_count == 7) goto sprite_7; if(ppu->sprite_count == 8) goto sprite_8; + if(ppu->sprite_count == 7) goto sprite_7; + sprite_8: SPRITE_STEP(7); sprite_7: SPRITE_STEP(6); @@ -172,8 +169,7 @@ sprite_3: SPRITE_STEP(2); sprite_2: SPRITE_STEP(1); sprite_1: SPRITE_STEP(0); } -sprite_done:; - +sprite_done: // Final pixel composition uint8_t bg_index = (bg_palette << 2) + bg_pixel; @@ -187,7 +183,7 @@ sprite_done:; case 0: { palette_index = 0; } break; case 1: { palette_index = 0x10 | sp_index; } break; case 2: { palette_index = bg_index; } break; - case 3: { ppu->reg_status |= (sp_zero && x < 255) ? 0x40 : 0; } break; // NOTE(peter): Sprite zero hit! + case 3: { ppu->reg_status |= (sp_zero && x < 255) ? PPU_STATUS_SPRITE_ZERO_HIT : 0; } break; // NOTE(peter): Sprite zero hit! } state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x). @@ -199,7 +195,7 @@ static void ppu_tick(struct nes_state *state) { uint32_t dot = ppu->dot; uint32_t scanline = ppu->scanline; - uint8_t rendering = (ppu->reg_mask & 0x18); + uint8_t rendering = (ppu->reg_mask & (PPU_MASK_SHOW_SPRITES | PPU_MASK_SHOW_BG)); for(uint8_t ppu_loops = 0; ppu_loops < 3; ++ppu_loops) { @@ -207,10 +203,6 @@ static void ppu_tick(struct nes_state *state) { if(scanline <= 239) { - if(dot == 65) { - ppu_evaluate_sprites(state, scanline); // Evaluate sprites early, sets overflow immediately - } - if(dot >= 1 && dot <= 256) { if(dot == 256) { if((ppu->vram_addr & 0x7000) != 0x7000) { @@ -237,12 +229,11 @@ static void ppu_tick(struct nes_state *state) { if(dot == 257) { ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f); - memcpy(ppu->secondary_oam, ppu->temp_secondary_oam, 32); - ppu->sprite_count = ppu->sprite_count_next; + ppu_evaluate_sprites(state, scanline); } if(dot >= 321 && dot <= 336) { -stupid: if(ppu->reg_mask & 0x10) { +stupid: if(ppu->reg_mask & PPU_MASK_SHOW_SPRITES) { for(uint32_t i = 0; i < ppu->sprite_count; i++) { if(ppu->sprites[i].position > 0) { ppu->sprites[i].position--; @@ -272,7 +263,7 @@ stupid: if(ppu->reg_mask & 0x10) { } break; case 5: { - uint32_t base = (ppu->reg_ctrl & 0x10) << 8; + uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8; uint32_t tile = ppu->bg_next_tile_id; uint32_t fine_y = (ppu->vram_addr >> 12) & 7; uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff; @@ -280,7 +271,7 @@ stupid: if(ppu->reg_mask & 0x10) { } break; case 7: { - uint32_t base = (ppu->reg_ctrl & 0x10) << 8; + uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8; uint32_t tile = ppu->bg_next_tile_id; uint32_t fine_y = (ppu->vram_addr >> 12) & 7; uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff; @@ -312,9 +303,6 @@ stupid: if(ppu->reg_mask & 0x10) { } if(scanline == 261) { - if(dot == 65) { - ppu_evaluate_sprites(state, scanline); // Evaluate sprites early - } if(dot >= 1 && dot <= 256) { if(dot == 256) { @@ -341,8 +329,6 @@ stupid: if(ppu->reg_mask & 0x10) { if(dot == 257) { ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f); - memcpy(ppu->secondary_oam, ppu->temp_secondary_oam, 32); - ppu->sprite_count = ppu->sprite_count_next; } if(dot >= 280 && dot <= 304) { @@ -350,7 +336,7 @@ stupid: if(ppu->reg_mask & 0x10) { } if(dot >= 321 && dot <= 336) { -stupid2: if(ppu->reg_mask & 0x10) { +stupid2: if(ppu->reg_mask & PPU_MASK_SHOW_SPRITES) { for(uint32_t i = 0; i < ppu->sprite_count; i++) { if(ppu->sprites[i].position > 0) { ppu->sprites[i].position--; @@ -381,7 +367,7 @@ stupid2: if(ppu->reg_mask & 0x10) { } break; case 5: { - uint32_t base = (ppu->reg_ctrl & 0x10) << 8; + uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8; uint32_t tile = ppu->bg_next_tile_id; uint32_t fine_y = (ppu->vram_addr >> 12) & 7; uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff; @@ -389,7 +375,7 @@ stupid2: if(ppu->reg_mask & 0x10) { } break; case 7: { - uint32_t base = (ppu->reg_ctrl & 0x10) << 8; + uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8; uint32_t tile = ppu->bg_next_tile_id; uint32_t fine_y = (ppu->vram_addr >> 12) & 7; uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff; @@ -413,10 +399,6 @@ stupid2: if(ppu->reg_mask & 0x10) { } break; } } - - if(dot == 340) { - ppu_fetch_sprite_patterns(state, scanline); - } } } @@ -424,13 +406,14 @@ stupid2: if(ppu->reg_mask & 0x10) { if(dot == 1) { switch(scanline) { case 241: { - ppu->reg_status |= 0x80; - state->cpu.nmi_pending = (ppu->reg_ctrl & 0x80); + ppu->reg_status |= PPU_STATUS_VBLANK; + state->cpu.nmi_pending = (ppu->reg_ctrl & PPU_CTRL_NMI); } break; case 261: { - ppu->reg_status &= ~0x80; - ppu->reg_status &= ~0x40; + ppu->reg_status &= ~PPU_STATUS_VBLANK; + ppu->reg_status &= ~PPU_STATUS_SPRITE_ZERO_HIT; + ppu->reg_status &= ~PPU_STATUS_SPRITE_OVERFLOW; } break; } } @@ -440,7 +423,7 @@ stupid2: if(ppu->reg_mask & 0x10) { dot = 0; scanline++; - if(scanline == 261 && !ppu->even_frame && (ppu->reg_mask & 0x18)) { + if(scanline == 261 && !ppu->even_frame && rendering) { dot = 1; } diff --git a/mknes_ppu_registers.c b/mknes_ppu_registers.c index a7aa351..b66f7e5 100644 --- a/mknes_ppu_registers.c +++ b/mknes_ppu_registers.c @@ -65,7 +65,7 @@ static inline void ppu_write(struct nes_state *state, uint32_t offset, uint8_t v } break; } - ppu->vram_addr += (ppu->reg_ctrl & 0x04) ? 32 : 1; + ppu->vram_addr += (ppu->reg_ctrl & PPU_CTRL_VRAM_INCREMENT) ? 32 : 1; } break; } @@ -84,7 +84,7 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { case 2: { result &= 0x1f; result |= ppu->reg_status & 0xe0; - ppu->reg_status &= ~0x80; + ppu->reg_status &= ~PPU_STATUS_VBLANK; ppu->write_latch = 0; } break; @@ -118,7 +118,7 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { } - ppu->vram_addr += (ppu->reg_ctrl & 0x04) ? 32 : 1; + ppu->vram_addr += (ppu->reg_ctrl & PPU_CTRL_VRAM_INCREMENT) ? 32 : 1; } break; } |
