summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xBench.sh48
-rwxr-xr-xbuild.sh9
-rw-r--r--mknes.c34
-rw-r--r--mknes.h28
-rw-r--r--mknes_cpu_opcodes.c1
-rw-r--r--mknes_ines2.c59
-rw-r--r--mknes_memory.c79
-rw-r--r--mknes_ppu.c97
-rw-r--r--mknes_ppu_registers.c6
9 files changed, 211 insertions, 150 deletions
diff --git a/Bench.sh b/Bench.sh
index 4a26ab5..157ad57 100755
--- a/Bench.sh
+++ b/Bench.sh
@@ -1,11 +1,45 @@
-#. ~/.local/bin/dev
+#!/usr/bin/env bash
./build.sh clean
./build.sh profile
./mknes
./build.sh profile_release
-taskset -c 1 chrt -f 99 -- perf stat -- ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -- ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -- ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes
-taskset -c 1 chrt -f 99 -- perf stat -e stalled-cycles-frontend:u -e L1-icache-loads -e L1-icache-load-misses -- ./mknes
+
+runs=10
+frames=4096
+events="cycles,instructions,task-clock"
+tmp=$(mktemp)
+
+taskset -c 1 ./mknes
+
+> "$tmp"
+for i in $(seq 1 $runs); do
+ taskset -c 1 chrt -f 99 perf stat -x, -e $events -- ./mknes 2>>"$tmp"
+done
+
+
+awk -F, -v F="$frames" '
+ $3=="cycles" { c[++nc]=$1/F }
+ $3=="instructions" { i[++ni]=$1/F }
+# $3=="task-clock" { t[++nt]=$1 } # milliseconds NOTE(peter): changed to nanoseconds...
+ $3=="task-clock" { t[++nt]=$1/1000000 }
+
+ END {
+ for(k=1;k<=nc;k++) sumc+=c[k]; mc=sumc/nc
+ for(k=1;k<=ni;k++) sumi+=i[k]; mi=sumi/ni
+ for(k=1;k<=nt;k++) sumt+=t[k]; mt=sumt/nt
+
+ for(k=1;k<=nc;k++) sdc+=(c[k]-mc)^2; sdc=sqrt(sdc/(nc-1))
+ for(k=1;k<=ni;k++) sdi+=(i[k]-mi)^2; sdi=sqrt(sdi/(ni-1))
+ for(k=1;k<=nt;k++) sdt+=(t[k]-mt)^2; sdt=sqrt(sdt/(nt-1))
+
+ ms_per_frame = mt / F
+ fps = F / (mt / 1000)
+
+ printf "IPC (insn/cycle) = %.3f\n", mi/mc
+ printf "cycles/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mc, sdc, 100*sdc/mc, nc
+ printf "insn/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mi, sdi, 100*sdi/mi, ni
+ printf "time (ms) mean=%.3f sd=%.3f relSD=%.3f%% n=%d\n", mt, sdt, 100*sdt/mt, nt
+ printf "FPS (frames/second) = %.2f\n", fps
+ printf "ms/frame = %.6f\n", ms_per_frame
+ }' "$tmp"
+
diff --git a/build.sh b/build.sh
index b8e4fa9..a8b26e8 100755
--- a/build.sh
+++ b/build.sh
@@ -53,17 +53,18 @@ case "$BUILD_TYPE" in
;;
"release")
# CFLAGS+="-s -Wl,--strip-all -O2 "
- CFLAGS+="-g -O2 "
+ CFLAGS+=" -O2 "
;;
"profile")
- CFLAGS+="-O2 -fprofile-generate -ftest-coverage "
+ CFLAGS+="-O2 -fprofile-generate -ftest-coverage -DBENCHMARK "
;;
"profile_release")
# CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use "
- CFLAGS+="-g -O2 -fprofile-use "
+ CFLAGS+="-g -O2 -fprofile-use -DBENCHMARK "
;;
"debug")
- CFLAGS+="-g -O0 -DTIMER_DEBUG "
+ CFLAGS+="-g -O0 "
+# -DTIMER_DEBUG "
LDFLAGS+="-fno-pie -no-pie "
;;
"coverage")
diff --git a/mknes.c b/mknes.c
index a6d224d..f8850b0 100644
--- a/mknes.c
+++ b/mknes.c
@@ -37,8 +37,8 @@
#define CIRAM_SIZE 0x1000
#define CHR_RAM_SIZE 0x4000
-uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
-uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
+static uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
+static uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
static void audio_callback(int16_t *data, size_t frames) { }
@@ -54,6 +54,14 @@ static void audio_callback(int16_t *data, size_t frames) { }
// #include "audio.c"
#include "incbin.h"
+#ifdef BENCHMARK
+// Embed the ROM for benchmarking to eliminate file I/O overhead
+// Uncomment the ROM you want to benchmark:
+INCBIN_BYTES(benchmark_rom, "data/Life Force (USA).nes");
+// INCBIN_BYTES(benchmark_rom, "data/0000/Super Mario Bros. (World) (HVC-SM).nes");
+// INCBIN_BYTES(benchmark_rom, "data/0003/Gradius (USA).zip");
+#endif
+
#include "platform_gl_loader.c"
@@ -152,7 +160,7 @@ struct main_state state __attribute__((aligned(64)));
#include "mkfw.h"
#include "platform_opengl.c"
-
+static size_t sprite_counts[8];
static uint32_t frames; // debug information
// static int32_t tas_frame_count;
@@ -235,12 +243,17 @@ int main(int argc, char **argv) {
memset(nstate, 0, sizeof(struct nes_state));
ppu_reset(nstate);
+#ifdef BENCHMARK
+ // Use embedded ROM for consistent benchmarking without file I/O overhead
+ ines2_load_from_memory(nstate, benchmark_rom, INCBIN_SIZE(benchmark_rom));
+
+#else
// ines2_load(nstate, "data/0000/10-Yard Fight (USA, Europe).nes");
// ines2_load(nstate, "data/0000/Balloon Fight (USA).nes");
// ines2_load(nstate, "data/0000/Excitebike (Japan, USA).nes");
// ines2_load(nstate, "data/0000/Ice Climber (USA, Europe, Korea).nes");
// ines2_load(nstate, "data/0000/Kung Fu (Japan, USA).nes");
- ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).nes");
+ // ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).nes");
// ines2_load(nstate, "data/Super Mario Bros. (W) (V1.0) [!].nes");
// ines2_load(nstate, "data/Super Mario Bros. (JU) [!].nes");
// ines2_load(nstate, "data/0000/Urban Champion (World).nes");
@@ -253,7 +266,7 @@ int main(int argc, char **argv) {
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3a.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3b.NES");
- ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES");
+ // ines2_load(nstate, "data/0000/raster_demos/RasterTest3c.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3d.NES");
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3e.NES");
// ines2_load(nstate, "data/0000/NEStress.NES");
@@ -262,7 +275,7 @@ int main(int argc, char **argv) {
// ines2_load(nstate, "data/0000/Xevious - The Avenger (USA).zip");
// ines2_load(nstate, "data/tv.nes");
- // ines2_load(nstate, "data/Life Force (USA).zip"); // 2002
+ ines2_load(nstate, "data/Life Force (USA).nes"); // 2002
// ines2_load(nstate, "data/0003/Flipull - An Exciting Cube Game (Japan) (En).zip");
// ines2_load(nstate, "data/0003/Friday the 13th (USA).zip");
@@ -286,12 +299,13 @@ int main(int argc, char **argv) {
// ines2_load(nstate, "data/Blaster Master (USA).zip"); // mapper 1
// ines2_load(nstate, "AccuracyCoin.nes"); // mapper 1
+#endif
mapper_setup(nstate);
cpu_reset(nstate);
-#if 0
- for(uint32_t i = 0; i < 0x5000; ++i) {
+#ifdef BENCHMARK
+ for(uint32_t i = 0; i < 0x1000; ++i) {
while(!nstate->ppu.frame_ready) {
// PROFILE_NAMED("nes emulator");
cpu_tick(nstate);
@@ -300,10 +314,14 @@ int main(int argc, char **argv) {
frames++;
}
+ // for(size_t i = 0; i < 9; ++i) {
+ // printf("count %d: %lld\n", i, sprite_counts[i]);
+ // }
// for(size_t i = 0; i < 256; ++i) {
// printf("instr %2.2x: %lld\n", i, instr_count[i]);
// }
+
return 0;
#else
diff --git a/mknes.h b/mknes.h
index d21a9ad..e9d3e0e 100644
--- a/mknes.h
+++ b/mknes.h
@@ -1,17 +1,27 @@
-#define PPU_CTRL_BG_TILE_SELECT 0x10
-#define PPU_CTRL_SPRITE_TILE_SELECT 0x08
-
-// Define constants for PPU control and mask bits
+// Define constants for PPU control register bits ($2000)
#define PPU_CTRL_NMI 0x80
#define PPU_CTRL_SPRITE_HEIGHT 0x20
+#define PPU_CTRL_BG_TILE_SELECT 0x10
#define PPU_CTRL_SPRITE_TILE 0x08
#define PPU_CTRL_VRAM_INCREMENT 0x04
-#define PPU_MASK_SHOW_BG 0x08
+// Define constants for PPU mask register bits ($2001)
#define PPU_MASK_SHOW_SPRITES 0x10
+#define PPU_MASK_SHOW_BG 0x08
+
+// Define constants for PPU status register bits ($2002)
+#define PPU_STATUS_VBLANK 0x80
+#define PPU_STATUS_SPRITE_ZERO_HIT 0x40
+#define PPU_STATUS_SPRITE_OVERFLOW 0x20
+
+// Define constants for sprite attribute bits
+#define SPRITE_ATTR_FLIP_VERTICAL 0x80
+#define SPRITE_ATTR_FLIP_HORIZONTAL 0x40
+#define SPRITE_ATTR_PRIORITY 0x20
+#define SPRITE_ATTR_PALETTE_MASK 0x03
// Define mirroring modes
#define MIRROR_HORIZONTAL 0
@@ -45,13 +55,11 @@ struct ppu_state {
uint8_t vram_read_buffer; // 28
uint8_t open_bus; // 29
- uint8_t sprite_count; // 30
- uint8_t overflow_scheduled_dot; // 31
- uint8_t palette[32]; // 32
+ uint8_t sprite_count; // 30 - Number of sprites in secondary OAM (0-8)
+ uint8_t palette[32]; // 31
// NOTE(peter): CACHELINE 2
uint8_t secondary_oam[32] __attribute__((aligned(64)));
- uint8_t temp_secondary_oam[32];
// NOTE(peter): CACHELINE 3
struct sprite_data {
@@ -68,8 +76,6 @@ struct ppu_state {
uint8_t input_strobe; // 46 - Control bit (0 or 1)
uint8_t frame_ready; // 47
uint8_t sprite_zero_in_range; // 48 - Boolean: is sprite 0 in range (will always be slot 0 if true)
- uint8_t sprite_count_next; // 49 - Sprite count for next scanline
- // 15 bytes left.
// NOTE(peter): CACHELINE 4
uint8_t oam[256] __attribute__((aligned(64)));
diff --git a/mknes_cpu_opcodes.c b/mknes_cpu_opcodes.c
index b2fafd5..6b67f77 100644
--- a/mknes_cpu_opcodes.c
+++ b/mknes_cpu_opcodes.c
@@ -1758,6 +1758,7 @@ static void opcode_ror_absx(struct nes_state *state) {
// SBC
+__attribute__((hot, always_inline))
static inline void sbc(struct cpu_state * restrict cpu, uint8_t value) {
#ifdef ENABLE_DECIMAL_MODE
if(cpu->d) {
diff --git a/mknes_ines2.c b/mknes_ines2.c
index 29ceb71..686eb38 100644
--- a/mknes_ines2.c
+++ b/mknes_ines2.c
@@ -1,9 +1,11 @@
+#ifndef BENCHMARK
#define USE_LIBARCHIVE
#ifdef USE_LIBARCHIVE
#include <archive.h>
#include <archive_entry.h>
#endif
+#endif
// iNES header fields
#define INES_HEADER_SIZE 16
@@ -30,7 +32,7 @@
#define MIRROR_VERTICAL 1
#define MIRROR_FOUR_SCREEN 2
-
+#ifndef BENCHMARK
static uint8_t *ines2_read_entire_file(const char *path, size_t *out_size) {
FILE *f = fopen(path, "rb");
if(!f) return 0;
@@ -99,32 +101,16 @@ uint8_t *ines2_unzip_file_to_memory(const char *zip_path, size_t *out_size) {
return buffer;
}
#endif
-
-static int ines2_load(struct nes_state *state, const char *path) {
- uint8_t *data = 0;
- size_t size = 0;
-
-#ifdef USE_LIBARCHIVE
- if(strstr(path, ".zip")) {
- data = ines2_unzip_file_to_memory(path, &size);
- } else {
- data = ines2_read_entire_file(path, &size);
- }
-#else
- if(strstr(path, ".zip")) {
- fprintf(stderr, "ZIP support not compiled in. Please use .nes files directly.\n");
- return -1;
- }
- data = ines2_read_entire_file(path, &size);
#endif
+// Load ROM from memory buffer (for embedded ROMs)
+static int ines2_load_from_memory(struct nes_state *state, const uint8_t *data, size_t size) {
if(!data || size < INES_HEADER_SIZE) {
- free(data);
return -1;
}
- uint8_t *ptr = data;
- uint8_t *header = ptr; ptr += INES_HEADER_SIZE;
+ const uint8_t *ptr = data;
+ const uint8_t *header = ptr; ptr += INES_HEADER_SIZE;
uint8_t prg_lsb = header[INES_PRG_SIZE_LSB];
uint8_t chr_lsb = header[INES_CHR_SIZE_LSB];
@@ -168,9 +154,38 @@ static int ines2_load(struct nes_state *state, const char *path) {
memcpy(state->chr_rom, ptr, chr_size);
}
- free(data);
return 0;
}
+#ifndef BENCHMARK
+static int ines2_load(struct nes_state *state, const char *path) {
+ uint8_t *data = 0;
+ size_t size = 0;
+
+#ifdef USE_LIBARCHIVE
+ if(strstr(path, ".zip")) {
+ data = ines2_unzip_file_to_memory(path, &size);
+ } else {
+ data = ines2_read_entire_file(path, &size);
+ }
+#else
+ if(strstr(path, ".zip")) {
+ fprintf(stderr, "ZIP support not compiled in. Please use .nes files directly.\n");
+ return -1;
+ }
+ data = ines2_read_entire_file(path, &size);
+#endif
+
+ if(!data || size < INES_HEADER_SIZE) {
+ free(data);
+ return -1;
+ }
+
+ int result = ines2_load_from_memory(state, data, size);
+ free(data);
+ return result;
+}
+#endif
+
diff --git a/mknes_memory.c b/mknes_memory.c
index 7b74424..49546b2 100644
--- a/mknes_memory.c
+++ b/mknes_memory.c
@@ -5,6 +5,8 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) {
apu_tick(state);
ppu_tick(state);
+ if(offset >= 0x10000) __builtin_unreachable();
+
if(offset <= 0x1fff) {
return state->ram[offset & 0x07ff];
}
@@ -55,55 +57,56 @@ static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_
apu_tick(state);
ppu_tick(state);
- switch(offset) {
- case 0x0000 ... 0x1fff: {
- state->ram[offset & 0x07ff] = value;
- } break;
+ if(offset >= 0x10000) __builtin_unreachable();
+ if(offset <= 0x1fff) {
+ state->ram[offset & 0x07ff] = value;
+ return;
+ }
- case 0x2000 ... 0x3fff: {
- ppu_write(state, offset, value);
- } break;
+ if(offset >= 0x8000) {
+ state->mapper_function.prg_rom_write(state, offset, value);
+ return;
+ }
- case 0x4000 ... 0x4017: {
- switch(offset) {
- case 0x4014: {
- ppu_dma_4014(state, value);
- } break;
+ if(offset >= 0x2000 && offset <= 0x3fff) {
+ ppu_write(state, offset, value);
+ return;
+ }
- case 0x4016: {
- // joypad strobe
- uint8_t s = value & 1;
+ if(offset == 0x4014) {
+ ppu_dma_4014(state, value);
+ return;
+ }
- // if(s) {
- uint8_t prev = state->ppu.input_strobe;
- state->ppu.input_strobe = s;
+ if(offset == 0x4016) {
+ // joypad strobe
+ uint8_t s = value & 1;
- if(prev == 1 && (s) == 0) {
- // state->ppu.input[0] = tas_input[tas_frame_count];
+ // if(s) {
+ uint8_t prev = state->ppu.input_strobe;
+ state->ppu.input_strobe = s;
- state->ppu.input_latch[0] = state->ppu.input[0];
- state->ppu.input_latch[1] = state->ppu.input[1];
- state->ppu.input_bit[0] = 0;
- state->ppu.input_bit[1] = 0;
- }
- // }
- } break;
+ if(prev == 1 && (s) == 0) {
+ // state->ppu.input[0] = tas_input[tas_frame_count];
- default: {
- apu_write(state, offset, value);
- } break;
+ state->ppu.input_latch[0] = state->ppu.input[0];
+ state->ppu.input_latch[1] = state->ppu.input[1];
+ state->ppu.input_bit[0] = 0;
+ state->ppu.input_bit[1] = 0;
}
- } break;
-
- case 0x6000 ... 0x7fff: {
- state->mapper_function.prg_ram_write(state, offset, value);
- } break;
+ // }
+ return;
+ }
- case 0x8000 ... 0xffff: {
- state->mapper_function.prg_rom_write(state, offset, value);
- } break;
+ if(offset >= 0x4000 && offset <= 0x4017) {
+ apu_write(state, offset, value);
+ return;
+ }
+ if(offset >= 0x6000 && offset <= 0x7fff) {
+ state->mapper_function.prg_ram_write(state, offset, value);
+ return;
}
}
diff --git a/mknes_ppu.c b/mknes_ppu.c
index 92b22fc..fcaf681 100644
--- a/mknes_ppu.c
+++ b/mknes_ppu.c
@@ -27,39 +27,32 @@ static void ppu_reset(struct nes_state *state) {
__attribute__((hot, flatten))
static inline void ppu_evaluate_sprites(struct nes_state *state, uint32_t scanline) {
struct ppu_state *restrict ppu = &state->ppu;
- uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
+ uint8_t sprite_height = (ppu->reg_ctrl & PPU_CTRL_SPRITE_HEIGHT) ? 16 : 8;
uint8_t n = 0;
uint8_t sprite_zero_found = 0;
uint8_t * restrict src = ppu->oam;
- uint8_t * restrict dst = ppu->temp_secondary_oam; // Write to temp buffer
+ uint8_t * restrict dst = ppu->secondary_oam;
for(uint8_t i = 0; i < 64; i++, src += 4) {
- uint8_t y = src[0];
- int32_t row = (int32_t)scanline - y;
-
- if(row >= 0 && row < sprite_height) {
+ uint32_t row = scanline - src[0];
+ if(row < sprite_height) {
if(n < 8) {
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
-
- if(i == 0) {
- sprite_zero_found = 1; // Sprite 0 is in range, will be in slot 0
- }
-
dst += 4;
+ sprite_zero_found |= (i == 0);
n++;
-
} else {
- ppu->reg_status |= 0x20; // Set overflow immediately
+ ppu->reg_status |= PPU_STATUS_SPRITE_OVERFLOW;
break;
}
}
}
ppu->sprite_zero_in_range = sprite_zero_found;
- ppu->sprite_count_next = n;
+ ppu->sprite_count = n;
}
__attribute__((hot))
@@ -68,8 +61,8 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state,
uint8_t * restrict sec_oam = ppu->secondary_oam;
uint8_t ctrl = ppu->reg_ctrl;
- uint8_t sprite_height = (ctrl & 0x20) ? 16 : 8;
- uint32_t sprite_pattern_table_base = (ctrl & 0x08) << 9;
+ uint8_t sprite_height = (ctrl & PPU_CTRL_SPRITE_HEIGHT) ? 16 : 8;
+ uint32_t sprite_pattern_table_base = (ctrl & PPU_CTRL_SPRITE_TILE) << 9;
for(uint8_t i = 0; i < ppu->sprite_count; i++, sec_oam += 4) {
uint8_t y = sec_oam[0];
@@ -78,9 +71,10 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state,
uint8_t x = sec_oam[3];
uint32_t row = scanline - y;
- row = (attr & 0x80) ? sprite_height - 1 - row : row;
+ row = (attr & SPRITE_ATTR_FLIP_VERTICAL) ? sprite_height - 1 - row : row;
- uint32_t bank, addr;
+ uint32_t bank;
+ uint32_t addr;
if(sprite_height == 16) {
bank = (tile & 1) << 12;
tile &= 0xfe;
@@ -97,15 +91,15 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state * restrict state,
uint8_t val_lo = state->mapper_function.chr_read(state, addr);
uint8_t val_hi = state->mapper_function.chr_read(state, addr + 8);
- uint8_t rev = -(!!(attr & 0x40));
+ uint8_t rev = -(!!(attr & SPRITE_ATTR_FLIP_HORIZONTAL));
uint8_t lsb = (rev & ppu_bitreverse_lut[val_lo]) | (~rev & val_lo);
uint8_t msb = (rev & ppu_bitreverse_lut[val_hi]) | (~rev & val_hi);
ppu->sprites[i].shift_lo = lsb;
ppu->sprites[i].shift_hi = msb;
ppu->sprites[i].position = x;
- ppu->sprites[i].priority = attr & 0x20;
- ppu->sprites[i].palette = attr & 0x3;
+ ppu->sprites[i].priority = attr & SPRITE_ATTR_PRIORITY;
+ ppu->sprites[i].palette = attr & SPRITE_ATTR_PALETTE_MASK;
}
}
@@ -121,10 +115,11 @@ static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t
uint8_t sp_prio = 0;
uint8_t sp_zero = 0;
- uint8_t show_bg = ppu->reg_mask & 0x08;
- uint8_t show_sprites = ppu->reg_mask & 0x10;
- uint8_t left_bg = ppu->reg_mask & 0x02;
- uint8_t left_sp = ppu->reg_mask & 0x04;
+ uint8_t mask_reg = ppu->reg_mask; // Single load
+ uint8_t show_bg = mask_reg & PPU_MASK_SHOW_BG;
+ uint8_t show_sprites = mask_reg & PPU_MASK_SHOW_SPRITES;
+ uint8_t left_bg = mask_reg & 0x02;
+ uint8_t left_sp = mask_reg & 0x04;
uint8_t bg_mask = (show_bg && (left_bg || x & ~7)) ? 0xff : 0x00;
uint8_t sp_mask = (show_sprites && (left_sp || x & ~7));// ? 0xff : 0x00;
@@ -153,15 +148,17 @@ static inline void ppu_render_pixel(struct nes_state * restrict state, uint32_t
} \
} while (0)
+ // sprite_counts[ppu->sprite_count]++;
if(sp_mask && ppu->sprite_count > 0) {
- if(ppu->sprite_count == 1) goto sprite_1;
if(ppu->sprite_count == 2) goto sprite_2;
+ if(ppu->sprite_count == 1) goto sprite_1;
if(ppu->sprite_count == 3) goto sprite_3;
if(ppu->sprite_count == 4) goto sprite_4;
if(ppu->sprite_count == 5) goto sprite_5;
if(ppu->sprite_count == 6) goto sprite_6;
- if(ppu->sprite_count == 7) goto sprite_7;
if(ppu->sprite_count == 8) goto sprite_8;
+ if(ppu->sprite_count == 7) goto sprite_7;
+
sprite_8: SPRITE_STEP(7);
sprite_7: SPRITE_STEP(6);
@@ -172,8 +169,7 @@ sprite_3: SPRITE_STEP(2);
sprite_2: SPRITE_STEP(1);
sprite_1: SPRITE_STEP(0);
}
-sprite_done:;
-
+sprite_done:
// Final pixel composition
uint8_t bg_index = (bg_palette << 2) + bg_pixel;
@@ -187,7 +183,7 @@ sprite_done:;
case 0: { palette_index = 0; } break;
case 1: { palette_index = 0x10 | sp_index; } break;
case 2: { palette_index = bg_index; } break;
- case 3: { ppu->reg_status |= (sp_zero && x < 255) ? 0x40 : 0; } break; // NOTE(peter): Sprite zero hit!
+ case 3: { ppu->reg_status |= (sp_zero && x < 255) ? PPU_STATUS_SPRITE_ZERO_HIT : 0; } break; // NOTE(peter): Sprite zero hit!
}
state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x).
@@ -199,7 +195,7 @@ static void ppu_tick(struct nes_state *state) {
uint32_t dot = ppu->dot;
uint32_t scanline = ppu->scanline;
- uint8_t rendering = (ppu->reg_mask & 0x18);
+ uint8_t rendering = (ppu->reg_mask & (PPU_MASK_SHOW_SPRITES | PPU_MASK_SHOW_BG));
for(uint8_t ppu_loops = 0; ppu_loops < 3; ++ppu_loops) {
@@ -207,10 +203,6 @@ static void ppu_tick(struct nes_state *state) {
if(scanline <= 239) {
- if(dot == 65) {
- ppu_evaluate_sprites(state, scanline); // Evaluate sprites early, sets overflow immediately
- }
-
if(dot >= 1 && dot <= 256) {
if(dot == 256) {
if((ppu->vram_addr & 0x7000) != 0x7000) {
@@ -237,12 +229,11 @@ static void ppu_tick(struct nes_state *state) {
if(dot == 257) {
ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f);
- memcpy(ppu->secondary_oam, ppu->temp_secondary_oam, 32);
- ppu->sprite_count = ppu->sprite_count_next;
+ ppu_evaluate_sprites(state, scanline);
}
if(dot >= 321 && dot <= 336) {
-stupid: if(ppu->reg_mask & 0x10) {
+stupid: if(ppu->reg_mask & PPU_MASK_SHOW_SPRITES) {
for(uint32_t i = 0; i < ppu->sprite_count; i++) {
if(ppu->sprites[i].position > 0) {
ppu->sprites[i].position--;
@@ -272,7 +263,7 @@ stupid: if(ppu->reg_mask & 0x10) {
} break;
case 5: {
- uint32_t base = (ppu->reg_ctrl & 0x10) << 8;
+ uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8;
uint32_t tile = ppu->bg_next_tile_id;
uint32_t fine_y = (ppu->vram_addr >> 12) & 7;
uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff;
@@ -280,7 +271,7 @@ stupid: if(ppu->reg_mask & 0x10) {
} break;
case 7: {
- uint32_t base = (ppu->reg_ctrl & 0x10) << 8;
+ uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8;
uint32_t tile = ppu->bg_next_tile_id;
uint32_t fine_y = (ppu->vram_addr >> 12) & 7;
uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff;
@@ -312,9 +303,6 @@ stupid: if(ppu->reg_mask & 0x10) {
}
if(scanline == 261) {
- if(dot == 65) {
- ppu_evaluate_sprites(state, scanline); // Evaluate sprites early
- }
if(dot >= 1 && dot <= 256) {
if(dot == 256) {
@@ -341,8 +329,6 @@ stupid: if(ppu->reg_mask & 0x10) {
if(dot == 257) {
ppu->vram_addr = (ppu->vram_addr & ~0x041f) | (ppu->temp_addr & 0x041f);
- memcpy(ppu->secondary_oam, ppu->temp_secondary_oam, 32);
- ppu->sprite_count = ppu->sprite_count_next;
}
if(dot >= 280 && dot <= 304) {
@@ -350,7 +336,7 @@ stupid: if(ppu->reg_mask & 0x10) {
}
if(dot >= 321 && dot <= 336) {
-stupid2: if(ppu->reg_mask & 0x10) {
+stupid2: if(ppu->reg_mask & PPU_MASK_SHOW_SPRITES) {
for(uint32_t i = 0; i < ppu->sprite_count; i++) {
if(ppu->sprites[i].position > 0) {
ppu->sprites[i].position--;
@@ -381,7 +367,7 @@ stupid2: if(ppu->reg_mask & 0x10) {
} break;
case 5: {
- uint32_t base = (ppu->reg_ctrl & 0x10) << 8;
+ uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8;
uint32_t tile = ppu->bg_next_tile_id;
uint32_t fine_y = (ppu->vram_addr >> 12) & 7;
uint32_t addr_lsb = (base + tile * 16 + fine_y) & 0x1fff;
@@ -389,7 +375,7 @@ stupid2: if(ppu->reg_mask & 0x10) {
} break;
case 7: {
- uint32_t base = (ppu->reg_ctrl & 0x10) << 8;
+ uint32_t base = (ppu->reg_ctrl & PPU_CTRL_BG_TILE_SELECT) << 8;
uint32_t tile = ppu->bg_next_tile_id;
uint32_t fine_y = (ppu->vram_addr >> 12) & 7;
uint32_t addr_msb = (base + tile * 16 + fine_y + 8) & 0x1fff;
@@ -413,10 +399,6 @@ stupid2: if(ppu->reg_mask & 0x10) {
} break;
}
}
-
- if(dot == 340) {
- ppu_fetch_sprite_patterns(state, scanline);
- }
}
}
@@ -424,13 +406,14 @@ stupid2: if(ppu->reg_mask & 0x10) {
if(dot == 1) {
switch(scanline) {
case 241: {
- ppu->reg_status |= 0x80;
- state->cpu.nmi_pending = (ppu->reg_ctrl & 0x80);
+ ppu->reg_status |= PPU_STATUS_VBLANK;
+ state->cpu.nmi_pending = (ppu->reg_ctrl & PPU_CTRL_NMI);
} break;
case 261: {
- ppu->reg_status &= ~0x80;
- ppu->reg_status &= ~0x40;
+ ppu->reg_status &= ~PPU_STATUS_VBLANK;
+ ppu->reg_status &= ~PPU_STATUS_SPRITE_ZERO_HIT;
+ ppu->reg_status &= ~PPU_STATUS_SPRITE_OVERFLOW;
} break;
}
}
@@ -440,7 +423,7 @@ stupid2: if(ppu->reg_mask & 0x10) {
dot = 0;
scanline++;
- if(scanline == 261 && !ppu->even_frame && (ppu->reg_mask & 0x18)) {
+ if(scanline == 261 && !ppu->even_frame && rendering) {
dot = 1;
}
diff --git a/mknes_ppu_registers.c b/mknes_ppu_registers.c
index a7aa351..b66f7e5 100644
--- a/mknes_ppu_registers.c
+++ b/mknes_ppu_registers.c
@@ -65,7 +65,7 @@ static inline void ppu_write(struct nes_state *state, uint32_t offset, uint8_t v
} break;
}
- ppu->vram_addr += (ppu->reg_ctrl & 0x04) ? 32 : 1;
+ ppu->vram_addr += (ppu->reg_ctrl & PPU_CTRL_VRAM_INCREMENT) ? 32 : 1;
} break;
}
@@ -84,7 +84,7 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) {
case 2: {
result &= 0x1f;
result |= ppu->reg_status & 0xe0;
- ppu->reg_status &= ~0x80;
+ ppu->reg_status &= ~PPU_STATUS_VBLANK;
ppu->write_latch = 0;
} break;
@@ -118,7 +118,7 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) {
}
- ppu->vram_addr += (ppu->reg_ctrl & 0x04) ? 32 : 1;
+ ppu->vram_addr += (ppu->reg_ctrl & PPU_CTRL_VRAM_INCREMENT) ? 32 : 1;
} break;
}