From 3d70e69f6c9fbdcb890c6986096330e4f6611a32 Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Thu, 29 May 2025 19:19:59 +0200 Subject: added mapper_tick() functionality, regressed 200fps, and optimized it back up to ~1940fps --- build.sh | 21 +++++++++++---------- callbacks.c | 6 ------ cpu.c | 5 +---- mappers/mapper_002_2.c | 9 +++------ mappers/mapper_066_0.c | 16 +++++----------- memory.c | 40 ++++++++++++++++++++-------------------- mknes.c | 5 ----- ppu.c | 29 +++++++++++++++-------------- ppu_registers.c | 16 ++++++++-------- 9 files changed, 63 insertions(+), 84 deletions(-) diff --git a/build.sh b/build.sh index c796e72..1fbf82c 100755 --- a/build.sh +++ b/build.sh @@ -6,7 +6,6 @@ PROJECT_NAME="mknes" # Change this for each new project CC=gcc # Base configuration common to all builds -# CFLAGS="-std=gnu11 -mtune=generic " CFLAGS="-std=gnu11 -mtune=generic " CFLAGS+="-mbmi " CFLAGS+="-mfunction-return=keep -mindirect-branch=keep " @@ -25,11 +24,12 @@ CFLAGS+="-U_FORTIFY_SOURCE -fno-pic " LDFLAGS="-Wl,--gc-sections -Wl,--as-needed " # Base include paths (common for all platforms) -INCLUDE_PATHS="-Ibase -I.." +INCLUDE_PATHS="-Ibase -I.. " # Linux-specific includes and libraries -LINUX_INCLUDE="-I/usr/include/pipewire-0.3 -I/usr/include/spa-0.2" +# LINUX_INCLUDE="-I/usr/include/pipewire-0.3 -I/usr/include/spa-0.2" #LINUX_LIBS="-lpipewire-0.3 -lXi -lX11 -lGL -lm -ldl -pthread -lglfw -larchive " +LINUX_INCLUDE=" " LINUX_LIBS="-lXi -lX11 -lGL -lm -ldl -pthread -lglfw -larchive " # Windows-specific includes and libraries @@ -45,22 +45,22 @@ fi case "$BUILD_TYPE" in "normal") - CFLAGS+=" -ggdb -fno-omit-frame-pointer -O2 -DDEBUG_INTERNAL" - # CFLAGS+=" -fsanitize=address,undefined,alignment,object-size,unreachable -fno-omit-frame-pointer" + CFLAGS+="-ggdb -fno-omit-frame-pointer -O2 -DDEBUG_INTERNAL " + # CFLAGS+="-fsanitize=address,undefined,alignment,object-size,unreachable " # -pg # for gprof ;; "release") - CFLAGS+=" -s -Wl,--strip-all -O2" + CFLAGS+="-s -Wl,--strip-all -O2 " ;; "profile") - CFLAGS+=" -O2 -fprofile-generate -ftest-coverage" + CFLAGS+="-O2 -fprofile-generate -ftest-coverage " ;; "profile_release") - CFLAGS+=" -s -Wl,--strip-all -O2 -fprofile-use" + CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use " ;; "debug") - CFLAGS+=" -g -O0" - LDFLAGS+=" -fno-pie -no-pie" + CFLAGS+="-g -O0 " + LDFLAGS+="-fno-pie -no-pie " ;; "coverage") gcov -b -c *.c @@ -86,6 +86,7 @@ set -e # Build Linux version ( + . dev # ../bin/ctime -begin .${PROJECT_NAME}_linux $CC $CFLAGS ${PROJECT_NAME}.c -o ${PROJECT_NAME} $INCLUDE_PATHS $LINUX_INCLUDE $LDFLAGS $LINUX_LIBS objdump -d -Mintel mknes > mknes.s diff --git a/callbacks.c b/callbacks.c index f32c59d..e43826d 100644 --- a/callbacks.c +++ b/callbacks.c @@ -45,12 +45,6 @@ static void toggle_fullscreen(bool enable) { static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) { struct nes_state *nes_state = (struct nes_state*)glfwGetWindowUserPointer(window); - if(key == GLFW_KEY_ESCAPE) { - if(action == GLFW_PRESS) { - glfwSetWindowShouldClose(window, 1); - } - } - if(action == GLFW_RELEASE) { switch(key) { case GLFW_KEY_F12: { diff --git a/cpu.c b/cpu.c index 23825aa..f6a070e 100644 --- a/cpu.c +++ b/cpu.c @@ -80,13 +80,10 @@ static inline void check_interrupts(struct nes_state *state) { static inline void cpu_tick(struct nes_state *state) { struct cpu_state * restrict cpu = &state->cpu; - check_interrupts(state); - uint8_t opcode; - // printf("%4.4x: ", cpu->pc); - opcode = memory_read(state, cpu->pc++); + uint8_t opcode = memory_read(state, cpu->pc++); // printf("%2.2x a:%2.2x x:%2.2x y:%2.2x p:%2.2x sp:%2.2x cycle: %ld\n", opcode, cpu->a, cpu->x, cpu->y, pack_flags(cpu), cpu->sp, state->cycles); opcode_lut[opcode](state); } diff --git a/mappers/mapper_002_2.c b/mappers/mapper_002_2.c index e6cddf7..d3df77d 100644 --- a/mappers/mapper_002_2.c +++ b/mappers/mapper_002_2.c @@ -4,11 +4,10 @@ __attribute__((section(".mapper_002_2"))) static uint8_t mapper_002_2_prg_rom_read(struct nes_state *state, uint32_t addr) { struct mapper_002_2 *mapper = &state->mapper_data.m002_2; - - if(addr >= 0x8000 && addr < 0xc000) { + if(addr < 0xc000) { return mapper->prg_bank0[addr & 0x3fff]; - } else if(addr >= 0xc000) { + } else { return mapper->prg_bank1[addr & 0x3fff]; } return 0; @@ -18,9 +17,7 @@ __attribute__((section(".mapper_002_2"))) static void mapper_002_2_prg_rom_write(struct nes_state *state, uint32_t addr, uint8_t value) { struct mapper_002_2 *mapper = &state->mapper_data.m002_2; - if(addr >= 0x8000) { - mapper->prg_bank0 = state->prg_rom + ((value & 0x0f) * 0x4000); - } + mapper->prg_bank0 = state->prg_rom + ((value & 0x0f) * 0x4000); } __attribute__((section(".mapper_002_2"))) diff --git a/mappers/mapper_066_0.c b/mappers/mapper_066_0.c index ccd00fe..bd4c124 100644 --- a/mappers/mapper_066_0.c +++ b/mappers/mapper_066_0.c @@ -3,23 +3,18 @@ __attribute__((section(".mapper_066_0"))) static uint8_t mapper_066_0_prg_rom_read(struct nes_state *state, uint32_t addr) { struct mapper_066_0 *mapper = &state->mapper_data.m066_0; - if(addr >= 0x8000) { - return state->prg_rom[addr & 0x7fff]; - } - return 0; + return mapper->prg_offset[addr & 0x7fff]; } __attribute__((section(".mapper_066_0"))) static void mapper_066_0_prg_rom_write(struct nes_state *state, uint32_t addr, uint8_t value) { struct mapper_066_0 *mapper = &state->mapper_data.m066_0; - if(addr >= 0x8000) { - uint32_t prg_bank = (value >> 4) & 3; - uint32_t chr_bank = (value >> 0) & 3; + uint32_t prg_bank = (value >> 4) & 3; + uint32_t chr_bank = (value >> 0) & 3; - mapper->prg_offset = state->prg_rom + (prg_bank * 0x8000); - mapper->chr_offset = state->chr_rom + (chr_bank * 0x2000); - } + mapper->prg_offset = state->prg_rom + (prg_bank * 0x8000); + mapper->chr_offset = state->chr_rom + (chr_bank * 0x2000); } __attribute__((section(".mapper_066_0"))) @@ -39,4 +34,3 @@ static void mapper_066_0_init(struct nes_state *state) { state->mapper_function.prg_rom_write = mapper_066_0_prg_rom_write; state->mapper_function.chr_read = mapper_066_0_chr_read; } - diff --git a/memory.c b/memory.c index 55c7015..096189b 100644 --- a/memory.c +++ b/memory.c @@ -6,18 +6,18 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { ppu_tick(state); // apu_tick(state); - if(LIKELY(offset >= 0x6000)) { // MOST - if(UNLIKELY(offset < 0x8000)) { - return state->mapper_function.prg_ram_read(state, offset); - } + if(offset >= 0x8000) { // MOST return state->mapper_function.prg_rom_read(state, offset); - } else if(LIKELY(offset < 0x2000)) { // SECOND + } else if((offset < 0x2000)) { // SECOND return state->ram[offset & 0x07ff]; } else if(offset < 0x4000) { // THIRD return ppu_read(state, offset); + } else if(offset >= 0x6000) { + return state->mapper_function.prg_ram_read(state, offset); + } else if(offset == 0x4016 || offset == 0x4017) { uint32_t index = offset & 1; uint8_t value = (state->ppu.input_latch[index] >> state->ppu.input_bit[index]) & 1; @@ -28,13 +28,25 @@ static inline uint8_t memory_read(struct nes_state *state, uint32_t offset) { return 0; } +__attribute__((hot)) +static inline uint8_t memory_read_dummy(struct nes_state *state, uint32_t offset) { + state->cpu.cycles++; + ppu_tick(state); + // apu_tick(state); + + if(offset >= 0x2000 && offset < 0x4000) { + return ppu_read(state, offset); + } + return 0; +} + __attribute__((hot)) static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_t value) { state->cpu.cycles++; ppu_tick(state); // apu_tick(state); - if(LIKELY(offset < 0x2000)) { + if(offset < 0x2000) { state->ram[offset & 0x07ff] = value; } else if(offset < 0x4000) { @@ -78,20 +90,8 @@ static inline void memory_write(struct nes_state *state, uint32_t offset, uint8_ } } - +__attribute__((hot, flatten)) static inline uint8_t memory_read_dma(struct nes_state *state, uint32_t offset) { - // Do not tick CPU/PPU/APU — caller handles timing + // NOTE(peter): DO NOT tick CPU/PPU/APU — caller handles timing return state->ram[offset & 0x07ff]; } - -static inline uint8_t memory_read_dummy(struct nes_state *state, uint32_t offset) { - state->cpu.cycles++; - ppu_tick(state); - // apu_tick(state); - - if(UNLIKELY(offset >= 0x2000 && offset < 0x4000)) { - return ppu_read(state, offset); - } - return 0; -} - diff --git a/mknes.c b/mknes.c index 29d705e..77bdd45 100644 --- a/mknes.c +++ b/mknes.c @@ -8,8 +8,6 @@ #include #include -#define printf(...) - #define BUFFER_WIDTH 256 #define BUFFER_HEIGHT 240 #define WINDOW_WIDTH 320 * 3 @@ -23,9 +21,6 @@ #define CIRAM_SIZE 0x1000 #define CHR_RAM_SIZE 0x4000 -#define LIKELY(x) __builtin_expect(!!(x), 1) -#define UNLIKELY(x) __builtin_expect(!!(x), 0) - #ifndef restrict # if defined(__cplusplus) # define restrict __restrict diff --git a/ppu.c b/ppu.c index 3096573..8280265 100644 --- a/ppu.c +++ b/ppu.c @@ -19,14 +19,12 @@ static uint8_t __attribute__((aligned(64))) ppu_bitreverse_lut[256] = { 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff }; - static void ppu_reset(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; memset(ppu, 0, sizeof(struct ppu_state)); } - -// __attribute__((hot)) +__attribute__((hot)) static inline void ppu_evaluate_sprites(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8; @@ -59,7 +57,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) { ppu->sprite_count = n; } -// __attribute__((hot)) +__attribute__((hot)) static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; uint32_t addr; @@ -105,7 +103,7 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { } } -__attribute__((hot)) +__attribute__((hot, flatten)) static inline void ppu_render_pixel(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; @@ -182,7 +180,7 @@ static inline void ppu_tick(struct nes_state *state) { for(uint32_t ppu_loops = 0; ppu_loops < 3; ++ppu_loops) { - if(LIKELY(rendering)) { + if(rendering) { switch(scanline) { case 0 ... 239: { switch(dot) { @@ -358,12 +356,12 @@ static inline void ppu_tick(struct nes_state *state) { } break; case 0: { - ppu->bg_shift_pattern_low = (ppu->bg_shift_pattern_low & 0xff00) | ppu->bg_next_tile_lsb; - ppu->bg_shift_pattern_high = (ppu->bg_shift_pattern_high & 0xff00) | ppu->bg_next_tile_msb; + ppu->bg_shift_pattern_low = (ppu->bg_shift_pattern_low & 0xff00) | ppu->bg_next_tile_lsb; + ppu->bg_shift_pattern_high = (ppu->bg_shift_pattern_high & 0xff00) | ppu->bg_next_tile_msb; uint8_t a = ppu->bg_next_tile_attrib; - ppu->bg_shift_attrib_low = (ppu->bg_shift_attrib_low & 0xff00) | ((a & 1) ? 0xff : 0x00); - ppu->bg_shift_attrib_high = (ppu->bg_shift_attrib_high & 0xff00) | ((a & 2) ? 0xff : 0x00); + ppu->bg_shift_attrib_low = (ppu->bg_shift_attrib_low & 0xff00) | ((a & 1) ? 0xff : 0x00); + ppu->bg_shift_attrib_high = (ppu->bg_shift_attrib_high & 0xff00) | ((a & 2) ? 0xff : 0x00); if((ppu->vram_addr & 0x001f) == 31) { ppu->vram_addr &= ~0x001f; @@ -394,7 +392,7 @@ static inline void ppu_tick(struct nes_state *state) { } } - if(UNLIKELY(scanline == 241) && dot == 1) { + if(scanline == 241 && dot == 1) { // static int32_t tas_frame = 0; // state->input[0] = tas_input[tas_frame++]; @@ -402,7 +400,7 @@ static inline void ppu_tick(struct nes_state *state) { state->cpu.nmi_pending = (ppu->reg_ctrl & 0x80); // NOTE(peter): Set NMI if enabled. } - if(UNLIKELY(scanline == 261) && dot == 1) { + if(scanline == 261 && dot == 1) { ppu->reg_status &= ~0x80; ppu->reg_status &= ~0x40; } @@ -412,7 +410,7 @@ static inline void ppu_tick(struct nes_state *state) { dot = 0; scanline++; - if(UNLIKELY(scanline == 261 && !ppu->even_frame && (ppu->reg_mask & 0x18))) { + if(scanline == 261 && !ppu->even_frame && (ppu->reg_mask & 0x18)) { dot = 1; } @@ -426,6 +424,9 @@ static inline void ppu_tick(struct nes_state *state) { ppu->dot = dot; ppu->scanline = scanline; - if(UNLIKELY(state->mapper_function.tick)) state->mapper_function.tick(state); + + if(state->mapper_function.tick) { + state->mapper_function.tick(state); + } } } diff --git a/ppu_registers.c b/ppu_registers.c index 0e67890..6217908 100644 --- a/ppu_registers.c +++ b/ppu_registers.c @@ -1,10 +1,10 @@ -__attribute__((hot)) +__attribute__((hot, flatten)) static inline void ppu_write(struct nes_state *state, uint32_t offset, uint8_t value) { struct ppu_state *ppu = &state->ppu; uint32_t reg = offset & 0x7; - if(LIKELY(reg == 4)) { + if(reg == 4) { ppu->oam[ppu->oam_addr++] = value; ppu->open_bus = value; return; @@ -54,7 +54,7 @@ static inline void ppu_write(struct nes_state *state, uint32_t offset, uint8_t v if(addr < 0x2000) { state->mapper_function.chr_write(state, addr, value); - } else if(LIKELY(addr < 0x3f00)) { + } else if(addr < 0x3f00) { state->mapper_function.ciram_write(state, addr, value); } else if(addr < 0x4000) { @@ -70,21 +70,21 @@ static inline void ppu_write(struct nes_state *state, uint32_t offset, uint8_t v } } -__attribute__((hot)) +__attribute__((hot, flatten)) static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { struct ppu_state *ppu = &state->ppu; uint8_t result = ppu->open_bus; uint32_t reg = offset & 0x7; - if(LIKELY(reg == 2)) { + if(reg == 2) { result = ppu->reg_status; ppu->reg_status &= ~0x80; ppu->write_latch = 0; ppu->open_bus = result; return result; - } else if(LIKELY(reg == 4)) { + } else if(reg == 4) { result = ppu->oam[ppu->oam_addr]; ppu->open_bus = result; return result; @@ -92,11 +92,11 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) { } else if(reg == 7) { uint32_t addr = ppu->vram_addr; - if(LIKELY(addr < 0x2000)) { + if(addr < 0x2000) { result = ppu->vram_read_buffer; ppu->vram_read_buffer = state->mapper_function.chr_read(state, addr); - } else if(LIKELY(addr < 0x3f00)) { + } else if(addr < 0x3f00) { result = ppu->vram_read_buffer; ppu->vram_read_buffer = state->mapper_function.ciram_read(state, addr); -- cgit v1.2.3