diff options
| author | Peter Fors <peter.fors@mindkiller.com> | 2025-06-08 17:32:05 +0200 |
|---|---|---|
| committer | Peter Fors <peter.fors@mindkiller.com> | 2025-06-08 17:32:05 +0200 |
| commit | 412b2ef851516c1de8ba5006ddd284192cbcaf9b (patch) | |
| tree | e8b68d60125bf89e375ec9acc4e7833825abbfaa | |
| parent | 6dd73982c514445c4d2a4787c37666d0812a3dad (diff) | |
tests
| -rw-r--r-- | apu.c | 26 | ||||
| -rwxr-xr-x | build.sh | 9 | ||||
| -rw-r--r-- | cpu.c | 2 | ||||
| -rw-r--r-- | cpu_opcodes.c | 73 | ||||
| -rw-r--r-- | cpu_opcodes_ud.c | 18 | ||||
| -rw-r--r-- | mappers/mapper.c | 10 | ||||
| -rw-r--r-- | mappers/mapper_002_2.c | 7 | ||||
| -rw-r--r-- | mknes.c | 16 | ||||
| -rw-r--r-- | mknes.h | 3 | ||||
| -rw-r--r-- | ppu.c | 75 |
10 files changed, 116 insertions, 123 deletions
@@ -66,6 +66,7 @@ static void apu_write(struct nes_state *state, uint16_t addr, uint8_t val) { // APU tick static inline void apu_tick(struct nes_state *state) { + return; struct apu_state *apu = &state->apu; apu->frame_cycle++; @@ -103,31 +104,6 @@ static inline void apu_tick(struct nes_state *state) { } } - // if(apu->mode == 0) { - // if(apu->frame_cycle == 7457 || apu->frame_cycle == 14913 || apu->frame_cycle == 22371) { - // // Quarter frame - // } - // if(apu->frame_cycle == 14913 || apu->frame_cycle == 29829) { - // // Half frame - // } - // if(apu->frame_cycle == 29829 && !apu->irq_inhibit) { - // apu->irq_pending = 1; - // } - // if(apu->frame_cycle >= 29830) { - // apu->frame_cycle = 0; - // } - // } else { - // if(apu->frame_cycle == 7457 || apu->frame_cycle == 14913 || apu->frame_cycle == 22371 || apu->frame_cycle == 37281) { - // // Quarter frame - // } - // if(apu->frame_cycle == 14913 || apu->frame_cycle == 29829) { - // // Half frame - // } - // if(apu->frame_cycle >= 37282) { - // apu->frame_cycle = 0; - // } - // } - if(apu->dmc_dma_enabled && apu->dmc_bytes_remaining > 0) { apu->dmc_sample_timer++; if(apu->dmc_sample_timer >= dmc_rate_table[apu->dmc_freq_index]) { @@ -6,8 +6,8 @@ PROJECT_NAME="mknes" # Change this for each new project CC=gcc # Base configuration common to all builds -CFLAGS="-std=gnu11 -mtune=generic " -CFLAGS+="-mbmi " +CFLAGS="-std=gnu11 -mtune=generic -fdump-tree-alias " +CFLAGS+="-mbmi -fno-jump-tables -fno-argument-alias " CFLAGS+="-mfunction-return=keep -mindirect-branch=keep " CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fvisibility=hidden " CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fcf-protection=none -ffunction-sections -fdata-sections " @@ -56,7 +56,8 @@ case "$BUILD_TYPE" in CFLAGS+="-O2 -fprofile-generate -ftest-coverage " ;; "profile_release") - CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use " + # CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use " + CFLAGS+="-O2 -fprofile-use " ;; "debug") CFLAGS+="-g -O0 " @@ -68,7 +69,7 @@ case "$BUILD_TYPE" in ;; "clean") - rm -f *.gcda *.gcno *.gcov + rm -f *.gcda *.gcno *.gcov perf.data* *.alias exit 0 ;; *) @@ -5,6 +5,8 @@ // DO NOT ENABLE FOR NES!!!!! // #define ENABLE_DECIMAL_MODE +#define PAGE_CROSSED(base, addr) (((base ^ addr) > 0xff)) + static inline uint8_t pack_flags(struct cpu_state *cpu) { return (cpu->n << 7) | (cpu->v << 6) | (1 << 5) | (cpu->d << 3) | (cpu->i << 2) | (cpu->z << 1) | cpu->c; } diff --git a/cpu_opcodes.c b/cpu_opcodes.c index b401910..1a70cce 100644 --- a/cpu_opcodes.c +++ b/cpu_opcodes.c @@ -88,7 +88,7 @@ static void opcode_adc_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 (dummy if crossed) } @@ -117,7 +117,7 @@ static void opcode_adc_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -134,7 +134,7 @@ static void opcode_adc_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -205,7 +205,7 @@ static void opcode_and_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy } @@ -236,7 +236,7 @@ static void opcode_and_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -254,7 +254,7 @@ static void opcode_and_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -406,9 +406,9 @@ static void opcode_brk(struct nes_state *state) { // BRANCHES -static inline int page_crossed(uint16_t a, uint16_t b) { - return (a & 0xff00) != (b & 0xff00); -} +// static inline int page_crossed(uint16_t a, uint16_t b) { +// return (a & 0xff00) != (b & 0xff00); +// } static void opcode_bpl(struct nes_state *state) { struct cpu_state * restrict cpu = &state->cpu; @@ -418,7 +418,7 @@ static void opcode_bpl(struct nes_state *state) { if(!cpu->n) { memory_read_dummy(state, cpu->pc); // T2 - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); // T3 } cpu->pc = new_pc; // T3 or T4 @@ -433,7 +433,7 @@ static void opcode_bmi(struct nes_state *state) { if(cpu->n) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -448,7 +448,7 @@ static void opcode_bvc(struct nes_state *state) { if(!cpu->v) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -463,7 +463,7 @@ static void opcode_bvs(struct nes_state *state) { if(cpu->v) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -478,7 +478,7 @@ static void opcode_bcc(struct nes_state *state) { if(!cpu->c) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -493,7 +493,7 @@ static void opcode_bcs(struct nes_state *state) { if(cpu->c) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -508,7 +508,7 @@ static void opcode_bne(struct nes_state *state) { if(!cpu->z) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -523,7 +523,7 @@ static void opcode_beq(struct nes_state *state) { if(cpu->z) { memory_read_dummy(state, cpu->pc); - if(page_crossed(cpu->pc, new_pc)) { + if(PAGE_CROSSED(cpu->pc, new_pc)) { memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); } cpu->pc = new_pc; @@ -584,7 +584,9 @@ static void opcode_sei(struct nes_state *state) { static inline void cmp(struct cpu_state * restrict cpu, uint8_t value) { uint8_t result = cpu->a - value; cpu->c = (cpu->a >= value); - update_zn(cpu, result); + cpu->z = (result == 0); + cpu->n = (result & 0x80) != 0; + // update_zn(cpu, result); } static void opcode_cmp_indx(struct nes_state *state) { @@ -640,7 +642,7 @@ static void opcode_cmp_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy } @@ -667,7 +669,7 @@ static void opcode_cmp_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -683,7 +685,7 @@ static void opcode_cmp_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -885,7 +887,7 @@ static void opcode_eor_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy } @@ -914,7 +916,7 @@ static void opcode_eor_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -931,7 +933,7 @@ static void opcode_eor_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy } @@ -1024,6 +1026,7 @@ static void opcode_jmp_ind(struct nes_state *state) { uint8_t lo = memory_read(state, ptr); // T3 uint8_t hi; + if((ptr & 0x00ff) == 0x00ff) { hi = memory_read(state, ptr & 0xff00); // Emulate 6502 bug } else { @@ -1108,7 +1111,7 @@ static void opcode_lda_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy } @@ -1137,7 +1140,7 @@ static void opcode_lda_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 } @@ -1154,7 +1157,7 @@ static void opcode_lda_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 } @@ -1216,7 +1219,7 @@ static void opcode_ldx_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1278,7 +1281,7 @@ static void opcode_ldy_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1436,7 +1439,7 @@ static void opcode_ora_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1465,7 +1468,7 @@ static void opcode_ora_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1482,7 +1485,7 @@ static void opcode_ora_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1828,7 +1831,7 @@ static void opcode_sbc_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1855,7 +1858,7 @@ static void opcode_sbc_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -1871,7 +1874,7 @@ static void opcode_sbc_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } diff --git a/cpu_opcodes_ud.c b/cpu_opcodes_ud.c index 44c1c8a..e884ed4 100644 --- a/cpu_opcodes_ud.c +++ b/cpu_opcodes_ud.c @@ -43,7 +43,7 @@ static void opcode_nop_absx(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->x; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy read } @@ -131,7 +131,7 @@ static void opcode_lax_absy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -150,7 +150,7 @@ static void opcode_lax_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -293,7 +293,7 @@ static void opcode_dcp_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -422,7 +422,7 @@ static void opcode_isc_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -552,7 +552,7 @@ static void opcode_slo_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -697,7 +697,7 @@ static void opcode_rla_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -843,7 +843,7 @@ static void opcode_sre_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } @@ -985,7 +985,7 @@ static void opcode_rra_indy(struct nes_state *state) { uint16_t base = lo | (hi << 8); uint16_t addr = base + cpu->y; - if((base & 0xff00) != (addr & 0xff00)) { + if(PAGE_CROSSED(base, addr)) { memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); } diff --git a/mappers/mapper.c b/mappers/mapper.c index 0b5612b..84586e9 100644 --- a/mappers/mapper.c +++ b/mappers/mapper.c @@ -24,23 +24,23 @@ static void mapper_default_ciram_write(struct nes_state *state, uint32_t addr, u // static void mapper_default_chr_write(struct nes_state *state, uint32_t addr, uint8_t value) { } // static void mapper_default_tick(struct nes_state *state) { } -__attribute__((naked)) void mapper_default_prg_rom_write(struct nes_state *state, uint32_t addr, uint8_t value) { +__attribute__((naked)) static void mapper_default_prg_rom_write(struct nes_state *state, uint32_t addr, uint8_t value) { __asm__ __volatile__("ret"); } -__attribute__((naked)) uint8_t mapper_default_prg_ram_read(struct nes_state *state, uint32_t addr) { +__attribute__((naked)) static uint8_t mapper_default_prg_ram_read(struct nes_state *state, uint32_t addr) { __asm__ __volatile__("xor %%al, %%al\n\t" "ret" : : : "al"); } -__attribute__((naked)) void mapper_default_prg_ram_write(struct nes_state *state, uint32_t addr, uint8_t value) { +__attribute__((naked)) static void mapper_default_prg_ram_write(struct nes_state *state, uint32_t addr, uint8_t value) { __asm__ __volatile__("ret"); } -__attribute__((naked)) void mapper_default_chr_write(struct nes_state *state, uint32_t addr, uint8_t value) { +__attribute__((naked)) static void mapper_default_chr_write(struct nes_state *state, uint32_t addr, uint8_t value) { __asm__ __volatile__("ret"); } -__attribute__((naked)) void mapper_default_tick(struct nes_state *state) { +__attribute__((naked)) static void mapper_default_tick(struct nes_state *state) { __asm__ __volatile__("ret"); } diff --git a/mappers/mapper_002_2.c b/mappers/mapper_002_2.c index d3df77d..b65ce33 100644 --- a/mappers/mapper_002_2.c +++ b/mappers/mapper_002_2.c @@ -4,13 +4,10 @@ __attribute__((section(".mapper_002_2"))) static uint8_t mapper_002_2_prg_rom_read(struct nes_state *state, uint32_t addr) { struct mapper_002_2 *mapper = &state->mapper_data.m002_2; - if(addr < 0xc000) { - return mapper->prg_bank0[addr & 0x3fff]; - - } else { + if(addr >= 0xc000) { return mapper->prg_bank1[addr & 0x3fff]; } - return 0; + return mapper->prg_bank0[addr & 0x3fff]; } __attribute__((section(".mapper_002_2"))) @@ -7,6 +7,9 @@ #include <math.h> #include <string.h> #include <immintrin.h> +#include <sys/mman.h> +#include <sys/stat.h> /* For mode constants */ +#include <fcntl.h> /* For O_* constants */ #define BUFFER_WIDTH 256 #define BUFFER_HEIGHT 240 @@ -67,7 +70,7 @@ struct main_state { uint8_t toggle_crt_emulation; }; -struct main_state state; +struct main_state state __attribute__((aligned(64))); uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096))); @@ -116,7 +119,16 @@ int main(int argc, char **argv) { init_opcode_ud_lut(); // protect_opcode_lut(); +#if DEBUG_SHARED_MEM + // int fd = shm_open("/mknes_dbg", O_CREAT | O_RDWR, 0666); + // ftruncate(fd, sizeof(struct nes_state)); + // struct nes_state *nstate = mmap(0, sizeof(struct nes_state), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + // close(fd); +#else + // struct nes_state *nstate = mmap(0, sizeof(struct nes_state), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); struct nes_state *nstate = aligned_alloc(4096, (sizeof(struct nes_state) + 4095) & ~4095); + memset(nstate, 0, sizeof(struct nes_state)); +#endif ppu_reset(nstate); // ines2_load(nstate, "data/0000/10-Yard Fight (USA, Europe).nes"); @@ -142,7 +154,7 @@ int main(int argc, char **argv) { // ines2_load(nstate, "data/0000/raster_demos/RasterTest3e.NES"); // ines2_load(nstate, "data/0000/NEStress.NES"); // ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).zip"); - ines2_load(nstate, "data/0042/Super Mario Bros. + Duck Hunt (USA).zip"); + // ines2_load(nstate, "data/0042/Super Mario Bros. + Duck Hunt (USA).zip"); // ines2_load(nstate, "data/0000/Xevious - The Avenger (USA).zip"); // ines2_load(nstate, "data/tv.nes"); @@ -48,7 +48,7 @@ struct ppu_state { uint8_t sprite_count; uint8_t palette[32]; -// NOTE(peter): one byte left on cacheline + uint8_t frame_ready; // NOTE(peter): CACHELINE 2 start here! uint8_t sprite_indexes[8]; @@ -63,7 +63,6 @@ struct ppu_state { uint8_t input_latch[2]; // Latched inputs after strobe uint8_t input_bit[2]; // Current bit position being shifted out uint8_t input_strobe; // Control bit (0 or 1) - uint8_t frame_ready; } __attribute__((packed, aligned(64))); struct apu_state { @@ -60,20 +60,23 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) { __attribute__((hot)) static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; - uint32_t addr; - uint32_t bank; - uint8_t lsb; - uint8_t msb; uint8_t * restrict s = ppu->secondary_oam; uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8; + uint8_t ctrl = ppu->reg_ctrl; + uint8_t scanline = ppu->scanline; + uint32_t sprite_pattern_table_base = (ctrl & 0x08) << 9; for(uint8_t i = 0; i < ppu->sprite_count; i++) { - uint8_t y = s[0], tile = s[1], attr = s[2], x = s[3]; - uint8_t row = ppu->scanline - y; + uint8_t y = s[0]; + uint8_t tile = s[1]; + uint8_t attr = s[2]; + uint8_t x = s[3]; + uint8_t row = scanline - y; row = (attr & 0x80) ? height - 1 - row : row; + uint32_t bank, addr; if(height == 16) { bank = (tile & 1) << 12; tile &= 0xfe; @@ -81,29 +84,29 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) { tile++; row -= 8; } - } else { - bank = (ppu->reg_ctrl & 0x08) << 9; - } - addr = bank + tile * 16 + row; + addr = bank + tile * 16 + row; - if(attr & 0x40) { - lsb = ppu_bitreverse_lut[state->mapper_function.chr_read(state, addr)]; - msb = ppu_bitreverse_lut[state->mapper_function.chr_read(state, addr + 8)]; } else { - lsb = state->mapper_function.chr_read(state, addr); - msb = state->mapper_function.chr_read(state, addr + 8); + addr = sprite_pattern_table_base + tile * 16 + row; } + uint8_t val_lo = state->mapper_function.chr_read(state, addr); + uint8_t val_hi = state->mapper_function.chr_read(state, addr + 8); + + uint8_t rev = -(!!(attr & 0x40)); + uint8_t lsb = (rev & ppu_bitreverse_lut[val_lo]) | (~rev & val_lo); + uint8_t msb = (rev & ppu_bitreverse_lut[val_hi]) | (~rev & val_hi); + ppu->sprite_shift_lo[i] = lsb; ppu->sprite_shift_hi[i] = msb; - ppu->sprite_positions[i] = x; ppu->sprite_priorities[i] = attr & 0x20; + s += 4; } } -__attribute__((always_inline, hot, flatten)) +__attribute__((always_inline, hot)) static inline void ppu_render_pixel(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; @@ -112,8 +115,6 @@ static inline void ppu_render_pixel(struct nes_state *state) { uint16_t bit = 0x8000 >> ppu->fine_x; - uint8_t bg_pixel = 0; - uint8_t bg_palette = 0; uint8_t sp_pixel = 0; uint8_t sp_palette = 0; uint8_t sp_prio = 0; @@ -133,23 +134,25 @@ static inline void ppu_render_pixel(struct nes_state *state) { uint8_t a0 = !!(ppu->bg_shift_attrib_low & bit); uint8_t a1 = !!(ppu->bg_shift_attrib_high & bit); - bg_pixel = ((p1 << 1) | p0) & bg_mask; - bg_palette = ((a1 << 1) | a0) & bg_mask; + uint8_t bg_pixel = ((p1 << 1) | p0) & bg_mask; + uint8_t bg_palette = ((a1 << 1) | a0) & bg_mask; // Sprite - for(uint8_t i = 0; i < ppu->sprite_count; i++) { - if(ppu->sprite_positions[i]) continue; + if(sp_mask) { + for(uint8_t i = 0; i < ppu->sprite_count; i++) { + if(ppu->sprite_positions[i]) continue; - uint8_t lo = ppu->sprite_shift_lo[i]; - uint8_t hi = ppu->sprite_shift_hi[i]; - sp_pixel = (((hi & 0x80) >> 6) | ((lo & 0x80) >> 7)) & sp_mask; + uint8_t lo = ppu->sprite_shift_lo[i]; + uint8_t hi = ppu->sprite_shift_hi[i]; + sp_pixel = (((hi & 0x80) >> 6) | ((lo & 0x80) >> 7)); - if(!sp_pixel) continue; + if(!sp_pixel) continue; - sp_palette = ppu->secondary_oam[i * 4 + 2] & 3; - sp_prio = ppu->sprite_priorities[i]; - sp_zero = (ppu->sprite_indexes[i] == 0); - break; + sp_palette = ppu->secondary_oam[i * 4 + 2] & 3; + sp_prio = ppu->sprite_priorities[i]; + sp_zero = (ppu->sprite_indexes[i] == 0); + break; + } } // Final pixel composition @@ -170,8 +173,8 @@ static inline void ppu_render_pixel(struct nes_state *state) { state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x). } -__attribute__((hot, flatten)) -__attribute__((optimize("no-jump-tables"))) + +__attribute__((hot, optimize("no-jump-tables"))) static inline void ppu_tick(struct nes_state *state) { struct ppu_state *restrict ppu = &state->ppu; @@ -208,10 +211,10 @@ static inline void ppu_tick(struct nes_state *state) { case 1 ... 255: // fallthrough: this is 1->256 ppu_render_pixel(state); - __attribute__((fallthrough)); + __attribute__((fallthrough)); // fallthrough: the code below has to run 1->256 + 321->336 + + case 321 ... 336: { // Rendering and tile fetch; - case 321 ... 336: { // fallthrough: the code below has to run 1->256 + 321->336 - // Rendering and tile fetch goes here if(ppu->reg_mask & 0x10) { for(uint32_t i = 0; i < ppu->sprite_count; i++) { if(ppu->sprite_positions[i] > 0) { |
