summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apu.c26
-rwxr-xr-xbuild.sh9
-rw-r--r--cpu.c2
-rw-r--r--cpu_opcodes.c73
-rw-r--r--cpu_opcodes_ud.c18
-rw-r--r--mappers/mapper.c10
-rw-r--r--mappers/mapper_002_2.c7
-rw-r--r--mknes.c16
-rw-r--r--mknes.h3
-rw-r--r--ppu.c75
10 files changed, 116 insertions, 123 deletions
diff --git a/apu.c b/apu.c
index 0c16123..46a484d 100644
--- a/apu.c
+++ b/apu.c
@@ -66,6 +66,7 @@ static void apu_write(struct nes_state *state, uint16_t addr, uint8_t val) {
// APU tick
static inline void apu_tick(struct nes_state *state) {
+ return;
struct apu_state *apu = &state->apu;
apu->frame_cycle++;
@@ -103,31 +104,6 @@ static inline void apu_tick(struct nes_state *state) {
}
}
- // if(apu->mode == 0) {
- // if(apu->frame_cycle == 7457 || apu->frame_cycle == 14913 || apu->frame_cycle == 22371) {
- // // Quarter frame
- // }
- // if(apu->frame_cycle == 14913 || apu->frame_cycle == 29829) {
- // // Half frame
- // }
- // if(apu->frame_cycle == 29829 && !apu->irq_inhibit) {
- // apu->irq_pending = 1;
- // }
- // if(apu->frame_cycle >= 29830) {
- // apu->frame_cycle = 0;
- // }
- // } else {
- // if(apu->frame_cycle == 7457 || apu->frame_cycle == 14913 || apu->frame_cycle == 22371 || apu->frame_cycle == 37281) {
- // // Quarter frame
- // }
- // if(apu->frame_cycle == 14913 || apu->frame_cycle == 29829) {
- // // Half frame
- // }
- // if(apu->frame_cycle >= 37282) {
- // apu->frame_cycle = 0;
- // }
- // }
-
if(apu->dmc_dma_enabled && apu->dmc_bytes_remaining > 0) {
apu->dmc_sample_timer++;
if(apu->dmc_sample_timer >= dmc_rate_table[apu->dmc_freq_index]) {
diff --git a/build.sh b/build.sh
index 1fbf82c..5ba1fb6 100755
--- a/build.sh
+++ b/build.sh
@@ -6,8 +6,8 @@ PROJECT_NAME="mknes" # Change this for each new project
CC=gcc
# Base configuration common to all builds
-CFLAGS="-std=gnu11 -mtune=generic "
-CFLAGS+="-mbmi "
+CFLAGS="-std=gnu11 -mtune=generic -fdump-tree-alias "
+CFLAGS+="-mbmi -fno-jump-tables -fno-argument-alias "
CFLAGS+="-mfunction-return=keep -mindirect-branch=keep "
CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fvisibility=hidden "
CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fcf-protection=none -ffunction-sections -fdata-sections "
@@ -56,7 +56,8 @@ case "$BUILD_TYPE" in
CFLAGS+="-O2 -fprofile-generate -ftest-coverage "
;;
"profile_release")
- CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use "
+ # CFLAGS+="-s -Wl,--strip-all -O2 -fprofile-use "
+ CFLAGS+="-O2 -fprofile-use "
;;
"debug")
CFLAGS+="-g -O0 "
@@ -68,7 +69,7 @@ case "$BUILD_TYPE" in
;;
"clean")
- rm -f *.gcda *.gcno *.gcov
+ rm -f *.gcda *.gcno *.gcov perf.data* *.alias
exit 0
;;
*)
diff --git a/cpu.c b/cpu.c
index f6a070e..e3e83e6 100644
--- a/cpu.c
+++ b/cpu.c
@@ -5,6 +5,8 @@
// DO NOT ENABLE FOR NES!!!!!
// #define ENABLE_DECIMAL_MODE
+#define PAGE_CROSSED(base, addr) (((base ^ addr) > 0xff))
+
static inline uint8_t pack_flags(struct cpu_state *cpu) {
return (cpu->n << 7) | (cpu->v << 6) | (1 << 5) | (cpu->d << 3) | (cpu->i << 2) | (cpu->z << 1) | cpu->c;
}
diff --git a/cpu_opcodes.c b/cpu_opcodes.c
index b401910..1a70cce 100644
--- a/cpu_opcodes.c
+++ b/cpu_opcodes.c
@@ -88,7 +88,7 @@ static void opcode_adc_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 (dummy if crossed)
}
@@ -117,7 +117,7 @@ static void opcode_adc_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -134,7 +134,7 @@ static void opcode_adc_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -205,7 +205,7 @@ static void opcode_and_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy
}
@@ -236,7 +236,7 @@ static void opcode_and_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -254,7 +254,7 @@ static void opcode_and_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -406,9 +406,9 @@ static void opcode_brk(struct nes_state *state) {
// BRANCHES
-static inline int page_crossed(uint16_t a, uint16_t b) {
- return (a & 0xff00) != (b & 0xff00);
-}
+// static inline int page_crossed(uint16_t a, uint16_t b) {
+// return (a & 0xff00) != (b & 0xff00);
+// }
static void opcode_bpl(struct nes_state *state) {
struct cpu_state * restrict cpu = &state->cpu;
@@ -418,7 +418,7 @@ static void opcode_bpl(struct nes_state *state) {
if(!cpu->n) {
memory_read_dummy(state, cpu->pc); // T2
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff)); // T3
}
cpu->pc = new_pc; // T3 or T4
@@ -433,7 +433,7 @@ static void opcode_bmi(struct nes_state *state) {
if(cpu->n) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -448,7 +448,7 @@ static void opcode_bvc(struct nes_state *state) {
if(!cpu->v) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -463,7 +463,7 @@ static void opcode_bvs(struct nes_state *state) {
if(cpu->v) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -478,7 +478,7 @@ static void opcode_bcc(struct nes_state *state) {
if(!cpu->c) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -493,7 +493,7 @@ static void opcode_bcs(struct nes_state *state) {
if(cpu->c) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -508,7 +508,7 @@ static void opcode_bne(struct nes_state *state) {
if(!cpu->z) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -523,7 +523,7 @@ static void opcode_beq(struct nes_state *state) {
if(cpu->z) {
memory_read_dummy(state, cpu->pc);
- if(page_crossed(cpu->pc, new_pc)) {
+ if(PAGE_CROSSED(cpu->pc, new_pc)) {
memory_read_dummy(state, (cpu->pc & 0xff00) | (new_pc & 0x00ff));
}
cpu->pc = new_pc;
@@ -584,7 +584,9 @@ static void opcode_sei(struct nes_state *state) {
static inline void cmp(struct cpu_state * restrict cpu, uint8_t value) {
uint8_t result = cpu->a - value;
cpu->c = (cpu->a >= value);
- update_zn(cpu, result);
+ cpu->z = (result == 0);
+ cpu->n = (result & 0x80) != 0;
+ // update_zn(cpu, result);
}
static void opcode_cmp_indx(struct nes_state *state) {
@@ -640,7 +642,7 @@ static void opcode_cmp_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy
}
@@ -667,7 +669,7 @@ static void opcode_cmp_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -683,7 +685,7 @@ static void opcode_cmp_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -885,7 +887,7 @@ static void opcode_eor_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy
}
@@ -914,7 +916,7 @@ static void opcode_eor_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -931,7 +933,7 @@ static void opcode_eor_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy
}
@@ -1024,6 +1026,7 @@ static void opcode_jmp_ind(struct nes_state *state) {
uint8_t lo = memory_read(state, ptr); // T3
uint8_t hi;
+
if((ptr & 0x00ff) == 0x00ff) {
hi = memory_read(state, ptr & 0xff00); // Emulate 6502 bug
} else {
@@ -1108,7 +1111,7 @@ static void opcode_lda_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T4 dummy
}
@@ -1137,7 +1140,7 @@ static void opcode_lda_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3
}
@@ -1154,7 +1157,7 @@ static void opcode_lda_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3
}
@@ -1216,7 +1219,7 @@ static void opcode_ldx_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1278,7 +1281,7 @@ static void opcode_ldy_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1436,7 +1439,7 @@ static void opcode_ora_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1465,7 +1468,7 @@ static void opcode_ora_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1482,7 +1485,7 @@ static void opcode_ora_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1828,7 +1831,7 @@ static void opcode_sbc_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1855,7 +1858,7 @@ static void opcode_sbc_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -1871,7 +1874,7 @@ static void opcode_sbc_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
diff --git a/cpu_opcodes_ud.c b/cpu_opcodes_ud.c
index 44c1c8a..e884ed4 100644
--- a/cpu_opcodes_ud.c
+++ b/cpu_opcodes_ud.c
@@ -43,7 +43,7 @@ static void opcode_nop_absx(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->x;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff)); // T3 dummy read
}
@@ -131,7 +131,7 @@ static void opcode_lax_absy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -150,7 +150,7 @@ static void opcode_lax_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -293,7 +293,7 @@ static void opcode_dcp_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -422,7 +422,7 @@ static void opcode_isc_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -552,7 +552,7 @@ static void opcode_slo_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -697,7 +697,7 @@ static void opcode_rla_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -843,7 +843,7 @@ static void opcode_sre_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
@@ -985,7 +985,7 @@ static void opcode_rra_indy(struct nes_state *state) {
uint16_t base = lo | (hi << 8);
uint16_t addr = base + cpu->y;
- if((base & 0xff00) != (addr & 0xff00)) {
+ if(PAGE_CROSSED(base, addr)) {
memory_read_dummy(state, (base & 0xff00) | (addr & 0x00ff));
}
diff --git a/mappers/mapper.c b/mappers/mapper.c
index 0b5612b..84586e9 100644
--- a/mappers/mapper.c
+++ b/mappers/mapper.c
@@ -24,23 +24,23 @@ static void mapper_default_ciram_write(struct nes_state *state, uint32_t addr, u
// static void mapper_default_chr_write(struct nes_state *state, uint32_t addr, uint8_t value) { }
// static void mapper_default_tick(struct nes_state *state) { }
-__attribute__((naked)) void mapper_default_prg_rom_write(struct nes_state *state, uint32_t addr, uint8_t value) {
+__attribute__((naked)) static void mapper_default_prg_rom_write(struct nes_state *state, uint32_t addr, uint8_t value) {
__asm__ __volatile__("ret");
}
-__attribute__((naked)) uint8_t mapper_default_prg_ram_read(struct nes_state *state, uint32_t addr) {
+__attribute__((naked)) static uint8_t mapper_default_prg_ram_read(struct nes_state *state, uint32_t addr) {
__asm__ __volatile__("xor %%al, %%al\n\t" "ret" : : : "al");
}
-__attribute__((naked)) void mapper_default_prg_ram_write(struct nes_state *state, uint32_t addr, uint8_t value) {
+__attribute__((naked)) static void mapper_default_prg_ram_write(struct nes_state *state, uint32_t addr, uint8_t value) {
__asm__ __volatile__("ret");
}
-__attribute__((naked)) void mapper_default_chr_write(struct nes_state *state, uint32_t addr, uint8_t value) {
+__attribute__((naked)) static void mapper_default_chr_write(struct nes_state *state, uint32_t addr, uint8_t value) {
__asm__ __volatile__("ret");
}
-__attribute__((naked)) void mapper_default_tick(struct nes_state *state) {
+__attribute__((naked)) static void mapper_default_tick(struct nes_state *state) {
__asm__ __volatile__("ret");
}
diff --git a/mappers/mapper_002_2.c b/mappers/mapper_002_2.c
index d3df77d..b65ce33 100644
--- a/mappers/mapper_002_2.c
+++ b/mappers/mapper_002_2.c
@@ -4,13 +4,10 @@ __attribute__((section(".mapper_002_2")))
static uint8_t mapper_002_2_prg_rom_read(struct nes_state *state, uint32_t addr) {
struct mapper_002_2 *mapper = &state->mapper_data.m002_2;
- if(addr < 0xc000) {
- return mapper->prg_bank0[addr & 0x3fff];
-
- } else {
+ if(addr >= 0xc000) {
return mapper->prg_bank1[addr & 0x3fff];
}
- return 0;
+ return mapper->prg_bank0[addr & 0x3fff];
}
__attribute__((section(".mapper_002_2")))
diff --git a/mknes.c b/mknes.c
index 068bd83..154ca95 100644
--- a/mknes.c
+++ b/mknes.c
@@ -7,6 +7,9 @@
#include <math.h>
#include <string.h>
#include <immintrin.h>
+#include <sys/mman.h>
+#include <sys/stat.h> /* For mode constants */
+#include <fcntl.h> /* For O_* constants */
#define BUFFER_WIDTH 256
#define BUFFER_HEIGHT 240
@@ -67,7 +70,7 @@ struct main_state {
uint8_t toggle_crt_emulation;
};
-struct main_state state;
+struct main_state state __attribute__((aligned(64)));
uint32_t buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
uint32_t display_buffer[BUFFER_WIDTH * BUFFER_HEIGHT] __attribute__((section(".bss"), aligned(4096)));
@@ -116,7 +119,16 @@ int main(int argc, char **argv) {
init_opcode_ud_lut();
// protect_opcode_lut();
+#if DEBUG_SHARED_MEM
+ // int fd = shm_open("/mknes_dbg", O_CREAT | O_RDWR, 0666);
+ // ftruncate(fd, sizeof(struct nes_state));
+ // struct nes_state *nstate = mmap(0, sizeof(struct nes_state), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ // close(fd);
+#else
+ // struct nes_state *nstate = mmap(0, sizeof(struct nes_state), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
struct nes_state *nstate = aligned_alloc(4096, (sizeof(struct nes_state) + 4095) & ~4095);
+ memset(nstate, 0, sizeof(struct nes_state));
+#endif
ppu_reset(nstate);
// ines2_load(nstate, "data/0000/10-Yard Fight (USA, Europe).nes");
@@ -142,7 +154,7 @@ int main(int argc, char **argv) {
// ines2_load(nstate, "data/0000/raster_demos/RasterTest3e.NES");
// ines2_load(nstate, "data/0000/NEStress.NES");
// ines2_load(nstate, "data/0000/Super Mario Bros. (World) (HVC-SM).zip");
- ines2_load(nstate, "data/0042/Super Mario Bros. + Duck Hunt (USA).zip");
+ // ines2_load(nstate, "data/0042/Super Mario Bros. + Duck Hunt (USA).zip");
// ines2_load(nstate, "data/0000/Xevious - The Avenger (USA).zip");
// ines2_load(nstate, "data/tv.nes");
diff --git a/mknes.h b/mknes.h
index 0ae37f4..d3e3840 100644
--- a/mknes.h
+++ b/mknes.h
@@ -48,7 +48,7 @@ struct ppu_state {
uint8_t sprite_count;
uint8_t palette[32];
-// NOTE(peter): one byte left on cacheline
+ uint8_t frame_ready;
// NOTE(peter): CACHELINE 2 start here!
uint8_t sprite_indexes[8];
@@ -63,7 +63,6 @@ struct ppu_state {
uint8_t input_latch[2]; // Latched inputs after strobe
uint8_t input_bit[2]; // Current bit position being shifted out
uint8_t input_strobe; // Control bit (0 or 1)
- uint8_t frame_ready;
} __attribute__((packed, aligned(64)));
struct apu_state {
diff --git a/ppu.c b/ppu.c
index e0fce44..3b6200e 100644
--- a/ppu.c
+++ b/ppu.c
@@ -60,20 +60,23 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) {
__attribute__((hot))
static inline void ppu_fetch_sprite_patterns(struct nes_state *state) {
struct ppu_state *restrict ppu = &state->ppu;
- uint32_t addr;
- uint32_t bank;
- uint8_t lsb;
- uint8_t msb;
uint8_t * restrict s = ppu->secondary_oam;
uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
+ uint8_t ctrl = ppu->reg_ctrl;
+ uint8_t scanline = ppu->scanline;
+ uint32_t sprite_pattern_table_base = (ctrl & 0x08) << 9;
for(uint8_t i = 0; i < ppu->sprite_count; i++) {
- uint8_t y = s[0], tile = s[1], attr = s[2], x = s[3];
- uint8_t row = ppu->scanline - y;
+ uint8_t y = s[0];
+ uint8_t tile = s[1];
+ uint8_t attr = s[2];
+ uint8_t x = s[3];
+ uint8_t row = scanline - y;
row = (attr & 0x80) ? height - 1 - row : row;
+ uint32_t bank, addr;
if(height == 16) {
bank = (tile & 1) << 12;
tile &= 0xfe;
@@ -81,29 +84,29 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) {
tile++;
row -= 8;
}
- } else {
- bank = (ppu->reg_ctrl & 0x08) << 9;
- }
- addr = bank + tile * 16 + row;
+ addr = bank + tile * 16 + row;
- if(attr & 0x40) {
- lsb = ppu_bitreverse_lut[state->mapper_function.chr_read(state, addr)];
- msb = ppu_bitreverse_lut[state->mapper_function.chr_read(state, addr + 8)];
} else {
- lsb = state->mapper_function.chr_read(state, addr);
- msb = state->mapper_function.chr_read(state, addr + 8);
+ addr = sprite_pattern_table_base + tile * 16 + row;
}
+ uint8_t val_lo = state->mapper_function.chr_read(state, addr);
+ uint8_t val_hi = state->mapper_function.chr_read(state, addr + 8);
+
+ uint8_t rev = -(!!(attr & 0x40));
+ uint8_t lsb = (rev & ppu_bitreverse_lut[val_lo]) | (~rev & val_lo);
+ uint8_t msb = (rev & ppu_bitreverse_lut[val_hi]) | (~rev & val_hi);
+
ppu->sprite_shift_lo[i] = lsb;
ppu->sprite_shift_hi[i] = msb;
-
ppu->sprite_positions[i] = x;
ppu->sprite_priorities[i] = attr & 0x20;
+
s += 4;
}
}
-__attribute__((always_inline, hot, flatten))
+__attribute__((always_inline, hot))
static inline void ppu_render_pixel(struct nes_state *state) {
struct ppu_state *restrict ppu = &state->ppu;
@@ -112,8 +115,6 @@ static inline void ppu_render_pixel(struct nes_state *state) {
uint16_t bit = 0x8000 >> ppu->fine_x;
- uint8_t bg_pixel = 0;
- uint8_t bg_palette = 0;
uint8_t sp_pixel = 0;
uint8_t sp_palette = 0;
uint8_t sp_prio = 0;
@@ -133,23 +134,25 @@ static inline void ppu_render_pixel(struct nes_state *state) {
uint8_t a0 = !!(ppu->bg_shift_attrib_low & bit);
uint8_t a1 = !!(ppu->bg_shift_attrib_high & bit);
- bg_pixel = ((p1 << 1) | p0) & bg_mask;
- bg_palette = ((a1 << 1) | a0) & bg_mask;
+ uint8_t bg_pixel = ((p1 << 1) | p0) & bg_mask;
+ uint8_t bg_palette = ((a1 << 1) | a0) & bg_mask;
// Sprite
- for(uint8_t i = 0; i < ppu->sprite_count; i++) {
- if(ppu->sprite_positions[i]) continue;
+ if(sp_mask) {
+ for(uint8_t i = 0; i < ppu->sprite_count; i++) {
+ if(ppu->sprite_positions[i]) continue;
- uint8_t lo = ppu->sprite_shift_lo[i];
- uint8_t hi = ppu->sprite_shift_hi[i];
- sp_pixel = (((hi & 0x80) >> 6) | ((lo & 0x80) >> 7)) & sp_mask;
+ uint8_t lo = ppu->sprite_shift_lo[i];
+ uint8_t hi = ppu->sprite_shift_hi[i];
+ sp_pixel = (((hi & 0x80) >> 6) | ((lo & 0x80) >> 7));
- if(!sp_pixel) continue;
+ if(!sp_pixel) continue;
- sp_palette = ppu->secondary_oam[i * 4 + 2] & 3;
- sp_prio = ppu->sprite_priorities[i];
- sp_zero = (ppu->sprite_indexes[i] == 0);
- break;
+ sp_palette = ppu->secondary_oam[i * 4 + 2] & 3;
+ sp_prio = ppu->sprite_priorities[i];
+ sp_zero = (ppu->sprite_indexes[i] == 0);
+ break;
+ }
}
// Final pixel composition
@@ -170,8 +173,8 @@ static inline void ppu_render_pixel(struct nes_state *state) {
state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x).
}
-__attribute__((hot, flatten))
-__attribute__((optimize("no-jump-tables")))
+
+__attribute__((hot, optimize("no-jump-tables")))
static inline void ppu_tick(struct nes_state *state) {
struct ppu_state *restrict ppu = &state->ppu;
@@ -208,10 +211,10 @@ static inline void ppu_tick(struct nes_state *state) {
case 1 ... 255: // fallthrough: this is 1->256
ppu_render_pixel(state);
- __attribute__((fallthrough));
+ __attribute__((fallthrough)); // fallthrough: the code below has to run 1->256 + 321->336
+
+ case 321 ... 336: { // Rendering and tile fetch;
- case 321 ... 336: { // fallthrough: the code below has to run 1->256 + 321->336
- // Rendering and tile fetch goes here
if(ppu->reg_mask & 0x10) {
for(uint32_t i = 0; i < ppu->sprite_count; i++) {
if(ppu->sprite_positions[i] > 0) {