summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh9
-rw-r--r--mknes.c112
-rw-r--r--mknes.h14
-rw-r--r--ppu.c180
4 files changed, 193 insertions, 122 deletions
diff --git a/build.sh b/build.sh
index 5487439..845da73 100755
--- a/build.sh
+++ b/build.sh
@@ -8,7 +8,7 @@ CFLAGS="-std=gnu++23 "
CFLAGS+="-mavx2 -mbmi2 -mtune=native -mfunction-return=keep -mindirect-branch=keep "
CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fwhole-program "
CFLAGS+="-fno-stack-protector -fno-PIE -no-pie -fno-strict-aliasing -ffunction-sections -fdata-sections "
-CFLAGS+="-fno-exceptions -fno-rtti -fno-use-cxa-atexit -fno-non-call-exceptions "
+CFLAGS+="-fno-exceptions -fno-rtti -fno-use-cxa-atexit -fno-non-call-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables "
CFLAGS+="-Wall -Wextra "
CFLAGS+="-Wno-unused-parameter -Wno-sign-compare -Wno-trigraphs -Wno-maybe-uninitialized "
CFLAGS+="-Wno-unused-variable -Wno-unused-const-variable -Wno-unused-function -Wno-write-strings -Wno-missing-field-initializers "
@@ -51,12 +51,7 @@ case "$BUILD_TYPE" in
;;
esac
-# Rebuild assets every time we compile
-#rm -rf data
-#mkdir -p data/p{1,2,3,4,5,6,7,8}
-#env -C org_assets ../../bin/mks_time ./process.sh
-
-# Make sure the <basecode> shaders are up to date if you are experimenting with them.
+# Make sure the shaders are up to date
shader2h 330 vertex_shader vertex_shader.glsl
shader2h 330 fragment_shader shader.h fragment_shader.glsl
diff --git a/mknes.c b/mknes.c
index 5fbea56..25e920f 100644
--- a/mknes.c
+++ b/mknes.c
@@ -100,45 +100,88 @@ struct nes_state nstate;
static uint32_t frames;
+
+#define PRG_ROM_SIZE (512 * 1024)
+#define CHR_ROM_SIZE (512 * 1024)
+#define PIXELS_SIZE (256 * 240)
+#define RAM_SIZE 0x800
+#define SRAM_SIZE 0x2000
+#define CIRAM_SIZE 0x1000
+
+static struct nes_state *allocate_nes_state(void) {
+ struct nes_state *state = (struct nes_state*)calloc(1, sizeof(struct nes_state));
+ if(!state) return 0;
+
+ size_t total_size = (PRG_ROM_SIZE + CHR_ROM_SIZE + PIXELS_SIZE + RAM_SIZE + SRAM_SIZE + CIRAM_SIZE + 4095) & ~0xfff;
+
+ uint8_t *m = (uint8_t*)aligned_alloc(4096, total_size);
+ memset(m, 0, total_size);
+
+ size_t offset = 0;
+
+ state->prg_rom = m + offset;
+ offset += PRG_ROM_SIZE;
+
+ state->chr_rom = m + offset;
+ offset += CHR_ROM_SIZE;
+
+ state->pixels = m + offset;
+ offset += PIXELS_SIZE;
+
+ state->ram = m + offset;
+ offset += RAM_SIZE;
+
+ state->sram = m + offset;
+ offset += SRAM_SIZE;
+
+ state->ciram = m + offset;
+ offset += CIRAM_SIZE;
+
+ return state;
+}
+
+
int main(int argc, char **argv) {
#ifdef _WIN32
timeBeginPeriod(1);
#endif
+ struct nes_state *nstate = allocate_nes_state();
+
state.toggle_crt_emulation = 1;
setbuf(stdout, 0);
init_opcode_lut();
init_opcode_ud_lut();
// protect_opcode_lut();
- ppu_reset(&nstate);
- // ines2_load(&nstate, "data/nrom/10-Yard Fight (USA, Europe).nes");
- // ines2_load(&nstate, "data/nrom/Balloon Fight (USA).nes");
- // ines2_load(&nstate, "data/nrom/Excitebike (Japan, USA).nes");
- // ines2_load(&nstate, "data/nrom/Ice Climber (USA, Europe, Korea).nes");
- // ines2_load(&nstate, "data/nrom/Kung Fu (Japan, USA).nes");
- ines2_load(&nstate, "data/nrom/Super Mario Bros. (World) (HVC-SM).nes");
- // ines2_load(&nstate, "data/nrom/Urban Champion (World).nes");
- // ines2_load(&nstate, "data/nrom/Wrecking Crew (World).nes");
- // ines2_load(&nstate, "data/nrom/scanline.nes");
- // ines2_load(&nstate, "data/nrom/Sayoonara!.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterChromaLuma.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest1.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest2.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest3.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest3a.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest3b.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest3c.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest3d.NES");
- // ines2_load(&nstate, "data/nrom/raster_demos/RasterTest3e.NES");
- // ines2_load(&nstate, "data/nrom/NEStress.NES");
- // ines2_load(&nstate, "data/tv.nes");
- // ines2_load(&nstate, "data/Super Mario Bros. (World) (HVC-SM).zip");
- // ines2_load(&nstate, "data/Super Mario Bros. + Duck Hunt (USA).zip");
-
- mapper_setup(&nstate);
- uint32_t lo = nstate.mapper.prg_read(&nstate, 0xfffc);
- uint32_t hi = nstate.mapper.prg_read(&nstate, 0xfffd);
- nstate.cpu.pc = (hi << 8) | lo;
+ ppu_reset(nstate);
+ // ines2_load(nstate, "data/nrom/10-Yard Fight (USA, Europe).nes");
+ // ines2_load(nstate, "data/nrom/Balloon Fight (USA).nes");
+ // ines2_load(nstate, "data/nrom/Excitebike (Japan, USA).nes");
+ // ines2_load(nstate, "data/nrom/Ice Climber (USA, Europe, Korea).nes");
+ // ines2_load(nstate, "data/nrom/Kung Fu (Japan, USA).nes");
+ ines2_load(nstate, "data/nrom/Super Mario Bros. (World) (HVC-SM).nes");
+ // ines2_load(nstate, "data/nrom/Urban Champion (World).nes");
+ // ines2_load(nstate, "data/nrom/Wrecking Crew (World).nes");
+ // ines2_load(nstate, "data/nrom/scanline.nes");
+ // ines2_load(nstate, "data/nrom/Sayoonara!.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterChromaLuma.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest1.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest2.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest3.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest3a.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest3b.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest3c.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest3d.NES");
+ // ines2_load(nstate, "data/nrom/raster_demos/RasterTest3e.NES");
+ // ines2_load(nstate, "data/nrom/NEStress.NES");
+ // ines2_load(nstate, "data/tv.nes");
+ // ines2_load(nstate, "data/Super Mario Bros. (World) (HVC-SM).zip");
+ // ines2_load(nstate, "data/Super Mario Bros. + Duck Hunt (USA).zip");
+
+ mapper_setup(nstate);
+ uint32_t lo = nstate->mapper.prg_read(nstate, 0xfffc);
+ uint32_t hi = nstate->mapper.prg_read(nstate, 0xfffd);
+ nstate->cpu.pc = (hi << 8) | lo;
struct timer_handle *timer = timer_new(FRAME_INTERVAL_NS);
if(!timer) {
@@ -178,19 +221,20 @@ int main(int argc, char **argv) {
timer_start(timer);
while(!glfwWindowShouldClose(window)) {
+ // for(uint32_t i = 0; i < 0x5000; ++ i) {
timer_wait(timer);
glfwPollEvents();
// //
- while(!nstate.ppu.frame_ready) {
+ while(!nstate->ppu.frame_ready) {
// PROFILE_NAMED("nes emulator");
- cpu_tick(&nstate);
+ cpu_tick(nstate);
}
- nstate.ppu.frame_ready = 0;
+ nstate->ppu.frame_ready = 0;
frames++;
uint32_t * restrict dst = buffer;
- uint8_t * restrict src = nstate.ppu.pixels;
+ uint8_t * restrict src = nstate->pixels;
for(uint32_t y = 0; y < 240; ++y) {
for(uint32_t x = 0; x < 256; ++x) {
uint8_t val = *src++;
@@ -203,7 +247,7 @@ int main(int argc, char **argv) {
render_frame();
glfwSwapBuffers(window);
}
-printf("total frames: %6.6d total cycles: %ld\n", frames, nstate.cycles);
+printf("total frames: %6.6d total cycles: %ld\n", frames, nstate->cycles);
glfwDestroyWindow(window);
} else {
fprintf(stderr, "Failed to create window\n");
diff --git a/mknes.h b/mknes.h
index 2d04add..c1784f6 100644
--- a/mknes.h
+++ b/mknes.h
@@ -35,7 +35,6 @@ struct ppu_state {
uint8_t oam_data;
uint8_t even_frame;
- uint8_t pixels[256 * 240] __attribute__((aligned(64)));
uint8_t oam[256];
uint8_t secondary_oam[32];
uint8_t palette[0x20];
@@ -101,12 +100,13 @@ struct nes_state {
struct ppu_state ppu;
struct mapper_entry mapper;
union mapper_data map;
- uint8_t ram[0x800] __attribute__((aligned(64)));
- uint8_t sram[0x2000] __attribute__((aligned(64)));
- uint8_t ciram[0x1000] __attribute__((aligned(64))); // NOTE(peter): Originally 0x800 bytes, but extended as it should work for up to fourway, this is optimization, reality is 2kb, but there is no side-effects, so this is fine!
- uint8_t prg_rom[4 * 1024 * 1024] __attribute__((aligned(64)));
- uint8_t chr_rom[4 * 1024 * 1024] __attribute__((aligned(64)));
-} __attribute__((aligned(64)));
+ uint8_t *pixels;
+ uint8_t *ram;
+ uint8_t *sram;
+ uint8_t *ciram;
+ uint8_t *prg_rom;
+ uint8_t *chr_rom;
+};
__attribute__((aligned(4096))) static uint32_t nes_palette[64] = {
diff --git a/ppu.c b/ppu.c
index d67cff3..db78b91 100644
--- a/ppu.c
+++ b/ppu.c
@@ -124,8 +124,8 @@ static inline uint8_t ppu_read(struct nes_state *state, uint32_t offset) {
return result;
}
-__attribute__((always_inline, hot))
-static inline void ppu_evaluate_sprites(struct nes_state *state) {
+__attribute__((hot))
+static void ppu_evaluate_sprites(struct nes_state *state) {
struct ppu_state *ppu = &state->ppu;
uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
uint8_t n = 0;
@@ -143,7 +143,7 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) {
dst[2] = src[2];
dst[3] = src[3];
ppu->sprite_indexes[n] = i;
- ppu->sprite_zero_hit_possible |= (i == 0) ? 1 : 0;
+ ppu->sprite_zero_hit_possible |= (i == 0);
dst += 4;
n++;
@@ -158,30 +158,32 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) {
ppu->sprite_count = n;
}
-__attribute__((always_inline, hot))
-static inline void ppu_fetch_sprite_patterns(struct nes_state *state) {
+__attribute__((hot))
+static void ppu_fetch_sprite_patterns(struct nes_state *state) {
struct ppu_state *ppu = &state->ppu;
+ uint32_t addr;
+ uint32_t bank;
+
+ uint8_t *s = ppu->secondary_oam;
+ uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
+
for(uint8_t i = 0; i < ppu->sprite_count; i++) {
- uint8_t *s = ppu->secondary_oam + i * 4;
uint8_t y = s[0], tile = s[1], attr = s[2], x = s[3];
uint8_t row = ppu->scanline - y;
- uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
row = (attr & 0x80) ? height - 1 - row : row;
- uint32_t addr;
if(height == 16) {
- uint32_t bank = (tile & 1) ? 0x1000 : 0x0000;
+ bank = (tile & 1) << 12;
tile &= 0xfe;
if(row >= 8) {
tile++;
row -= 8;
}
- addr = bank + tile * 16 + row;
} else {
- uint32_t bank = (ppu->reg_ctrl & 0x08) ? 0x1000 : 0x0000;
- addr = bank + tile * 16 + row;
+ bank = (ppu->reg_ctrl & 0x08) << 9;
}
+ addr = bank + tile * 16 + row;
uint8_t lsb = state->mapper.chr_read(state, addr);
uint8_t msb = state->mapper.chr_read(state, addr + 8);
@@ -194,73 +196,107 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) {
ppu->sprite_shift_lo[i] = lsb;
ppu->sprite_shift_hi[i] = msb;
ppu->sprite_positions[i] = x;
- ppu->sprite_priorities[i] = (attr >> 5) & 1;
+ ppu->sprite_priorities[i] = attr & 0x20;
+ s += 4;
}
}
-__attribute__((always_inline, hot))
-static inline void ppu_render_pixel(struct nes_state *state) {
+__attribute__((hot))
+static void ppu_render_pixel(struct nes_state *state) {
+ struct ppu_state *ppu = &state->ppu;
+
+ uint32_t x = ppu->dot - 1;
+ uint32_t y = ppu->scanline;
+
+ // Fine X shift mask
+ // static const uint16_t fine_shift[8] = { 0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100 };
+ uint16_t bit = 0x8000 >> ppu->fine_x;//fine_shift[ppu->fine_x];
+
uint8_t bg_pixel = 0;
uint8_t bg_palette = 0;
uint8_t sp_pixel = 0;
uint8_t sp_palette = 0;
uint8_t sp_prio = 0;
uint8_t sp_zero = 0;
- uint8_t final_color = 0;
- struct ppu_state *ppu = &state->ppu;
+#if 1 // TODO(peter): Decide what I prefer, masking away unlikely path, or LIKELY hint to the compiler
+ uint8_t bg_mask = (ppu->reg_mask & 0x08) ? 0xff : 0x00;
+ uint8_t sp_mask = (ppu->reg_mask & 0x10) ? 0xff : 0x00;
- uint32_t x = ppu->dot - 1;
- uint32_t y = ppu->scanline;
+ // Background
+ uint8_t p0 = !!(ppu->bg_shift_pattern_low & bit);
+ uint8_t p1 = !!(ppu->bg_shift_pattern_high & bit);
+ uint8_t a0 = !!(ppu->bg_shift_attrib_low & bit);
+ uint8_t a1 = !!(ppu->bg_shift_attrib_high & bit);
+
+ bg_pixel = ((p1 << 1) | p0) & bg_mask;
+ bg_palette = ((a1 << 1) | a0) & bg_mask;
+
+ // Sprite
+ for(uint8_t i = 0; i < ppu->sprite_count; i++) {
+ if(ppu->sprite_positions[i]) continue;
- uint32_t bit = 0x8000 >> ppu->fine_x;
+ uint8_t lo = ppu->sprite_shift_lo[i];
+ uint8_t hi = ppu->sprite_shift_hi[i];
+ sp_pixel = (((hi & 0x80) >> 6) | ((lo & 0x80) >> 7)) & sp_mask;
- if(ppu->reg_mask & 0x08) {
- uint8_t p0 = (ppu->bg_shift_pattern_low & bit) ? 1 : 0;
- uint8_t p1 = (ppu->bg_shift_pattern_high & bit) ? 1 : 0;
+ if(!sp_pixel) continue;
+
+ sp_palette = ppu->secondary_oam[i * 4 + 2] & 3;
+ sp_prio = ppu->sprite_priorities[i];
+ sp_zero = (ppu->sprite_indexes[i] == 0);
+ break;
+ }
+#else
+ // Background fetch
+ if(LIKELY(ppu->reg_mask & 0x08)) {
+ uint8_t p0 = !!(ppu->bg_shift_pattern_low & bit);
+ uint8_t p1 = !!(ppu->bg_shift_pattern_high & bit);
bg_pixel = (p1 << 1) | p0;
- uint8_t a0 = (ppu->bg_shift_attrib_low & bit) ? 1 : 0;
- uint8_t a1 = (ppu->bg_shift_attrib_high & bit) ? 1 : 0;
+ uint8_t a0 = !!(ppu->bg_shift_attrib_low & bit);
+ uint8_t a1 = !!(ppu->bg_shift_attrib_high & bit);
bg_palette = (a1 << 1) | a0;
}
- if(ppu->reg_mask & 0x10) {
+ // Sprite fetch
+ if(LIKELY(ppu->reg_mask & 0x10)) {
for(uint8_t i = 0; i < ppu->sprite_count; i++) {
- if(ppu->sprite_positions[i] == 0) {
- uint8_t p0 = (ppu->sprite_shift_lo[i] & 0x80) ? 1 : 0;
- uint8_t p1 = (ppu->sprite_shift_hi[i] & 0x80) ? 1 : 0;
- sp_pixel = (p1 << 1) | p0;
-
- if(sp_pixel) {
- sp_palette = ppu->secondary_oam[i * 4 + 2] & 3;
- sp_prio = ppu->sprite_priorities[i];
- sp_zero = (ppu->sprite_indexes[i] == 0);
- break;
- }
- }
- }
- }
+ if(ppu->sprite_positions[i]) continue;
- if(bg_pixel == 0 && sp_pixel == 0) {
- final_color = ppu->palette[0];
- } else if(bg_pixel == 0 && sp_pixel != 0) {
- final_color = ppu->palette[0x10 | (sp_palette << 2) | sp_pixel];
- } else if(bg_pixel != 0 && sp_pixel == 0) {
- final_color = ppu->palette[(bg_palette << 2) | bg_pixel];
- } else {
- if(sp_zero && ppu->sprite_zero_hit_possible && x < 255) {
- ppu->reg_status |= 0x40;
- }
- if(sp_prio == 0) {
- final_color = ppu->palette[0x10 | (sp_palette << 2) | sp_pixel];
- } else {
- final_color = ppu->palette[(bg_palette << 2) | bg_pixel];
+ uint8_t lo = ppu->sprite_shift_lo[i];
+ uint8_t hi = ppu->sprite_shift_hi[i];
+ sp_pixel = ((hi & 0x80) >> 6) | ((lo & 0x80) >> 7);
+
+ if(!sp_pixel) continue;
+
+ sp_palette = ppu->secondary_oam[i * 4 + 2] & 3;
+ sp_prio = ppu->sprite_priorities[i];
+ sp_zero = (ppu->sprite_indexes[i] == 0);
+ break;
}
}
- ppu->pixels[y * 256 + x] = final_color;
-}
+#endif
+ // Final pixel composition
+ uint8_t palette_index = 0;
+ uint8_t bg_index = (bg_palette << 2) + bg_pixel;
+ uint8_t sp_index = (sp_palette << 2) + sp_pixel;
+ uint8_t selector = (bg_pixel ? 2 : 0) | (sp_pixel ? 1 : 0);
+
+ switch(selector) {
+ case 0: { palette_index = 0; } break;
+ case 1: { palette_index = 0x10 | sp_index; } break;
+ case 2: { palette_index = bg_index; } break;
+ case 3: {
+ if(sp_zero && ppu->sprite_zero_hit_possible && x < 255) {
+ ppu->reg_status |= 0x40;
+ }
+ palette_index = (sp_prio) ? bg_index : 0x10 | sp_index;
+ } break;
+ }
+ state->pixels[y * 256 + x] = ppu->palette[palette_index]; // NOTE(peter): Add color_emphasis bits (expand palette to 8x).
+}
__attribute__((hot, flatten))
static void ppu_tick(struct nes_state *state) {
@@ -272,7 +308,6 @@ static void ppu_tick(struct nes_state *state) {
for(uint32_t ppu_loops = 0; ppu_loops < 3; ++ppu_loops) {
-
if(LIKELY(rendering)) {
if(ppu->even_frame && dot == 0) {
@@ -284,28 +319,25 @@ static void ppu_tick(struct nes_state *state) {
ppu_render_pixel(state);
}
- if((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336)) {
+ if(scanline < 240 || scanline == 261) {
+ if((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336)) {
- if(ppu->reg_mask & 0x10) {
- for(uint32_t i = 0; i < ppu->sprite_count; i++) {
- if(ppu->sprite_positions[i] > 0) {
- ppu->sprite_positions[i]--;
- } else {
- ppu->sprite_shift_lo[i] <<= 1;
- ppu->sprite_shift_hi[i] <<= 1;
+ if(ppu->reg_mask & 0x10) {
+ for(uint32_t i = 0; i < ppu->sprite_count; i++) {
+ if(ppu->sprite_positions[i] > 0) {
+ ppu->sprite_positions[i]--;
+ } else {
+ ppu->sprite_shift_lo[i] <<= 1;
+ ppu->sprite_shift_hi[i] <<= 1;
+ }
}
}
- }
-
- ppu->bg_shift_pattern_low <<= 1;
- ppu->bg_shift_pattern_high <<= 1;
- ppu->bg_shift_attrib_low <<= 1;
- ppu->bg_shift_attrib_high <<= 1;
- }
+ ppu->bg_shift_pattern_low <<= 1;
+ ppu->bg_shift_pattern_high <<= 1;
+ ppu->bg_shift_attrib_low <<= 1;
+ ppu->bg_shift_attrib_high <<= 1;
- if(scanline < 240 || scanline == 261) {
- if((dot >= 1 && dot <= 256) || (dot >= 321 && dot <= 336)) {
switch(dot % 8) {
case 1: {
uint32_t nt_addr = 0x2000 | (ppu->vram_addr & 0x0fff);