summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh2
-rw-r--r--mapper.c1
-rw-r--r--mapper_0003.c5
-rw-r--r--mknes.c10
-rw-r--r--ppu.c18
-rw-r--r--render.c58
6 files changed, 47 insertions, 47 deletions
diff --git a/build.sh b/build.sh
index 3e878d3..b8f6a4e 100755
--- a/build.sh
+++ b/build.sh
@@ -5,7 +5,7 @@ PROJECT_NAME="mknes" # Change this for each new project
# Base configuration common to all builds
CFLAGS="-std=gnu11 "
-CFLAGS+="-mavx2 -mbmi2 -march=native "
+CFLAGS+="-mbmi2 "
CFLAGS+="-mfunction-return=keep "
CFLAGS+="-mindirect-branch=keep "
CFLAGS+="-fwrapv -ffast-math -fno-trapping-math -fwhole-program "
diff --git a/mapper.c b/mapper.c
index 188d88b..e10c97c 100644
--- a/mapper.c
+++ b/mapper.c
@@ -42,6 +42,7 @@ static struct mapper_entry mapper_table[] = {
/* Mapper: b */ { 0x0b, mapper_000b_prg_read, mapper_000b_prg_write, mapper_000b_chr_read, mapper_000b_chr_write, mapper_default_ciram_read, mapper_default_ciram_write, mapper_default_tick, mapper_000b_init },
/* Mapper: 66 */ { 0x42, mapper_0042_prg_read, mapper_0042_prg_write, mapper_0042_chr_read, mapper_0042_chr_write, mapper_default_ciram_read, mapper_default_ciram_write, mapper_default_tick, mapper_0042_init },
/* Mapper: 2002 */ { 0x2002, mapper_2002_prg_read, mapper_2002_prg_write, mapper_2002_chr_read, mapper_2002_chr_write, mapper_default_ciram_read, mapper_default_ciram_write, mapper_default_tick, mapper_2002_init },
+/* Mapper: 2003 */ { 0x2003, mapper_0003_prg_read, mapper_0003_prg_write, mapper_0003_chr_read, mapper_0003_chr_write, mapper_default_ciram_read, mapper_default_ciram_write, mapper_default_tick, mapper_0003_init },
};
static void mapper_setup(struct nes_state *state) {
diff --git a/mapper_0003.c b/mapper_0003.c
index 743df51..a4ae56c 100644
--- a/mapper_0003.c
+++ b/mapper_0003.c
@@ -5,10 +5,7 @@ static void mapper_0003_init(struct nes_state *state) {
}
static uint8_t mapper_0003_prg_read(struct nes_state *state, uint32_t addr) {
- if(addr >= 0x8000) {
- return state->prg_rom[addr - 0x8000];
- }
- return 0;
+ return state->prg_rom[addr & 0x7fff];
}
static void mapper_0003_prg_write(struct nes_state *state, uint32_t addr, uint8_t value) {
diff --git a/mknes.c b/mknes.c
index ea735f6..09da608 100644
--- a/mknes.c
+++ b/mknes.c
@@ -173,7 +173,7 @@ int main(int argc, char **argv) {
// ines2_load(nstate, "data/0003/Flipull - An Exciting Cube Game (Japan) (En).zip");
// ines2_load(nstate, "data/0003/Friday the 13th (USA).zip");
// ines2_load(nstate, "data/0003/Ghostbusters (Japan).zip");
-
+ // ines2_load(nstate, "data/0003/Gradius (USA).zip");
// ines2_load(nstate, "data/0007/Battletoads (USA).zip");
// ines2_load(nstate, "data/0007/Beetlejuice (USA).zip");
// ines2_load(nstate, "data/0007/Cabal (USA).zip");
@@ -226,7 +226,7 @@ int main(int argc, char **argv) {
}
}
- set_decay(10);
+ set_decay(20);
timer_start(timer);
@@ -241,7 +241,7 @@ int main(int argc, char **argv) {
}
#else
while(!glfwWindowShouldClose(window)) {
- timer_wait(timer);
+ // timer_wait(timer);
glfwPollEvents();
while(!nstate->ppu.frame_ready) {
@@ -251,7 +251,7 @@ int main(int argc, char **argv) {
nstate->ppu.frame_ready = 0;
frames++;
- uint32_t * restrict dst = buffer;
+ uint32_t * restrict dst = display_buffer; //buffer;
uint8_t * restrict src = nstate->pixels;
for(uint32_t y = 0; y < 240; ++y) {
for(uint32_t x = 0; x < 256; ++x) {
@@ -261,7 +261,7 @@ int main(int argc, char **argv) {
}
dst += BUFFER_WIDTH;
}
- apply_phosphor_decay();
+ // apply_phosphor_decay();
render_frame();
glfwSwapBuffers(window);
}
diff --git a/ppu.c b/ppu.c
index cef45fa..bab97d5 100644
--- a/ppu.c
+++ b/ppu.c
@@ -22,8 +22,8 @@ static inline void ppu_evaluate_sprites(struct nes_state *state) {
uint8_t sprite_height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
uint8_t n = 0;
- uint8_t *src = ppu->oam;
- uint8_t *dst = ppu->secondary_oam;
+ uint8_t * restrict src = ppu->oam;
+ uint8_t * restrict dst = ppu->secondary_oam;
for(uint8_t i = 0; i < 64; i++) {
uint8_t y = src[0];
int32_t row = (int32_t)ppu->scanline - y;
@@ -55,8 +55,10 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) {
struct ppu_state *restrict ppu = &state->ppu;
uint32_t addr;
uint32_t bank;
+ uint8_t lsb;
+ uint8_t msb;
- uint8_t *s = ppu->secondary_oam;
+ uint8_t * restrict s = ppu->secondary_oam;
uint8_t height = (ppu->reg_ctrl & 0x20) ? 16 : 8;
for(uint8_t i = 0; i < ppu->sprite_count; i++) {
@@ -77,12 +79,12 @@ static inline void ppu_fetch_sprite_patterns(struct nes_state *state) {
}
addr = bank + tile * 16 + row;
- uint8_t lsb = state->mapper.chr_read(state, addr);
- uint8_t msb = state->mapper.chr_read(state, addr + 8);
-
if(attr & 0x40) {
- lsb = ppu_bitreverse_lut[lsb];
- msb = ppu_bitreverse_lut[msb];
+ lsb = ppu_bitreverse_lut[state->mapper.chr_read(state, addr)];
+ msb = ppu_bitreverse_lut[state->mapper.chr_read(state, addr + 8)];
+ } else {
+ lsb = state->mapper.chr_read(state, addr);
+ msb = state->mapper.chr_read(state, addr + 8);
}
ppu->sprite_shift_lo[i] = lsb;
diff --git a/render.c b/render.c
index 26b3c19..6e399ef 100644
--- a/render.c
+++ b/render.c
@@ -16,32 +16,32 @@ static void set_decay(uint16_t old_weight) {
}
/* [=]===^=[ apply_phosphor_decay ]=================================================================^===[=] */
-__attribute__((always_inline, hot))
-static inline void apply_phosphor_decay(void) {
- // PROFILE_FUNCTION();
- __m256i old_weight = _mm256_set1_epi16(_old_weight);
- __m256i new_weight = _mm256_set1_epi16(_new_weight);
- __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
- uint32_t * restrict src = buffer;
- uint32_t * restrict dst = display_buffer;
-
- for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
- for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
- _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
- _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
-
- __m128i new_pixels = _mm_load_si128((__m128i*)&src[x]);
- __m128i old_pixels = _mm_load_si128((__m128i*)&dst[x]);
-
- __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
- __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
-
- __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
- blended = _mm256_srli_epi16(blended, 8);
-
- __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
- final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
- _mm_store_si128((__m128i*)&dst[x], final_pixels);
- }
- }
-}
+// __attribute__((always_inline, hot))
+// static inline void apply_phosphor_decay(void) {
+// // PROFILE_FUNCTION();
+// __m256i old_weight = _mm256_set1_epi16(_old_weight);
+// __m256i new_weight = _mm256_set1_epi16(_new_weight);
+// __m128i alpha_mask = _mm_set1_epi32(0x000000ff);
+// uint32_t * restrict src = buffer;
+// uint32_t * restrict dst = display_buffer;
+
+// for(uint32_t y = 0; y < BUFFER_HEIGHT; ++y, src += BUFFER_WIDTH, dst += BUFFER_WIDTH) {
+// for(uint32_t x = 0; x < BUFFER_WIDTH; x += 4) {
+// _mm_prefetch((char*)&src[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+// _mm_prefetch((char*)&dst[x + 2 * BUFFER_WIDTH], _MM_HINT_T0);
+
+// __m128i new_pixels = _mm_load_si128((__m128i*)&src[x]);
+// __m128i old_pixels = _mm_load_si128((__m128i*)&dst[x]);
+
+// __m256i old_lo = _mm256_cvtepu8_epi16(old_pixels);
+// __m256i new_lo = _mm256_cvtepu8_epi16(new_pixels);
+
+// __m256i blended = _mm256_adds_epu16(_mm256_mullo_epi16(old_lo, old_weight), _mm256_mullo_epi16(new_lo, new_weight));
+// blended = _mm256_srli_epi16(blended, 8);
+
+// __m128i final_pixels = _mm_packus_epi16(_mm256_castsi256_si128(blended), _mm256_extracti128_si256(blended, 1));
+// final_pixels = _mm_or_si128(final_pixels, _mm_and_si128(old_pixels, alpha_mask));
+// _mm_store_si128((__m128i*)&dst[x], final_pixels);
+// }
+// }
+// }