Skip to content
Snippets Groups Projects
Commit e0d4aa83 authored by João Magalhães's avatar João Magalhães :rocket:
Browse files

Merge branch 'joamag/performance' into 'master'

Performance optimisations

See merge request !24
parents 87f655e9 fe23dd35
No related branches found
No related tags found
1 merge request!24Performance optimisations
Pipeline #2838 passed
......@@ -129,3 +129,5 @@ jobs:
run: cd frontends/sdl && cargo run --release -- --headless --cycles 10000000
- name: Run benchmark
run: cd frontends/sdl && cargo run --release -- --benchmark
- name: Run benchmark, only for CPU
run: cd frontends/sdl && cargo run --release -- --benchmark --cpu-only
......@@ -33,6 +33,7 @@ built = "0.5"
debug = false
lto = true
opt-level = 3
codegen-units = 1
[workspace]
members = [".", "frontends/sdl"]
......
......@@ -28,6 +28,8 @@ version = "0.4"
[dependencies.sdl2]
version = "0.35"
git = "https://github.com/Rust-SDL2/rust-sdl2"
rev = "27cd1fd67c811e06b9d997a77bb6089a1b65070d"
features = ["ttf", "image", "gfx", "mixer", "static-link", "use-vcpkg"]
# For MacOS running under arm64 architecture there may be linking issues
......
......@@ -39,12 +39,12 @@ const VOLUME: f32 = 64.0;
pub struct Benchmark {
count: usize,
chunk_size: Option<usize>,
cpu_only: Option<bool>,
}
impl Benchmark {
pub fn new(count: usize, chunk_size: Option<usize>) -> Self {
Self { count, chunk_size }
pub fn new(count: usize, cpu_only: Option<bool>) -> Self {
Self { count, cpu_only }
}
}
......@@ -221,23 +221,21 @@ impl Emulator {
self.load_rom(None);
}
pub fn benchmark(&mut self, params: Benchmark) {
pub fn benchmark(&mut self, params: &Benchmark) {
println!("Going to run benchmark...");
let count = params.count;
let chunk_size = params.chunk_size.unwrap_or(1);
let cpu_only = params.cpu_only.unwrap_or(false);
let mut cycles = 0u64;
if cpu_only {
self.system.set_all_enabled(false);
}
let initial = SystemTime::now();
if chunk_size > 1 {
for _ in 0..(count / chunk_size) {
cycles += self.system.clock_m(chunk_size) as u64;
}
} else {
for _ in 0..count {
cycles += self.system.clock() as u64;
}
for _ in 0..count {
cycles += self.system.clock() as u64;
}
let delta = initial.elapsed().unwrap().as_millis() as f64 / 1000.0;
......@@ -320,7 +318,7 @@ impl Emulator {
Event::KeyDown {
keycode: Some(Keycode::B),
..
} => self.benchmark(Benchmark::default()),
} => self.benchmark(&Benchmark::default()),
Event::KeyDown {
keycode: Some(Keycode::T),
..
......@@ -491,21 +489,19 @@ impl Emulator {
}
}
pub fn run_benchmark(&mut self, params: Benchmark) {
pub fn run_benchmark(&mut self, params: &Benchmark) {
let count = params.count;
let chunk_size = params.chunk_size.unwrap_or(1);
let cpu_only = params.cpu_only.unwrap_or(false);
let mut cycles = 0u64;
if cpu_only {
self.system.set_all_enabled(false);
}
let initial = SystemTime::now();
if chunk_size > 1 {
for _ in 0..(count / chunk_size) {
cycles += self.system.clock_m(chunk_size) as u64;
}
} else {
for _ in 0..count {
cycles += self.system.clock() as u64;
}
for _ in 0..count {
cycles += self.system.clock() as u64;
}
let delta = initial.elapsed().unwrap().as_millis() as f64 / 1000.0;
......@@ -640,6 +636,16 @@ struct Args {
)]
benchmark: bool,
#[arg(
long,
default_value_t = 500000000,
help = "The size of the benchmark in clock ticks"
)]
benchmark_count: usize,
#[arg(long, default_value_t = false, help = "Run benchmark only for the CPU")]
benchmark_cpu: bool,
#[arg(
long,
default_value_t = false,
......@@ -665,6 +671,26 @@ struct Args {
rom_path: String,
}
fn run(args: Args, emulator: &mut Emulator) {
// determines if the emulator should run in headless mode or
// not and runs it accordingly, note that if running in headless
// mode the number of cycles to be run may be specified
if args.benchmark {
emulator.run_benchmark(&Benchmark::new(
args.benchmark_count,
Some(args.benchmark_cpu),
));
} else if args.headless {
emulator.run_headless(if args.cycles > 0 {
Some(args.cycles)
} else {
None
});
} else {
emulator.run();
}
}
fn main() {
// parses the provided command line arguments and uses them to
// obtain structured values
......@@ -707,20 +733,7 @@ fn main() {
emulator.load_rom(Some(&args.rom_path));
emulator.toggle_palette();
// determines if the emulator should run in headless mode or
// not and runs it accordingly, note that if running in headless
// mode the number of cycles to be run may be specified
if args.benchmark {
emulator.run_benchmark(Benchmark::new(500000000, None));
} else if args.headless {
emulator.run_headless(if args.cycles > 0 {
Some(args.cycles)
} else {
None
});
} else {
emulator.run();
}
run(args, &mut emulator);
}
fn build_device(device: &str) -> Box<dyn SerialDevice> {
......
......@@ -58,7 +58,7 @@ impl GameBoyMode {
}
}
pub fn from_u8(value: u8) -> GameBoyMode {
pub fn from_u8(value: u8) -> Self {
match value {
1 => GameBoyMode::Dmg,
2 => GameBoyMode::Cgb,
......@@ -67,7 +67,7 @@ impl GameBoyMode {
}
}
pub fn from_string(value: &str) -> GameBoyMode {
pub fn from_string(value: &str) -> Self {
match value {
"dmg" => GameBoyMode::Dmg,
"cgb" => GameBoyMode::Cgb,
......@@ -75,6 +75,18 @@ impl GameBoyMode {
_ => panic!("Invalid mode value: {}", value),
}
}
pub fn is_dmg(&self) -> bool {
*self == GameBoyMode::Dmg
}
pub fn is_cgb(&self) -> bool {
*self == GameBoyMode::Cgb
}
pub fn is_sgb(&self) -> bool {
*self == GameBoyMode::Sgb
}
}
impl Display for GameBoyMode {
......@@ -98,7 +110,7 @@ impl GameBoySpeed {
}
}
pub fn switch(&self) -> GameBoySpeed {
pub fn switch(&self) -> Self {
match self {
GameBoySpeed::Normal => GameBoySpeed::Double,
GameBoySpeed::Double => GameBoySpeed::Normal,
......@@ -112,7 +124,7 @@ impl GameBoySpeed {
}
}
pub fn from_u8(value: u8) -> GameBoySpeed {
pub fn from_u8(value: u8) -> Self {
match value {
0 => GameBoySpeed::Normal,
1 => GameBoySpeed::Double,
......@@ -763,6 +775,14 @@ impl GameBoy {
(*self.gbc).borrow_mut().set_serial_enabled(value);
}
pub fn set_all_enabled(&mut self, value: bool) {
self.set_ppu_enabled(value);
self.set_apu_enabled(value);
self.set_dma_enabled(value);
self.set_timer_enabled(value);
self.set_serial_enabled(value);
}
pub fn clock_freq(&self) -> u32 {
self.clock_freq
}
......
......@@ -1063,11 +1063,33 @@ impl Ppu {
}
fn render_line(&mut self) {
if self.gb_mode == GameBoyMode::Dmg {
self.render_line_dmg();
} else {
self.render_line_cgb();
}
}
fn render_line_dmg(&mut self) {
if self.first_frame {
return;
}
let switch_bg_window =
(self.gb_mode == GameBoyMode::Cgb && !self.dmg_compat) || self.switch_bg;
if self.switch_bg {
self.render_map_dmg(self.bg_map, self.scx, self.scy, 0, 0, self.ly);
}
if self.switch_bg && self.switch_window {
self.render_map_dmg(self.window_map, 0, 0, self.wx, self.wy, self.window_counter);
}
if self.switch_obj {
self.render_objects();
}
}
fn render_line_cgb(&mut self) {
if self.first_frame {
return;
}
let switch_bg_window = (self.gb_mode.is_cgb() && !self.dmg_compat) || self.switch_bg;
if switch_bg_window {
self.render_map(self.bg_map, self.scx, self.scy, 0, 0, self.ly);
}
......@@ -1088,6 +1110,7 @@ impl Ppu {
// selects the correct background attributes map based on the bg map flag
// because the attributes are separated according to the map they represent
// this is only relevant for CGB mode
let bg_map_attrs = if map {
self.bg_map_attrs_1
} else {
......@@ -1122,8 +1145,6 @@ impl Ppu {
// obtains the reference to the attributes of the new tile in
// drawing for meta processing (CGB only)
// @TODO: This strategy seems a bit naive, need to figure out
// if there's a better way to do this and a more performant one
let mut tile_attr = if self.dmg_compat {
&DEFAULT_TILE_ATTR
} else {
......@@ -1237,6 +1258,114 @@ impl Ppu {
}
}
fn render_map_dmg(&mut self, map: bool, scx: u8, scy: u8, wx: u8, wy: u8, ld: u8) {
// in case the target window Y position has not yet been reached
// then there's nothing to be done, returns control flow immediately
if self.ly < wy {
return;
}
// obtains the base address of the background map using the bg map flag
// that control which background map is going to be used
let map_offset: usize = if map { 0x1c00 } else { 0x1800 };
// calculates the map row index for the tile by using the current line
// index and the DY (scroll Y) divided by 8 (as the tiles are 8x8 pixels),
// on top of that ensures that the result is modulus 32 meaning that the
// drawing wraps around the Y axis
let row_index = (((ld as usize + scy as usize) & 0xff) >> 3) % 32;
// calculates the map offset by the row offset multiplied by the number
// of tiles in each row (32)
let row_offset = row_index * 32;
// calculates the sprite line offset by using the SCX register
// shifted by 3 meaning that the tiles are 8x8
let mut line_offset = (scx >> 3) as usize;
// calculates the index of the initial tile in drawing,
// if the tile data set in use is #1, the indexes are
// signed, then calculates a real tile offset
let mut tile_index = self.vram[map_offset + row_offset + line_offset] as usize;
if !self.bg_tile && tile_index < 128 {
tile_index += 256;
}
// obtains the reference to the tile that is going to be drawn
let mut tile = &self.tiles[tile_index];
// calculates the offset that is going to be used in the update of the color buffer
// which stores Game Boy colors from 0 to 3
let mut color_offset = self.ly as usize * DISPLAY_WIDTH;
// calculates the frame buffer offset position assuming the proper
// Game Boy screen width and RGB pixel (3 bytes) size
let mut frame_offset = self.ly as usize * DISPLAY_WIDTH * RGB_SIZE;
// calculates both the current Y and X positions within the tiles
// using the bitwise and operation as an effective modulus 8
let y = (ld as usize + scy as usize) & 0x07;
let mut x = (scx & 0x07) as usize;
// calculates the initial tile X position in drawing, doing this
// allows us to position the background map properly in the display
let initial_index = max(wx as i16 - 7, 0) as usize;
color_offset += initial_index;
frame_offset += initial_index * RGB_SIZE;
// iterates over all the pixels in the current line of the display
// to draw the background map, note that the initial index is used
// to skip the drawing of the tiles that are not visible (WX)
for _ in initial_index..DISPLAY_WIDTH {
// obtains the current pixel data from the tile and
// re-maps it according to the current palette
let pixel = tile.get(x, y);
let color = &self.palette_bg[pixel as usize];
// updates the pixel in the color buffer, which stores
// the raw pixel color information (unmapped)
self.color_buffer[color_offset] = pixel;
// set the color pixel in the frame buffer
self.frame_buffer[frame_offset] = color[0];
self.frame_buffer[frame_offset + 1] = color[1];
self.frame_buffer[frame_offset + 2] = color[2];
// increments the current tile X position in drawing
x += 1;
// in case the end of tile width has been reached then
// a new tile must be retrieved for rendering
if x == TILE_WIDTH {
// resets the tile X position to the base value
// as a new tile is going to be drawn
x = 0;
// calculates the new line tile offset making sure that
// the maximum of 32 is not overflown
line_offset = (line_offset + 1) % 32;
// calculates the tile index and makes sure the value
// takes into consideration the bg tile value
tile_index = self.vram[map_offset + row_offset + line_offset] as usize;
if !self.bg_tile && tile_index < 128 {
tile_index += 256;
}
// obtains the reference to the new tile in drawing
tile = &self.tiles[tile_index];
}
// increments the color offset by one, representing
// the drawing of one pixel
color_offset += 1;
// increments the offset of the frame buffer by the
// size of an RGB pixel (which is 3 bytes)
frame_offset += RGB_SIZE;
}
}
fn render_objects(&mut self) {
let mut draw_count = 0u8;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment