diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 7e001c454c158a38343f60174ed2024f9a2c232d..8a3becd911ca237aa181267efa334e2639c22266 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -129,3 +129,5 @@ jobs:
         run: cd frontends/sdl && cargo run --release -- --headless --cycles 10000000
       - name: Run benchmark
         run: cd frontends/sdl && cargo run --release -- --benchmark
+      - name: Run benchmark, only for CPU
+        run: cd frontends/sdl && cargo run --release -- --benchmark --cpu-only
diff --git a/Cargo.toml b/Cargo.toml
index fcb0edf008acaf09b9247899e02dbacf79b740bd..052b987ff0db5ae62486d6b9f3e48bd0398b93fb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -33,6 +33,7 @@ built = "0.5"
 debug = false
 lto = true
 opt-level = 3
+codegen-units = 1
 
 [workspace]
 members = [".", "frontends/sdl"]
diff --git a/frontends/sdl/Cargo.toml b/frontends/sdl/Cargo.toml
index 20ae6478ace6e45ac99b125cfff97e90c009c9ad..7d916f682ab6a5d176504a9b03f7f5efcab6309a 100644
--- a/frontends/sdl/Cargo.toml
+++ b/frontends/sdl/Cargo.toml
@@ -28,6 +28,8 @@ version = "0.4"
 
 [dependencies.sdl2]
 version = "0.35"
+git = "https://github.com/Rust-SDL2/rust-sdl2"
+rev = "27cd1fd67c811e06b9d997a77bb6089a1b65070d"
 features = ["ttf", "image", "gfx", "mixer", "static-link", "use-vcpkg"]
 
 # For MacOS running under arm64 architecture there may be linking issues
diff --git a/frontends/sdl/src/main.rs b/frontends/sdl/src/main.rs
index ccba9557a9d38279a15a292f56e34a4da5663d44..c5ddebe19adef5ef50e91c051cdb4304f09846bd 100644
--- a/frontends/sdl/src/main.rs
+++ b/frontends/sdl/src/main.rs
@@ -39,12 +39,12 @@ const VOLUME: f32 = 64.0;
 
 pub struct Benchmark {
     count: usize,
-    chunk_size: Option<usize>,
+    cpu_only: Option<bool>,
 }
 
 impl Benchmark {
-    pub fn new(count: usize, chunk_size: Option<usize>) -> Self {
-        Self { count, chunk_size }
+    pub fn new(count: usize, cpu_only: Option<bool>) -> Self {
+        Self { count, cpu_only }
     }
 }
 
@@ -221,23 +221,21 @@ impl Emulator {
         self.load_rom(None);
     }
 
-    pub fn benchmark(&mut self, params: Benchmark) {
+    pub fn benchmark(&mut self, params: &Benchmark) {
         println!("Going to run benchmark...");
 
         let count = params.count;
-        let chunk_size = params.chunk_size.unwrap_or(1);
+        let cpu_only = params.cpu_only.unwrap_or(false);
         let mut cycles = 0u64;
 
+        if cpu_only {
+            self.system.set_all_enabled(false);
+        }
+
         let initial = SystemTime::now();
 
-        if chunk_size > 1 {
-            for _ in 0..(count / chunk_size) {
-                cycles += self.system.clock_m(chunk_size) as u64;
-            }
-        } else {
-            for _ in 0..count {
-                cycles += self.system.clock() as u64;
-            }
+        for _ in 0..count {
+            cycles += self.system.clock() as u64;
         }
 
         let delta = initial.elapsed().unwrap().as_millis() as f64 / 1000.0;
@@ -320,7 +318,7 @@ impl Emulator {
                     Event::KeyDown {
                         keycode: Some(Keycode::B),
                         ..
-                    } => self.benchmark(Benchmark::default()),
+                    } => self.benchmark(&Benchmark::default()),
                     Event::KeyDown {
                         keycode: Some(Keycode::T),
                         ..
@@ -491,21 +489,19 @@ impl Emulator {
         }
     }
 
-    pub fn run_benchmark(&mut self, params: Benchmark) {
+    pub fn run_benchmark(&mut self, params: &Benchmark) {
         let count = params.count;
-        let chunk_size = params.chunk_size.unwrap_or(1);
+        let cpu_only = params.cpu_only.unwrap_or(false);
         let mut cycles = 0u64;
 
+        if cpu_only {
+            self.system.set_all_enabled(false);
+        }
+
         let initial = SystemTime::now();
 
-        if chunk_size > 1 {
-            for _ in 0..(count / chunk_size) {
-                cycles += self.system.clock_m(chunk_size) as u64;
-            }
-        } else {
-            for _ in 0..count {
-                cycles += self.system.clock() as u64;
-            }
+        for _ in 0..count {
+            cycles += self.system.clock() as u64;
         }
 
         let delta = initial.elapsed().unwrap().as_millis() as f64 / 1000.0;
@@ -640,6 +636,16 @@ struct Args {
     )]
     benchmark: bool,
 
+    #[arg(
+        long,
+        default_value_t = 500000000,
+        help = "The size of the benchmark in clock ticks"
+    )]
+    benchmark_count: usize,
+
+    #[arg(long, default_value_t = false, help = "Run benchmark only for the CPU")]
+    benchmark_cpu: bool,
+
     #[arg(
         long,
         default_value_t = false,
@@ -665,6 +671,26 @@ struct Args {
     rom_path: String,
 }
 
+fn run(args: Args, emulator: &mut Emulator) {
+    // determines if the emulator should run in headless mode or
+    // not and runs it accordingly, note that if running in headless
+    // mode the number of cycles to be run may be specified
+    if args.benchmark {
+        emulator.run_benchmark(&Benchmark::new(
+            args.benchmark_count,
+            Some(args.benchmark_cpu),
+        ));
+    } else if args.headless {
+        emulator.run_headless(if args.cycles > 0 {
+            Some(args.cycles)
+        } else {
+            None
+        });
+    } else {
+        emulator.run();
+    }
+}
+
 fn main() {
     // parses the provided command line arguments and uses them to
     // obtain structured values
@@ -707,20 +733,7 @@ fn main() {
     emulator.load_rom(Some(&args.rom_path));
     emulator.toggle_palette();
 
-    // determines if the emulator should run in headless mode or
-    // not and runs it accordingly, note that if running in headless
-    // mode the number of cycles to be run may be specified
-    if args.benchmark {
-        emulator.run_benchmark(Benchmark::new(500000000, None));
-    } else if args.headless {
-        emulator.run_headless(if args.cycles > 0 {
-            Some(args.cycles)
-        } else {
-            None
-        });
-    } else {
-        emulator.run();
-    }
+    run(args, &mut emulator);
 }
 
 fn build_device(device: &str) -> Box<dyn SerialDevice> {
diff --git a/src/gb.rs b/src/gb.rs
index c6b0cd4314a14174c92811292ae04195e259fa2d..7b18a41fdc68b5947b2078d4ece2b4d58edd79ff 100644
--- a/src/gb.rs
+++ b/src/gb.rs
@@ -58,7 +58,7 @@ impl GameBoyMode {
         }
     }
 
-    pub fn from_u8(value: u8) -> GameBoyMode {
+    pub fn from_u8(value: u8) -> Self {
         match value {
             1 => GameBoyMode::Dmg,
             2 => GameBoyMode::Cgb,
@@ -67,7 +67,7 @@ impl GameBoyMode {
         }
     }
 
-    pub fn from_string(value: &str) -> GameBoyMode {
+    pub fn from_string(value: &str) -> Self {
         match value {
             "dmg" => GameBoyMode::Dmg,
             "cgb" => GameBoyMode::Cgb,
@@ -75,6 +75,18 @@ impl GameBoyMode {
             _ => panic!("Invalid mode value: {}", value),
         }
     }
+
+    pub fn is_dmg(&self) -> bool {
+        *self == GameBoyMode::Dmg
+    }
+
+    pub fn is_cgb(&self) -> bool {
+        *self == GameBoyMode::Cgb
+    }
+
+    pub fn is_sgb(&self) -> bool {
+        *self == GameBoyMode::Sgb
+    }
 }
 
 impl Display for GameBoyMode {
@@ -98,7 +110,7 @@ impl GameBoySpeed {
         }
     }
 
-    pub fn switch(&self) -> GameBoySpeed {
+    pub fn switch(&self) -> Self {
         match self {
             GameBoySpeed::Normal => GameBoySpeed::Double,
             GameBoySpeed::Double => GameBoySpeed::Normal,
@@ -112,7 +124,7 @@ impl GameBoySpeed {
         }
     }
 
-    pub fn from_u8(value: u8) -> GameBoySpeed {
+    pub fn from_u8(value: u8) -> Self {
         match value {
             0 => GameBoySpeed::Normal,
             1 => GameBoySpeed::Double,
@@ -763,6 +775,14 @@ impl GameBoy {
         (*self.gbc).borrow_mut().set_serial_enabled(value);
     }
 
+    pub fn set_all_enabled(&mut self, value: bool) {
+        self.set_ppu_enabled(value);
+        self.set_apu_enabled(value);
+        self.set_dma_enabled(value);
+        self.set_timer_enabled(value);
+        self.set_serial_enabled(value);
+    }
+
     pub fn clock_freq(&self) -> u32 {
         self.clock_freq
     }
diff --git a/src/ppu.rs b/src/ppu.rs
index 7df9a92314551da829a7af19492b949161e742cf..690da47c5f3554bc31057dcf02c0e4ecddf060a9 100644
--- a/src/ppu.rs
+++ b/src/ppu.rs
@@ -1063,11 +1063,33 @@ impl Ppu {
     }
 
     fn render_line(&mut self) {
+        if self.gb_mode == GameBoyMode::Dmg {
+            self.render_line_dmg();
+        } else {
+            self.render_line_cgb();
+        }
+    }
+
+    fn render_line_dmg(&mut self) {
         if self.first_frame {
             return;
         }
-        let switch_bg_window =
-            (self.gb_mode == GameBoyMode::Cgb && !self.dmg_compat) || self.switch_bg;
+        if self.switch_bg {
+            self.render_map_dmg(self.bg_map, self.scx, self.scy, 0, 0, self.ly);
+        }
+        if self.switch_bg && self.switch_window {
+            self.render_map_dmg(self.window_map, 0, 0, self.wx, self.wy, self.window_counter);
+        }
+        if self.switch_obj {
+            self.render_objects();
+        }
+    }
+
+    fn render_line_cgb(&mut self) {
+        if self.first_frame {
+            return;
+        }
+        let switch_bg_window = (self.gb_mode.is_cgb() && !self.dmg_compat) || self.switch_bg;
         if switch_bg_window {
             self.render_map(self.bg_map, self.scx, self.scy, 0, 0, self.ly);
         }
@@ -1088,6 +1110,7 @@ impl Ppu {
 
         // selects the correct background attributes map based on the bg map flag
         // because the attributes are separated according to the map they represent
+        // this is only relevant for CGB mode
         let bg_map_attrs = if map {
             self.bg_map_attrs_1
         } else {
@@ -1122,8 +1145,6 @@ impl Ppu {
 
         // obtains the reference to the attributes of the new tile in
         // drawing for meta processing (CGB only)
-        // @TODO: This strategy seems a bit naive, need to figure out
-        // if there's a better way to do this and a more performant one
         let mut tile_attr = if self.dmg_compat {
             &DEFAULT_TILE_ATTR
         } else {
@@ -1237,6 +1258,114 @@ impl Ppu {
         }
     }
 
+    fn render_map_dmg(&mut self, map: bool, scx: u8, scy: u8, wx: u8, wy: u8, ld: u8) {
+        // in case the target window Y position has not yet been reached
+        // then there's nothing to be done, returns control flow immediately
+        if self.ly < wy {
+            return;
+        }
+
+        // obtains the base address of the background map using the bg map flag
+        // that control which background map is going to be used
+        let map_offset: usize = if map { 0x1c00 } else { 0x1800 };
+
+        // calculates the map row index for the tile by using the current line
+        // index and the DY (scroll Y) divided by 8 (as the tiles are 8x8 pixels),
+        // on top of that ensures that the result is modulus 32 meaning that the
+        // drawing wraps around the Y axis
+        let row_index = (((ld as usize + scy as usize) & 0xff) >> 3) % 32;
+
+        // calculates the map offset by the row offset multiplied by the number
+        // of tiles in each row (32)
+        let row_offset = row_index * 32;
+
+        // calculates the sprite line offset by using the SCX register
+        // shifted by 3 meaning that the tiles are 8x8
+        let mut line_offset = (scx >> 3) as usize;
+
+        // calculates the index of the initial tile in drawing,
+        // if the tile data set in use is #1, the indexes are
+        // signed, then calculates a real tile offset
+        let mut tile_index = self.vram[map_offset + row_offset + line_offset] as usize;
+        if !self.bg_tile && tile_index < 128 {
+            tile_index += 256;
+        }
+
+        // obtains the reference to the tile that is going to be drawn
+        let mut tile = &self.tiles[tile_index];
+
+        // calculates the offset that is going to be used in the update of the color buffer
+        // which stores Game Boy colors from 0 to 3
+        let mut color_offset = self.ly as usize * DISPLAY_WIDTH;
+
+        // calculates the frame buffer offset position assuming the proper
+        // Game Boy screen width and RGB pixel (3 bytes) size
+        let mut frame_offset = self.ly as usize * DISPLAY_WIDTH * RGB_SIZE;
+
+        // calculates both the current Y and X positions within the tiles
+        // using the bitwise and operation as an effective modulus 8
+        let y = (ld as usize + scy as usize) & 0x07;
+        let mut x = (scx & 0x07) as usize;
+
+        // calculates the initial tile X position in drawing, doing this
+        // allows us to position the background map properly in the display
+        let initial_index = max(wx as i16 - 7, 0) as usize;
+        color_offset += initial_index;
+        frame_offset += initial_index * RGB_SIZE;
+
+        // iterates over all the pixels in the current line of the display
+        // to draw the background map, note that the initial index is used
+        // to skip the drawing of the tiles that are not visible (WX)
+        for _ in initial_index..DISPLAY_WIDTH {
+            // obtains the current pixel data from the tile and
+            // re-maps it according to the current palette
+            let pixel = tile.get(x, y);
+            let color = &self.palette_bg[pixel as usize];
+
+            // updates the pixel in the color buffer, which stores
+            // the raw pixel color information (unmapped)
+            self.color_buffer[color_offset] = pixel;
+
+            // set the color pixel in the frame buffer
+            self.frame_buffer[frame_offset] = color[0];
+            self.frame_buffer[frame_offset + 1] = color[1];
+            self.frame_buffer[frame_offset + 2] = color[2];
+
+            // increments the current tile X position in drawing
+            x += 1;
+
+            // in case the end of tile width has been reached then
+            // a new tile must be retrieved for rendering
+            if x == TILE_WIDTH {
+                // resets the tile X position to the base value
+                // as a new tile is going to be drawn
+                x = 0;
+
+                // calculates the new line tile offset making sure that
+                // the maximum of 32 is not overflown
+                line_offset = (line_offset + 1) % 32;
+
+                // calculates the tile index and makes sure the value
+                // takes into consideration the bg tile value
+                tile_index = self.vram[map_offset + row_offset + line_offset] as usize;
+                if !self.bg_tile && tile_index < 128 {
+                    tile_index += 256;
+                }
+
+                // obtains the reference to the new tile in drawing
+                tile = &self.tiles[tile_index];
+            }
+
+            // increments the color offset by one, representing
+            // the drawing of one pixel
+            color_offset += 1;
+
+            // increments the offset of the frame buffer by the
+            // size of an RGB pixel (which is 3 bytes)
+            frame_offset += RGB_SIZE;
+        }
+    }
+
     fn render_objects(&mut self) {
         let mut draw_count = 0u8;