From 9b538419de240f8b06326173e0b697d789865ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Magalh=C3=A3es?= <joamag@gmail.com> Date: Sat, 4 Mar 2023 17:57:08 +0000 Subject: [PATCH] feat: efficient multi-cycle per clock APU Improves performance of APU by an order of maginute at the cost of less accurate emulation. --- src/apu.rs | 150 +++++++++++++++++++++++++---------------------------- 1 file changed, 71 insertions(+), 79 deletions(-) diff --git a/src/apu.rs b/src/apu.rs index deb08753..03893a90 100644 --- a/src/apu.rs +++ b/src/apu.rs @@ -17,7 +17,7 @@ pub enum Channel { } pub struct Apu { - ch1_timer: u16, + ch1_timer: i16, ch1_sequence: u8, ch1_envelope_sequence: u8, ch1_envelope_enabled: bool, @@ -35,7 +35,7 @@ pub struct Apu { ch1_length_stop: bool, ch1_enabled: bool, - ch2_timer: u16, + ch2_timer: i16, ch2_sequence: u8, ch2_envelope_sequence: u8, ch2_envelope_enabled: bool, @@ -49,7 +49,7 @@ pub struct Apu { ch2_length_stop: bool, ch2_enabled: bool, - ch3_timer: u16, + ch3_timer: i16, ch3_position: u8, ch3_output: u8, ch3_dac: bool, @@ -67,7 +67,7 @@ pub struct Apu { sampling_rate: u16, sequencer: u16, sequencer_step: u8, - output_timer: u16, + output_timer: i16, audio_buffer: VecDeque<u8>, audio_buffer_max: usize, } @@ -190,9 +190,60 @@ impl Apu { } pub fn clock(&mut self, cycles: u8) { - // @TODO the performance here requires improvement - for _ in 0..cycles { - self.tick(); + self.sequencer += cycles as u16; + if self.sequencer >= 8192 { + // each of these steps runs at 512/8 Hz = 64Hz, + // meaning a complete loop runs at 512 Hz + match self.sequencer_step { + 0 => { + self.tick_length_all(); + } + 1 => (), + 2 => { + self.tick_ch1_sweep(); + self.tick_length_all(); + } + 3 => (), + 4 => { + self.tick_length_all(); + } + 5 => (), + 6 => { + self.tick_ch1_sweep(); + self.tick_length_all(); + } + 7 => { + self.tick_envelope_all(); + } + _ => (), + } + + self.sequencer -= 8192; + self.sequencer_step = (self.sequencer_step + 1) & 7; + } + + self.tick_ch_all(cycles); + + self.output_timer = self.output_timer.saturating_sub(cycles as i16); + if self.output_timer <= 0 { + // verifies if we've reached the maximum allowed size for the + // audio buffer and if that's the case an item is removed from + // the buffer (avoiding overflow) and then then the new audio + // volume item is added to the queue + if self.audio_buffer.len() >= self.audio_buffer_max { + self.audio_buffer.pop_front(); + self.audio_buffer.pop_front(); + } + if self.left_enabled { + self.audio_buffer.push_back(self.output()); + } + if self.right_enabled { + self.audio_buffer.push_back(self.output()); + } + + // @TODO the CPU clock is hardcoded here, we must handle situations + // where there's some kind of overclock + self.output_timer += (4194304.0 / self.sampling_rate as f32) as i16; } } @@ -309,65 +360,6 @@ impl Apu { self.audio_buffer.clear(); } - #[inline(always)] - fn tick(&mut self) { - self.sequencer += 1; - if self.sequencer >= 8192 { - // each of these steps runs at 512/8 Hz = 64Hz, - // meaning a complete loop runs at 512 Hz - match self.sequencer_step { - 0 => { - self.tick_length_all(); - } - 1 => (), - 2 => { - self.tick_ch1_sweep(); - self.tick_length_all(); - } - 3 => (), - 4 => { - self.tick_length_all(); - } - 5 => (), - 6 => { - self.tick_ch1_sweep(); - self.tick_length_all(); - } - 7 => { - self.tick_envelope_all(); - } - _ => (), - } - - self.sequencer = 0; - self.sequencer_step = (self.sequencer_step + 1) & 7; - } - - self.tick_ch_all(); - - self.output_timer = self.output_timer.saturating_sub(1); - if self.output_timer == 0 { - // verifies if we've reached the maximum allowed size for the - // audio buffer and if that's the case an item is removed from - // the buffer (avoiding overflow) and then then the new audio - // volume item is added to the queue - if self.audio_buffer.len() >= self.audio_buffer_max { - self.audio_buffer.pop_front(); - self.audio_buffer.pop_front(); - } - if self.left_enabled { - self.audio_buffer.push_back(self.output()); - } - if self.right_enabled { - self.audio_buffer.push_back(self.output()); - } - - // @TODO the CPU clock is hardcoded here, we must handle situations - // where there's some kind of overclock - self.output_timer = (4194304.0 / self.sampling_rate as f32) as u16; - } - } - #[inline(always)] fn tick_length_all(&mut self) { self.tick_length(Channel::Ch1); @@ -477,15 +469,15 @@ impl Apu { } #[inline(always)] - fn tick_ch_all(&mut self) { - self.tick_ch1(); - self.tick_ch2(); - self.tick_ch3(); + fn tick_ch_all(&mut self, cycles: u8) { + self.tick_ch1(cycles); + self.tick_ch2(cycles); + self.tick_ch3(cycles); } #[inline(always)] - fn tick_ch1(&mut self) { - self.ch1_timer = self.ch1_timer.saturating_sub(1); + fn tick_ch1(&mut self, cycles: u8) { + self.ch1_timer = self.ch1_timer.saturating_sub(cycles as i16); if self.ch1_timer > 0 { return; } @@ -501,13 +493,13 @@ impl Apu { self.ch1_output = 0; } - self.ch1_timer = (2048 - self.ch1_wave_length) << 2; + self.ch1_timer += ((2048 - self.ch1_wave_length) << 2) as i16; self.ch1_sequence = (self.ch1_sequence + 1) & 7; } #[inline(always)] - fn tick_ch2(&mut self) { - self.ch2_timer = self.ch2_timer.saturating_sub(1); + fn tick_ch2(&mut self, cycles: u8) { + self.ch2_timer = self.ch2_timer.saturating_sub(cycles as i16); if self.ch2_timer > 0 { return; } @@ -523,13 +515,13 @@ impl Apu { self.ch2_output = 0; } - self.ch2_timer = (2048 - self.ch2_wave_length) << 2; + self.ch2_timer += ((2048 - self.ch2_wave_length) << 2) as i16; self.ch2_sequence = (self.ch2_sequence + 1) & 7; } #[inline(always)] - fn tick_ch3(&mut self) { - self.ch3_timer = self.ch3_timer.saturating_sub(1); + fn tick_ch3(&mut self, cycles: u8) { + self.ch3_timer = self.ch3_timer.saturating_sub(cycles as i16); if self.ch3_timer > 0 { return; } @@ -552,7 +544,7 @@ impl Apu { self.ch3_output = 0; } - self.ch3_timer = (2048 - self.ch3_wave_length) << 1; + self.ch3_timer += ((2048 - self.ch3_wave_length) << 1) as i16; self.ch3_position = (self.ch3_position + 1) & 31; } } -- GitLab