Skip to content

Commit 530b1f7

Browse files
committed
Copy DWT management and profiling features from stm32f4xx-hal (#50)
The DWT unit allows cycle count based profiling/tracing for performance testing. The [module in the stm32f4xx-hal](https://github.com/stm32-rs/stm32f4xx-hal/blob/master/src/dwt.rs) is a useful tool for managing this functionality, so I'm copying it verbatim (with very minor changes for imports) here.
1 parent cb0ea8a commit 530b1f7

File tree

4 files changed

+343
-0
lines changed

4 files changed

+343
-0
lines changed

examples/dwt-blinky.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#![deny(unsafe_code)]
2+
#![deny(warnings)]
3+
#![no_main]
4+
#![no_std]
5+
6+
mod utilities;
7+
8+
use cortex_m_rt::entry;
9+
use embedded_hal::delay::DelayNs;
10+
use log::info;
11+
use stm32h5xx_hal::{
12+
dwt::{ClockDuration, DwtExt},
13+
pac,
14+
prelude::*,
15+
};
16+
17+
#[entry]
18+
fn main() -> ! {
19+
utilities::logger::init();
20+
21+
let cp = cortex_m::Peripherals::take().unwrap();
22+
let dp = pac::Peripherals::take().unwrap();
23+
24+
info!("Setup PWR... ");
25+
let pwr = dp.PWR.constrain();
26+
let pwrcfg = pwr.vos0().freeze();
27+
28+
// Constrain and Freeze clock
29+
info!("Setup RCC... ");
30+
let rcc = dp.RCC.constrain();
31+
let ccdr = rcc.sys_ck(250.MHz()).freeze(pwrcfg, &dp.SBS);
32+
33+
let gpioa = dp.GPIOA.split(ccdr.peripheral.GPIOA);
34+
let mut led = gpioa.pa5.into_push_pull_output();
35+
36+
// Create a delay abstraction based on DWT cycle counter
37+
let dwt = cp.DWT.constrain(cp.DCB, &ccdr.clocks);
38+
let mut delay = dwt.delay();
39+
40+
// Create a stopwatch for maximum 9 laps
41+
// Note: it starts immediately
42+
let mut lap_times = [0u32; 10];
43+
let mut sw = dwt.stopwatch(&mut lap_times);
44+
loop {
45+
// On for 1s, off for 1s.
46+
led.set_high();
47+
delay.delay_ms(1000);
48+
sw.lap();
49+
led.set_low();
50+
delay.delay_ms(900);
51+
52+
// Also you can measure with almost clock precision
53+
let cd: ClockDuration = dwt.measure(|| delay.delay_ms(100));
54+
info!("Ticks: {}", cd.as_ticks()); // Should return 250MHz * 0.1s as u32
55+
info!("Secs (f32): {}", cd.as_secs_f32()); // Should return ~0.1s as a f32
56+
info!("Secs (f64): {}", cd.as_secs_f64()); // Should return ~0.1s as a f64
57+
info!("Nanos: {}", cd.as_nanos()); // Should return 100000000ns as a u64
58+
59+
sw.lap();
60+
61+
// Get all the lap times
62+
{
63+
let mut lap = 1;
64+
while let Some(lap_time) = sw.lap_time(lap) {
65+
let _t = lap_time.as_secs_f64();
66+
lap += 1;
67+
}
68+
}
69+
70+
// Reset stopwatch
71+
sw.reset();
72+
}
73+
}

src/dwt.rs

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
//! Debug and trace management for profiling/tracing operations
2+
3+
use cortex_m::peripheral::{DCB, DWT};
4+
use fugit::HertzU32 as Hertz;
5+
6+
use crate::rcc::CoreClocks;
7+
8+
pub trait DwtExt {
9+
fn constrain(self, dcb: DCB, clocks: &CoreClocks) -> Dwt;
10+
}
11+
impl DwtExt for DWT {
12+
/// Enable trace unit and cycle counter
13+
fn constrain(mut self, mut dcb: DCB, clocks: &CoreClocks) -> Dwt {
14+
dcb.enable_trace();
15+
self.enable_cycle_counter();
16+
Dwt {
17+
dwt: self,
18+
dcb,
19+
clock: clocks.hclk(),
20+
}
21+
}
22+
}
23+
24+
/// DWT (Data Watchpoint and Trace) unit
25+
pub struct Dwt {
26+
dwt: DWT,
27+
dcb: DCB,
28+
clock: Hertz,
29+
}
30+
impl Dwt {
31+
/// Release the dwt and dcb control
32+
/// # Safety
33+
/// All instances of Delay and StopWatch become invalid after this
34+
pub unsafe fn release(self) -> (DWT, DCB) {
35+
(self.dwt, self.dcb)
36+
}
37+
/// Create a delay instance
38+
pub fn delay(&self) -> Delay {
39+
Delay { clock: self.clock }
40+
}
41+
/// Create a stopwatch instance
42+
/// # Arguments
43+
/// * `times` - Array which will be holding the timings in ticks (max laps == times.len()-1)
44+
pub fn stopwatch<'i>(&self, times: &'i mut [u32]) -> StopWatch<'i> {
45+
StopWatch::new(times, self.clock)
46+
}
47+
/// Measure cycles it takes to execute closure `f`.
48+
///
49+
/// Since DWT Cycle Counter is a 32-bit counter that wraps around to 0 on overflow,
50+
/// users should be aware that `Dwt::measure` cannot correctly measure running time of
51+
/// closures which take longer than `u32::MAX` cycles
52+
pub fn measure<F: FnOnce()>(&self, f: F) -> ClockDuration {
53+
let mut times: [u32; 2] = [0; 2];
54+
let mut sw = self.stopwatch(&mut times);
55+
f();
56+
sw.lap().lap_time(1).unwrap()
57+
}
58+
}
59+
60+
#[derive(Clone, Copy)]
61+
pub struct Delay {
62+
clock: Hertz,
63+
}
64+
impl Delay {
65+
/// Delay for `ClockDuration::ticks`
66+
pub fn delay(duration: ClockDuration) {
67+
let ticks = duration.ticks as u64;
68+
Delay::delay_ticks(DWT::cycle_count(), ticks);
69+
}
70+
/// Delay ticks
71+
/// NOTE DCB and DWT need to be set up for this to work, so it is private
72+
fn delay_ticks(mut start: u32, ticks: u64) {
73+
if ticks < (core::u32::MAX / 2) as u64 {
74+
// Simple delay
75+
let ticks = ticks as u32;
76+
while (DWT::cycle_count().wrapping_sub(start)) < ticks {}
77+
} else if ticks <= core::u32::MAX as u64 {
78+
// Try to avoid race conditions by limiting delay to u32::MAX / 2
79+
let mut ticks = ticks as u32;
80+
ticks -= core::u32::MAX / 2;
81+
while (DWT::cycle_count().wrapping_sub(start)) < core::u32::MAX / 2
82+
{
83+
}
84+
start -= core::u32::MAX / 2;
85+
while (DWT::cycle_count().wrapping_sub(start)) < ticks {}
86+
} else {
87+
// Delay for ticks, then delay for rest * u32::MAX
88+
let mut rest = (ticks >> 32) as u32;
89+
let ticks = (ticks & core::u32::MAX as u64) as u32;
90+
loop {
91+
while (DWT::cycle_count().wrapping_sub(start)) < ticks {}
92+
if rest == 0 {
93+
break;
94+
}
95+
rest -= 1;
96+
while (DWT::cycle_count().wrapping_sub(start)) > ticks {}
97+
}
98+
}
99+
}
100+
}
101+
102+
impl embedded_hal::delay::DelayNs for Delay {
103+
fn delay_ns(&mut self, ns: u32) {
104+
// Convert us to ticks
105+
let start = DWT::cycle_count();
106+
let ticks = (ns as u64 * self.clock.raw() as u64) / 1_000_000_000;
107+
Delay::delay_ticks(start, ticks);
108+
}
109+
110+
fn delay_us(&mut self, us: u32) {
111+
// Convert us to ticks
112+
let start = DWT::cycle_count();
113+
let ticks = (us as u64 * self.clock.raw() as u64) / 1_000_000;
114+
Delay::delay_ticks(start, ticks);
115+
}
116+
117+
fn delay_ms(&mut self, ms: u32) {
118+
// Convert ms to ticks
119+
let start = DWT::cycle_count();
120+
let ticks = (ms as u64 * self.clock.raw() as u64) / 1_000;
121+
Delay::delay_ticks(start, ticks);
122+
}
123+
}
124+
125+
/// Very simple stopwatch which reads from DWT Cycle Counter to record timing.
126+
///
127+
/// Since DWT Cycle Counter is a 32-bit counter that wraps around to 0 on overflow,
128+
/// users should be aware that `StopWatch` cannot correctly measure laps
129+
/// which take longer than `u32::MAX` cycles
130+
pub struct StopWatch<'l> {
131+
times: &'l mut [u32],
132+
timei: usize,
133+
clock: Hertz,
134+
}
135+
impl<'l> StopWatch<'l> {
136+
/// Create a new instance (Private because dwt/dcb should be set up)
137+
/// # Arguments
138+
/// * `times` - Array which will be holding the timings (max laps == times.len()-1)
139+
/// * `clock` - The DWT cycle counters clock
140+
fn new(times: &'l mut [u32], clock: Hertz) -> Self {
141+
assert!(times.len() >= 2);
142+
let mut sw = StopWatch {
143+
times,
144+
timei: 0,
145+
clock,
146+
};
147+
sw.reset();
148+
sw
149+
}
150+
/// Returns the numbers of laps recorded
151+
pub fn lap_count(&self) -> usize {
152+
self.timei
153+
}
154+
/// Resets recorded laps to 0 and sets 0 offset
155+
pub fn reset(&mut self) {
156+
self.timei = 0;
157+
self.times[0] = DWT::cycle_count();
158+
}
159+
/// Record a new lap.
160+
///
161+
/// If lap count exceeds maximum, the last lap is updated
162+
pub fn lap(&mut self) -> &mut Self {
163+
let c = DWT::cycle_count();
164+
if self.timei < self.times.len() {
165+
self.timei += 1;
166+
}
167+
self.times[self.timei] = c;
168+
self
169+
}
170+
/// Calculate the time of lap n (n starting with 1).
171+
///
172+
/// Returns None if `n` is out of range
173+
pub fn lap_time(&self, n: usize) -> Option<ClockDuration> {
174+
if (n < 1) || (self.timei < n) {
175+
None
176+
} else {
177+
Some(ClockDuration {
178+
ticks: self.times[n].wrapping_sub(self.times[n - 1]),
179+
clock: self.clock,
180+
})
181+
}
182+
}
183+
}
184+
185+
/// Clock difference with capability to calculate SI units (s)
186+
#[derive(Clone, Copy)]
187+
pub struct ClockDuration {
188+
ticks: u32,
189+
clock: Hertz,
190+
}
191+
impl ClockDuration {
192+
/// Returns ticks
193+
pub fn as_ticks(self) -> u32 {
194+
self.ticks
195+
}
196+
/// Returns calculated milliseconds as integer
197+
pub fn as_millis(self) -> u64 {
198+
self.ticks as u64 * 1_000 / self.clock.raw() as u64
199+
}
200+
/// Returns calculated microseconds as integer
201+
pub fn as_micros(self) -> u64 {
202+
self.ticks as u64 * 1_000_000 / self.clock.raw() as u64
203+
}
204+
/// Returns calculated nanoseconds as integer
205+
pub fn as_nanos(self) -> u64 {
206+
self.ticks as u64 * 1_000_000_000 / self.clock.raw() as u64
207+
}
208+
/// Return calculated seconds as 32-bit float
209+
pub fn as_secs_f32(self) -> f32 {
210+
self.ticks as f32 / self.clock.raw() as f32
211+
}
212+
/// Return calculated seconds as 64-bit float
213+
pub fn as_secs_f64(self) -> f64 {
214+
self.ticks as f64 / self.clock.raw() as f64
215+
}
216+
}
217+
218+
/// A monotonic non-decreasing timer
219+
///
220+
/// This uses the timer in the debug watch trace peripheral. This means, that if the
221+
/// core is stopped, the timer does not count up. This may be relevant if you are using
222+
/// cortex_m_semihosting::hprintln for debugging in which case the timer will be stopped
223+
/// while printing
224+
#[derive(Clone, Copy)]
225+
pub struct MonoTimer {
226+
frequency: Hertz,
227+
}
228+
229+
impl MonoTimer {
230+
/// Creates a new `Monotonic` timer
231+
pub fn new(mut dwt: DWT, mut dcb: DCB, clocks: &CoreClocks) -> Self {
232+
dcb.enable_trace();
233+
dwt.enable_cycle_counter();
234+
235+
// now the CYCCNT counter can't be stopped or reset
236+
237+
MonoTimer {
238+
frequency: clocks.hclk(),
239+
}
240+
}
241+
242+
/// Returns the frequency at which the monotonic timer is operating at
243+
pub fn frequency(self) -> Hertz {
244+
self.frequency
245+
}
246+
247+
/// Returns an `Instant` corresponding to "now"
248+
pub fn now(self) -> Instant {
249+
Instant {
250+
now: DWT::cycle_count(),
251+
}
252+
}
253+
}
254+
255+
/// A measurement of a monotonically non-decreasing clock
256+
#[derive(Clone, Copy)]
257+
pub struct Instant {
258+
now: u32,
259+
}
260+
261+
impl Instant {
262+
/// Ticks elapsed since the `Instant` was created
263+
pub fn elapsed(self) -> u32 {
264+
DWT::cycle_count().wrapping_sub(self.now)
265+
}
266+
}

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ pub mod delay;
7676
#[cfg(feature = "device-selected")]
7777
pub mod spi;
7878

79+
#[cfg(feature = "device-selected")]
80+
pub mod dwt;
81+
7982
#[cfg(feature = "device-selected")]
8083
mod sealed {
8184
pub trait Sealed {}

src/prelude.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Prelude
22
33
pub use crate::delay::DelayExt as _stm32h5xx_hal_delay_DelayExt;
4+
pub use crate::dwt::DwtExt as _stm32h5xx_hal_delay_DwtExt;
45
pub use crate::gpio::GpioExt as _stm32h5xx_hal_gpio_GpioExt;
56
pub use crate::i2c::I2cExt as _stm32h5xx_hal_i2c_I2cExt;
67
pub use crate::icache::ICacheExt as _stm32h5xx_hal_icache_ICacheExt;

0 commit comments

Comments
 (0)