Skip to content

Commit bcd0425

Browse files
astapletonusbalbin
andauthored
dwt: copy DWT management and profiling features from stm32f4xx-hal (#50) (#61)
The DWT unit allows cycle count based profiling/tracing for performance testing. The [module in the stm32f4xx-hal](https://github.com/stm32-rs/stm32f4xx-hal/blob/master/src/dwt.rs) is a useful tool for managing this functionality, so I'm copying it verbatim (with very minor changes for imports) here. --------- Co-authored-by: Albin Hedman <[email protected]>
1 parent 4b6cc4d commit bcd0425

File tree

4 files changed

+344
-0
lines changed

4 files changed

+344
-0
lines changed

examples/dwt-blinky.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#![deny(unsafe_code)]
2+
#![deny(warnings)]
3+
#![no_main]
4+
#![no_std]
5+
6+
mod utilities;
7+
8+
use cortex_m_rt::entry;
9+
use embedded_hal::delay::DelayNs;
10+
use log::info;
11+
use stm32h5xx_hal::{
12+
dwt::{ClockDuration, DwtExt},
13+
pac,
14+
prelude::*,
15+
};
16+
17+
#[entry]
18+
fn main() -> ! {
19+
utilities::logger::init();
20+
21+
let cp = cortex_m::Peripherals::take().unwrap();
22+
let dp = pac::Peripherals::take().unwrap();
23+
24+
info!("Setup PWR... ");
25+
let pwr = dp.PWR.constrain();
26+
let pwrcfg = pwr.vos0().freeze();
27+
28+
// Constrain and Freeze clock
29+
info!("Setup RCC... ");
30+
let rcc = dp.RCC.constrain();
31+
let ccdr = rcc.sys_ck(250.MHz()).freeze(pwrcfg, &dp.SBS);
32+
33+
let gpioa = dp.GPIOA.split(ccdr.peripheral.GPIOA);
34+
let mut led = gpioa.pa5.into_push_pull_output();
35+
36+
// Create a delay abstraction based on DWT cycle counter
37+
let dwt = cp.DWT.constrain(cp.DCB, &ccdr.clocks);
38+
let mut delay = dwt.delay();
39+
40+
// Create a stopwatch for maximum 9 laps
41+
// Note: it starts immediately
42+
let mut lap_times = [0u32; 10];
43+
let mut sw = dwt.stopwatch(&mut lap_times);
44+
loop {
45+
// On for 1s, off for 1s.
46+
led.set_high();
47+
delay.delay_ms(1000);
48+
sw.lap();
49+
led.set_low();
50+
delay.delay_ms(900);
51+
52+
// Also you can measure with almost clock precision
53+
let cd: ClockDuration = dwt.measure(|| delay.delay_ms(100));
54+
info!("Ticks: {}", cd.as_ticks()); // Should return 250MHz * 0.1s as u32
55+
info!("Secs (f32): {}", cd.as_secs_f32()); // Should return ~0.1s as a f32
56+
info!("Secs (f64): {}", cd.as_secs_f64()); // Should return ~0.1s as a f64
57+
info!("Nanos: {}", cd.as_nanos()); // Should return 100000000ns as a u64
58+
59+
sw.lap();
60+
61+
// Get all the lap times
62+
{
63+
let mut lap = 1;
64+
while let Some(lap_time) = sw.lap_time(lap) {
65+
let _t = lap_time.as_secs_f64();
66+
lap += 1;
67+
}
68+
}
69+
70+
// Reset stopwatch
71+
sw.reset();
72+
}
73+
}

src/dwt.rs

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
//! Debug and trace management for profiling/tracing operations
2+
//! This module provides an interface for using the DWT (Data Watchpoint and Trace)
3+
//! unit on Cortex-M microcontrollers, allowing for cycle counting and tracing
4+
//! of code execution.
5+
6+
use cortex_m::peripheral::{DCB, DWT};
7+
use fugit::HertzU32 as Hertz;
8+
9+
use crate::rcc::CoreClocks;
10+
11+
pub trait DwtExt {
12+
fn constrain(self, dcb: DCB, clocks: &CoreClocks) -> Dwt;
13+
}
14+
impl DwtExt for DWT {
15+
/// Enable trace unit and cycle counter
16+
fn constrain(mut self, mut dcb: DCB, clocks: &CoreClocks) -> Dwt {
17+
dcb.enable_trace();
18+
self.enable_cycle_counter();
19+
Dwt {
20+
dwt: self,
21+
dcb,
22+
clock: clocks.hclk(),
23+
}
24+
}
25+
}
26+
27+
/// DWT (Data Watchpoint and Trace) unit
28+
pub struct Dwt {
29+
dwt: DWT,
30+
dcb: DCB,
31+
clock: Hertz,
32+
}
33+
impl Dwt {
34+
/// Release the dwt and dcb control
35+
/// # Safety
36+
/// All instances of Delay and StopWatch become invalid after this
37+
pub unsafe fn release(self) -> (DWT, DCB) {
38+
(self.dwt, self.dcb)
39+
}
40+
/// Create a delay instance
41+
pub fn delay(&self) -> Delay {
42+
Delay { clock: self.clock }
43+
}
44+
/// Create a stopwatch instance
45+
/// # Arguments
46+
/// * `times` - Array which will be holding the timings in ticks (max laps == times.len()-1)
47+
pub fn stopwatch<'i>(&self, times: &'i mut [u32]) -> StopWatch<'i> {
48+
StopWatch::new(times, self.clock)
49+
}
50+
/// Measure cycles it takes to execute closure `f`.
51+
///
52+
/// Since DWT Cycle Counter is a 32-bit counter that wraps around to 0 on overflow,
53+
/// users should be aware that `Dwt::measure` cannot correctly measure running time of
54+
/// closures which take longer than `u32::MAX` cycles
55+
pub fn measure<F: FnOnce()>(&self, f: F) -> ClockDuration {
56+
let mut times: [u32; 2] = [0; 2];
57+
let mut sw = self.stopwatch(&mut times);
58+
f();
59+
sw.lap().lap_time(1).unwrap()
60+
}
61+
}
62+
63+
#[derive(Clone, Copy)]
64+
pub struct Delay {
65+
clock: Hertz,
66+
}
67+
impl Delay {
68+
/// Delay for `ClockDuration::ticks`
69+
pub fn delay(duration: ClockDuration) {
70+
let ticks = duration.ticks as u64;
71+
Delay::delay_ticks(DWT::cycle_count(), ticks);
72+
}
73+
/// Delay ticks
74+
/// NOTE DCB and DWT need to be set up for this to work, so it is private
75+
fn delay_ticks(mut start: u32, ticks: u64) {
76+
if ticks < (u32::MAX / 2) as u64 {
77+
// Simple delay
78+
let ticks = ticks as u32;
79+
while (DWT::cycle_count().wrapping_sub(start)) < ticks {}
80+
} else if ticks <= u32::MAX as u64 {
81+
// Try to avoid race conditions by limiting delay to u32::MAX / 2
82+
let mut ticks = ticks as u32;
83+
ticks -= u32::MAX / 2;
84+
while (DWT::cycle_count().wrapping_sub(start)) < u32::MAX / 2 {}
85+
start -= u32::MAX / 2;
86+
while (DWT::cycle_count().wrapping_sub(start)) < ticks {}
87+
} else {
88+
// Delay for ticks, then delay for rest * u32::MAX
89+
let mut rest = (ticks >> 32) as u32;
90+
let ticks = (ticks & u32::MAX as u64) as u32;
91+
loop {
92+
while (DWT::cycle_count().wrapping_sub(start)) < ticks {}
93+
if rest == 0 {
94+
break;
95+
}
96+
rest -= 1;
97+
while (DWT::cycle_count().wrapping_sub(start)) > ticks {}
98+
}
99+
}
100+
}
101+
}
102+
103+
impl embedded_hal::delay::DelayNs for Delay {
104+
fn delay_ns(&mut self, ns: u32) {
105+
// Convert ns to ticks
106+
let start = DWT::cycle_count();
107+
let ticks = (ns as u64 * self.clock.raw() as u64) / 1_000_000_000;
108+
Delay::delay_ticks(start, ticks);
109+
}
110+
111+
fn delay_us(&mut self, us: u32) {
112+
// Convert us to ticks
113+
let start = DWT::cycle_count();
114+
let ticks = (us as u64 * self.clock.raw() as u64) / 1_000_000;
115+
Delay::delay_ticks(start, ticks);
116+
}
117+
118+
fn delay_ms(&mut self, ms: u32) {
119+
// Convert ms to ticks
120+
let start = DWT::cycle_count();
121+
let ticks = (ms as u64 * self.clock.raw() as u64) / 1_000;
122+
Delay::delay_ticks(start, ticks);
123+
}
124+
}
125+
126+
/// Very simple stopwatch which reads from DWT Cycle Counter to record timing.
127+
///
128+
/// Since DWT Cycle Counter is a 32-bit counter that wraps around to 0 on overflow,
129+
/// users should be aware that `StopWatch` cannot correctly measure laps
130+
/// which take longer than `u32::MAX` cycles
131+
pub struct StopWatch<'l> {
132+
times: &'l mut [u32],
133+
timei: usize,
134+
clock: Hertz,
135+
}
136+
impl<'l> StopWatch<'l> {
137+
/// Create a new instance (Private because dwt/dcb should be set up)
138+
/// # Arguments
139+
/// * `times` - Array which will be holding the timings (max laps == times.len()-1)
140+
/// * `clock` - The DWT cycle counters clock
141+
fn new(times: &'l mut [u32], clock: Hertz) -> Self {
142+
assert!(times.len() >= 2);
143+
let mut sw = StopWatch {
144+
times,
145+
timei: 0,
146+
clock,
147+
};
148+
sw.reset();
149+
sw
150+
}
151+
/// Returns the numbers of laps recorded
152+
pub fn lap_count(&self) -> usize {
153+
self.timei
154+
}
155+
/// Resets recorded laps to 0 and sets 0 offset
156+
pub fn reset(&mut self) {
157+
self.timei = 0;
158+
self.times[0] = DWT::cycle_count();
159+
}
160+
/// Record a new lap.
161+
///
162+
/// If lap count exceeds maximum, the last lap is updated
163+
pub fn lap(&mut self) -> &mut Self {
164+
let c = DWT::cycle_count();
165+
if self.timei < self.times.len() {
166+
self.timei += 1;
167+
}
168+
self.times[self.timei] = c;
169+
self
170+
}
171+
/// Calculate the time of lap n (n starting with 1).
172+
///
173+
/// Returns None if `n` is out of range
174+
pub fn lap_time(&self, n: usize) -> Option<ClockDuration> {
175+
if (n < 1) || (self.timei < n) {
176+
None
177+
} else {
178+
Some(ClockDuration {
179+
ticks: self.times[n].wrapping_sub(self.times[n - 1]),
180+
clock: self.clock,
181+
})
182+
}
183+
}
184+
}
185+
186+
/// Clock difference with capability to calculate SI units (s)
187+
#[derive(Clone, Copy)]
188+
pub struct ClockDuration {
189+
ticks: u32,
190+
clock: Hertz,
191+
}
192+
impl ClockDuration {
193+
/// Returns ticks
194+
pub fn as_ticks(self) -> u32 {
195+
self.ticks
196+
}
197+
/// Returns calculated milliseconds as integer
198+
pub fn as_millis(self) -> u64 {
199+
self.ticks as u64 * 1_000 / self.clock.raw() as u64
200+
}
201+
/// Returns calculated microseconds as integer
202+
pub fn as_micros(self) -> u64 {
203+
self.ticks as u64 * 1_000_000 / self.clock.raw() as u64
204+
}
205+
/// Returns calculated nanoseconds as integer
206+
pub fn as_nanos(self) -> u64 {
207+
self.ticks as u64 * 1_000_000_000 / self.clock.raw() as u64
208+
}
209+
/// Return calculated seconds as 32-bit float
210+
pub fn as_secs_f32(self) -> f32 {
211+
self.ticks as f32 / self.clock.raw() as f32
212+
}
213+
/// Return calculated seconds as 64-bit float
214+
pub fn as_secs_f64(self) -> f64 {
215+
self.ticks as f64 / self.clock.raw() as f64
216+
}
217+
}
218+
219+
/// A monotonic non-decreasing timer
220+
///
221+
/// This uses the timer in the debug watch trace peripheral. This means, that if the
222+
/// core is stopped, the timer does not count up. This may be relevant if you are using
223+
/// cortex_m_semihosting::hprintln for debugging in which case the timer will be stopped
224+
/// while printing
225+
#[derive(Clone, Copy)]
226+
pub struct MonoTimer {
227+
frequency: Hertz,
228+
}
229+
230+
impl MonoTimer {
231+
/// Creates a new `Monotonic` timer
232+
pub fn new(mut dwt: DWT, mut dcb: DCB, clocks: &CoreClocks) -> Self {
233+
dcb.enable_trace();
234+
dwt.enable_cycle_counter();
235+
236+
// now the CYCCNT counter can't be stopped or reset
237+
238+
MonoTimer {
239+
frequency: clocks.hclk(),
240+
}
241+
}
242+
243+
/// Returns the frequency at which the monotonic timer is operating at
244+
pub fn frequency(self) -> Hertz {
245+
self.frequency
246+
}
247+
248+
/// Returns an `Instant` corresponding to "now"
249+
pub fn now(self) -> Instant {
250+
Instant {
251+
now: DWT::cycle_count(),
252+
}
253+
}
254+
}
255+
256+
/// A measurement of a monotonically non-decreasing clock
257+
#[derive(Clone, Copy)]
258+
pub struct Instant {
259+
now: u32,
260+
}
261+
262+
impl Instant {
263+
/// Ticks elapsed since the `Instant` was created
264+
pub fn elapsed(self) -> u32 {
265+
DWT::cycle_count().wrapping_sub(self.now)
266+
}
267+
}

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ pub mod delay;
7676
#[cfg(feature = "device-selected")]
7777
pub mod spi;
7878

79+
#[cfg(feature = "device-selected")]
80+
pub mod dwt;
81+
7982
#[cfg(feature = "device-selected")]
8083
mod sealed {
8184
pub trait Sealed {}

src/prelude.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Prelude
22
33
pub use crate::delay::DelayExt as _stm32h5xx_hal_delay_DelayExt;
4+
pub use crate::dwt::DwtExt as _stm32h5xx_hal_delay_DwtExt;
45
pub use crate::gpio::GpioExt as _stm32h5xx_hal_gpio_GpioExt;
56
pub use crate::i2c::I2cExt as _stm32h5xx_hal_i2c_I2cExt;
67
pub use crate::icache::ICacheExt as _stm32h5xx_hal_icache_ICacheExt;

0 commit comments

Comments
 (0)