Skip to content

Commit 067a294

Browse files
bors[bot]mdonoughe
andcommitted
603: add demo to simulate usage in a game engine r=cuviper a=mdonoughe It has been observed in amethyst/amethyst#780 that if you use Rayon in an environment where you are not CPU bound, you may end up using many more CPU cycles than necessary. This pull request adds an example of this problem to the demo project as a starting point towards finding a way to improve efficiency in this scenario. The life demo now has a `play` mode where the same comparison of serial and parallel occurs, but with a frame (generation) per second limit in place and comparing CPU time measurements at the end. Example output for my Ryzen 7 1800X with hyperthreading (16 logical processors) in Windows 10: $ rayon-demo life play serial: 59.93 fps cpu usage: 4.7% parallel: 59.97 fps cpu usage: 55.3% par_bridge: 59.94 fps cpu usage: 1164.1% Once there are ways to improve the overhead they should be incorporated into this demo as an example. If there are already ways to improve the demo let me know and I can try to implement them now. Co-authored-by: Matthew Donoughe <[email protected]>
2 parents df86443 + 7222d9b commit 067a294

File tree

6 files changed

+153
-2
lines changed

6 files changed

+153
-2
lines changed

rayon-demo/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,11 @@ serde = "1"
1818
serde_derive = "1"
1919
time = "0.1"
2020

21+
[target.'cfg(unix)'.dependencies]
22+
libc = "0.2"
23+
24+
[target.'cfg(windows)'.dependencies]
25+
winapi = { version = "0.3", features = ["processthreadsapi"] }
26+
2127
[dev-dependencies]
2228
num = "0.2"

rayon-demo/src/life/cpu_time/mod.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
use time::Duration;
2+
3+
#[cfg(windows)]
4+
mod win;
5+
#[cfg(windows)]
6+
pub use self::win::get_cpu_time;
7+
8+
#[cfg(unix)]
9+
mod unix;
10+
#[cfg(unix)]
11+
pub use self::unix::get_cpu_time;
12+
13+
#[cfg(not(any(unix, windows)))]
14+
pub fn get_cpu_time() -> Option<u64> {
15+
None
16+
}
17+
18+
pub fn get_cpu_duration(start: Option<u64>, stop: Option<u64>) -> Option<Duration> {
19+
start.and_then(|start| stop.and_then(|stop| Some(Duration::nanoseconds((stop - start) as i64))))
20+
}

rayon-demo/src/life/cpu_time/unix.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use libc::{getrusage, rusage, RUSAGE_SELF};
2+
use std::mem;
3+
4+
pub fn get_cpu_time() -> Option<u64> {
5+
unsafe {
6+
let mut usage: rusage = mem::uninitialized();
7+
getrusage(RUSAGE_SELF, &mut usage);
8+
let user = 1_000_000_000 * (usage.ru_utime.tv_sec as u64)
9+
+ 1_000 * (usage.ru_utime.tv_usec as u64);
10+
let system = 1_000_000_000 * (usage.ru_stime.tv_sec as u64)
11+
+ 1_000 * (usage.ru_stime.tv_usec as u64);
12+
Some(user + system)
13+
}
14+
}

rayon-demo/src/life/cpu_time/win.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
use std::mem;
2+
use winapi::shared::minwindef::FILETIME;
3+
use winapi::um::processthreadsapi::{GetCurrentProcess, GetProcessTimes};
4+
5+
pub fn get_cpu_time() -> Option<u64> {
6+
unsafe {
7+
let process = GetCurrentProcess();
8+
let mut _creation: FILETIME = mem::uninitialized();
9+
let mut _exit: FILETIME = mem::uninitialized();
10+
let mut kernel: FILETIME = mem::uninitialized();
11+
let mut user: FILETIME = mem::uninitialized();
12+
GetProcessTimes(process, &mut _creation, &mut _exit, &mut kernel, &mut user);
13+
let kernel = (kernel.dwHighDateTime as u64) << 32 | kernel.dwLowDateTime as u64;
14+
let user = (user.dwHighDateTime as u64) << 32 | user.dwLowDateTime as u64;
15+
Some(100 * (kernel + user))
16+
}
17+
}

rayon-demo/src/life/mod.rs

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
const USAGE: &'static str = "
22
Usage: life bench [--size N] [--gens N]
3+
life play [--size N] [--gens N] [--fps N]
34
life --help
45
Conway's Game of Life.
56
67
Commands:
78
bench Run the benchmark in different modes and print the timings.
9+
play Run with a max frame rate and monitor CPU resources.
810
Options:
911
--size N Size of the game board (N x N) [default: 200]
1012
--gens N Simulate N generations [default: 100]
13+
--fps N Maximum frame rate [default: 60]
1114
-h, --help Show this message.
1215
";
1316

@@ -17,6 +20,7 @@ use rand::distributions::Standard;
1720
use std::iter::repeat;
1821
use std::num::Wrapping;
1922
use std::sync::Arc;
23+
use std::thread;
2024
use time;
2125

2226
use docopt::Docopt;
@@ -25,12 +29,15 @@ use rayon::iter::ParallelBridge;
2529

2630
#[cfg(test)]
2731
mod bench;
32+
mod cpu_time;
2833

2934
#[derive(Deserialize)]
3035
pub struct Args {
3136
cmd_bench: bool,
37+
cmd_play: bool,
3238
flag_size: usize,
3339
flag_gens: usize,
40+
flag_fps: usize,
3441
}
3542

3643
#[derive(PartialEq, Eq, Clone, Debug)]
@@ -161,6 +168,44 @@ fn par_bridge_generations(board: Board, gens: usize) {
161168
for _ in 0..gens { brd = brd.par_bridge_next_generation(); }
162169
}
163170

171+
fn delay(last_start: u64, min_interval_ns: u64) -> u64 {
172+
let mut current_time = time::precise_time_ns();
173+
let elapsed = current_time - last_start;
174+
if elapsed < min_interval_ns {
175+
let delay = min_interval_ns - elapsed;
176+
thread::sleep(::std::time::Duration::from_nanos(delay));
177+
current_time += delay;
178+
}
179+
current_time
180+
}
181+
182+
fn generations_limited(board: Board, gens: usize, min_interval_ns: u64) {
183+
let mut brd = board;
184+
let mut time = time::precise_time_ns();
185+
for _ in 0..gens {
186+
brd = brd.next_generation();
187+
time = delay(time, min_interval_ns);
188+
}
189+
}
190+
191+
fn parallel_generations_limited(board: Board, gens: usize, min_interval_ns: u64) {
192+
let mut brd = board;
193+
let mut time = time::precise_time_ns();
194+
for _ in 0..gens {
195+
brd = brd.parallel_next_generation();
196+
time = delay(time, min_interval_ns);
197+
}
198+
}
199+
200+
fn par_bridge_generations_limited(board: Board, gens: usize, min_interval_ns: u64) {
201+
let mut brd = board;
202+
let mut time = time::precise_time_ns();
203+
for _ in 0..gens {
204+
brd = brd.par_bridge_next_generation();
205+
time = delay(time, min_interval_ns);
206+
}
207+
}
208+
164209
fn measure(f: fn(Board, usize) -> (), args: &Args) -> u64 {
165210
let (n, gens) = (args.flag_size, args.flag_gens);
166211
let brd = Board::new(n, n).random();
@@ -171,6 +216,31 @@ fn measure(f: fn(Board, usize) -> (), args: &Args) -> u64 {
171216
time::precise_time_ns() - start
172217
}
173218

219+
struct CpuResult {
220+
actual_fps: f64,
221+
cpu_usage_percent: Option<f64>,
222+
}
223+
224+
fn measure_cpu(f: fn(Board, usize, u64) -> (), args: &Args) -> CpuResult {
225+
let (n, gens, rate) = (args.flag_size, args.flag_gens, args.flag_fps);
226+
let interval = 1_000_000_000 / rate as u64;
227+
let brd = Board::new(n, n).random();
228+
let start = time::precise_time_ns();
229+
let cpu_start = cpu_time::get_cpu_time();
230+
231+
f(brd, gens, interval);
232+
233+
let cpu_stop = cpu_time::get_cpu_time();
234+
let duration = time::precise_time_ns() - start;
235+
236+
CpuResult {
237+
actual_fps: (1_000_000_000.0 * gens as f64) / duration as f64,
238+
cpu_usage_percent: cpu_time::get_cpu_duration(cpu_start, cpu_stop)
239+
.and_then(|cpu| cpu.num_nanoseconds())
240+
.and_then(|cpu| Some(100.0 * cpu as f64 / duration as f64)),
241+
}
242+
}
243+
174244
pub fn main(args: &[String]) {
175245
let args: Args =
176246
Docopt::new(USAGE)
@@ -183,10 +253,30 @@ pub fn main(args: &[String]) {
183253

184254
let parallel = measure(parallel_generations, &args);
185255
println!("parallel: {:10} ns -> {:.2}x speedup", parallel,
186-
serial as f64 / parallel as f64);
256+
serial as f64 / parallel as f64);
187257

188258
let par_bridge = measure(par_bridge_generations, &args);
189259
println!("par_bridge: {:10} ns -> {:.2}x speedup", par_bridge,
190-
serial as f64 / par_bridge as f64);
260+
serial as f64 / par_bridge as f64);
261+
}
262+
263+
if args.cmd_play {
264+
let serial = measure_cpu(generations_limited, &args);
265+
println!(" serial: {:.2} fps", serial.actual_fps);
266+
if let Some(cpu_usage) = serial.cpu_usage_percent {
267+
println!(" cpu usage: {:.1}%", cpu_usage);
268+
}
269+
270+
let parallel = measure_cpu(parallel_generations_limited, &args);
271+
println!("parallel: {:.2} fps", parallel.actual_fps);
272+
if let Some(cpu_usage) = parallel.cpu_usage_percent {
273+
println!(" cpu usage: {:.1}%", cpu_usage);
274+
}
275+
276+
let par_bridge = measure_cpu(par_bridge_generations_limited, &args);
277+
println!("par_bridge: {:.2} fps", par_bridge.actual_fps);
278+
if let Some(cpu_usage) = par_bridge.cpu_usage_percent {
279+
println!(" cpu usage: {:.1}%", cpu_usage);
280+
}
191281
}
192282
}

rayon-demo/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ extern crate serde; // all
3333
extern crate cgmath; // nbody
3434
#[macro_use]
3535
extern crate glium; // nbody
36+
#[cfg(unix)]
37+
extern crate libc; // life
3638
extern crate rand; // nbody
3739
extern crate time; // nbody, sieve
3840
extern crate odds; // sieve
@@ -42,6 +44,8 @@ extern crate num; // factorial
4244
extern crate lazy_static; // find
4345
extern crate fixedbitset; // tsp
4446
extern crate regex; // tsp
47+
#[cfg(windows)]
48+
extern crate winapi; // life
4549

4650
#[cfg(test)]
4751
extern crate test;

0 commit comments

Comments
 (0)