Skip to content

Commit 2eeb527

Browse files
committed
GPU upgrade
1 parent b2c23cd commit 2eeb527

File tree

11 files changed

+1745
-78
lines changed

11 files changed

+1745
-78
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Generated by Cargo
22
# will have compiled files and executables
33
/target/
4+
/.vs/
5+
/bin/
6+
/obj/
7+
/packages/
48

59
# These are backup files generated by rustfmt
610
**/*.rs.bk

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "engraver"
3-
version = "2.0.5"
3+
version = "2.2.0"
44
license = "GPL-3.0"
55
authors = ["PoC Consortium <bots@cryptoguru.org>"]
66
description = """

src/cpu_hasher.rs

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
use libc::{c_void, size_t, uint64_t};
2+
use std::sync::mpsc::Sender;
3+
4+
extern "C" {
5+
pub fn noncegen(
6+
cache: *mut c_void,
7+
cache_size: size_t,
8+
chunk_offset: size_t,
9+
numeric_ID: uint64_t,
10+
local_startnonce: uint64_t,
11+
local_nonces: uint64_t,
12+
);
13+
pub fn noncegen_sse(
14+
cache: *mut c_void,
15+
cache_size: size_t,
16+
chunk_offset: size_t,
17+
numeric_ID: uint64_t,
18+
local_startnonce: uint64_t,
19+
local_nonces: uint64_t,
20+
);
21+
pub fn noncegen_avx(
22+
cache: *mut c_void,
23+
cache_size: size_t,
24+
chunk_offset: size_t,
25+
numeric_ID: uint64_t,
26+
local_startnonce: uint64_t,
27+
local_nonces: uint64_t,
28+
);
29+
pub fn noncegen_avx2(
30+
cache: *mut c_void,
31+
cache_size: size_t,
32+
chunk_offset: size_t,
33+
numeric_ID: uint64_t,
34+
local_startnonce: uint64_t,
35+
local_nonces: uint64_t,
36+
);
37+
pub fn noncegen_avx512(
38+
cache: *mut c_void,
39+
cache_size: size_t,
40+
chunk_offset: size_t,
41+
numeric_ID: uint64_t,
42+
local_startnonce: uint64_t,
43+
local_nonces: uint64_t,
44+
);
45+
}
46+
pub struct SafeCVoid {
47+
pub ptr: *mut c_void,
48+
}
49+
unsafe impl Send for SafeCVoid {}
50+
51+
pub struct CpuTask {
52+
pub cache: SafeCVoid,
53+
pub cache_size: size_t,
54+
pub chunk_offset: size_t,
55+
pub numeric_id: uint64_t,
56+
pub local_startnonce: uint64_t,
57+
pub local_nonces: uint64_t,
58+
}
59+
60+
pub fn hash_cpu(
61+
tx: Sender<(u8, u8, u64)>,
62+
hasher_task: CpuTask,
63+
simd_ext: String,
64+
) -> impl FnOnce() {
65+
move || {
66+
unsafe {
67+
match &*simd_ext {
68+
"AVX512F" => noncegen_avx512(
69+
hasher_task.cache.ptr,
70+
hasher_task.cache_size,
71+
hasher_task.chunk_offset,
72+
hasher_task.numeric_id,
73+
hasher_task.local_startnonce,
74+
hasher_task.local_nonces,
75+
),
76+
"AVX2" => noncegen_avx2(
77+
hasher_task.cache.ptr,
78+
hasher_task.cache_size,
79+
hasher_task.chunk_offset,
80+
hasher_task.numeric_id,
81+
hasher_task.local_startnonce,
82+
hasher_task.local_nonces,
83+
),
84+
"AVX" => noncegen_avx(
85+
hasher_task.cache.ptr,
86+
hasher_task.cache_size,
87+
hasher_task.chunk_offset,
88+
hasher_task.numeric_id,
89+
hasher_task.local_startnonce,
90+
hasher_task.local_nonces,
91+
),
92+
"SSE2" => noncegen_sse(
93+
hasher_task.cache.ptr,
94+
hasher_task.cache_size,
95+
hasher_task.chunk_offset,
96+
hasher_task.numeric_id,
97+
hasher_task.local_startnonce,
98+
hasher_task.local_nonces,
99+
),
100+
_ => noncegen(
101+
hasher_task.cache.ptr,
102+
hasher_task.cache_size,
103+
hasher_task.chunk_offset,
104+
hasher_task.numeric_id,
105+
hasher_task.local_startnonce,
106+
hasher_task.local_nonces,
107+
),
108+
}
109+
}
110+
// report hashing done
111+
tx.send((0u8, 1u8, 0))
112+
.expect("CPU task can't communicate with scheduler thread.");
113+
// report data in hostmem
114+
tx.send((0u8, 0u8, hasher_task.local_nonces))
115+
.expect("CPU task can't communicate with scheduler thread.");
116+
}
117+
}

src/gpu_hasher.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
use chan::Receiver;
2+
use ocl::{gpu_hash, gpu_hash_and_transfer_to_host, gpu_transfer_to_host, GpuContext};
3+
use std::sync::mpsc::Sender;
4+
use std::sync::{Arc, Mutex};
5+
6+
pub struct SafePointer {
7+
pub ptr: *mut u8,
8+
}
9+
unsafe impl Send for SafePointer {}
10+
unsafe impl Sync for SafePointer {}
11+
12+
pub struct GpuTask {
13+
pub cache: SafePointer,
14+
pub cache_size: u64,
15+
pub chunk_offset: u64,
16+
pub numeric_id: u64,
17+
pub local_startnonce: u64,
18+
pub local_nonces: u64,
19+
}
20+
21+
pub fn create_gpu_hasher_thread(
22+
gpu_id: u8,
23+
gpu_context: Arc<Mutex<GpuContext>>,
24+
tx: Sender<(u8, u8, u64)>,
25+
rx_hasher_task: Receiver<Option<GpuTask>>,
26+
) -> impl FnOnce() {
27+
move || {
28+
let mut first_run = true;
29+
let mut buffer_id = 0u8;
30+
let mut last_task = GpuTask {
31+
cache: SafePointer { ptr: &mut 0u8 },
32+
cache_size: 0,
33+
chunk_offset: 0,
34+
numeric_id: 0,
35+
local_startnonce: 0,
36+
local_nonces: 0,
37+
};
38+
for task in rx_hasher_task {
39+
// check if new task or termination
40+
match task {
41+
// new task
42+
Some(task) => {
43+
// first run - just hash
44+
if first_run {
45+
if task.local_nonces != 0 {
46+
first_run = false;
47+
gpu_hash(&gpu_context, &task);
48+
buffer_id = 1 - buffer_id;
49+
last_task = task;
50+
tx.send((gpu_id, 1u8, 0))
51+
.expect("GPU task can't communicate with scheduler thread.");
52+
}
53+
// last run - just transfer
54+
} else if task.local_nonces == 0 {
55+
gpu_transfer_to_host(&gpu_context, buffer_id, &last_task);
56+
first_run = true;
57+
buffer_id = 0;
58+
tx.send((gpu_id, 0u8, last_task.local_nonces))
59+
.expect("GPU task can't communicate with scheduler thread.");
60+
// normal run - hash and transfer async
61+
} else {
62+
gpu_hash_and_transfer_to_host(&gpu_context, buffer_id, &task, &last_task);
63+
buffer_id = 1 - buffer_id;
64+
tx.send((gpu_id, 0u8, last_task.local_nonces))
65+
.expect("GPU task can't communicate with scheduler thread.");
66+
last_task = task;
67+
tx.send((gpu_id, 1u8, 0))
68+
.expect("GPU task can't communicate with scheduler thread.");
69+
}
70+
}
71+
// termination
72+
None => {
73+
break;
74+
}
75+
}
76+
}
77+
}
78+
}

src/main.rs

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@ extern crate pbr;
99
extern crate stopwatch;
1010
extern crate sys_info;
1111

12-
mod hasher;
12+
mod cpu_hasher;
13+
#[cfg(feature = "opencl")]
14+
mod gpu_hasher;
15+
#[cfg(feature = "opencl")]
16+
mod ocl;
1317
mod plotter;
18+
mod scheduler;
1419
mod utils;
1520
mod writer;
1621

@@ -19,13 +24,10 @@ use clap::AppSettings::{ArgRequiredElseHelp, DeriveDisplayOrder, VersionlessSubc
1924
use clap::ArgGroup;
2025
use clap::{App, Arg};
2126
use plotter::{Plotter, PlotterTask};
27+
use std::cmp::min;
2228
use utils::set_low_prio;
2329

2430
fn main() {
25-
#[cfg(not(feature = "opencl"))]
26-
let _opencl = false;
27-
#[cfg(feature = "opencl")]
28-
let opencl = true;
2931
let arg = App::new("Engraver")
3032
.version(crate_version!())
3133
.author(crate_authors!())
@@ -60,6 +62,12 @@ fn main() {
6062
.long("quiet")
6163
.help("Runs engraver in non-verbose mode")
6264
.global(true),
65+
).arg(
66+
Arg::with_name("benchmark")
67+
.short("b")
68+
.long("bench")
69+
.help("Runs engraver in xPU benchmark mode")
70+
.global(true),
6371
)
6472
/*
6573
.subcommand(
@@ -74,23 +82,23 @@ fn main() {
7482
.value_name("numeric_ID")
7583
.help("your numeric Burst ID")
7684
.takes_value(true)
77-
.required(true),
85+
.required_unless("ocl-devices"),
7886
).arg(
7987
Arg::with_name("start nonce")
8088
.short("s")
8189
.long("sn")
8290
.value_name("start_nonce")
8391
.help("where you want to start plotting")
8492
.takes_value(true)
85-
.required(true),
93+
.required_unless("ocl-devices"),
8694
).arg(
8795
Arg::with_name("nonces")
8896
.short("n")
8997
.long("n")
9098
.value_name("nonces")
9199
.help("how many nonces you want to plot")
92100
.takes_value(true)
93-
.required(true),
101+
.required_unless("ocl-devices"),
94102
).arg(
95103
Arg::with_name("path")
96104
.short("p")
@@ -120,14 +128,13 @@ fn main() {
120128
.short("g")
121129
.long("gpu")
122130
.value_name("platform_id:device_id")
123-
.help("*GPU(s) you want to use for plotting")
131+
.help("GPU(s) you want to use for plotting (optional)")
124132
.multiple(true)
125133
.takes_value(true),
126134
]).groups(&[#[cfg(feature = "opencl")]
127135
ArgGroup::with_name("processing")
128136
.args(&["cpu", "gpu"])
129-
.multiple(true)
130-
.required(true)])
137+
.multiple(true)])
131138
/*
132139
.arg(
133140
Arg::with_name("ssd buffer")
@@ -168,14 +175,35 @@ fn main() {
168175
169176
)*/;
170177

178+
#[cfg(feature = "opencl")]
179+
let arg = arg
180+
.arg(
181+
Arg::with_name("ocl-devices")
182+
.short("o")
183+
.long("opencl")
184+
.help("Display OpenCL platforms and devices")
185+
.global(true),
186+
).arg(
187+
Arg::with_name("zero-copy")
188+
.short("z")
189+
.long("zcb")
190+
.help("Enables zero copy buffers for shared mem (integrated) gpus")
191+
.global(true),
192+
);
171193
let matches = &arg.get_matches();
172194

173195
if matches.is_present("low priority") {
174196
set_low_prio();
175197
}
176198

199+
if matches.is_present("ocl-devices") {
200+
#[cfg(feature = "opencl")]
201+
ocl::platform_info();
202+
return;
203+
}
204+
177205
// plotting
178-
/*
206+
/* subcommand
179207
if let Some(matches) = matches.subcommand_matches("plot") {
180208
*/
181209
let numeric_id = value_t!(matches, "numeric id", u64).unwrap_or_else(|e| e.exit());
@@ -189,8 +217,30 @@ fn main() {
189217
.unwrap()
190218
});
191219
let mem = value_t!(matches, "memory", String).unwrap_or_else(|_| "0B".to_owned());
192-
let cpu_threads =
193-
value_t!(matches, "cpu", u8).unwrap_or_else(|_| sys_info::cpu_num().unwrap() as u8);
220+
let cpu_threads = value_t!(matches, "cpu", u8).unwrap_or(0u8);
221+
222+
let gpus = if matches.occurrences_of("gpu") > 0 {
223+
let gpu = values_t!(matches, "gpu", String);
224+
Some(gpu.unwrap())
225+
} else {
226+
None
227+
};
228+
229+
// work out number of cpu threads to use
230+
let cores = sys_info::cpu_num().unwrap() as u8;
231+
let cpu_threads = if cpu_threads == 0 {
232+
cores
233+
} else {
234+
min(cores, cpu_threads)
235+
};
236+
237+
// special case: dont use cpu if only a gpu is defined
238+
#[cfg(feature = "opencl")]
239+
let cpu_threads = if matches.occurrences_of("gpu") > 0 && matches.occurrences_of("cpu") == 0 {
240+
0u8
241+
} else {
242+
cpu_threads
243+
};
194244

195245
let p = Plotter::new();
196246
p.run(PlotterTask {
@@ -200,8 +250,11 @@ fn main() {
200250
output_path,
201251
mem,
202252
cpu_threads,
253+
gpus,
203254
direct_io: !matches.is_present("disable direct i/o"),
204255
async_io: !matches.is_present("disable async i/o"),
205256
quiet: matches.is_present("non-verbosity"),
257+
benchmark: matches.is_present("benchmark"),
258+
zcb: matches.is_present("zero-copy"),
206259
});
207260
}

0 commit comments

Comments
 (0)