Skip to content

Commit 3ee57fe

Browse files
committed
Emulate i128 ops
LLVM 7.1 (and thus nvvm) lacks native 128-bit integer intrinsics, so emulate them using 64-bit operations. I also added an example with `sha2` to confirm it works and show folks that crates.io crates can often be used directly unmodified. Fixes #207.
1 parent 9fa4487 commit 3ee57fe

File tree

15 files changed

+793
-20
lines changed

15 files changed

+793
-20
lines changed

.github/workflows/ci_windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ jobs:
7777
run: cargo build --all-features -p cust_raw
7878

7979
- name: Build
80-
run: cargo build --workspace --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*"
80+
run: cargo build --workspace --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "sha2*"
8181

8282
# Don't currently test because many tests rely on the system having a CUDA GPU
8383
# - name: Test
@@ -86,7 +86,7 @@ jobs:
8686
- name: Check documentation
8787
env:
8888
RUSTDOCFLAGS: -Dwarnings
89-
run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "cust_raw"
89+
run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "sha2*" --exclude "cust_raw"
9090
# Disabled due to dll issues, someone with Windows knowledge needed
9191
# - name: Compiletest
9292
# run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ members = [
1414
"examples/cuda/gemm/kernels",
1515
"examples/cuda/path_tracer",
1616
"examples/cuda/path_tracer/kernels",
17+
"examples/cuda/sha2_crates_io",
18+
"examples/cuda/sha2_crates_io/kernels",
1719

1820
"examples/optix/*",
1921
"tests/compiletests",

crates/optix-sys/build/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ fn main() {
1616

1717
println!("cargo::rerun-if-changed=build");
1818
for e in sdk.related_optix_envs() {
19-
println!("cargo::rerun-if-env-changed={}", e);
19+
println!("cargo::rerun-if-env-changed={e}");
2020
}
2121
// Emit metadata for the build script.
2222
println!("cargo::metadata=root={}", sdk.optix_root().display());
@@ -36,7 +36,7 @@ fn main() {
3636
let metadata_optix_include = env::join_paths(sdk.optix_include_paths())
3737
.map(|s| s.to_string_lossy().to_string())
3838
.expect("Failed to build metadata for include.");
39-
println!("cargo::metadata=include_dir={}", metadata_optix_include);
39+
println!("cargo::metadata=include_dir={metadata_optix_include}");
4040

4141
// Generate optix bindings.
4242
create_optix_bindings(&sdk, &cuda_include_paths);

crates/optix-sys/build/optix_sdk.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ impl OptiXSdk {
8989
.ok_or("Cannot find OPTIX_VERSION from OptiX header file.")?;
9090
let version = version
9191
.parse::<u32>()
92-
.map_err(|_| format!("Cannot parse OPTIX_VERSION as u32: '{}'", version))?;
92+
.map_err(|_| format!("Cannot parse OPTIX_VERSION as u32: '{version}'"))?;
9393
Ok(version)
9494
}
9595
}
1.19 KB
Binary file not shown.

crates/rustc_codegen_nvvm/libintrinsics.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,66 @@ start:
286286
}
287287
declare {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64>, <2 x i64>) #0
288288

289+
; Division operations from compiler-builtins
290+
define <2 x i64> @__nvvm_divti3(<2 x i64>, <2 x i64>) #0 {
291+
start:
292+
%2 = call <2 x i64> @__divti3(<2 x i64> %0, <2 x i64> %1)
293+
ret <2 x i64> %2
294+
}
295+
declare <2 x i64> @__divti3(<2 x i64>, <2 x i64>) #0
296+
297+
define <2 x i64> @__nvvm_udivti3(<2 x i64>, <2 x i64>) #0 {
298+
start:
299+
%2 = call <2 x i64> @__udivti3(<2 x i64> %0, <2 x i64> %1)
300+
ret <2 x i64> %2
301+
}
302+
declare <2 x i64> @__udivti3(<2 x i64>, <2 x i64>) #0
303+
304+
; Remainder operations from compiler-builtins
305+
define <2 x i64> @__nvvm_modti3(<2 x i64>, <2 x i64>) #0 {
306+
start:
307+
%2 = call <2 x i64> @__modti3(<2 x i64> %0, <2 x i64> %1)
308+
ret <2 x i64> %2
309+
}
310+
declare <2 x i64> @__modti3(<2 x i64>, <2 x i64>) #0
311+
312+
define <2 x i64> @__nvvm_umodti3(<2 x i64>, <2 x i64>) #0 {
313+
start:
314+
%2 = call <2 x i64> @__umodti3(<2 x i64> %0, <2 x i64> %1)
315+
ret <2 x i64> %2
316+
}
317+
declare <2 x i64> @__umodti3(<2 x i64>, <2 x i64>) #0
318+
319+
; Multiplication from compiler-builtins
320+
define <2 x i64> @__nvvm_multi3(<2 x i64>, <2 x i64>) #0 {
321+
start:
322+
%2 = call <2 x i64> @__multi3(<2 x i64> %0, <2 x i64> %1)
323+
ret <2 x i64> %2
324+
}
325+
declare <2 x i64> @__multi3(<2 x i64>, <2 x i64>) #0
326+
327+
; Shift operations from compiler-builtins
328+
define <2 x i64> @__nvvm_ashlti3(<2 x i64>, i32) #0 {
329+
start:
330+
%2 = call <2 x i64> @__ashlti3(<2 x i64> %0, i32 %1)
331+
ret <2 x i64> %2
332+
}
333+
declare <2 x i64> @__ashlti3(<2 x i64>, i32) #0
334+
335+
define <2 x i64> @__nvvm_ashrti3(<2 x i64>, i32) #0 {
336+
start:
337+
%2 = call <2 x i64> @__ashrti3(<2 x i64> %0, i32 %1)
338+
ret <2 x i64> %2
339+
}
340+
declare <2 x i64> @__ashrti3(<2 x i64>, i32) #0
341+
342+
define <2 x i64> @__nvvm_lshrti3(<2 x i64>, i32) #0 {
343+
start:
344+
%2 = call <2 x i64> @__lshrti3(<2 x i64> %0, i32 %1)
345+
ret <2 x i64> %2
346+
}
347+
declare <2 x i64> @__lshrti3(<2 x i64>, i32) #0
348+
289349
; Required because we need to explicitly generate { i32, i1 } for the following intrinsics
290350
; except rustc will not generate them (it will make { i32, i8 }) which libnvvm rejects.
291351

0 commit comments

Comments
 (0)