Skip to content

Commit bc9f721

Browse files
committed
Emulate all 128 bit ops
I thought we could use the fallback code from compiler-builtins but I guess not? Most things were broken. I had AI bang out our implementations, I did not write them. This is generating a lot of code, perhaps better to generate a fn and then have everything call it? I added an example and confirmed it works / passes.
1 parent 1a0874d commit bc9f721

File tree

11 files changed

+1110
-302
lines changed

11 files changed

+1110
-302
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ members = [
1616
"examples/cuda/path_tracer/kernels",
1717
"examples/cuda/sha2_crates_io",
1818
"examples/cuda/sha2_crates_io/kernels",
19+
"examples/cuda/i128_demo",
20+
"examples/cuda/i128_demo/kernels",
1921

2022
"examples/optix/*",
2123
"tests/compiletests",
-2.14 KB
Binary file not shown.

crates/rustc_codegen_nvvm/libintrinsics.ll

Lines changed: 0 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -239,113 +239,6 @@ start:
239239
}
240240
declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) #0
241241

242-
; This is a bit weird, we need to use functions defined in rust crates (compiler_builtins)
243-
; as intrinsics in the codegen, but we can't directly use their name, otherwise we will have
244-
; really odd and incorrect behavior in the crate theyre defined in. So we need to make a wrapper for them that is opaque
245-
; to the codegen, which is what this is doing.
246-
247-
define {<2 x i64>, i1} @__nvvm_i128_addo(<2 x i64>, <2 x i64>) #0 {
248-
start:
249-
%2 = call {<2 x i64>, i1} @__rust_i128_addo(<2 x i64> %0, <2 x i64> %1)
250-
ret {<2 x i64>, i1} %2
251-
}
252-
declare {<2 x i64>, i1} @__rust_i128_addo(<2 x i64>, <2 x i64>) #0
253-
254-
define {<2 x i64>, i1} @__nvvm_u128_addo(<2 x i64>, <2 x i64>) #0 {
255-
start:
256-
%2 = call {<2 x i64>, i1} @__rust_u128_addo(<2 x i64> %0, <2 x i64> %1)
257-
ret {<2 x i64>, i1} %2
258-
}
259-
declare {<2 x i64>, i1} @__rust_u128_addo(<2 x i64>, <2 x i64>) #0
260-
261-
define {<2 x i64>, i1} @__nvvm_i128_subo(<2 x i64>, <2 x i64>) #0 {
262-
start:
263-
%2 = call {<2 x i64>, i1} @__rust_i128_subo(<2 x i64> %0, <2 x i64> %1)
264-
ret {<2 x i64>, i1} %2
265-
}
266-
declare {<2 x i64>, i1} @__rust_i128_subo(<2 x i64>, <2 x i64>) #0
267-
268-
define {<2 x i64>, i1} @__nvvm_u128_subo(<2 x i64>, <2 x i64>) #0 {
269-
start:
270-
%2 = call {<2 x i64>, i1} @__rust_u128_subo(<2 x i64> %0, <2 x i64> %1)
271-
ret {<2 x i64>, i1} %2
272-
}
273-
declare {<2 x i64>, i1} @__rust_u128_subo(<2 x i64>, <2 x i64>) #0
274-
275-
define {<2 x i64>, i1} @__nvvm_i128_mulo(<2 x i64>, <2 x i64>) #0 {
276-
start:
277-
%2 = call {<2 x i64>, i1} @__rust_i128_mulo(<2 x i64> %0, <2 x i64> %1)
278-
ret {<2 x i64>, i1} %2
279-
}
280-
declare {<2 x i64>, i1} @__rust_i128_mulo(<2 x i64>, <2 x i64>) #0
281-
282-
define {<2 x i64>, i1} @__nvvm_u128_mulo(<2 x i64>, <2 x i64>) #0 {
283-
start:
284-
%2 = call {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64> %0, <2 x i64> %1)
285-
ret {<2 x i64>, i1} %2
286-
}
287-
declare {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64>, <2 x i64>) #0
288-
289-
; Division operations from compiler-builtins
290-
define <2 x i64> @__nvvm_divti3(<2 x i64>, <2 x i64>) #0 {
291-
start:
292-
%2 = call <2 x i64> @__divti3(<2 x i64> %0, <2 x i64> %1)
293-
ret <2 x i64> %2
294-
}
295-
declare <2 x i64> @__divti3(<2 x i64>, <2 x i64>) #0
296-
297-
define <2 x i64> @__nvvm_udivti3(<2 x i64>, <2 x i64>) #0 {
298-
start:
299-
%2 = call <2 x i64> @__udivti3(<2 x i64> %0, <2 x i64> %1)
300-
ret <2 x i64> %2
301-
}
302-
declare <2 x i64> @__udivti3(<2 x i64>, <2 x i64>) #0
303-
304-
; Remainder operations from compiler-builtins
305-
define <2 x i64> @__nvvm_modti3(<2 x i64>, <2 x i64>) #0 {
306-
start:
307-
%2 = call <2 x i64> @__modti3(<2 x i64> %0, <2 x i64> %1)
308-
ret <2 x i64> %2
309-
}
310-
declare <2 x i64> @__modti3(<2 x i64>, <2 x i64>) #0
311-
312-
define <2 x i64> @__nvvm_umodti3(<2 x i64>, <2 x i64>) #0 {
313-
start:
314-
%2 = call <2 x i64> @__umodti3(<2 x i64> %0, <2 x i64> %1)
315-
ret <2 x i64> %2
316-
}
317-
declare <2 x i64> @__umodti3(<2 x i64>, <2 x i64>) #0
318-
319-
; Multiplication from compiler-builtins
320-
define <2 x i64> @__nvvm_multi3(<2 x i64>, <2 x i64>) #0 {
321-
start:
322-
%2 = call <2 x i64> @__multi3(<2 x i64> %0, <2 x i64> %1)
323-
ret <2 x i64> %2
324-
}
325-
declare <2 x i64> @__multi3(<2 x i64>, <2 x i64>) #0
326-
327-
; Shift operations from compiler-builtins
328-
define <2 x i64> @__nvvm_ashlti3(<2 x i64>, i32) #0 {
329-
start:
330-
%2 = call <2 x i64> @__ashlti3(<2 x i64> %0, i32 %1)
331-
ret <2 x i64> %2
332-
}
333-
declare <2 x i64> @__ashlti3(<2 x i64>, i32) #0
334-
335-
define <2 x i64> @__nvvm_ashrti3(<2 x i64>, i32) #0 {
336-
start:
337-
%2 = call <2 x i64> @__ashrti3(<2 x i64> %0, i32 %1)
338-
ret <2 x i64> %2
339-
}
340-
declare <2 x i64> @__ashrti3(<2 x i64>, i32) #0
341-
342-
define <2 x i64> @__nvvm_lshrti3(<2 x i64>, i32) #0 {
343-
start:
344-
%2 = call <2 x i64> @__lshrti3(<2 x i64> %0, i32 %1)
345-
ret <2 x i64> %2
346-
}
347-
declare <2 x i64> @__lshrti3(<2 x i64>, i32) #0
348-
349242
; Required because we need to explicitly generate { i32, i1 } for the following intrinsics
350243
; except rustc will not generate them (it will make { i32, i8 }) which libnvvm rejects.
351244

0 commit comments

Comments
 (0)