Skip to content

Commit 51eca39

Browse files
nnethercoteLegNeato
authored andcommitted
Improve math_builder_methods.
It currently produces silly verbose code like this: ``` fn add(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value { #[allow(unused_variables)] let needs_i128_emulation = false || self.is_i128(a) || self.is_i128(b); if needs_i128_emulation { let args_vec: Vec<&'ll Value> = <[_]>::into_vec( ::alloc::boxed::box_new([a, b]), ); match "add" { "add" | "unchecked_uadd" | "unchecked_sadd" => { assert_eq!(args_vec.len(), 2); return self.emulate_i128_add(args_vec[0], args_vec[1]); } "sub" | "unchecked_usub" | "unchecked_ssub" => { ... } "mul" | "unchecked_umul" | "unchecked_smul" => { ... } "and" => { ... } ... _ => { self.cx.fatal(...); } } } unsafe { ... } } ``` and relies on constant folding to eliminate most of the code. This commit changes the macro to produce code like this: ``` fn add(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value { if self.is_i128(a) || self.is_i128(b) { self.emulate_i128_add(a, b) } else { unsafe { ... } } ``` and float ops are even shorter, because they don't need a u128 check and are now done with a new, separate macro: ``` fn fadd(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value { unsafe { ... } } ``` The generated code is much shorter: `cargo expand`'s length has reduced from 34,636 lines to 26,356 lines, a 24% reduction. The original code is also a little shorter. `arg_vecs` is gone because it isn't needed. The asserts are gone because they were very low value. There is now a tiny bit of code duplication, but it's trivial and not worth trying to avoid.
1 parent 1a0874d commit 51eca39

File tree

1 file changed

+78
-125
lines changed

1 file changed

+78
-125
lines changed

crates/rustc_codegen_nvvm/src/builder.rs

Lines changed: 78 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -130,108 +130,34 @@ impl<'ll, 'tcx> Deref for Builder<'_, 'll, 'tcx> {
130130
}
131131
}
132132

133-
macro_rules! math_builder_methods {
134-
($($name:ident($($arg:ident),*) => $llvm_capi:ident),+ $(,)?) => {
135-
$(fn $name(&mut self, $($arg: &'ll Value),*) -> &'ll Value {
136-
// Check if we're dealing with 128-bit integers and need emulation
137-
#[allow(unused_variables)]
138-
let needs_i128_emulation = false $(|| self.is_i128($arg))*;
139-
140-
if needs_i128_emulation {
141-
142-
// Collect arguments into a vector for easier handling
143-
let args_vec: Vec<&'ll Value> = vec![$($arg),*];
144-
145-
// Dispatch to i128 emulation or `compiler_builtins`-based intrinsic
146-
match stringify!($name) {
147-
"add" | "unchecked_uadd" | "unchecked_sadd" => {
148-
assert_eq!(args_vec.len(), 2);
149-
return self.emulate_i128_add(args_vec[0], args_vec[1]);
150-
}
151-
"sub" | "unchecked_usub" | "unchecked_ssub" => {
152-
assert_eq!(args_vec.len(), 2);
153-
return self.emulate_i128_sub(args_vec[0], args_vec[1]);
154-
}
155-
"mul" | "unchecked_umul" | "unchecked_smul" => {
156-
assert_eq!(args_vec.len(), 2);
157-
return self.call_intrinsic("__nvvm_multi3", &[args_vec[0], args_vec[1]]);
158-
}
159-
"and" => {
160-
assert_eq!(args_vec.len(), 2);
161-
return self.emulate_i128_and(args_vec[0], args_vec[1]);
162-
}
163-
"or" => {
164-
assert_eq!(args_vec.len(), 2);
165-
return self.emulate_i128_or(args_vec[0], args_vec[1]);
166-
}
167-
"xor" => {
168-
assert_eq!(args_vec.len(), 2);
169-
return self.emulate_i128_xor(args_vec[0], args_vec[1]);
170-
}
171-
"shl" => {
172-
assert_eq!(args_vec.len(), 2);
173-
// Convert shift amount to i32 for compiler-builtins
174-
let shift_amt = self.trunc(args_vec[1], self.type_i32());
175-
return self.call_intrinsic("__nvvm_ashlti3", &[args_vec[0], shift_amt]);
176-
}
177-
"lshr" => {
178-
assert_eq!(args_vec.len(), 2);
179-
// Convert shift amount to i32 for compiler-builtins
180-
let shift_amt = self.trunc(args_vec[1], self.type_i32());
181-
return self.call_intrinsic("__nvvm_lshrti3", &[args_vec[0], shift_amt]);
182-
}
183-
"ashr" => {
184-
assert_eq!(args_vec.len(), 2);
185-
// Convert shift amount to i32 for compiler-builtins
186-
let shift_amt = self.trunc(args_vec[1], self.type_i32());
187-
return self.call_intrinsic("__nvvm_ashrti3", &[args_vec[0], shift_amt]);
188-
}
189-
"neg" => {
190-
assert_eq!(args_vec.len(), 1);
191-
return self.emulate_i128_neg(args_vec[0]);
192-
}
193-
"not" => {
194-
assert_eq!(args_vec.len(), 1);
195-
return self.emulate_i128_not(args_vec[0]);
196-
}
197-
"udiv" | "exactudiv" => {
198-
assert_eq!(args_vec.len(), 2);
199-
return self.call_intrinsic("__nvvm_udivti3", &[args_vec[0], args_vec[1]]);
200-
}
201-
"sdiv" | "exactsdiv" => {
202-
assert_eq!(args_vec.len(), 2);
203-
return self.call_intrinsic("__nvvm_divti3", &[args_vec[0], args_vec[1]]);
204-
}
205-
"urem" => {
206-
assert_eq!(args_vec.len(), 2);
207-
return self.call_intrinsic("__nvvm_umodti3", &[args_vec[0], args_vec[1]]);
208-
}
209-
"srem" => {
210-
assert_eq!(args_vec.len(), 2);
211-
return self.call_intrinsic("__nvvm_modti3", &[args_vec[0], args_vec[1]]);
212-
}
213-
_ => {
214-
self.cx.fatal(format!(
215-
"Unimplemented 128-bit integer operation '{}' with {} arguments. \
216-
This operation is not yet supported in Rust CUDA. \
217-
Consider using 64-bit integers or filing an issue at \
218-
https://github.com/Rust-GPU/rust-cuda/issues",
219-
stringify!($name),
220-
args_vec.len()
221-
));
222-
}
133+
macro_rules! imath_builder_methods {
134+
($($self_:ident.$name:ident($($arg:ident),*) => $llvm_capi:ident => $op:block)+) => {
135+
$(fn $name(&mut $self_, $($arg: &'ll Value),*) -> &'ll Value {
136+
// Dispatch to i128 emulation or `compiler_builtins`-based intrinsic
137+
if $($self_.is_i128($arg))||*
138+
$op
139+
else {
140+
unsafe {
141+
trace!("binary expr: {:?} with args {:?}", stringify!($name), [$($arg),*]);
142+
llvm::$llvm_capi($self_.llbuilder, $($arg,)* UNNAMED)
223143
}
224144
}
145+
})+
146+
}
147+
}
225148

149+
macro_rules! fmath_builder_methods {
150+
($($self_:ident.$name:ident($($arg:ident),*) => $llvm_capi:ident)+) => {
151+
$(fn $name(&mut $self_, $($arg: &'ll Value),*) -> &'ll Value {
226152
unsafe {
227153
trace!("binary expr: {:?} with args {:?}", stringify!($name), [$($arg),*]);
228-
llvm::$llvm_capi(self.llbuilder, $($arg,)* UNNAMED)
154+
llvm::$llvm_capi($self_.llbuilder, $($arg,)* UNNAMED)
229155
}
230156
})+
231157
}
232158
}
233159

234-
macro_rules! set_math_builder_methods {
160+
macro_rules! set_fmath_builder_methods {
235161
($($name:ident($($arg:ident),*) => ($llvm_capi:ident, $llvm_set_math:ident)),+ $(,)?) => {
236162
$(fn $name(&mut self, $($arg: &'ll Value),*) -> &'ll Value {
237163
unsafe {
@@ -384,39 +310,66 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
384310
}
385311
}
386312

387-
math_builder_methods! {
388-
add(a, b) => LLVMBuildAdd,
389-
fadd(a, b) => LLVMBuildFAdd,
390-
sub(a, b) => LLVMBuildSub,
391-
fsub(a, b) => LLVMBuildFSub,
392-
mul(a, b) => LLVMBuildMul,
393-
fmul(a, b) => LLVMBuildFMul,
394-
udiv(a, b) => LLVMBuildUDiv,
395-
exactudiv(a, b) => LLVMBuildExactUDiv,
396-
sdiv(a, b) => LLVMBuildSDiv,
397-
exactsdiv(a, b) => LLVMBuildExactSDiv,
398-
fdiv(a, b) => LLVMBuildFDiv,
399-
urem(a, b) => LLVMBuildURem,
400-
srem(a, b) => LLVMBuildSRem,
401-
frem(a, b) => LLVMBuildFRem,
402-
shl(a, b) => LLVMBuildShl,
403-
lshr(a, b) => LLVMBuildLShr,
404-
ashr(a, b) => LLVMBuildAShr,
405-
and(a, b) => LLVMBuildAnd,
406-
or(a, b) => LLVMBuildOr,
407-
xor(a, b) => LLVMBuildXor,
408-
neg(x) => LLVMBuildNeg,
409-
fneg(x) => LLVMBuildFNeg,
410-
not(x) => LLVMBuildNot,
411-
unchecked_sadd(x, y) => LLVMBuildNSWAdd,
412-
unchecked_uadd(x, y) => LLVMBuildNUWAdd,
413-
unchecked_ssub(x, y) => LLVMBuildNSWSub,
414-
unchecked_usub(x, y) => LLVMBuildNUWSub,
415-
unchecked_smul(x, y) => LLVMBuildNSWMul,
416-
unchecked_umul(x, y) => LLVMBuildNUWMul,
417-
}
418-
419-
set_math_builder_methods! {
313+
imath_builder_methods! {
314+
self.add(a, b) => LLVMBuildAdd => { self.emulate_i128_add(a, b) }
315+
self.unchecked_uadd(a, b) => LLVMBuildNUWAdd => { self.emulate_i128_add(a, b) }
316+
self.unchecked_sadd(a, b) => LLVMBuildNSWAdd => { self.emulate_i128_add(a, b) }
317+
318+
self.sub(a, b) => LLVMBuildSub => { self.emulate_i128_sub(a, b) }
319+
self.unchecked_usub(a, b) => LLVMBuildNUWSub => { self.emulate_i128_sub(a, b) }
320+
self.unchecked_ssub(a, b) => LLVMBuildNSWSub => { self.emulate_i128_sub(a, b) }
321+
322+
self.mul(a, b) => LLVMBuildMul => { self.call_intrinsic("__nvvm_multi3", &[a, b]) }
323+
self.unchecked_umul(a, b) => LLVMBuildNUWMul => {
324+
self.call_intrinsic("__nvvm_multi3", &[a, b])
325+
}
326+
self.unchecked_smul(a, b) => LLVMBuildNSWMul => {
327+
self.call_intrinsic("__nvvm_multi3", &[a, b])
328+
}
329+
330+
self.udiv(a, b) => LLVMBuildUDiv => { self.call_intrinsic("__nvvm_udivti3", &[a, b]) }
331+
self.exactudiv(a, b) => LLVMBuildExactUDiv => {
332+
self.call_intrinsic("__nvvm_udivti3", &[a, b])
333+
}
334+
self.sdiv(a, b) => LLVMBuildSDiv => { self.call_intrinsic("__nvvm_divti3", &[a, b]) }
335+
self.exactsdiv(a, b) => LLVMBuildExactSDiv => {
336+
self.call_intrinsic("__nvvm_divti3", &[a, b])
337+
}
338+
self.urem(a, b) => LLVMBuildURem => { self.call_intrinsic("__nvvm_umodti3", &[a, b]) }
339+
self.srem(a, b) => LLVMBuildSRem => { self.call_intrinsic("__nvvm_modti3", &[a, b]) }
340+
341+
self.shl(a, b) => LLVMBuildShl => {
342+
// Convert shift amount to i32 for compiler-builtins.
343+
let b = self.trunc(b, self.type_i32());
344+
self.call_intrinsic("__nvvm_ashlti3", &[a, b])
345+
}
346+
self.lshr(a, b) => LLVMBuildLShr => {
347+
// Convert shift amount to i32 for compiler-builtins.
348+
let b = self.trunc(b, self.type_i32());
349+
self.call_intrinsic("__nvvm_lshrti3", &[a, b])
350+
}
351+
self.ashr(a, b) => LLVMBuildAShr => {
352+
// Convert shift amount to i32 for compiler-builtins.
353+
let b = self.trunc(b, self.type_i32());
354+
self.call_intrinsic("__nvvm_ashrti3", &[a, b])
355+
}
356+
self.and(a, b) => LLVMBuildAnd => { self.emulate_i128_and(a, b) }
357+
self.or(a, b) => LLVMBuildOr => { self.emulate_i128_or(a, b) }
358+
self.xor(a, b) => LLVMBuildXor => { self.emulate_i128_xor(a, b) }
359+
self.neg(a) => LLVMBuildNeg => { self.emulate_i128_neg(a) }
360+
self.not(a) => LLVMBuildNot => { self.emulate_i128_not(a) }
361+
}
362+
363+
fmath_builder_methods! {
364+
self.fadd(a, b) => LLVMBuildFAdd
365+
self.fsub(a, b) => LLVMBuildFSub
366+
self.fmul(a, b) => LLVMBuildFMul
367+
self.fdiv(a, b) => LLVMBuildFDiv
368+
self.frem(a, b) => LLVMBuildFRem
369+
self.fneg(a) => LLVMBuildFNeg
370+
}
371+
372+
set_fmath_builder_methods! {
420373
fadd_fast(x, y) => (LLVMBuildFAdd, LLVMRustSetFastMath),
421374
fsub_fast(x, y) => (LLVMBuildFSub, LLVMRustSetFastMath),
422375
fmul_fast(x, y) => (LLVMBuildFMul, LLVMRustSetFastMath),

0 commit comments

Comments
 (0)