Skip to content

Commit 62ec2ce

Browse files
committed
Fix i128 saturating ops
Turns out these are not in llvm 7.1 so we need to implement them directly. This is done without branches to try to get fast codegen.
1 parent 3ee57fe commit 62ec2ce

File tree

1 file changed

+76
-51
lines changed

1 file changed

+76
-51
lines changed

crates/rustc_codegen_nvvm/src/intrinsic.rs

Lines changed: 76 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ fn saturating_intrinsic_impl<'ll, 'tcx>(
8989
is_add: bool,
9090
args: &[OperandRef<'tcx, &'ll Value>],
9191
) -> &'ll Value {
92+
use crate::intrinsic::OverflowOp;
93+
use rustc_codegen_ssa::common::IntPredicate;
9294
use rustc_middle::ty::IntTy::*;
9395
use rustc_middle::ty::UintTy::*;
9496

@@ -107,65 +109,88 @@ fn saturating_intrinsic_impl<'ll, 'tcx>(
107109
_ => unreachable!(),
108110
};
109111

110-
// For 128-bit, we need to handle the constants differently
111-
if width == 128 {
112-
// For 128-bit saturating operations, use LLVM's saturating intrinsics directly
113-
let lhs = args[0].immediate();
114-
let rhs = args[1].immediate();
115-
let llvm_name = format!(
116-
"llvm.{}{}.sat.i128",
117-
if signed { 's' } else { 'u' },
118-
if is_add { "add" } else { "sub" }
119-
);
120-
return b.call_intrinsic(&llvm_name, &[lhs, rhs]);
112+
let llty = b.type_ix(width as u64);
113+
let a = args[0].immediate();
114+
let c = args[1].immediate();
115+
116+
// Perform the operation and capture overflow flag
117+
let (val, ov_i1) = b.checked_binop(
118+
if is_add {
119+
OverflowOp::Add
120+
} else {
121+
OverflowOp::Sub
122+
},
123+
ty,
124+
a,
125+
c,
126+
);
127+
128+
// Common constants
129+
let zero = b.const_int(llty, 0);
130+
let one = b.const_int(llty, 1);
131+
let all1 = b.const_int(llty, -1);
132+
let sh = b.const_int(llty, (width - 1) as i64);
133+
134+
// Convert overflow flag to mask: -1 if overflow, else 0
135+
let ov_m = b.select(ov_i1, all1, zero);
136+
137+
if !signed {
138+
// MAX for add, 0 for sub
139+
let clamp = if is_add { all1 } else { zero };
140+
141+
// Mask for non-overflow path
142+
let keep = b.not(ov_m);
143+
144+
// Overflow → clamp
145+
let l = b.and(ov_m, clamp);
146+
147+
// No overflow → result
148+
let r = b.and(keep, val);
149+
150+
return b.or(l, r);
121151
}
122152

123-
let unsigned_max_value = match width {
124-
8 => u8::MAX as i64,
125-
16 => u16::MAX as i64,
126-
32 => u32::MAX as i64,
127-
64 => u64::MAX as i64,
128-
_ => unreachable!(),
129-
};
153+
// Compute INT_MIN = 1 << (w-1)
154+
let int_min = b.shl(one, sh);
130155

131-
let (min_value, max_value) = if signed {
132-
(-((unsigned_max_value / 2) + 1), (unsigned_max_value / 2))
133-
} else {
134-
(0, unsigned_max_value)
135-
};
156+
// Compute INT_MAX = INT_MIN - 1
157+
let int_max = b.sub(int_min, one);
158+
159+
// Mask for a < 0
160+
let a_lt0 = b.icmp(IntPredicate::IntSLT, a, zero);
161+
let a_neg = b.select(a_lt0, all1, zero);
136162

137-
let overflow_op = if is_add {
138-
OverflowOp::Add
163+
// Mask for a >= 0
164+
let a_non = b.not(a_neg);
165+
166+
// Mask for c < 0
167+
let c_lt0 = b.icmp(IntPredicate::IntSLT, c, zero);
168+
let c_neg = b.select(c_lt0, all1, zero);
169+
170+
// Signed add overflow: a<0 → MIN, a>=0 → MAX
171+
let sat = if is_add {
172+
let p1 = b.and(a_neg, int_min);
173+
let p2 = b.and(a_non, int_max);
174+
b.or(p1, p2)
139175
} else {
140-
OverflowOp::Sub
176+
// Signed sub overflow: (a>=0 && c<0) → MAX, else MIN
177+
let to_max = b.and(a_non, c_neg);
178+
let to_min = b.not(to_max);
179+
let p1 = b.and(to_max, int_max);
180+
let p2 = b.and(to_min, int_min);
181+
b.or(p1, p2)
141182
};
142-
let llty = b.type_ix(width as u64);
143-
let lhs = args[0].immediate();
144-
let rhs = args[1].immediate();
145183

146-
let (val, overflowed) = b.checked_binop(overflow_op, ty, lhs, rhs);
184+
// Mask for non-overflow path
185+
let keep = b.not(ov_m);
147186

148-
if !signed {
149-
let select_val = if is_add {
150-
b.const_int(llty, -1)
151-
} else {
152-
b.const_int(llty, 0)
153-
};
154-
b.select(overflowed, select_val, val)
155-
} else {
156-
let const_val = b.const_int(llty, (width - 1) as i64);
157-
let first_val = if is_add {
158-
b.ashr(rhs, const_val)
159-
} else {
160-
b.lshr(rhs, const_val)
161-
};
162-
let second_val = if is_add {
163-
b.unchecked_uadd(first_val, b.const_int(llty, max_value))
164-
} else {
165-
b.xor(first_val, b.const_int(llty, min_value))
166-
};
167-
b.select(overflowed, second_val, val)
168-
}
187+
// Overflow → sat value
188+
let l = b.and(ov_m, sat);
189+
190+
// No overflow → result
191+
let r = b.and(keep, val);
192+
193+
b.or(l, r)
169194
}
170195

171196
fn get_simple_intrinsic<'ll>(

0 commit comments

Comments
 (0)