Skip to content

Commit 2adc3bf

Browse files
committed
Fix i128 saturating ops
Turns out these are not in llvm 7.1 so we need to implement them directly. This is done without branches to try to get fast codegen.
1 parent 3ee57fe commit 2adc3bf

File tree

1 file changed

+43
-52
lines changed

1 file changed

+43
-52
lines changed

crates/rustc_codegen_nvvm/src/intrinsic.rs

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ fn saturating_intrinsic_impl<'ll, 'tcx>(
8989
is_add: bool,
9090
args: &[OperandRef<'tcx, &'ll Value>],
9191
) -> &'ll Value {
92+
use crate::intrinsic::OverflowOp;
93+
use rustc_codegen_ssa::common::IntPredicate;
9294
use rustc_middle::ty::IntTy::*;
9395
use rustc_middle::ty::UintTy::*;
9496

@@ -107,65 +109,54 @@ fn saturating_intrinsic_impl<'ll, 'tcx>(
107109
_ => unreachable!(),
108110
};
109111

110-
// For 128-bit, we need to handle the constants differently
111-
if width == 128 {
112-
// For 128-bit saturating operations, use LLVM's saturating intrinsics directly
113-
let lhs = args[0].immediate();
114-
let rhs = args[1].immediate();
115-
let llvm_name = format!(
116-
"llvm.{}{}.sat.i128",
117-
if signed { 's' } else { 'u' },
118-
if is_add { "add" } else { "sub" }
119-
);
120-
return b.call_intrinsic(&llvm_name, &[lhs, rhs]);
112+
let llty = b.type_ix(width as u64);
113+
let a = args[0].immediate();
114+
let c = args[1].immediate();
115+
116+
// Perform the add or sub, returning the result and an overflow flag
117+
let (val, ov) = b.checked_binop(
118+
if is_add {
119+
OverflowOp::Add
120+
} else {
121+
OverflowOp::Sub
122+
},
123+
ty,
124+
a,
125+
c,
126+
);
127+
128+
let zero = b.const_int(llty, 0);
129+
130+
// Unsigned case: overflow means clamp to either max or min value
131+
if !signed {
132+
let all1 = b.not(zero);
133+
let clamp = if is_add { all1 } else { zero };
134+
return b.select(ov, clamp, val);
121135
}
122136

123-
let unsigned_max_value = match width {
124-
8 => u8::MAX as i64,
125-
16 => u16::MAX as i64,
126-
32 => u32::MAX as i64,
127-
64 => u64::MAX as i64,
128-
_ => unreachable!(),
129-
};
137+
// Signed case: compute INT_MIN and INT_MAX
138+
let one = b.const_int(llty, 1);
139+
let sh = b.const_int(llty, (width - 1) as i64);
140+
let int_min = b.shl(one, sh);
141+
let int_max = b.sub(int_min, one);
130142

131-
let (min_value, max_value) = if signed {
132-
(-((unsigned_max_value / 2) + 1), (unsigned_max_value / 2))
133-
} else {
134-
(0, unsigned_max_value)
135-
};
143+
// Check if a is negative
144+
let a_lt0 = b.icmp(IntPredicate::IntSLT, a, zero);
136145

137-
let overflow_op = if is_add {
138-
OverflowOp::Add
146+
// Pick the saturation value depending on operation and operand signs
147+
let sat = if is_add {
148+
// Add overflow: if a is negative → INT_MIN, else → INT_MAX
149+
b.select(a_lt0, int_min, int_max)
139150
} else {
140-
OverflowOp::Sub
151+
// Sub overflow: if a is non-negative and c is negative → INT_MAX, else → INT_MIN
152+
let a_ge0 = b.not(a_lt0);
153+
let c_lt0 = b.icmp(IntPredicate::IntSLT, c, zero);
154+
let to_max = b.and(a_ge0, c_lt0);
155+
b.select(to_max, int_max, int_min)
141156
};
142-
let llty = b.type_ix(width as u64);
143-
let lhs = args[0].immediate();
144-
let rhs = args[1].immediate();
145-
146-
let (val, overflowed) = b.checked_binop(overflow_op, ty, lhs, rhs);
147157

148-
if !signed {
149-
let select_val = if is_add {
150-
b.const_int(llty, -1)
151-
} else {
152-
b.const_int(llty, 0)
153-
};
154-
b.select(overflowed, select_val, val)
155-
} else {
156-
let const_val = b.const_int(llty, (width - 1) as i64);
157-
let first_val = if is_add {
158-
b.ashr(rhs, const_val)
159-
} else {
160-
b.lshr(rhs, const_val)
161-
};
162-
let second_val = if is_add {
163-
b.unchecked_uadd(first_val, b.const_int(llty, max_value))
164-
} else {
165-
b.xor(first_val, b.const_int(llty, min_value))
166-
};
167-
b.select(overflowed, second_val, val)
168-
}
158+
// Return the saturation value if overflow, else the computed result
159+
b.select(ov, sat, val)
169160
}
170161

171162
fn get_simple_intrinsic<'ll>(

0 commit comments

Comments
 (0)