Skip to content

Commit e6e046d

Browse files
committed
Replace incorrect, UB-prone fptoui_sat with a corret implementation.
1 parent 7a1cb56 commit e6e046d

File tree

1 file changed

+36
-7
lines changed

1 file changed

+36
-7
lines changed

crates/rustc_codegen_nvvm/src/builder.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -805,13 +805,42 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
805805
}
806806

807807
fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
808-
// NVVM does not have support for saturated conversion. Setting rustc flag
809-
// `-Z saturating_float_casts=false` falls back to non-saturated, UB-prone
810-
// conversion, and should prevent this codegen. Otherwise, fall back to UB
811-
// prone conversion.
812-
self.cx().sess().dcx()
813-
.warn("Saturated float to int conversion is not supported on NVVM. Defaulting to UB prone conversion.");
814-
self.fptoui(val, dest_ty)
808+
// NVVM does not support saturating casts, however, they are relatively simple to implement.
809+
// (at least for unsigned ints). So, we emulate them here.
810+
811+
// In order to clamp the value, we need to know it's type.
812+
let val_ty = self.val_ty(val);
813+
// Find the min / max intrinsics
814+
let (min, max) = match self.cx().float_width(val_ty) {
815+
64 => ("__nv_fmin", "__nv_fmax"),
816+
32 => ("__nv_fminf", "__nv_fmaxf"),
817+
_ => {
818+
self.cx().sess().dcx()
819+
.warn("Saturated float to int conversion is not supported in NVVM for type {val_ty:?}. Defaulting to UB prone conversion.");
820+
return self.fptoui(val, dest_ty);
821+
}
822+
};
823+
let (max_ty, max) = self.cx().get_intrinsic(max);
824+
let (min_ty, min) = self.cx().get_intrinsic(min);
825+
// Find the zero value, and the max value of a given int.
826+
let zero = self.const_real(val_ty, 0.0);
827+
let max_value = match self.int_width(dest_ty) {
828+
8 => u8::MAX as f64,
829+
16 => u16::MAX as f64,
830+
32 => u32::MAX as f64,
831+
64 => u64::MAX as f64,
832+
128 => u128::MAX as f64,
833+
_ => todo!("Unsupported int type {dest_ty:?}"),
834+
};
835+
let max_value = self.const_real(val_ty, max_value);
836+
// Compute max(val, 0). This will clamp negative values to zero **AND**
837+
// replace NaNs with 0s(just like how Rust is specified to behave)
838+
let res = self.call(max_ty, None, None, max, &[val, zero], None, None);
839+
// Clamp all values higher than max to max
840+
let res = self.call(min_ty, None, None, min, &[res, max_value], None, None);
841+
// Now, we know that `res` is non-nan, and in range (min, max). So, it is well-defined
842+
// for all inputs :D!
843+
self.fptoui(res, dest_ty)
815844
}
816845

817846
fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {

0 commit comments

Comments
 (0)