Skip to content

Commit 5857477

Browse files
committed
Replace incorrect, UB-prone fptoui_sat with a corret implementation.
1 parent 5fb10d3 commit 5857477

File tree

1 file changed

+36
-7
lines changed

1 file changed

+36
-7
lines changed

crates/rustc_codegen_nvvm/src/builder.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -835,13 +835,42 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
835835
}
836836

837837
fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
838-
// NVVM does not have support for saturated conversion. Setting rustc flag
839-
// `-Z saturating_float_casts=false` falls back to non-saturated, UB-prone
840-
// conversion, and should prevent this codegen. Otherwise, fall back to UB
841-
// prone conversion.
842-
self.cx().sess().dcx()
843-
.warn("Saturated float to int conversion is not supported on NVVM. Defaulting to UB prone conversion.");
844-
self.fptoui(val, dest_ty)
838+
// NVVM does not support saturating casts, however, they are relatively simple to implement.
839+
// (at least for unsigned ints). So, we emulate them here.
840+
841+
// In order to clamp the value, we need to know it's type.
842+
let val_ty = self.val_ty(val);
843+
// Find the min / max intrinsics
844+
let (min, max) = match self.cx().float_width(val_ty) {
845+
64 => ("__nv_fmin", "__nv_fmax"),
846+
32 => ("__nv_fminf", "__nv_fmaxf"),
847+
_ => {
848+
self.cx().sess().dcx()
849+
.warn("Saturated float to int conversion is not supported in NVVM for type {val_ty:?}. Defaulting to UB prone conversion.");
850+
return self.fptoui(val, dest_ty);
851+
}
852+
};
853+
let (max_ty, max) = self.cx().get_intrinsic(max);
854+
let (min_ty, min) = self.cx().get_intrinsic(min);
855+
// Find the zero value, and the max value of a given int.
856+
let zero = self.const_real(val_ty, 0.0);
857+
let max_value = match self.int_width(dest_ty) {
858+
8 => u8::MAX as f64,
859+
16 => u16::MAX as f64,
860+
32 => u32::MAX as f64,
861+
64 => u64::MAX as f64,
862+
128 => u128::MAX as f64,
863+
_ => todo!("Unsupported int type {dest_ty:?}"),
864+
};
865+
let max_value = self.const_real(val_ty, max_value);
866+
// Compute max(val, 0). This will clamp negative values to zero **AND**
867+
// replace NaNs with 0s(just like how Rust is specified to behave)
868+
let res = self.call(max_ty, None, None, max, &[val, zero], None, None);
869+
// Clamp all values higher than max to max
870+
let res = self.call(min_ty, None, None, min, &[res, max_value], None, None);
871+
// Now, we know that `res` is non-nan, and in range (min, max). So, it is well-defined
872+
// for all inputs :D!
873+
self.fptoui(res, dest_ty)
845874
}
846875

847876
fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {

0 commit comments

Comments
 (0)