@@ -835,13 +835,42 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
835
835
}
836
836
837
837
fn fptoui_sat ( & mut self , val : & ' ll Value , dest_ty : & ' ll Type ) -> & ' ll Value {
838
- // NVVM does not have support for saturated conversion. Setting rustc flag
839
- // `-Z saturating_float_casts=false` falls back to non-saturated, UB-prone
840
- // conversion, and should prevent this codegen. Otherwise, fall back to UB
841
- // prone conversion.
842
- self . cx ( ) . sess ( ) . dcx ( )
843
- . warn ( "Saturated float to int conversion is not supported on NVVM. Defaulting to UB prone conversion." ) ;
844
- self . fptoui ( val, dest_ty)
838
+ // NVVM does not support saturating casts, however, they are relatively simple to implement.
839
+ // (at least for unsigned ints). So, we emulate them here.
840
+
841
+ // In order to clamp the value, we need to know it's type.
842
+ let val_ty = self . val_ty ( val) ;
843
+ // Find the min / max intrinsics
844
+ let ( min, max) = match self . cx ( ) . float_width ( val_ty) {
845
+ 64 => ( "__nv_fmin" , "__nv_fmax" ) ,
846
+ 32 => ( "__nv_fminf" , "__nv_fmaxf" ) ,
847
+ _ => {
848
+ self . cx ( ) . sess ( ) . dcx ( )
849
+ . warn ( "Saturated float to int conversion is not supported in NVVM for type {val_ty:?}. Defaulting to UB prone conversion." ) ;
850
+ return self . fptoui ( val, dest_ty) ;
851
+ }
852
+ } ;
853
+ let ( max_ty, max) = self . cx ( ) . get_intrinsic ( max) ;
854
+ let ( min_ty, min) = self . cx ( ) . get_intrinsic ( min) ;
855
+ // Find the zero value, and the max value of a given int.
856
+ let zero = self . const_real ( val_ty, 0.0 ) ;
857
+ let max_value = match self . int_width ( dest_ty) {
858
+ 8 => u8:: MAX as f64 ,
859
+ 16 => u16:: MAX as f64 ,
860
+ 32 => u32:: MAX as f64 ,
861
+ 64 => u64:: MAX as f64 ,
862
+ 128 => u128:: MAX as f64 ,
863
+ _ => todo ! ( "Unsupported int type {dest_ty:?}" ) ,
864
+ } ;
865
+ let max_value = self . const_real ( val_ty, max_value) ;
866
+ // Compute max(val, 0). This will clamp negative values to zero **AND**
867
+ // replace NaNs with 0s(just like how Rust is specified to behave)
868
+ let res = self . call ( max_ty, None , None , max, & [ val, zero] , None , None ) ;
869
+ // Clamp all values higher than max to max
870
+ let res = self . call ( min_ty, None , None , min, & [ res, max_value] , None , None ) ;
871
+ // Now, we know that `res` is non-nan, and in range (min, max). So, it is well-defined
872
+ // for all inputs :D!
873
+ self . fptoui ( res, dest_ty)
845
874
}
846
875
847
876
fn fptosi_sat ( & mut self , val : & ' ll Value , dest_ty : & ' ll Type ) -> & ' ll Value {
0 commit comments