@@ -410,7 +410,11 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
410
410
| sym:: saturating_sub => {
411
411
match int_type_width_signed ( args[ 0 ] . layout . ty , self ) {
412
412
Some ( ( width, signed) ) => match name {
413
- sym:: ctlz | sym:: cttz => {
413
+ sym:: ctlz => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
414
+
415
+
416
+
417
+ sym:: cttz => {
414
418
let func = self . current_func ( ) ;
415
419
let then_block = func. new_block ( "then" ) ;
416
420
let else_block = func. new_block ( "else" ) ;
@@ -445,7 +449,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
445
449
446
450
result. to_rvalue ( )
447
451
}
448
- sym:: ctlz_nonzero => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
452
+ sym:: ctlz_nonzero => self . count_leading_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) ) ,
449
453
sym:: cttz_nonzero => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
450
454
sym:: ctpop => self . pop_count ( args[ 0 ] . immediate ( ) ) ,
451
455
sym:: bswap => {
@@ -886,6 +890,38 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
886
890
}
887
891
888
892
fn count_leading_zeroes ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
893
+ let func = self . current_func ( ) ;
894
+ let then_block = func. new_block ( "then" ) ;
895
+ let else_block = func. new_block ( "else" ) ;
896
+ let after_block = func. new_block ( "after" ) ;
897
+
898
+ let result = func. new_local ( None , self . u32_type , "zeros" ) ;
899
+ let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
900
+ let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
901
+ self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
902
+
903
+ let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
904
+ then_block. add_assignment ( None , result, zero_result) ;
905
+ then_block. end_with_jump ( None , after_block) ;
906
+
907
+ // NOTE: since jumps were added in a place
908
+ // count_leading_zeroes() does not expect, the current block
909
+ // in the state need to be updated.
910
+ self . switch_to_block ( else_block) ;
911
+
912
+ let zeros =self . count_leading_zeroes_nonzero ( width, arg) ;
913
+ self . llbb ( ) . add_assignment ( None , result, zeros) ;
914
+ self . llbb ( ) . end_with_jump ( None , after_block) ;
915
+
916
+ // NOTE: since jumps were added in a place rustc does not
917
+ // expect, the current block in the state need to be updated.
918
+ self . switch_to_block ( after_block) ;
919
+
920
+ result. to_rvalue ( )
921
+
922
+ }
923
+
924
+ fn count_leading_zeroes_nonzero ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
889
925
// TODO(antoyo): use width?
890
926
let arg_type = arg. get_type ( ) ;
891
927
let result_type = self . u32_type ;
@@ -902,51 +938,44 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
902
938
"__builtin_clzll"
903
939
}
904
940
else if width == 128 {
905
- // Algorithm from: https://stackoverflow.com/a/28433850/389119
906
- let array_type = self . context . new_array_type ( None , arg_type, 3 ) ;
907
- let result = self . current_func ( )
908
- . new_local ( None , array_type, "count_loading_zeroes_results" ) ;
941
+ // __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0,
942
+ // else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0 because arg is not 0
943
+ // when handle_non_zero is true
909
944
945
+ let result = self . current_func ( )
946
+ . new_local ( None , result_type, "count_loading_zeroes_results" ) ;
947
+
948
+ let ctlz_then_block = self . current_func ( ) . new_block ( "ctlz_then" ) ;
949
+ let ctlz_else_block = self . current_func ( ) . new_block ( "ctlz_else" ) ;
950
+ let ctlz_after_block = self . current_func ( ) . new_block ( "ctlz_after" )
951
+ ;
910
952
let sixty_four = self . const_uint ( arg_type, 64 ) ;
911
953
let shift = self . lshr ( arg, sixty_four) ;
912
954
let high = self . gcc_int_cast ( shift, self . u64_type ) ;
913
- let low = self . gcc_int_cast ( arg, self . u64_type ) ;
914
-
915
- let zero = self . context . new_rvalue_zero ( self . usize_type ) ;
916
- let one = self . context . new_rvalue_one ( self . usize_type ) ;
917
- let two = self . context . new_rvalue_from_long ( self . usize_type , 2 ) ;
918
955
919
956
let clzll = self . context . get_builtin_function ( "__builtin_clzll" ) ;
920
957
921
- let first_elem = self . context . new_array_access ( None , result , zero ) ;
922
- let first_value = self . gcc_int_cast ( self . context . new_call ( None , clzll , & [ high] ) , arg_type ) ;
923
- self . llbb ( )
924
- . add_assignment ( self . location , first_elem , first_value ) ;
958
+ let zero_hi = self . const_uint ( high . get_type ( ) , 0 ) ;
959
+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , high, zero_hi ) ;
960
+ self . llbb ( ) . end_with_conditional ( self . location , cond , ctlz_then_block , ctlz_else_block ) ;
961
+ self . switch_to_block ( ctlz_then_block ) ;
925
962
926
- let second_elem = self . context . new_array_access ( self . location , result, one) ;
927
- let cast = self . gcc_int_cast ( self . context . new_call ( self . location , clzll, & [ low] ) , arg_type) ;
928
- let second_value = self . add ( cast, sixty_four) ;
929
- self . llbb ( )
930
- . add_assignment ( self . location , second_elem, second_value) ;
963
+ let result_128 =
964
+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ high] ) , result_type) ;
931
965
932
- let third_elem = self . context . new_array_access ( self . location , result, two) ;
933
- let third_value = self . const_uint ( arg_type, 128 ) ;
934
- self . llbb ( )
935
- . add_assignment ( self . location , third_elem, third_value) ;
966
+ ctlz_then_block. add_assignment ( self . location , result, result_128) ;
967
+ ctlz_then_block. end_with_jump ( self . location , ctlz_after_block) ;
936
968
937
- let not_high = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , high) ;
938
- let not_low = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , low) ;
939
- let not_low_and_not_high = not_low & not_high;
940
- let index = not_high + not_low_and_not_high;
941
- // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
942
- // gcc.
943
- // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
944
- // compilation stage.
945
- let index = self . context . new_cast ( self . location , index, self . i32_type ) ;
946
-
947
- let res = self . context . new_array_access ( self . location , result, index) ;
948
-
949
- return self . gcc_int_cast ( res. to_rvalue ( ) , result_type) ;
969
+ self . switch_to_block ( ctlz_else_block) ;
970
+ let low = self . gcc_int_cast ( arg, self . u64_type ) ;
971
+ let low_leading_zeroes =
972
+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ low] ) , result_type) ;
973
+ let sixty_four_result_type = self . const_uint ( result_type, 64 ) ;
974
+ let result_128 = self . add ( low_leading_zeroes, sixty_four_result_type) ;
975
+ ctlz_else_block. add_assignment ( self . location , result, result_128) ;
976
+ ctlz_else_block. end_with_jump ( self . location , ctlz_after_block) ;
977
+ self . switch_to_block ( ctlz_after_block) ;
978
+ return result. to_rvalue ( ) ;
950
979
}
951
980
else {
952
981
let count_leading_zeroes = self . context . get_builtin_function ( "__builtin_clzll" ) ;
0 commit comments