@@ -6,7 +6,7 @@ use std::iter;
6
6
7
7
#[ cfg( feature = "master" ) ]
8
8
use gccjit:: Type ;
9
- use gccjit:: { ComparisonOp , Function , FunctionType , RValue , ToRValue , UnaryOp } ;
9
+ use gccjit:: { Block , ComparisonOp , Function , FunctionType , LValue , RValue , ToRValue , UnaryOp } ;
10
10
#[ cfg( feature = "master" ) ]
11
11
use rustc_abi:: ExternAbi ;
12
12
use rustc_abi:: { BackendRepr , HasDataLayout } ;
@@ -410,7 +410,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
410
410
| sym:: saturating_sub => {
411
411
match int_type_width_signed ( args[ 0 ] . layout . ty , self ) {
412
412
Some ( ( width, signed) ) => match name {
413
- sym:: ctlz | sym :: cttz => {
413
+ sym:: cttz => {
414
414
let func = self . current_func ( ) ;
415
415
let then_block = func. new_block ( "then" ) ;
416
416
let else_block = func. new_block ( "else" ) ;
@@ -426,16 +426,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
426
426
then_block. add_assignment ( None , result, zero_result) ;
427
427
then_block. end_with_jump ( None , after_block) ;
428
428
429
- // NOTE: since jumps were added in a place
430
- // count_leading_zeroes() does not expect, the current block
431
- // in the state need to be updated.
432
429
self . switch_to_block ( else_block) ;
433
430
434
- let zeros = match name {
435
- sym:: ctlz => self . count_leading_zeroes ( width, arg) ,
436
- sym:: cttz => self . count_trailing_zeroes ( width, arg) ,
437
- _ => unreachable ! ( ) ,
438
- } ;
431
+ let zeros = self . count_trailing_zeroes ( width, arg) ;
439
432
self . llbb ( ) . add_assignment ( None , result, zeros) ;
440
433
self . llbb ( ) . end_with_jump ( None , after_block) ;
441
434
@@ -445,7 +438,10 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
445
438
446
439
result. to_rvalue ( )
447
440
}
448
- sym:: ctlz_nonzero => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
441
+ sym:: ctlz => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
442
+ sym:: ctlz_nonzero => {
443
+ self . count_leading_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
444
+ }
449
445
sym:: cttz_nonzero => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
450
446
sym:: ctpop => self . pop_count ( args[ 0 ] . immediate ( ) ) ,
451
447
sym:: bswap => {
@@ -887,78 +883,115 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
887
883
888
884
fn count_leading_zeroes ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
889
885
// TODO(antoyo): use width?
890
- let arg_type = arg. get_type ( ) ;
891
886
let result_type = self . u32_type ;
892
- let count_leading_zeroes =
893
- // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
894
- // instead of using is_uint().
895
- if arg_type. is_uint ( self . cx ) {
896
- "__builtin_clz"
897
- }
898
- else if arg_type. is_ulong ( self . cx ) {
899
- "__builtin_clzl"
900
- }
901
- else if arg_type. is_ulonglong ( self . cx ) {
902
- "__builtin_clzll"
903
- }
904
- else if width == 128 {
905
- // Algorithm from: https://stackoverflow.com/a/28433850/389119
906
- let array_type = self . context . new_array_type ( None , arg_type, 3 ) ;
907
- let result = self . current_func ( )
908
- . new_local ( None , array_type, "count_loading_zeroes_results" ) ;
909
-
910
- let sixty_four = self . const_uint ( arg_type, 64 ) ;
911
- let shift = self . lshr ( arg, sixty_four) ;
912
- let high = self . gcc_int_cast ( shift, self . u64_type ) ;
913
- let low = self . gcc_int_cast ( arg, self . u64_type ) ;
887
+ let result = self . current_func ( ) . new_local ( None , result_type, "zeros" ) ;
914
888
915
- let zero = self . context . new_rvalue_zero ( self . usize_type ) ;
916
- let one = self . context . new_rvalue_one ( self . usize_type ) ;
917
- let two = self . context . new_rvalue_from_long ( self . usize_type , 2 ) ;
889
+ let then_block = self . current_func ( ) . new_block ( "then" ) ;
890
+ let else_block = self . current_func ( ) . new_block ( "else" ) ;
891
+ let after_block = self . current_func ( ) . new_block ( "after" ) ;
918
892
919
- let clzll = self . context . get_builtin_function ( "__builtin_clzll" ) ;
893
+ let zero = self . cx . const_uint ( arg. get_type ( ) , 0 ) ;
894
+ let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
895
+ self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
920
896
921
- let first_elem = self . context . new_array_access ( None , result , zero ) ;
922
- let first_value = self . gcc_int_cast ( self . context . new_call ( None , clzll , & [ high ] ) , arg_type ) ;
923
- self . llbb ( )
924
- . add_assignment ( self . location , first_elem , first_value ) ;
897
+ let zero_result = self . cx . gcc_uint ( self . u32_type , width ) ;
898
+ then_block . add_assignment ( None , result , zero_result ) ;
899
+ then_block . end_with_jump ( None , after_block ) ;
900
+ self . switch_to_block ( else_block ) ;
925
901
926
- let second_elem = self . context . new_array_access ( self . location , result, one) ;
927
- let cast = self . gcc_int_cast ( self . context . new_call ( self . location , clzll, & [ low] ) , arg_type) ;
928
- let second_value = self . add ( cast, sixty_four) ;
929
- self . llbb ( )
930
- . add_assignment ( self . location , second_elem, second_value) ;
931
-
932
- let third_elem = self . context . new_array_access ( self . location , result, two) ;
933
- let third_value = self . const_uint ( arg_type, 128 ) ;
934
- self . llbb ( )
935
- . add_assignment ( self . location , third_elem, third_value) ;
936
-
937
- let not_high = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , high) ;
938
- let not_low = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , low) ;
939
- let not_low_and_not_high = not_low & not_high;
940
- let index = not_high + not_low_and_not_high;
941
- // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
942
- // gcc.
943
- // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
944
- // compilation stage.
945
- let index = self . context . new_cast ( self . location , index, self . i32_type ) ;
946
-
947
- let res = self . context . new_array_access ( self . location , result, index) ;
902
+ self . count_leading_zeroes_nonzero_impl ( width, arg, Some ( ( result, else_block, after_block) ) ) ;
903
+ self . switch_to_block ( after_block) ;
904
+ result. to_rvalue ( )
905
+ }
948
906
949
- return self . gcc_int_cast ( res. to_rvalue ( ) , result_type) ;
907
+ fn count_leading_zeroes_nonzero ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
908
+ self . count_leading_zeroes_nonzero_impl ( width, arg, None )
909
+ }
910
+ fn count_leading_zeroes_nonzero_impl (
911
+ & mut self ,
912
+ width : u64 ,
913
+ arg : RValue < ' gcc > ,
914
+ block : Option < ( LValue < ' gcc > , Block < ' gcc > , Block < ' gcc > ) > ,
915
+ ) -> RValue < ' gcc > {
916
+ // TODO(antoyo): use width?
917
+ let arg_type = arg. get_type ( ) ;
918
+ let result_type = self . u32_type ;
919
+ // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
920
+ // instead of using is_uint().
921
+ let func_name = if arg_type. is_uint ( self . cx ) {
922
+ Some ( "__builtin_clz" )
923
+ } else if arg_type. is_ulong ( self . cx ) {
924
+ Some ( "__builtin_clzl" )
925
+ } else if arg_type. is_ulonglong ( self . cx ) {
926
+ Some ( "__builtin_clzll" )
927
+ } else {
928
+ None
929
+ } ;
930
+ if let Some ( func_name) = func_name {
931
+ let count_leading_zeroes = self . context . get_builtin_function ( func_name) ;
932
+ let res = self . context . new_call ( self . location , count_leading_zeroes, & [ arg] ) ;
933
+ let res = self . context . new_cast ( self . location , res, result_type) ;
934
+ if let Some ( ( result, else_block, after_block) ) = block {
935
+ else_block. add_assignment ( None , result, res) ;
936
+ else_block. end_with_jump ( None , after_block) ;
950
937
}
951
- else {
952
- let count_leading_zeroes = self . context . get_builtin_function ( "__builtin_clzll" ) ;
953
- let arg = self . context . new_cast ( self . location , arg, self . ulonglong_type ) ;
954
- let diff = self . ulonglong_type . get_size ( ) as i64 - arg_type. get_size ( ) as i64 ;
955
- let diff = self . context . new_rvalue_from_long ( self . int_type , diff * 8 ) ;
956
- let res = self . context . new_call ( self . location , count_leading_zeroes, & [ arg] ) - diff;
957
- return self . context . new_cast ( self . location , res, result_type) ;
938
+ res
939
+ } else if width == 128 {
940
+ // if arg is 0 return 128
941
+ // else if the 64 high bits of arg are not 0, return clzll(64 high bits of arg)
942
+ // else return 64 + clzll(64 low bits of arg)
943
+
944
+ let ctlz_then_block = self . current_func ( ) . new_block ( "ctlz_then" ) ;
945
+ let ctlz_else_block = self . current_func ( ) . new_block ( "ctlz_else" ) ;
946
+ let ( result, block, after_block) = if let Some ( block) = block {
947
+ block
948
+ } else {
949
+ (
950
+ self . current_func ( ) . new_local ( None , result_type, "zeros" ) ,
951
+ self . llbb ( ) ,
952
+ self . current_func ( ) . new_block ( "ctlz_after" ) ,
953
+ )
958
954
} ;
959
- let count_leading_zeroes = self . context . get_builtin_function ( count_leading_zeroes) ;
960
- let res = self . context . new_call ( self . location , count_leading_zeroes, & [ arg] ) ;
961
- self . context . new_cast ( self . location , res, result_type)
955
+ let sixty_four = self . const_uint ( arg_type, 64 ) ;
956
+ let shift = self . lshr ( arg, sixty_four) ;
957
+ let high = self . gcc_int_cast ( shift, self . u64_type ) ;
958
+
959
+ let clzll = self . context . get_builtin_function ( "__builtin_clzll" ) ;
960
+
961
+ let zero_hi = self . const_uint ( high. get_type ( ) , 0 ) ;
962
+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , high, zero_hi) ;
963
+ block. end_with_conditional ( self . location , cond, ctlz_then_block, ctlz_else_block) ;
964
+
965
+ let leading_zeroes =
966
+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ high] ) , result_type) ;
967
+
968
+ ctlz_then_block. add_assignment ( None , result, leading_zeroes) ;
969
+ ctlz_then_block. end_with_jump ( None , after_block) ;
970
+ self . switch_to_block ( ctlz_else_block) ;
971
+
972
+ let low = self . gcc_int_cast ( arg, self . u64_type ) ;
973
+ let low_leading_zeroes =
974
+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ low] ) , result_type) ;
975
+ let sixty_four_result_type = self . const_uint ( result_type, 64 ) ;
976
+ let leading_zeroes = self . add ( low_leading_zeroes, sixty_four_result_type) ;
977
+ ctlz_else_block. add_assignment ( None , result, leading_zeroes) ;
978
+ ctlz_else_block. end_with_jump ( None , after_block) ;
979
+ self . switch_to_block ( after_block) ;
980
+
981
+ result. to_rvalue ( )
982
+ } else {
983
+ let count_leading_zeroes = self . context . get_builtin_function ( "__builtin_clzll" ) ;
984
+ let arg = self . context . new_cast ( self . location , arg, self . ulonglong_type ) ;
985
+ let diff = self . ulonglong_type . get_size ( ) as i64 - arg_type. get_size ( ) as i64 ;
986
+ let diff = self . context . new_rvalue_from_long ( self . int_type , diff * 8 ) ;
987
+ let res = self . context . new_call ( self . location , count_leading_zeroes, & [ arg] ) - diff;
988
+ let res = self . context . new_cast ( self . location , res, result_type) ;
989
+ if let Some ( ( result, else_block, after_block) ) = block {
990
+ else_block. add_assignment ( None , result, res) ;
991
+ else_block. end_with_jump ( None , after_block) ;
992
+ }
993
+ res
994
+ }
962
995
}
963
996
964
997
fn count_trailing_zeroes ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
0 commit comments