Skip to content

Commit aa6c559

Browse files
committed
fix 128bit ctlz intrinsic UB
1 parent f682d09 commit aa6c559

File tree

1 file changed

+66
-37
lines changed

1 file changed

+66
-37
lines changed

src/intrinsic/mod.rs

Lines changed: 66 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,11 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
410410
| sym::saturating_sub => {
411411
match int_type_width_signed(args[0].layout.ty, self) {
412412
Some((width, signed)) => match name {
413-
sym::ctlz | sym::cttz => {
413+
sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),
414+
415+
416+
417+
sym::cttz => {
414418
let func = self.current_func();
415419
let then_block = func.new_block("then");
416420
let else_block = func.new_block("else");
@@ -445,7 +449,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
445449

446450
result.to_rvalue()
447451
}
448-
sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()),
452+
sym::ctlz_nonzero => self.count_leading_zeroes_nonzero(width, args[0].immediate()),
449453
sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
450454
sym::ctpop => self.pop_count(args[0].immediate()),
451455
sym::bswap => {
@@ -886,6 +890,38 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
886890
}
887891

888892
fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
893+
let func = self.current_func();
894+
let then_block = func.new_block("then");
895+
let else_block = func.new_block("else");
896+
let after_block = func.new_block("after");
897+
898+
let result = func.new_local(None, self.u32_type, "zeros");
899+
let zero = self.cx.gcc_zero(arg.get_type());
900+
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
901+
self.llbb().end_with_conditional(None, cond, then_block, else_block);
902+
903+
let zero_result = self.cx.gcc_uint(self.u32_type, width);
904+
then_block.add_assignment(None, result, zero_result);
905+
then_block.end_with_jump(None, after_block);
906+
907+
// NOTE: since jumps were added in a place
908+
// count_leading_zeroes() does not expect, the current block
909+
// in the state need to be updated.
910+
self.switch_to_block(else_block);
911+
912+
let zeros =self.count_leading_zeroes_nonzero(width, arg);
913+
self.llbb().add_assignment(None, result, zeros);
914+
self.llbb().end_with_jump(None, after_block);
915+
916+
// NOTE: since jumps were added in a place rustc does not
917+
// expect, the current block in the state need to be updated.
918+
self.switch_to_block(after_block);
919+
920+
result.to_rvalue()
921+
922+
}
923+
924+
fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
889925
// TODO(antoyo): use width?
890926
let arg_type = arg.get_type();
891927
let result_type = self.u32_type;
@@ -902,51 +938,44 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
902938
"__builtin_clzll"
903939
}
904940
else if width == 128 {
905-
// Algorithm from: https://stackoverflow.com/a/28433850/389119
906-
let array_type = self.context.new_array_type(None, arg_type, 3);
907-
let result = self.current_func()
908-
.new_local(None, array_type, "count_loading_zeroes_results");
941+
// __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0,
942+
// else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0 because arg is not 0
943+
// when handle_non_zero is true
909944

945+
let result = self.current_func()
946+
.new_local(None, result_type, "count_loading_zeroes_results");
947+
948+
let ctlz_then_block = self.current_func().new_block("ctlz_then");
949+
let ctlz_else_block = self.current_func().new_block("ctlz_else");
950+
let ctlz_after_block = self.current_func().new_block("ctlz_after")
951+
;
910952
let sixty_four = self.const_uint(arg_type, 64);
911953
let shift = self.lshr(arg, sixty_four);
912954
let high = self.gcc_int_cast(shift, self.u64_type);
913-
let low = self.gcc_int_cast(arg, self.u64_type);
914-
915-
let zero = self.context.new_rvalue_zero(self.usize_type);
916-
let one = self.context.new_rvalue_one(self.usize_type);
917-
let two = self.context.new_rvalue_from_long(self.usize_type, 2);
918955

919956
let clzll = self.context.get_builtin_function("__builtin_clzll");
920957

921-
let first_elem = self.context.new_array_access(None, result, zero);
922-
let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type);
923-
self.llbb()
924-
.add_assignment(self.location, first_elem, first_value);
958+
let zero_hi = self.const_uint(high.get_type(), 0);
959+
let cond = self.gcc_icmp(IntPredicate::IntNE, high, zero_hi);
960+
self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block);
961+
self.switch_to_block(ctlz_then_block);
925962

926-
let second_elem = self.context.new_array_access(self.location, result, one);
927-
let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type);
928-
let second_value = self.add(cast, sixty_four);
929-
self.llbb()
930-
.add_assignment(self.location, second_elem, second_value);
963+
let result_128 =
964+
self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type);
931965

932-
let third_elem = self.context.new_array_access(self.location, result, two);
933-
let third_value = self.const_uint(arg_type, 128);
934-
self.llbb()
935-
.add_assignment(self.location, third_elem, third_value);
966+
ctlz_then_block.add_assignment(self.location, result, result_128);
967+
ctlz_then_block.end_with_jump(self.location, ctlz_after_block);
936968

937-
let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
938-
let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
939-
let not_low_and_not_high = not_low & not_high;
940-
let index = not_high + not_low_and_not_high;
941-
// NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
942-
// gcc.
943-
// TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
944-
// compilation stage.
945-
let index = self.context.new_cast(self.location, index, self.i32_type);
946-
947-
let res = self.context.new_array_access(self.location, result, index);
948-
949-
return self.gcc_int_cast(res.to_rvalue(), result_type);
969+
self.switch_to_block(ctlz_else_block);
970+
let low = self.gcc_int_cast(arg, self.u64_type);
971+
let low_leading_zeroes =
972+
self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type);
973+
let sixty_four_result_type = self.const_uint(result_type, 64);
974+
let result_128 = self.add(low_leading_zeroes, sixty_four_result_type);
975+
ctlz_else_block.add_assignment(self.location, result, result_128);
976+
ctlz_else_block.end_with_jump(self.location, ctlz_after_block);
977+
self.switch_to_block(ctlz_after_block);
978+
return result.to_rvalue();
950979
}
951980
else {
952981
let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");

0 commit comments

Comments
 (0)