Skip to content

Commit 7831340

Browse files
committed
fix 128bit ctlz intrinsic UB
1 parent f682d09 commit 7831340

File tree

1 file changed

+65
-41
lines changed

1 file changed

+65
-41
lines changed

src/intrinsic/mod.rs

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
410410
| sym::saturating_sub => {
411411
match int_type_width_signed(args[0].layout.ty, self) {
412412
Some((width, signed)) => match name {
413-
sym::ctlz | sym::cttz => {
413+
sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),
414+
415+
sym::cttz => {
414416
let func = self.current_func();
415417
let then_block = func.new_block("then");
416418
let else_block = func.new_block("else");
@@ -431,11 +433,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
431433
// in the state need to be updated.
432434
self.switch_to_block(else_block);
433435

434-
let zeros = match name {
435-
sym::ctlz => self.count_leading_zeroes(width, arg),
436-
sym::cttz => self.count_trailing_zeroes(width, arg),
437-
_ => unreachable!(),
438-
};
436+
let zeros = self.count_trailing_zeroes(width, arg);
439437
self.llbb().add_assignment(None, result, zeros);
440438
self.llbb().end_with_jump(None, after_block);
441439

@@ -445,7 +443,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
445443

446444
result.to_rvalue()
447445
}
448-
sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()),
446+
sym::ctlz_nonzero => {
447+
self.count_leading_zeroes_nonzero(width, args[0].immediate())
448+
}
449449
sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
450450
sym::ctpop => self.pop_count(args[0].immediate()),
451451
sym::bswap => {
@@ -886,6 +886,37 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
886886
}
887887

888888
fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
889+
let func = self.current_func();
890+
let then_block = func.new_block("then");
891+
let else_block = func.new_block("else");
892+
let after_block = func.new_block("after");
893+
894+
let result = func.new_local(None, self.u32_type, "zeros");
895+
let zero = self.cx.gcc_zero(arg.get_type());
896+
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
897+
self.llbb().end_with_conditional(None, cond, then_block, else_block);
898+
899+
let zero_result = self.cx.gcc_uint(self.u32_type, width);
900+
then_block.add_assignment(None, result, zero_result);
901+
then_block.end_with_jump(None, after_block);
902+
903+
// NOTE: since jumps were added in a place
904+
// count_leading_zeroes() does not expect, the current block
905+
// in the state need to be updated.
906+
self.switch_to_block(else_block);
907+
908+
let zeros = self.count_leading_zeroes_nonzero(width, arg);
909+
self.llbb().add_assignment(None, result, zeros);
910+
self.llbb().end_with_jump(None, after_block);
911+
912+
// NOTE: since jumps were added in a place rustc does not
913+
// expect, the current block in the state need to be updated.
914+
self.switch_to_block(after_block);
915+
916+
result.to_rvalue()
917+
}
918+
919+
fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
889920
// TODO(antoyo): use width?
890921
let arg_type = arg.get_type();
891922
let result_type = self.u32_type;
@@ -902,51 +933,44 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
902933
"__builtin_clzll"
903934
}
904935
else if width == 128 {
905-
// Algorithm from: https://stackoverflow.com/a/28433850/389119
906-
let array_type = self.context.new_array_type(None, arg_type, 3);
936+
// __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0,
937+
// else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0 because arg is not 0
938+
// when handle_non_zero is true
939+
907940
let result = self.current_func()
908-
.new_local(None, array_type, "count_loading_zeroes_results");
941+
.new_local(None, result_type, "count_loading_zeroes_results");
909942

943+
let ctlz_then_block = self.current_func().new_block("ctlz_then");
944+
let ctlz_else_block = self.current_func().new_block("ctlz_else");
945+
let ctlz_after_block = self.current_func().new_block("ctlz_after")
946+
;
910947
let sixty_four = self.const_uint(arg_type, 64);
911948
let shift = self.lshr(arg, sixty_four);
912949
let high = self.gcc_int_cast(shift, self.u64_type);
913-
let low = self.gcc_int_cast(arg, self.u64_type);
914-
915-
let zero = self.context.new_rvalue_zero(self.usize_type);
916-
let one = self.context.new_rvalue_one(self.usize_type);
917-
let two = self.context.new_rvalue_from_long(self.usize_type, 2);
918950

919951
let clzll = self.context.get_builtin_function("__builtin_clzll");
920952

921-
let first_elem = self.context.new_array_access(None, result, zero);
922-
let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type);
923-
self.llbb()
924-
.add_assignment(self.location, first_elem, first_value);
925-
926-
let second_elem = self.context.new_array_access(self.location, result, one);
927-
let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type);
928-
let second_value = self.add(cast, sixty_four);
929-
self.llbb()
930-
.add_assignment(self.location, second_elem, second_value);
931-
932-
let third_elem = self.context.new_array_access(self.location, result, two);
933-
let third_value = self.const_uint(arg_type, 128);
934-
self.llbb()
935-
.add_assignment(self.location, third_elem, third_value);
953+
let zero_hi = self.const_uint(high.get_type(), 0);
954+
let cond = self.gcc_icmp(IntPredicate::IntNE, high, zero_hi);
955+
self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block);
956+
self.switch_to_block(ctlz_then_block);
936957

937-
let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
938-
let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
939-
let not_low_and_not_high = not_low & not_high;
940-
let index = not_high + not_low_and_not_high;
941-
// NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
942-
// gcc.
943-
// TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
944-
// compilation stage.
945-
let index = self.context.new_cast(self.location, index, self.i32_type);
958+
let result_128 =
959+
self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type);
946960

947-
let res = self.context.new_array_access(self.location, result, index);
961+
ctlz_then_block.add_assignment(self.location, result, result_128);
962+
ctlz_then_block.end_with_jump(self.location, ctlz_after_block);
948963

949-
return self.gcc_int_cast(res.to_rvalue(), result_type);
964+
self.switch_to_block(ctlz_else_block);
965+
let low = self.gcc_int_cast(arg, self.u64_type);
966+
let low_leading_zeroes =
967+
self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type);
968+
let sixty_four_result_type = self.const_uint(result_type, 64);
969+
let result_128 = self.add(low_leading_zeroes, sixty_four_result_type);
970+
ctlz_else_block.add_assignment(self.location, result, result_128);
971+
ctlz_else_block.end_with_jump(self.location, ctlz_after_block);
972+
self.switch_to_block(ctlz_after_block);
973+
return result.to_rvalue();
950974
}
951975
else {
952976
let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");

0 commit comments

Comments
 (0)