Skip to content

Commit 3b0e6af

Browse files
committed
fix 128bits ctlz intrinsinc UB
1 parent f682d09 commit 3b0e6af

File tree

1 file changed

+108
-75
lines changed

1 file changed

+108
-75
lines changed

src/intrinsic/mod.rs

Lines changed: 108 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::iter;
66

77
#[cfg(feature = "master")]
88
use gccjit::Type;
9-
use gccjit::{ComparisonOp, Function, FunctionType, RValue, ToRValue, UnaryOp};
9+
use gccjit::{Block, ComparisonOp, Function, FunctionType, LValue, RValue, ToRValue, UnaryOp};
1010
#[cfg(feature = "master")]
1111
use rustc_abi::ExternAbi;
1212
use rustc_abi::{BackendRepr, HasDataLayout};
@@ -410,7 +410,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
410410
| sym::saturating_sub => {
411411
match int_type_width_signed(args[0].layout.ty, self) {
412412
Some((width, signed)) => match name {
413-
sym::ctlz | sym::cttz => {
413+
sym::cttz => {
414414
let func = self.current_func();
415415
let then_block = func.new_block("then");
416416
let else_block = func.new_block("else");
@@ -426,16 +426,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
426426
then_block.add_assignment(None, result, zero_result);
427427
then_block.end_with_jump(None, after_block);
428428

429-
// NOTE: since jumps were added in a place
430-
// count_leading_zeroes() does not expect, the current block
431-
// in the state need to be updated.
432429
self.switch_to_block(else_block);
433430

434-
let zeros = match name {
435-
sym::ctlz => self.count_leading_zeroes(width, arg),
436-
sym::cttz => self.count_trailing_zeroes(width, arg),
437-
_ => unreachable!(),
438-
};
431+
let zeros = self.count_trailing_zeroes(width, arg);
439432
self.llbb().add_assignment(None, result, zeros);
440433
self.llbb().end_with_jump(None, after_block);
441434

@@ -445,7 +438,10 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
445438

446439
result.to_rvalue()
447440
}
448-
sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()),
441+
sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),
442+
sym::ctlz_nonzero => {
443+
self.count_leading_zeroes_nonzero(width, args[0].immediate())
444+
}
449445
sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
450446
sym::ctpop => self.pop_count(args[0].immediate()),
451447
sym::bswap => {
@@ -887,78 +883,115 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
887883

888884
fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
889885
// TODO(antoyo): use width?
890-
let arg_type = arg.get_type();
891886
let result_type = self.u32_type;
892-
let count_leading_zeroes =
893-
// TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
894-
// instead of using is_uint().
895-
if arg_type.is_uint(self.cx) {
896-
"__builtin_clz"
897-
}
898-
else if arg_type.is_ulong(self.cx) {
899-
"__builtin_clzl"
900-
}
901-
else if arg_type.is_ulonglong(self.cx) {
902-
"__builtin_clzll"
903-
}
904-
else if width == 128 {
905-
// Algorithm from: https://stackoverflow.com/a/28433850/389119
906-
let array_type = self.context.new_array_type(None, arg_type, 3);
907-
let result = self.current_func()
908-
.new_local(None, array_type, "count_loading_zeroes_results");
909-
910-
let sixty_four = self.const_uint(arg_type, 64);
911-
let shift = self.lshr(arg, sixty_four);
912-
let high = self.gcc_int_cast(shift, self.u64_type);
913-
let low = self.gcc_int_cast(arg, self.u64_type);
887+
let result = self.current_func().new_local(None, result_type, "zeros");
914888

915-
let zero = self.context.new_rvalue_zero(self.usize_type);
916-
let one = self.context.new_rvalue_one(self.usize_type);
917-
let two = self.context.new_rvalue_from_long(self.usize_type, 2);
889+
let then_block = self.current_func().new_block("then");
890+
let else_block = self.current_func().new_block("else");
891+
let after_block = self.current_func().new_block("after");
918892

919-
let clzll = self.context.get_builtin_function("__builtin_clzll");
893+
let zero = self.cx.const_uint(arg.get_type(), 0);
894+
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
895+
self.llbb().end_with_conditional(None, cond, then_block, else_block);
920896

921-
let first_elem = self.context.new_array_access(None, result, zero);
922-
let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type);
923-
self.llbb()
924-
.add_assignment(self.location, first_elem, first_value);
897+
let zero_result = self.cx.gcc_uint(self.u32_type, width);
898+
then_block.add_assignment(None, result, zero_result);
899+
then_block.end_with_jump(None, after_block);
900+
self.switch_to_block(else_block);
925901

926-
let second_elem = self.context.new_array_access(self.location, result, one);
927-
let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type);
928-
let second_value = self.add(cast, sixty_four);
929-
self.llbb()
930-
.add_assignment(self.location, second_elem, second_value);
931-
932-
let third_elem = self.context.new_array_access(self.location, result, two);
933-
let third_value = self.const_uint(arg_type, 128);
934-
self.llbb()
935-
.add_assignment(self.location, third_elem, third_value);
936-
937-
let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
938-
let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
939-
let not_low_and_not_high = not_low & not_high;
940-
let index = not_high + not_low_and_not_high;
941-
// NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
942-
// gcc.
943-
// TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
944-
// compilation stage.
945-
let index = self.context.new_cast(self.location, index, self.i32_type);
946-
947-
let res = self.context.new_array_access(self.location, result, index);
902+
self.count_leading_zeroes_nonzero_impl(width, arg, Some((result, else_block, after_block)));
903+
self.switch_to_block(after_block);
904+
result.to_rvalue()
905+
}
948906

949-
return self.gcc_int_cast(res.to_rvalue(), result_type);
907+
fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
908+
self.count_leading_zeroes_nonzero_impl(width, arg, None)
909+
}
910+
fn count_leading_zeroes_nonzero_impl(
911+
&mut self,
912+
width: u64,
913+
arg: RValue<'gcc>,
914+
block: Option<(LValue<'gcc>, Block<'gcc>, Block<'gcc>)>,
915+
) -> RValue<'gcc> {
916+
// TODO(antoyo): use width?
917+
let arg_type = arg.get_type();
918+
let result_type = self.u32_type;
919+
// TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
920+
// instead of using is_uint().
921+
let func_name = if arg_type.is_uint(self.cx) {
922+
Some("__builtin_clz")
923+
} else if arg_type.is_ulong(self.cx) {
924+
Some("__builtin_clzl")
925+
} else if arg_type.is_ulonglong(self.cx) {
926+
Some("__builtin_clzll")
927+
} else {
928+
None
929+
};
930+
if let Some(func_name) = func_name {
931+
let count_leading_zeroes = self.context.get_builtin_function(func_name);
932+
let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]);
933+
let res = self.context.new_cast(self.location, res, result_type);
934+
if let Some((result, else_block, after_block)) = block {
935+
else_block.add_assignment(None, result, res);
936+
else_block.end_with_jump(None, after_block);
950937
}
951-
else {
952-
let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
953-
let arg = self.context.new_cast(self.location, arg, self.ulonglong_type);
954-
let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
955-
let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8);
956-
let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff;
957-
return self.context.new_cast(self.location, res, result_type);
938+
res
939+
} else if width == 128 {
940+
// if arg is 0 return 128
941+
// else if the 64 high bits of arg are not 0, return clzll(64 high bits of arg)
942+
// else return 64 + clzll(64 low bits of arg)
943+
944+
let ctlz_then_block = self.current_func().new_block("ctlz_then");
945+
let ctlz_else_block = self.current_func().new_block("ctlz_else");
946+
let (result, block, after_block) = if let Some(block) = block {
947+
block
948+
} else {
949+
(
950+
self.current_func().new_local(None, result_type, "zeros"),
951+
self.llbb(),
952+
self.current_func().new_block("ctlz_after"),
953+
)
958954
};
959-
let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes);
960-
let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]);
961-
self.context.new_cast(self.location, res, result_type)
955+
let sixty_four = self.const_uint(arg_type, 64);
956+
let shift = self.lshr(arg, sixty_four);
957+
let high = self.gcc_int_cast(shift, self.u64_type);
958+
959+
let clzll = self.context.get_builtin_function("__builtin_clzll");
960+
961+
let zero_hi = self.const_uint(high.get_type(), 0);
962+
let cond = self.gcc_icmp(IntPredicate::IntNE, high, zero_hi);
963+
block.end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block);
964+
965+
let leading_zeroes =
966+
self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type);
967+
968+
ctlz_then_block.add_assignment(None, result, leading_zeroes);
969+
ctlz_then_block.end_with_jump(None, after_block);
970+
self.switch_to_block(ctlz_else_block);
971+
972+
let low = self.gcc_int_cast(arg, self.u64_type);
973+
let low_leading_zeroes =
974+
self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type);
975+
let sixty_four_result_type = self.const_uint(result_type, 64);
976+
let leading_zeroes = self.add(low_leading_zeroes, sixty_four_result_type);
977+
ctlz_else_block.add_assignment(None, result, leading_zeroes);
978+
ctlz_else_block.end_with_jump(None, after_block);
979+
self.switch_to_block(after_block);
980+
981+
result.to_rvalue()
982+
} else {
983+
let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
984+
let arg = self.context.new_cast(self.location, arg, self.ulonglong_type);
985+
let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
986+
let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8);
987+
let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff;
988+
let res = self.context.new_cast(self.location, res, result_type);
989+
if let Some((result, else_block, after_block)) = block {
990+
else_block.add_assignment(None, result, res);
991+
else_block.end_with_jump(None, after_block);
992+
}
993+
res
994+
}
962995
}
963996

964997
fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {

0 commit comments

Comments
 (0)