Merge pull request #18 from antoyo/fix-u128-clz

antoyo · web-flow · commit 6af0e57d19fa · 2021-06-02T07:51:47.000-04:00
Fix 128-bit integers and count leading zeroes intrinsic
diff --git a/gcc-test-backend/src/main.rs b/gcc-test-backend/src/main.rs
@@ -1,21 +1,6 @@
-#![feature(core_intrinsics)]
-
-fn i128_to_u64(u: i128) -> Option<u64> {
-    let min = u64::MIN as i128;
-    //let max = u64::MAX as i128;
-    let max = 18446744073709551612_i128;
-    //println!("{} < {} => {}", u, min, u < min);
-    //println!("max: {:b}", u64::MAX);
-    println!("max: {:b}", max);
-    println!("max: {}", max);
-    //println!("{}", u < min);
-    //println!("{}", u > max);
-    if u < min || u > max {
-        None
-    } else {
-        Some(u as u64)
-    }
-}
+#![feature(const_option)]
+
+use std::num::{NonZeroU8, NonZeroI8, NonZeroU16, NonZeroI16, NonZeroU32, NonZeroI32, NonZeroU64, NonZeroI64, NonZeroU128, NonZeroI128, NonZeroUsize, NonZeroIsize};
 
 fn main() {
     /*test_float!(f64, f64, f64::INFINITY, f64::NEG_INFINITY, f64::NAN);
@@ -58,21 +43,6 @@ fn main() {
     }
     println!("{}", 9.4f64);*/
 
-    let mut value = 0;
-    let res = unsafe { std::intrinsics::atomic_cxchg(&mut value, 0, 1) };
-    println!("{:?}", res);
-    let res = unsafe { std::intrinsics::atomic_cxchg(&mut value, 0, 1) };
-    println!("{:?}", res);
-
-    use std::sync::atomic::{AtomicBool, Ordering};
-
-    let a = AtomicBool::new(false);
-    assert_eq!(a.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst), Ok(false));
-    assert_eq!(a.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst), Err(true));
-
-    a.store(false, Ordering::SeqCst);
-    assert_eq!(a.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst), Ok(false));
-
     // FIXME: the code seems to be the same when using an integer, but somehow, it doesn't work for
     // a float. Could it be related to the fact that floating-points use different registers?
 
@@ -160,4 +130,71 @@ fn main() {
         t_min as u64
     );*/
     */
+
+    assert_eq!(NonZeroU8::new(1).unwrap().leading_zeros(), 7);
+    assert_eq!(NonZeroI8::new(1).unwrap().leading_zeros(), 7);
+    assert_eq!(NonZeroU16::new(1).unwrap().leading_zeros(), 15);
+    assert_eq!(NonZeroI16::new(1).unwrap().leading_zeros(), 15);
+    assert_eq!(NonZeroU32::new(1).unwrap().leading_zeros(), 31);
+    assert_eq!(NonZeroI32::new(1).unwrap().leading_zeros(), 31);
+    assert_eq!(NonZeroU64::new(1).unwrap().leading_zeros(), 63);
+    assert_eq!(NonZeroI64::new(1).unwrap().leading_zeros(), 63);
+    assert_eq!(NonZeroU128::new(1).unwrap().leading_zeros(), 127);
+    assert_eq!(NonZeroI128::new(1).unwrap().leading_zeros(), 127);
+    assert_eq!(NonZeroUsize::new(1).unwrap().leading_zeros(), usize::BITS - 1);
+    assert_eq!(NonZeroIsize::new(1).unwrap().leading_zeros(), usize::BITS - 1);
+
+    assert_eq!(NonZeroU8::new(u8::MAX >> 2).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroI8::new((u8::MAX >> 2) as i8).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroU16::new(u16::MAX >> 2).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroI16::new((u16::MAX >> 2) as i16).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroU32::new(u32::MAX >> 2).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroI32::new((u32::MAX >> 2) as i32).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroU64::new(u64::MAX >> 2).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroI64::new((u64::MAX >> 2) as i64).unwrap().leading_zeros(), 2);
+
+        /*
+    //let mut num = u128::MAX >> 20;
+    //let mut num = u128::MAX;
+    #[inline(never)]
+    fn two() -> u128 {
+        2
+    }
+
+    //let mut num = 340282366920938463463374607431768211455_u128 >> two();
+    let mut num = 340282366920938463463374607431768211455_u128 >> 2;
+    //let mut num = 340282366920938463463374607431768211455_u128;
+    //let mut num = 10_u128 >> 2;
+    const MASK: u128 = 0x80000000000000000000000000000000;
+    for _ in 0..128 {
+        if num & MASK == MASK {
+            print!("1");
+        }
+        else {
+            print!("0");
+        }
+        num <<= 1;
+    }
+    println!();
+*/
+    assert_eq!(NonZeroU128::new(u128::MAX >> 2).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroI128::new((u128::MAX >> 2) as i128).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroUsize::new(usize::MAX >> 2).unwrap().leading_zeros(), 2);
+    assert_eq!(NonZeroIsize::new((usize::MAX >> 2) as isize).unwrap().leading_zeros(), 2);
+
+    assert_eq!(NonZeroU8::new(u8::MAX).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroI8::new(-1i8).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroU16::new(u16::MAX).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroI16::new(-1i16).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroU32::new(u32::MAX).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroI32::new(-1i32).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroU64::new(u64::MAX).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroI64::new(-1i64).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroU128::new(u128::MAX).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroI128::new(-1i128).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroUsize::new(usize::MAX).unwrap().leading_zeros(), 0);
+    assert_eq!(NonZeroIsize::new(-1isize).unwrap().leading_zeros(), 0);
+
+    const LEADING_ZEROS: u32 = NonZeroU16::new(1).unwrap().leading_zeros();
+    assert_eq!(LEADING_ZEROS, 15);
 }
diff --git a/src/common.rs b/src/common.rs
@@ -136,12 +136,12 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         }
 
         // FIXME: use a new function new_rvalue_from_unsigned_long()?
-        let low = self.context.new_rvalue_from_long(typ, num as u64 as i64);
+        let low = self.context.new_rvalue_from_long(self.u64_type, num as u64 as i64);
         let high = self.context.new_rvalue_from_long(typ, (num >> 64) as u64 as i64);
 
         let sixty_four = self.context.new_rvalue_from_long(typ, 64);
 
-        (high << sixty_four) | low
+        (high << sixty_four) | self.context.new_cast(None, low, typ)
 
         /*unsafe {
             let words = [u as u64, (u >> 64) as u64];
diff --git a/src/intrinsic.rs b/src/intrinsic.rs
@@ -233,6 +233,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                                     then_block.add_assignment(None, result, zero_result);
                                     then_block.end_with_jump(None, after_block);
 
+                                    // NOTE: since jumps were added in a place
+                                    // count_leading_zeroes() does not expect, the current blocks
+                                    // in the state need to be updated.
+                                    *self.current_block.borrow_mut() = Some(else_block);
+                                    self.block = Some(else_block);
+
                                     let zeros =
                                         match name {
                                             sym::ctlz => self.count_leading_zeroes(width, arg),
@@ -732,6 +738,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     fn count_leading_zeroes(&self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
+        // TODO: use width?
         let arg_type = arg.get_type();
         let count_leading_zeroes =
             if arg_type.is_uint(&self.cx) {
@@ -743,8 +750,47 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             else if arg_type.is_ulonglong(&self.cx) {
                 "__builtin_clzll"
             }
+            else if width == 128 {
+                // Algorithm from: https://stackoverflow.com/a/28433850/389119
+                let array_type = self.context.new_array_type(None, arg_type, 3);
+                let result = self.current_func()
+                    .new_local(None, array_type, "count_loading_zeroes_results");
+
+                let sixty_four = self.context.new_rvalue_from_long(arg_type, 64);
+                let high = self.context.new_cast(None, arg >> sixty_four, self.u64_type);
+                let low = self.context.new_cast(None, arg, self.u64_type);
+
+                let zero = self.context.new_rvalue_zero(self.usize_type);
+                let one = self.context.new_rvalue_one(self.usize_type);
+                let two = self.context.new_rvalue_from_long(self.usize_type, 2);
+
+                let clzll = self.context.get_builtin_function("__builtin_clzll");
+
+                let first_elem = self.context.new_array_access(None, result, zero);
+                let first_value = self.context.new_cast(None, self.context.new_call(None, clzll, &[high]), arg_type);
+                self.llbb()
+                    .add_assignment(None, first_elem, first_value);
+
+                let second_elem = self.context.new_array_access(None, result, one);
+                let second_value = self.context.new_cast(None, self.context.new_call(None, clzll, &[low]), arg_type) + sixty_four;
+                self.llbb()
+                    .add_assignment(None, second_elem, second_value);
+
+                let third_elem = self.context.new_array_access(None, result, two);
+                let third_value = self.context.new_rvalue_from_long(arg_type, 128);
+                self.llbb()
+                    .add_assignment(None, third_elem, third_value);
+
+                let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high);
+                let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low);
+                let not_low_and_not_high = not_low & not_high;
+                let index = not_high + not_low_and_not_high;
+
+                let res = self.context.new_array_access(None, result, index);
+
+                return self.context.new_cast(None, res, arg_type);
+            }
             else {
-                // TODO: implement for 128-bit integers.
                 let count_leading_zeroes = self.context.get_builtin_function("__builtin_clz");
                 let arg = self.context.new_cast(None, arg, self.uint_type);
                 let diff = self.int_width(self.uint_type) - self.int_width(arg_type);