[CVP] Implement type narrowing for LShr

adam-bzowski · adam-bzowski · commit 2e44ffdbbe82 · 2024-12-13T05:30:07.000-08:00
Implements type narrowing for LShr. The treatment is analogous to the type narrowing of UDiv. Since LShr is a relatively cheap instruction, the narrowing occurs only if the following conditions hold: i) all the users of the LShr instruction are already TruncInst; ii) the narrowing is carried out to the largest TruncInst following the LShr instruction. Additionally, the function optimizes the cases where the result of the LShr instruction is guaranteed to vanish or be equal to poison.
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -1134,6 +1134,19 @@ static bool narrowLShr(BinaryOperator *LShr, LazyValueInfo *LVI) {
     return true;
   }
 
+  // Since LShr returns poison if the shift is larger of equal that the bit
+  // width of the argument, we must make sure that the maximal possible value
+  // for the shift is larger than the new width after narrowing. Otherwise some
+  // shifts that originally vanish would result in poison after the narrowing.
+  uint64_t MaxShiftValue64 = ShiftRange.getUnsignedMax().getZExtValue();
+  unsigned MaxShiftValue =
+      MaxShiftValue64 < std::numeric_limits<unsigned>::max()
+          ? static_cast<unsigned>(MaxShiftValue64)
+          : std::numeric_limits<unsigned>::max();
+
+  if (OrigWidth <= MaxShiftValue)
+    return false;
+
   // That's how many bits we need.
   unsigned MaxActiveBits =
       std::max(MaxActiveBitsInArg, ShiftRange.getActiveBits());
@@ -1165,6 +1178,10 @@ static bool narrowLShr(BinaryOperator *LShr, LazyValueInfo *LVI) {
     }
   }
 
+  // See comment above MaxShiftValue.
+  if (NewWidth <= MaxShiftValue)
+    return false;
+
   // We are ready to truncate.
   IRBuilder<> B(LShr);
   Type *TruncTy = RetTy->getWithNewBitWidth(NewWidth);
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/lshr-plus-instcombine.ll b/llvm/test/Transforms/CorrelatedValuePropagation/lshr-plus-instcombine.ll
@@ -7,13 +7,14 @@
 ; CHECK-LABEL: @trunc_test1
 ; CHECK-NEXT: [[A1:%.*]] = lshr i32 [[A:%.*]], 16
 ; CHECK-NEXT: [[CARG:%.*]] = trunc nuw i32 [[A1]] to i16
-; CHECK-NEXT: [[CSHIFT:%.*]] = trunc i32 [[B:%.*]] to i16
+; CHECK-NEXT: [[B1:%.*]] = trunc i32 [[B:%.*]] to i16
+; CHECK-NEXT: [[CSHIFT:%.*]] = and i16 [[B1]], 15
 ; CHECK-NEXT: [[C1:%.*]] = lshr i16 [[CARG]], [[CSHIFT]]
 ; CHECK-NEXT: ret i16 [[C1]]
 
 define i16 @trunc_test1(i32 %a, i32 %b) {
   %a.eff.trunc = lshr i32 %a, 16
-  %b.eff.trunc = and i32 %b, 65535
+  %b.eff.trunc = and i32 %b, 15
   %c = lshr i32 %a.eff.trunc, %b.eff.trunc
   %c.trunc = trunc i32 %c to i16
   ret i16 %c.trunc
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/lshr.ll b/llvm/test/Transforms/CorrelatedValuePropagation/lshr.ll
@@ -50,19 +50,19 @@ entry:
   br i1 %cmp, label %bb, label %exit
 
 bb:
-; CHECK: lshr i16
+; CHECK: lshr i32
   %shr = lshr i32 %m, %n
   br label %exit
 
 exit:
   ret void
 }
 
-; CHECK-LABEL: @test4(
-define void @test4(i32 %m, i32 %n) {
+; CHECK-LABEL: @test3a(
+define void @test3a(i32 %m, i32 %n) {
 entry:
   %cmp1 = icmp ult i32 %m, 65535
-  %cmp2 = icmp ule i32 %n, 65536
+  %cmp2 = icmp ult i32 %n, 17
   %cmp = and i1 %cmp1, %cmp2
   br i1 %cmp, label %bb, label %exit
 
@@ -75,6 +75,23 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: @test3b(
+define void @test3b(i32 %m, i32 %n) {
+entry:
+  %cmp1 = icmp ult i32 %m, 65535
+  %cmp2 = icmp ult i32 %n, 16
+  %cmp = and i1 %cmp1, %cmp2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: lshr i16
+  %shr = lshr i32 %m, %n
+  br label %exit
+
+exit:
+  ret void
+}
+
 ; CHECK-LABEL: @test5
 define void @test5(i32 %n) {
   %trunc = and i32 %n, 65535
@@ -83,6 +100,14 @@ define void @test5(i32 %n) {
   ret void
 }
 
+; CHECK-LABEL: @test5a
+define void @test5a(i32 %n) {
+  %trunc = and i32 %n, 65535
+  ; CHECK: lshr i16
+  %shr = lshr i32 %trunc, 15
+  ret void
+}
+
 ; CHECK-LABEL: @test6
 define void @test6(i32 %n) {
 entry:
@@ -107,7 +132,7 @@ exit:
 
 ; CHECK-LABEL: @trunc_test1
 ; CHECK-NEXT: [[A1:%.*]] = lshr i32 [[A:%.*]], 16
-; CHECK-NEXT: [[B1:%.*]] = and i32 [[B:%.*]], 65535
+; CHECK-NEXT: [[B1:%.*]] = and i32 [[B:%.*]], 15
 ; CHECK-NEXT: [[A2:%.*]] = trunc i32 [[A1]] to i16
 ; CHECK-NEXT: [[B2:%.*]] = trunc i32 [[B1]] to i16
 ; CHECK-NEXT: [[C1:%.*]] = lshr i16 [[A2]], [[B2]]
@@ -117,7 +142,7 @@ exit:
 
 define i16 @trunc_test1(i32 %a, i32 %b) {
   %a.eff.trunc = lshr i32 %a, 16
-  %b.eff.trunc = and i32 %b, 65535
+  %b.eff.trunc = and i32 %b, 15
   %c = lshr i32 %a.eff.trunc, %b.eff.trunc
   %c.trunc = trunc i32 %c to i16
   ret i16 %c.trunc