Skip to content

Conversation

@LU-JOHN
Copy link
Contributor

@LU-JOHN LU-JOHN commented Mar 10, 2025

Changes: Add guard to ensure truncation is strictly smaller than original size.

@llvmbot llvmbot added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Mar 10, 2025
@llvmbot
Copy link
Member

llvmbot commented Mar 10, 2025

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-selectiondag

Author: None (LU-JOHN)

Changes

Changes: Add guard to ensure truncation is strictly smaller than original size.


Full diff: https://github.com/llvm/llvm-project/pull/130609.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+29-6)
  • (modified) llvm/test/CodeGen/AMDGPU/shl64_reduce.ll (+56-8)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ef5f2210573e0..f7d4fa1e5f9b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14951,12 +14951,35 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
   AddToWorklist(NewPtr.getNode());
 
   SDValue Load;
-  if (ExtType == ISD::NON_EXTLOAD)
-    Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
-                       LN0->getPointerInfo().getWithOffset(PtrOff),
-                       LN0->getOriginalAlign(),
-                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
-  else
+  if (ExtType == ISD::NON_EXTLOAD) {
+    const MDNode *OldRanges = LN0->getRanges();
+    const MDNode *NewRanges = nullptr;
+    // If LSBs are loaded and the truncated ConstantRange for the OldRanges
+    // metadata is not the full-set for the NewWidth then create a NewRanges
+    // metadata for the truncated load
+    if (ShAmt == 0 && OldRanges) {
+      ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
+
+      // FIXME: OldRanges should match the width of the old load, so
+      // CR.getBitWidth() should be wider than the new narrower load.  This
+      // check should be unnecessary.
+      if (CR.getBitWidth() > VT.getScalarSizeInBits()) {
+        ConstantRange TruncatedCR = CR.truncate(VT.getScalarSizeInBits());
+        if (!TruncatedCR.isFullSet()) {
+          Metadata *Bounds[2] = {
+              ConstantAsMetadata::get(
+                  ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
+              ConstantAsMetadata::get(
+                  ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
+          NewRanges = MDNode::get(*DAG.getContext(), Bounds);
+        }
+      }
+    }
+    Load = DAG.getLoad(
+        VT, DL, LN0->getChain(), NewPtr,
+        LN0->getPointerInfo().getWithOffset(PtrOff), LN0->getOriginalAlign(),
+        LN0->getMemOperand()->getFlags(), LN0->getAAInfo(), NewRanges);
+  } else
     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
                           LN0->getOriginalAlign(),
diff --git a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
index 05430213c17d2..69242f4e44840 100644
--- a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
@@ -13,23 +13,66 @@
 ; Test range with metadata
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-; FIXME: This case should be reduced, but SelectionDAG::computeKnownBits() cannot
-;        determine the minimum from metadata in this case.  Match current results
-;        for now.
-
 define i64 @shl_metadata(i64 %arg0, ptr %arg1.ptr) {
 ; CHECK-LABEL: shl_metadata:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    flat_load_dword v1, v[2:3]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_lshlrev_b32_e32 v1, v1, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
+  %shl = shl i64 %arg0, %shift.amt
+  ret i64 %shl
+}
+
+define i64 @shl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_two_ranges:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    flat_load_dword v1, v[2:3]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_lshlrev_b32_e32 v1, v1, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shift.amt = load i64, ptr %arg1.ptr, !range !1, !noundef !{}
+  %shl = shl i64 %arg0, %shift.amt
+  ret i64 %shl
+}
+
+; Known minimum is too low.  Reduction must not be done.
+define i64 @shl_metadata_out_of_range(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_out_of_range:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    flat_load_dword v2, v[2:3]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shift.amt = load i64, ptr %arg1.ptr, !range !2, !noundef !{}
+  %shl = shl i64 %arg0, %shift.amt
+  ret i64 %shl
+}
+
+; Bounds cannot be truncated to i32 when load is narrowed to i32.
+; Reduction must not be done.
+; Bounds were chosen so that if bounds were truncated to i32 the
+; known minimum would be 32 and the shl would be erroneously reduced.
+define i64 @shl_metadata_cant_be_narrowed_to_i32(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_cant_be_narrowed_to_i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    flat_load_dword v2, v[2:3]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %shift.amt = load i64, ptr %arg1.ptr, !range !0
+  %shift.amt = load i64, ptr %arg1.ptr, !range !3, !noundef !{}
   %shl = shl i64 %arg0, %shift.amt
   ret i64 %shl
 }
 
+; FIXME: This case should be reduced
 define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
 ; CHECK-LABEL: shl_v2_metadata:
 ; CHECK:       ; %bb.0:
@@ -39,11 +82,12 @@ define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
 ; CHECK-NEXT:    v_lshlrev_b64 v[2:3], v6, v[2:3]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0
+  %shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
   %shl = shl <2 x i64> %arg0, %shift.amt
   ret <2 x i64> %shl
 }
 
+; FIXME: This case should be reduced
 define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
 ; CHECK-LABEL: shl_v3_metadata:
 ; CHECK:       ; %bb.0:
@@ -55,11 +99,12 @@ define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
 ; CHECK-NEXT:    v_lshlrev_b64 v[2:3], v10, v[2:3]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0
+  %shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
   %shl = shl <3 x i64> %arg0, %shift.amt
   ret <3 x i64> %shl
 }
 
+; FIXME: This case should be reduced
 define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
 ; CHECK-LABEL: shl_v4_metadata:
 ; CHECK:       ; %bb.0:
@@ -74,12 +119,15 @@ define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
 ; CHECK-NEXT:    v_lshlrev_b64 v[4:5], v13, v[4:5]
 ; CHECK-NEXT:    v_lshlrev_b64 v[6:7], v15, v[6:7]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
-  %shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0
+  %shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
   %shl = shl <4 x i64> %arg0, %shift.amt
   ret <4 x i64> %shl
 }
 
 !0 = !{i64 32, i64 64}
+!1 = !{i64 32, i64 38, i64 42, i64 48}
+!2 = !{i64 31, i64 38, i64 42, i64 48}
+!3 = !{i64 32, i64 38, i64 2147483680, i64 2147483681}
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; Test range with an "or X, 16"

@LU-JOHN
Copy link
Contributor Author

LU-JOHN commented Mar 10, 2025

Guard added to prevent build failures in ASan build and 2-stage Fuchsia reported when building with #128144.

@arsenm
Copy link
Contributor

arsenm commented Mar 10, 2025

Does one of the tests covered the regression case that caused the revert?

@LU-JOHN
Copy link
Contributor Author

LU-JOHN commented Mar 10, 2025

Does one of the tests covered the regression case that caused the revert?

No, I have not been able to re-create the regression. I'm looking into it.

LU-JOHN added 3 commits March 11, 2025 15:04
)

Changes: Add guard to ensure truncation is strictly smaller than original size.
Signed-off-by: John Lu <[email protected]>
Signed-off-by: John Lu <[email protected]>
@LU-JOHN LU-JOHN requested a review from arsenm March 12, 2025 12:54
@arsenm arsenm merged commit 95e186c into llvm:main Mar 13, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants