Skip to content

Conversation

@apeskov
Copy link
Contributor

@apeskov apeskov commented May 15, 2025

Original commit: bbc5221

Previously reverted due to conflict in LIT test. Mainline changed default version of load instruction to untyped version by this #137698 . Updated test uses ld.param.b64 instead of ld.param.u64.

Original commit: bbc5221

Previously reverted due to conflict in lit test. Mainline changed default version of load
instruction to untyped version, but test uses previous one.
@llvmbot llvmbot added backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well labels May 15, 2025
@llvmbot
Copy link
Member

llvmbot commented May 15, 2025

@llvm/pr-subscribers-backend-nvptx

@llvm/pr-subscribers-llvm-selectiondag

Author: Alexander Peskov (apeskov)

Changes

Original commit: bbc5221

Previously reverted due to conflict in LIT test. Mainline changed default version of load instruction to untyped version by this #137698 . Updated test uses ld.param.b64 instead of ld.param.u64.


Full diff: https://github.com/llvm/llvm-project/pull/140038.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+16)
  • (modified) llvm/test/CodeGen/NVPTX/shift-opt.ll (+16-26)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d6e288a59b2ee..8671efcfd2fb1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10972,6 +10972,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
   }
 
+  // fold (srl (logic_op x, (shl (zext y), c1)), c1)
+  //   -> (logic_op (srl x, c1), (zext y))
+  // c1 <= leadingzeros(zext(y))
+  SDValue X, ZExtY;
+  if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
+                              m_Value(X),
+                              m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY),
+                                                     m_Opc(ISD::ZERO_EXTEND)),
+                                             m_Specific(N1))))))) {
+    unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
+                               ZExtY.getOperand(0).getScalarValueSizeInBits();
+    if (N1C->getZExtValue() <= NumLeadingZeros)
+      return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+                         DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
+  }
+
   // fold operands of srl based on knowledge that the low bits are not
   // demanded.
   if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 5f5ad831cb148..65bcbb8e67156 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -6,15 +6,13 @@
 define i64 @test_or(i64 %x, i32 %y) {
 ; CHECK-LABEL: test_or(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<2>;
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_or_param_0];
-; CHECK-NEXT:    ld.param.b32 %r1, [test_or_param_1];
-; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
-; CHECK-NEXT:    or.b64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_or_param_1];
+; CHECK-NEXT:    shr.u64 %rd3, %rd1, 5;
+; CHECK-NEXT:    or.b64 %rd4, %rd3, %rd2;
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %ext = zext i32 %y to i64
@@ -29,15 +27,13 @@ define i64 @test_or(i64 %x, i32 %y) {
 define i64 @test_xor(i64 %x, i32 %y) {
 ; CHECK-LABEL: test_xor(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<2>;
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_xor_param_0];
-; CHECK-NEXT:    ld.param.b32 %r1, [test_xor_param_1];
-; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
-; CHECK-NEXT:    xor.b64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_xor_param_1];
+; CHECK-NEXT:    shr.u64 %rd3, %rd1, 5;
+; CHECK-NEXT:    xor.b64 %rd4, %rd3, %rd2;
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %ext = zext i32 %y to i64
@@ -52,15 +48,13 @@ define i64 @test_xor(i64 %x, i32 %y) {
 define i64 @test_and(i64 %x, i32 %y) {
 ; CHECK-LABEL: test_and(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<2>;
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_and_param_0];
-; CHECK-NEXT:    ld.param.b32 %r1, [test_and_param_1];
-; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
-; CHECK-NEXT:    and.b64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_and_param_1];
+; CHECK-NEXT:    shr.u64 %rd3, %rd1, 5;
+; CHECK-NEXT:    and.b64 %rd4, %rd3, %rd2;
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %ext = zext i32 %y to i64
@@ -76,23 +70,19 @@ define i64 @test_and(i64 %x, i32 %y) {
 define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
 ; CHECK-LABEL: test_vec(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b16 %rs<9>;
-; CHECK-NEXT:    .reg .b32 %r<7>;
+; CHECK-NEXT:    .reg .b16 %rs<5>;
+; CHECK-NEXT:    .reg .b32 %r<6>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_vec_param_0];
 ; CHECK-NEXT:    ld.param.b32 %r2, [test_vec_param_1];
 ; CHECK-NEXT:    and.b32 %r3, %r2, 16711935;
-; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
-; CHECK-NEXT:    shl.b16 %rs3, %rs2, 5;
-; CHECK-NEXT:    shl.b16 %rs4, %rs1, 5;
+; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
+; CHECK-NEXT:    shr.u16 %rs3, %rs2, 5;
+; CHECK-NEXT:    shr.u16 %rs4, %rs1, 5;
 ; CHECK-NEXT:    mov.b32 %r4, {%rs4, %rs3};
-; CHECK-NEXT:    or.b32 %r5, %r1, %r4;
-; CHECK-NEXT:    mov.b32 {%rs5, %rs6}, %r5;
-; CHECK-NEXT:    shr.u16 %rs7, %rs6, 5;
-; CHECK-NEXT:    shr.u16 %rs8, %rs5, 5;
-; CHECK-NEXT:    mov.b32 %r6, {%rs8, %rs7};
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT:    or.b32 %r5, %r4, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NEXT:    ret;
   %ext = zext <2 x i8> %y to <2 x i16>
   %shl = shl <2 x i16> %ext, splat(i16 5)

@apeskov
Copy link
Contributor Author

apeskov commented May 15, 2025

@RKSimon This is reapply of #138290.

@RKSimon RKSimon self-requested a review May 15, 2025 10:33
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@RKSimon RKSimon merged commit 2bc9f43 into llvm:main May 15, 2025
14 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants