llvm · phoebewang · Mar 1, 2025 · Feb 23, 2025 · Feb 23, 2025 · Feb 24, 2025
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53733,36 +53733,42 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
 }
 
-// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
-// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
-// to avoid generating code with MOVABS and large constants in certain cases.
-static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
-                                     const SDLoc &DL) {
+// Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to
+// (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are
+// able to avoid generating code with MOVABS and large constants in certain
+// cases.
+static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG,
+                                       const SDLoc &DL) {
   using namespace llvm::SDPatternMatch;
 
-  SDValue AddLhs;
-  APInt AddConst, SrlConst;
+  SDValue BinopLhs;
+  APInt BinopConst, SrlConst;
   if (VT != MVT::i32 ||
-      !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
-                           m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
-                                                m_ConstInt(AddConst))),
-                                 m_ConstInt(SrlConst)))))
+      !sd_match(
+          N,
+          m_AllOf(m_SpecificVT(MVT::i64),
+                  m_Srl(m_OneUse(m_AnyOf(
+                            m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)),
+                            m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)),
+                            m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))),
+                        m_ConstInt(SrlConst)))))
     return SDValue();
 
-  if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue())
+  if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue())
     return SDValue();
 
-  SDValue AddLHSSrl =
-      DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
-  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+  SDValue BinopLHSSrl =
+      DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl);
 
-  APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits());
-  SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
-  SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+  APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits());
+  SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT);
+  SDValue NewBinopNode =
+      DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst);
 
   EVT CleanUpVT =
       EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
-  return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT);
+  return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT);
 }
 
 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53810,11 +53816,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   if (!Src.hasOneUse())
     return SDValue();
 
-  if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
+  if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL))
     return R;
 
-  // Only support vector truncation for now.
-  // TODO: i64 scalar math would benefit as well.
   if (!VT.isVector())
     return SDValue();
 

diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -128,6 +128,61 @@ define i32 @test_trunc_add(i64 %x) {
   ret i32 %conv
 }
 
+define i32 @test_trunc_sub(i64 %x) {
+; X64-LABEL: test_trunc_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $65522, %edi # imm = 0xFFF2
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    retq
+  %sub = sub i64 %x, 3940649673949184
+  %shr = lshr i64 %sub, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_and(i64 %x) {
+; X64-LABEL: test_trunc_and:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    andl $14, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %and = and i64 %x, 3940649673949184
+  %shr = lshr i64 %and, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_or(i64 %x) {
+; X64-LABEL: test_trunc_or:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    orl $14, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %or = or i64 %x, 3940649673949184
+  %shr = lshr i64 %or, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_xor(i64 %x) {
+; X64-LABEL: test_trunc_xor:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    xorl $14, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %xor = xor i64 %x, 3940649673949184
+  %shr = lshr i64 %xor, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
 ; Make sure we don't crash on this test case.
 
 define i32 @pr128158(i64 %x) {
@@ -137,10 +192,10 @@ define i32 @pr128158(i64 %x) {
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    shrq $32, %rax
 ; X64-NEXT:    .p2align 4
-; X64-NEXT:  .LBB9_1: # %for.body
+; X64-NEXT:  .LBB13_1: # %for.body
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-NEXT:    cmpl $9, %eax
-; X64-NEXT:    jb .LBB9_1
+; X64-NEXT:    jb .LBB13_1
 ; X64-NEXT:  # %bb.2: # %exit
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq