diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c146e1e6c0334..e544c165e9afe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53733,36 +53733,35 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); } -// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to -// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able -// to avoid generating code with MOVABS and large constants in certain cases. -static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, - const SDLoc &DL) { - using namespace llvm::SDPatternMatch; +// Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to +// (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we +// are able to avoid generating code with MOVABS and large constants in certain +// cases. +static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, + const SDLoc &DL) { - SDValue AddLhs; - APInt AddConst, SrlConst; - if (VT != MVT::i32 || - !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), - m_Srl(m_OneUse(m_Add(m_Value(AddLhs), - m_ConstInt(AddConst))), - m_ConstInt(SrlConst))))) - return SDValue(); + SDValue Op = N.getOperand(0); + APInt OpConst = Op.getConstantOperandAPInt(1); + APInt SrlConst = N.getConstantOperandAPInt(1); + uint64_t SrlConstVal = SrlConst.getZExtValue(); + unsigned Opcode = Op.getOpcode(); - if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue()) + if (SrlConst.ule(32) || + (Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal)) return SDValue(); - SDValue AddLHSSrl = - DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); + SDValue OpLhsSrl = + DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl); - APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits()); - SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); - SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); + APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits()); + SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT); + SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst); + EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal); - EVT CleanUpVT = - EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); - return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT); + if (Opcode == ISD::ADD) + return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); + return NewOpNode; } /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify @@ -53810,11 +53809,21 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, if (!Src.hasOneUse()) return SDValue(); - if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) - return R; + if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL && + isa(Src.getOperand(1))) { + + unsigned SrcOpOpcode = Src.getOperand(0).getOpcode(); + if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR && + SrcOpOpcode != ISD::XOR) || + !isa(Src.getOperand(0).getOperand(1))) + return SDValue(); + + if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL)) + return R; + + return SDValue(); + } - // Only support vector truncation for now. - // TODO: i64 scalar math would benefit as well. if (!VT.isVector()) return SDValue(); diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll index 14992ca5bf488..f7906e5a009ae 100644 --- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll +++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll @@ -128,6 +128,103 @@ define i32 @test_trunc_add(i64 %x) { ret i32 %conv } +define i32 @test_trunc_sub(i64 %x) { +; X64-LABEL: test_trunc_sub: +; X64: # %bb.0: +; X64-NEXT: shrq $49, %rdi +; X64-NEXT: leal 32762(%rdi), %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF +; X64-NEXT: retq + %sub = sub i64 %x, 3377699720527872 + %shr = lshr i64 %sub, 49 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_and_1(i64 %x) { +; X64-LABEL: test_trunc_and_1: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $50, %rax +; X64-NEXT: andl $3, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %and = and i64 %x, 3940649673949184 + %shr = lshr i64 %and, 50 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_or_1(i64 %x) { +; X64-LABEL: test_trunc_or_1: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $50, %rax +; X64-NEXT: orl $3, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %or = or i64 %x, 3940649673949184 + %shr = lshr i64 %or, 50 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_xor_1(i64 %x) { +; X64-LABEL: test_trunc_xor_1: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $50, %rax +; X64-NEXT: xorl $3, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %xor = xor i64 %x, 3940649673949184 + %shr = lshr i64 %xor, 50 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_and_2(i64 %x) { +; X64-LABEL: test_trunc_and_2: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $45, %rax +; X64-NEXT: andl $111, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %and = and i64 %x, 3940649673949183 + %shr = lshr i64 %and, 45 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_or_2(i64 %x) { +; X64-LABEL: test_trunc_or_2: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $45, %rax +; X64-NEXT: orl $111, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %or = or i64 %x, 3940649673949183 + %shr = lshr i64 %or, 45 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_xor_2(i64 %x) { +; X64-LABEL: test_trunc_xor_2: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $45, %rax +; X64-NEXT: xorl $111, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %xor = xor i64 %x, 3940649673949183 + %shr = lshr i64 %xor, 45 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + ; Make sure we don't crash on this test case. define i32 @pr128158(i64 %x) { @@ -137,10 +234,10 @@ define i32 @pr128158(i64 %x) { ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: shrq $32, %rax ; X64-NEXT: .p2align 4 -; X64-NEXT: .LBB9_1: # %for.body +; X64-NEXT: .LBB16_1: # %for.body ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: cmpl $9, %eax -; X64-NEXT: jb .LBB9_1 +; X64-NEXT: jb .LBB16_1 ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq