From fe8151f38832e8f6861d0e3783a34ca0357ee3c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Sun, 23 Feb 2025 16:52:43 +0000 Subject: [PATCH 1/8] [X86] Add pre-commit tests. --- .../CodeGen/X86/combine-i64-trunc-srl-add.ll | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll index 14992ca5bf488..bd3c05a2c9302 100644 --- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll +++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll @@ -128,6 +128,61 @@ define i32 @test_trunc_add(i64 %x) { ret i32 %conv } +define i32 @test_trunc_sub(i64 %x) { +; X64-LABEL: test_trunc_sub: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: addl $65522, %edi # imm = 0xFFF2 +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: retq + %sub = sub i64 %x, 3940649673949184 + %shr = lshr i64 %sub, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_and(i64 %x) { +; X64-LABEL: test_trunc_and: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $48, %rax +; X64-NEXT: andl $14, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %and = and i64 %x, 3940649673949184 + %shr = lshr i64 %and, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_or(i64 %x) { +; X64-LABEL: test_trunc_or: +; X64: # %bb.0: +; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: shrq $48, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %or = or i64 %x, 3940649673949184 + %shr = lshr i64 %or, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_xor(i64 %x) { +; X64-LABEL: test_trunc_xor: +; X64: # %bb.0: +; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000 +; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: shrq $48, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %xor = xor i64 %x, 3940649673949184 + %shr = lshr i64 %xor, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + ; Make sure we don't crash on this test case. define i32 @pr128158(i64 %x) { @@ -137,10 +192,10 @@ define i32 @pr128158(i64 %x) { ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: shrq $32, %rax ; X64-NEXT: .p2align 4 -; X64-NEXT: .LBB9_1: # %for.body +; X64-NEXT: .LBB13_1: # %for.body ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: cmpl $9, %eax -; X64-NEXT: jb .LBB9_1 +; X64-NEXT: jb .LBB13_1 ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq From 5574dce692ba8c5218106950410655165a9977f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Sun, 23 Feb 2025 19:18:11 +0000 Subject: [PATCH 2/8] [X86] Extend `combinei64TruncSrlAdd` to handle patterns with `or` and `xor` --- llvm/lib/Target/X86/X86ISelLowering.cpp | 48 ++++++++++--------- .../CodeGen/X86/combine-i64-trunc-srl-add.ll | 8 ++-- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c146e1e6c0334..47dc9ffb4b24d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53733,36 +53733,42 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); } -// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to -// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able -// to avoid generating code with MOVABS and large constants in certain cases. -static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, - const SDLoc &DL) { +// Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to +// (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are +// able to avoid generating code with MOVABS and large constants in certain +// cases. +static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG, + const SDLoc &DL) { using namespace llvm::SDPatternMatch; - SDValue AddLhs; - APInt AddConst, SrlConst; + SDValue BinopLhs; + APInt BinopConst, SrlConst; if (VT != MVT::i32 || - !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), - m_Srl(m_OneUse(m_Add(m_Value(AddLhs), - m_ConstInt(AddConst))), - m_ConstInt(SrlConst))))) + !sd_match( + N, + m_AllOf(m_SpecificVT(MVT::i64), + m_Srl(m_OneUse(m_AnyOf( + m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)), + m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)), + m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))), + m_ConstInt(SrlConst))))) return SDValue(); - if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue()) + if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue()) return SDValue(); - SDValue AddLHSSrl = - DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); + SDValue BinopLHSSrl = + DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl); - APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits()); - SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); - SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); + APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits()); + SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT); + SDValue NewBinopNode = + DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst); EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); - return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT); + return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT); } /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify @@ -53810,11 +53816,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, if (!Src.hasOneUse()) return SDValue(); - if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) + if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL)) return R; - // Only support vector truncation for now. - // TODO: i64 scalar math would benefit as well. if (!VT.isVector()) return SDValue(); diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll index bd3c05a2c9302..ec29cf9d56c29 100644 --- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll +++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll @@ -158,9 +158,9 @@ define i32 @test_trunc_and(i64 %x) { define i32 @test_trunc_or(i64 %x) { ; X64-LABEL: test_trunc_or: ; X64: # %bb.0: -; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000 -; X64-NEXT: orq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $48, %rax +; X64-NEXT: orl $14, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %or = or i64 %x, 3940649673949184 @@ -172,9 +172,9 @@ define i32 @test_trunc_or(i64 %x) { define i32 @test_trunc_xor(i64 %x) { ; X64-LABEL: test_trunc_xor: ; X64: # %bb.0: -; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000 -; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $48, %rax +; X64-NEXT: xorl $14, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %xor = xor i64 %x, 3940649673949184 From 55189cc558a626155225e519866675c37c394fad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Mon, 24 Feb 2025 17:06:58 +0000 Subject: [PATCH 3/8] [X86] Add extra test cases --- .../CodeGen/X86/combine-i64-trunc-srl-add.ll | 58 ++++++++++++++++--- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll index ec29cf9d56c29..785865bf06a74 100644 --- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll +++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll @@ -141,8 +141,8 @@ define i32 @test_trunc_sub(i64 %x) { ret i32 %conv } -define i32 @test_trunc_and(i64 %x) { -; X64-LABEL: test_trunc_and: +define i32 @test_trunc_and_1(i64 %x) { +; X64-LABEL: test_trunc_and_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $48, %rax @@ -155,8 +155,8 @@ define i32 @test_trunc_and(i64 %x) { ret i32 %conv } -define i32 @test_trunc_or(i64 %x) { -; X64-LABEL: test_trunc_or: +define i32 @test_trunc_or_1(i64 %x) { +; X64-LABEL: test_trunc_or_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $48, %rax @@ -169,8 +169,8 @@ define i32 @test_trunc_or(i64 %x) { ret i32 %conv } -define i32 @test_trunc_xor(i64 %x) { -; X64-LABEL: test_trunc_xor: +define i32 @test_trunc_xor_1(i64 %x) { +; X64-LABEL: test_trunc_xor_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $48, %rax @@ -183,6 +183,48 @@ define i32 @test_trunc_xor(i64 %x) { ret i32 %conv } +define i32 @test_trunc_and_2(i64 %x) { +; X64-LABEL: test_trunc_and_2: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $48, %rax +; X64-NEXT: andl $13, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %and = and i64 %x, 3940649673949183 + %shr = lshr i64 %and, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_or_2(i64 %x) { +; X64-LABEL: test_trunc_or_2: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $48, %rax +; X64-NEXT: orl $13, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %or = or i64 %x, 3940649673949183 + %shr = lshr i64 %or, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + +define i32 @test_trunc_xor_2(i64 %x) { +; X64-LABEL: test_trunc_xor_2: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: shrq $48, %rax +; X64-NEXT: xorl $13, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq + %xor = xor i64 %x, 3940649673949183 + %shr = lshr i64 %xor, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +} + ; Make sure we don't crash on this test case. define i32 @pr128158(i64 %x) { @@ -192,10 +234,10 @@ define i32 @pr128158(i64 %x) { ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: shrq $32, %rax ; X64-NEXT: .p2align 4 -; X64-NEXT: .LBB13_1: # %for.body +; X64-NEXT: .LBB16_1: # %for.body ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: cmpl $9, %eax -; X64-NEXT: jb .LBB13_1 +; X64-NEXT: jb .LBB16_1 ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq From d2f27bf7a76c6c07e723983bd023c5bc88eec920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Wed, 26 Feb 2025 21:39:22 +0000 Subject: [PATCH 4/8] [X86] Address reviews --- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 ++++++++++++++++--------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 47dc9ffb4b24d..6dfcb7e3fb4fb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53739,36 +53739,47 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, // cases. static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG, const SDLoc &DL) { - using namespace llvm::SDPatternMatch; - SDValue BinopLhs; - APInt BinopConst, SrlConst; - if (VT != MVT::i32 || - !sd_match( - N, - m_AllOf(m_SpecificVT(MVT::i64), - m_Srl(m_OneUse(m_AnyOf( - m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)), - m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)), - m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))), - m_ConstInt(SrlConst))))) - return SDValue(); + SDValue Binop = N.getOperand(0); + APInt BinopConst = Binop.getConstantOperandAPInt(1); + APInt SrlConst = N.getConstantOperandAPInt(1); + unsigned Opcode = Binop.getOpcode(); + + auto CleanUpFn = +[](SDValue Op, EVT CleanUpVT, EVT VT, SelectionDAG &DAG, + const SDLoc &DL) { + SDValue CleanUp = DAG.getAnyExtOrTrunc(Op, DL, CleanUpVT); + return DAG.getAnyExtOrTrunc(CleanUp, DL, VT); + }; + auto ZeroExtCleanUp = +[](SDValue Op, EVT CleanUpVT, EVT VT, + SelectionDAG &DAG, const SDLoc &DL) { + return DAG.getZeroExtendInReg(Op, DL, CleanUpVT); + }; - if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue()) + switch (Opcode) { + default: return SDValue(); + case ISD::ADD: + if (BinopConst.countr_zero() < SrlConst.getZExtValue()) + return SDValue(); + CleanUpFn = ZeroExtCleanUp; + [[fallthrough]]; + case ISD::OR: + case ISD::XOR: + if (SrlConst.ule(32)) + return SDValue(); + break; + } SDValue BinopLHSSrl = - DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1)); + DAG.getNode(ISD::SRL, DL, MVT::i64, Binop.getOperand(0), N.getOperand(1)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl); APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits()); SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT); - SDValue NewBinopNode = - DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst); - + SDValue NewBinopNode = DAG.getNode(Opcode, DL, VT, Trunc, NewBinopConst); EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); - return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT); + return CleanUpFn(NewBinopNode, CleanUpVT, VT, DAG, DL); } /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify @@ -53816,8 +53827,14 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, if (!Src.hasOneUse()) return SDValue(); - if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL)) - return R; + if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL && + Src.getOperand(0).getNumOperands() == 2 && + isa(Src.getOperand(1)) && + isa(Src.getOperand(0).getOperand(1))) { + if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL)) + return R; + return SDValue(); + } if (!VT.isVector()) return SDValue(); From 1ba8e326100ffdaa6fbee4990d26a706913d3f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Thu, 27 Feb 2025 10:12:47 +0000 Subject: [PATCH 5/8] [X86] Improve readability --- llvm/lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++-------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6dfcb7e3fb4fb..1c9e8211281f5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53733,35 +53733,24 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); } -// Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to -// (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are -// able to avoid generating code with MOVABS and large constants in certain +// Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to +// (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we +// are able to avoid generating code with MOVABS and large constants in certain // cases. -static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG, - const SDLoc &DL) { +static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, + const SDLoc &DL) { - SDValue Binop = N.getOperand(0); - APInt BinopConst = Binop.getConstantOperandAPInt(1); + SDValue Op = N.getOperand(0); + APInt OpConst = Op.getConstantOperandAPInt(1); APInt SrlConst = N.getConstantOperandAPInt(1); - unsigned Opcode = Binop.getOpcode(); - - auto CleanUpFn = +[](SDValue Op, EVT CleanUpVT, EVT VT, SelectionDAG &DAG, - const SDLoc &DL) { - SDValue CleanUp = DAG.getAnyExtOrTrunc(Op, DL, CleanUpVT); - return DAG.getAnyExtOrTrunc(CleanUp, DL, VT); - }; - auto ZeroExtCleanUp = +[](SDValue Op, EVT CleanUpVT, EVT VT, - SelectionDAG &DAG, const SDLoc &DL) { - return DAG.getZeroExtendInReg(Op, DL, CleanUpVT); - }; + unsigned Opcode = Op.getOpcode(); switch (Opcode) { default: return SDValue(); case ISD::ADD: - if (BinopConst.countr_zero() < SrlConst.getZExtValue()) + if (OpConst.countr_zero() < SrlConst.getZExtValue()) return SDValue(); - CleanUpFn = ZeroExtCleanUp; [[fallthrough]]; case ISD::OR: case ISD::XOR: @@ -53770,16 +53759,21 @@ static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG, break; } - SDValue BinopLHSSrl = - DAG.getNode(ISD::SRL, DL, MVT::i64, Binop.getOperand(0), N.getOperand(1)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl); + SDValue OpLhsSrl = + DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl); - APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits()); - SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT); - SDValue NewBinopNode = DAG.getNode(Opcode, DL, VT, Trunc, NewBinopConst); + APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits()); + SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT); + SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst); EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); - return CleanUpFn(NewBinopNode, CleanUpVT, VT, DAG, DL); + + if (Opcode == ISD::ADD) + return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); + + SDValue CleanUp = DAG.getAnyExtOrTrunc(NewOpNode, DL, CleanUpVT); + return DAG.getAnyExtOrTrunc(CleanUp, DL, VT); } /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify @@ -53831,7 +53825,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, Src.getOperand(0).getNumOperands() == 2 && isa(Src.getOperand(1)) && isa(Src.getOperand(0).getOperand(1))) { - if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL)) + if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL)) return R; return SDValue(); } From 696f3a52f29430eed2e2e657e3eb1086864bad03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Thu, 27 Feb 2025 17:06:00 +0000 Subject: [PATCH 6/8] [X86] Remove getNumOperands --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1c9e8211281f5..7cd6ff5a2c4e5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53822,11 +53822,17 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, return SDValue(); if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL && - Src.getOperand(0).getNumOperands() == 2 && - isa(Src.getOperand(1)) && - isa(Src.getOperand(0).getOperand(1))) { + isa(Src.getOperand(1))) { + + unsigned SrcOpOpcode = Src.getOperand(0).getOpcode(); + if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR && + SrcOpOpcode != ISD::XOR) || + !isa(Src.getOperand(0).getOperand(1))) + return SDValue(); + if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL)) return R; + return SDValue(); } From 7caceb190ecf22bd1cee3fe688c937207872f7ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Fri, 28 Feb 2025 12:28:30 +0000 Subject: [PATCH 7/8] [X86] Remove unnecessary switch case --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7cd6ff5a2c4e5..9f56edd226b26 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53743,21 +53743,12 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, SDValue Op = N.getOperand(0); APInt OpConst = Op.getConstantOperandAPInt(1); APInt SrlConst = N.getConstantOperandAPInt(1); + uint64_t SrlConstVal = SrlConst.getZExtValue(); unsigned Opcode = Op.getOpcode(); - switch (Opcode) { - default: + if (SrlConst.ule(32) || + (Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal)) return SDValue(); - case ISD::ADD: - if (OpConst.countr_zero() < SrlConst.getZExtValue()) - return SDValue(); - [[fallthrough]]; - case ISD::OR: - case ISD::XOR: - if (SrlConst.ule(32)) - return SDValue(); - break; - } SDValue OpLhsSrl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1)); @@ -53766,8 +53757,7 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits()); SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT); SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst); - EVT CleanUpVT = - EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); + EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal); if (Opcode == ISD::ADD) return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); From 4388e736a0b64ba60aef3ed2ec16cf7f6ee867ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= Date: Sat, 1 Mar 2025 03:32:22 +0000 Subject: [PATCH 8/8] [X86] Address reviews --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +- .../CodeGen/X86/combine-i64-trunc-srl-add.ll | 46 +++++++++---------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9f56edd226b26..e544c165e9afe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53761,9 +53761,7 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, if (Opcode == ISD::ADD) return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); - - SDValue CleanUp = DAG.getAnyExtOrTrunc(NewOpNode, DL, CleanUpVT); - return DAG.getAnyExtOrTrunc(CleanUp, DL, VT); + return NewOpNode; } /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll index 785865bf06a74..f7906e5a009ae 100644 --- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll +++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll @@ -131,12 +131,12 @@ define i32 @test_trunc_add(i64 %x) { define i32 @test_trunc_sub(i64 %x) { ; X64-LABEL: test_trunc_sub: ; X64: # %bb.0: -; X64-NEXT: shrq $48, %rdi -; X64-NEXT: addl $65522, %edi # imm = 0xFFF2 -; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrq $49, %rdi +; X64-NEXT: leal 32762(%rdi), %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq - %sub = sub i64 %x, 3940649673949184 - %shr = lshr i64 %sub, 48 + %sub = sub i64 %x, 3377699720527872 + %shr = lshr i64 %sub, 49 %conv = trunc i64 %shr to i32 ret i32 %conv } @@ -145,12 +145,12 @@ define i32 @test_trunc_and_1(i64 %x) { ; X64-LABEL: test_trunc_and_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $48, %rax -; X64-NEXT: andl $14, %eax +; X64-NEXT: shrq $50, %rax +; X64-NEXT: andl $3, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %and = and i64 %x, 3940649673949184 - %shr = lshr i64 %and, 48 + %shr = lshr i64 %and, 50 %conv = trunc i64 %shr to i32 ret i32 %conv } @@ -159,12 +159,12 @@ define i32 @test_trunc_or_1(i64 %x) { ; X64-LABEL: test_trunc_or_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $48, %rax -; X64-NEXT: orl $14, %eax +; X64-NEXT: shrq $50, %rax +; X64-NEXT: orl $3, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %or = or i64 %x, 3940649673949184 - %shr = lshr i64 %or, 48 + %shr = lshr i64 %or, 50 %conv = trunc i64 %shr to i32 ret i32 %conv } @@ -173,12 +173,12 @@ define i32 @test_trunc_xor_1(i64 %x) { ; X64-LABEL: test_trunc_xor_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $48, %rax -; X64-NEXT: xorl $14, %eax +; X64-NEXT: shrq $50, %rax +; X64-NEXT: xorl $3, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %xor = xor i64 %x, 3940649673949184 - %shr = lshr i64 %xor, 48 + %shr = lshr i64 %xor, 50 %conv = trunc i64 %shr to i32 ret i32 %conv } @@ -187,12 +187,12 @@ define i32 @test_trunc_and_2(i64 %x) { ; X64-LABEL: test_trunc_and_2: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $48, %rax -; X64-NEXT: andl $13, %eax +; X64-NEXT: shrq $45, %rax +; X64-NEXT: andl $111, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %and = and i64 %x, 3940649673949183 - %shr = lshr i64 %and, 48 + %shr = lshr i64 %and, 45 %conv = trunc i64 %shr to i32 ret i32 %conv } @@ -201,12 +201,12 @@ define i32 @test_trunc_or_2(i64 %x) { ; X64-LABEL: test_trunc_or_2: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $48, %rax -; X64-NEXT: orl $13, %eax +; X64-NEXT: shrq $45, %rax +; X64-NEXT: orl $111, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %or = or i64 %x, 3940649673949183 - %shr = lshr i64 %or, 48 + %shr = lshr i64 %or, 45 %conv = trunc i64 %shr to i32 ret i32 %conv } @@ -215,12 +215,12 @@ define i32 @test_trunc_xor_2(i64 %x) { ; X64-LABEL: test_trunc_xor_2: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shrq $48, %rax -; X64-NEXT: xorl $13, %eax +; X64-NEXT: shrq $45, %rax +; X64-NEXT: xorl $111, %eax ; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %xor = xor i64 %x, 3940649673949183 - %shr = lshr i64 %xor, 48 + %shr = lshr i64 %xor, 45 %conv = trunc i64 %shr to i32 ret i32 %conv }