From c41f6655d5c3e8fcf476fbb3c9181ae72dfda84f Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 6 Dec 2024 13:46:58 +0000 Subject: [PATCH 1/6] [AArch64] Combine and and lsl into ubfiz Fixes #118132. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 +++++++++ .../test/CodeGen/AArch64/aarch64-fold-lslfast.ll | 10 ++++------ llvm/test/CodeGen/AArch64/xbfiz.ll | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c6f5cdcd1d5fe..6acac914dbbba 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8968,6 +8968,15 @@ def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i32 imm0_63:$imm)))>; +def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xff)), (i64 imm0_63:$imm)), + (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i8 imm0_63:$imm)))>; +def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xffff)), (i64 imm0_63:$imm)), + (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i16 imm0_63:$imm)))>; + // sra patterns have an AddedComplexity of 10, so make sure we have a higher // AddedComplexity for the following patterns since we want to match sext + sra // patterns before we attempt to match a single sra node. diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index 63dcafed2320a..abc5c0876e80b 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -13,11 +13,10 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind { ; CHECK0-SDAG-LABEL: halfword: ; CHECK0-SDAG: // %bb.0: ; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8 +; CHECK0-SDAG-NEXT: lsr w8, w1, #9 ; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK0-SDAG-NEXT: mov x19, x0 -; CHECK0-SDAG-NEXT: lsl x21, x8, #1 +; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8 ; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21] ; CHECK0-SDAG-NEXT: bl foo ; CHECK0-SDAG-NEXT: mov w0, w20 @@ -231,10 +230,9 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) { ; CHECK0-SDAG-NEXT: .cfi_offset w21, -24 ; CHECK0-SDAG-NEXT: .cfi_offset w22, -32 ; CHECK0-SDAG-NEXT: .cfi_offset w30, -48 -; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8 +; CHECK0-SDAG-NEXT: lsr w8, w1, #9 ; CHECK0-SDAG-NEXT: mov x19, x0 -; CHECK0-SDAG-NEXT: lsl x21, x8, #1 +; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8 ; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21] ; CHECK0-SDAG-NEXT: add w22, w20, #1 ; CHECK0-SDAG-NEXT: bl foo diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll index b777ddcb7efcc..05567e3425840 100644 --- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++ b/llvm/test/CodeGen/AArch64/xbfiz.ll @@ -69,3 +69,19 @@ define i64 @lsl32_not_ubfiz64(i64 %v) { %and = and i64 %shl, 4294967295 ret i64 %and } + +define i64 @lsl_zext_i8_i64(i8 %b) { +; CHECK-LABEL: lsl_zext_i8_i64: +; CHECK: ubfiz x0, x0, #1, #8 + %1 = zext i8 %b to i64 + %2 = shl i64 %1, 1 + ret i64 %2 +} + +define i64 @lsl_zext_i16_i64(i16 %b) { +; CHECK-LABEL: lsl_zext_i16_i64: +; CHECK: ubfiz x0, x0, #1, #16 + %1 = zext i16 %b to i64 + %2 = shl i64 %1, 1 + ret i64 %2 +} From 50993210203ac2070a3dbea27258c10629ef71e7 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 13 Dec 2024 12:30:25 +0000 Subject: [PATCH 2/6] Move to target DAG-combine --- .../Target/AArch64/AArch64ISelLowering.cpp | 34 ++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 -- .../CodeGen/AArch64/aarch64-fold-lslfast.ll | 10 +- .../AArch64/const-shift-of-constmasked.ll | 101 ++++++++---------- llvm/test/CodeGen/AArch64/extract-bits.ll | 16 +-- llvm/test/CodeGen/AArch64/fpenv.ll | 6 +- .../CodeGen/AArch64/swap-compare-operands.ll | 42 +++++--- 7 files changed, 123 insertions(+), 95 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3ad2905ce5207..5c6b04d637b5c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1140,6 +1140,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); + setTargetDAGCombine(ISD::SHL); + // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemset = @@ -26365,6 +26367,36 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return NVCAST; } +static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + // If the operand is a bitwise AND with a constant RHS, and the shift is the + // only use, we can pull it out of the shift. + // + // (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) + if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND) + return SDValue(); + + ConstantSDNode *C1 = dyn_cast(Op0.getOperand(1)); + ConstantSDNode *C2 = dyn_cast(Op1); + if (!C1 || !C2) + return SDValue(); + + // Might be folded into shifted add/sub, do not lower. + if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD || + N->use_begin()->getOpcode() == ISD::SUB)) + return SDValue(); + + SDLoc DL(N); + SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, Op0.getOperand(1), Op1); + SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, Op0->getOperand(0), Op1); + return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -26710,6 +26742,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCTLZCombine(N, DAG, Subtarget); case ISD::SCALAR_TO_VECTOR: return performScalarToVectorCombine(N, DCI, DAG); + case ISD::SHL: + return performSHLCombine(N, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6acac914dbbba..c6f5cdcd1d5fe 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8968,15 +8968,6 @@ def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i32 imm0_63:$imm)))>; -def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xff)), (i64 imm0_63:$imm)), - (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i8 imm0_63:$imm)))>; -def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xffff)), (i64 imm0_63:$imm)), - (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i16 imm0_63:$imm)))>; - // sra patterns have an AddedComplexity of 10, so make sure we have a higher // AddedComplexity for the following patterns since we want to match sext + sra // patterns before we attempt to match a single sra node. diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index abc5c0876e80b..63dcafed2320a 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -13,10 +13,11 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind { ; CHECK0-SDAG-LABEL: halfword: ; CHECK0-SDAG: // %bb.0: ; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-SDAG-NEXT: lsr w8, w1, #9 +; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8 ; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK0-SDAG-NEXT: mov x19, x0 -; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8 +; CHECK0-SDAG-NEXT: lsl x21, x8, #1 ; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21] ; CHECK0-SDAG-NEXT: bl foo ; CHECK0-SDAG-NEXT: mov w0, w20 @@ -230,9 +231,10 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) { ; CHECK0-SDAG-NEXT: .cfi_offset w21, -24 ; CHECK0-SDAG-NEXT: .cfi_offset w22, -32 ; CHECK0-SDAG-NEXT: .cfi_offset w30, -48 -; CHECK0-SDAG-NEXT: lsr w8, w1, #9 +; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8 ; CHECK0-SDAG-NEXT: mov x19, x0 -; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8 +; CHECK0-SDAG-NEXT: lsl x21, x8, #1 ; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21] ; CHECK0-SDAG-NEXT: add w22, w20, #1 ; CHECK0-SDAG-NEXT: bl foo diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll index 66a6745cda8f7..1fffcdda4b416 100644 --- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll @@ -190,8 +190,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { define i8 @test_i8_7_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: ubfiz w0, w0, #1, #3 ; CHECK-NEXT: ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 1 @@ -200,8 +199,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { define i8 @test_i8_7_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7 -; CHECK-NEXT: lsl w0, w8, #4 +; CHECK-NEXT: ubfiz w0, w0, #4, #3 ; CHECK-NEXT: ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 4 @@ -229,8 +227,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { define i8 @test_i8_28_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1c -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0x38 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 1 @@ -239,8 +237,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { define i8 @test_i8_28_mask_shl_2(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_2: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1c -; CHECK-NEXT: lsl w0, w8, #2 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: and w0, w8, #0x70 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 2 @@ -249,8 +247,8 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) { define i8 @test_i8_28_mask_shl_3(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_3: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1c -; CHECK-NEXT: lsl w0, w8, #3 +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: and w0, w8, #0xe0 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 3 @@ -259,8 +257,8 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) { define i8 @test_i8_28_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xc -; CHECK-NEXT: lsl w0, w8, #4 +; CHECK-NEXT: lsl w8, w0, #4 +; CHECK-NEXT: and w0, w8, #0xc0 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 4 @@ -270,8 +268,8 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) { define i8 @test_i8_224_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_224_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x60 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0xc0 ; CHECK-NEXT: ret %t0 = and i8 %a0, 224 %t1 = shl i8 %t0, 1 @@ -465,8 +463,7 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) { define i16 @test_i16_127_mask_shl_1(i16 %a0) { ; CHECK-LABEL: test_i16_127_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: ubfiz w0, w0, #1, #7 ; CHECK-NEXT: ret %t0 = and i16 %a0, 127 %t1 = shl i16 %t0, 1 @@ -475,8 +472,7 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) { define i16 @test_i16_127_mask_shl_8(i16 %a0) { ; CHECK-LABEL: test_i16_127_mask_shl_8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f -; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: ubfiz w0, w0, #8, #7 ; CHECK-NEXT: ret %t0 = and i16 %a0, 127 %t1 = shl i16 %t0, 8 @@ -504,8 +500,8 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) { define i16 @test_i16_2032_mask_shl_3(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_3: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f0 -; CHECK-NEXT: lsl w0, w8, #3 +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: and w0, w8, #0x3f80 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 3 @@ -514,8 +510,8 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) { define i16 @test_i16_2032_mask_shl_4(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f0 -; CHECK-NEXT: lsl w0, w8, #4 +; CHECK-NEXT: lsl w8, w0, #4 +; CHECK-NEXT: and w0, w8, #0x7f00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 4 @@ -524,8 +520,8 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) { define i16 @test_i16_2032_mask_shl_5(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_5: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f0 -; CHECK-NEXT: lsl w0, w8, #5 +; CHECK-NEXT: lsl w8, w0, #5 +; CHECK-NEXT: and w0, w8, #0xfe00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 5 @@ -534,8 +530,8 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) { define i16 @test_i16_2032_mask_shl_6(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_6: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x3f0 -; CHECK-NEXT: lsl w0, w8, #6 +; CHECK-NEXT: lsl w8, w0, #6 +; CHECK-NEXT: and w0, w8, #0xfc00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 6 @@ -545,8 +541,8 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) { define i16 @test_i16_65024_mask_shl_1(i16 %a0) { ; CHECK-LABEL: test_i16_65024_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7e00 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0xfc00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 65024 %t1 = shl i16 %t0, 1 @@ -740,8 +736,7 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) { define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ; CHECK-LABEL: test_i32_32767_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: ubfiz w0, w0, #1, #15 ; CHECK-NEXT: ret %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 1 @@ -750,8 +745,7 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { define i32 @test_i32_32767_mask_shl_16(i32 %a0) { ; CHECK-LABEL: test_i32_32767_mask_shl_16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff -; CHECK-NEXT: lsl w0, w8, #16 +; CHECK-NEXT: ubfiz w0, w0, #16, #15 ; CHECK-NEXT: ret %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 16 @@ -779,8 +773,8 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) { define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_7: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff00 -; CHECK-NEXT: lsl w0, w8, #7 +; CHECK-NEXT: lsl w8, w0, #7 +; CHECK-NEXT: and w0, w8, #0x3fff8000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 7 @@ -789,8 +783,8 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff00 -; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: lsl w8, w0, #8 +; CHECK-NEXT: and w0, w8, #0x7fff0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 8 @@ -799,8 +793,8 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_9: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff00 -; CHECK-NEXT: lsl w0, w8, #9 +; CHECK-NEXT: lsl w8, w0, #9 +; CHECK-NEXT: and w0, w8, #0xfffe0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 9 @@ -809,8 +803,8 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_10: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x3fff00 -; CHECK-NEXT: lsl w0, w8, #10 +; CHECK-NEXT: lsl w8, w0, #10 +; CHECK-NEXT: and w0, w8, #0xfffc0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 10 @@ -820,8 +814,8 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7ffe0000 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0xfffc0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 4294836224 %t1 = shl i32 %t0, 1 @@ -1015,8 +1009,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) { define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff -; CHECK-NEXT: lsl x0, x8, #1 +; CHECK-NEXT: lsl w0, w0, #1 ; CHECK-NEXT: ret %t0 = and i64 %a0, 2147483647 %t1 = shl i64 %t0, 1 @@ -1054,8 +1047,8 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff0000 -; CHECK-NEXT: lsl x0, x8, #15 +; CHECK-NEXT: lsl x8, x0, #15 +; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 15 @@ -1064,8 +1057,8 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff0000 -; CHECK-NEXT: lsl x0, x8, #16 +; CHECK-NEXT: lsl x8, x0, #16 +; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 16 @@ -1074,8 +1067,8 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff0000 -; CHECK-NEXT: lsl x0, x8, #17 +; CHECK-NEXT: lsl x8, x0, #17 +; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 17 @@ -1084,8 +1077,8 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x3fffffff0000 -; CHECK-NEXT: lsl x0, x8, #18 +; CHECK-NEXT: lsl x8, x0, #18 +; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 18 @@ -1095,8 +1088,8 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 -; CHECK-NEXT: lsl x0, x8, #1 +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 18446744065119617024 %t1 = shl i64 %t0, 1 diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index b87157a183835..aaa6c7eb4a30f 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -1013,8 +1013,8 @@ define i32 @c1_i32(i32 %arg) nounwind { define i32 @c2_i32(i32 %arg) nounwind { ; CHECK-LABEL: c2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx w8, w0, #19, #10 -; CHECK-NEXT: lsl w0, w8, #2 +; CHECK-NEXT: lsr w8, w0, #17 +; CHECK-NEXT: and w0, w8, #0xffc ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 @@ -1063,8 +1063,8 @@ define i64 @c1_i64(i64 %arg) nounwind { define i64 @c2_i64(i64 %arg) nounwind { ; CHECK-LABEL: c2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx x8, x0, #51, #10 -; CHECK-NEXT: lsl x0, x8, #2 +; CHECK-NEXT: lsr x8, x0, #49 +; CHECK-NEXT: and x0, x8, #0xffc ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 @@ -1120,8 +1120,8 @@ define void @c6_i32(i32 %arg, ptr %ptr) nounwind { define void @c7_i32(i32 %arg, ptr %ptr) nounwind { ; CHECK-LABEL: c7_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx w8, w0, #19, #10 -; CHECK-NEXT: lsl w8, w8, #2 +; CHECK-NEXT: lsr w8, w0, #17 +; CHECK-NEXT: and w8, w8, #0xffc ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 @@ -1163,8 +1163,8 @@ define void @c6_i64(i64 %arg, ptr %ptr) nounwind { define void @c7_i64(i64 %arg, ptr %ptr) nounwind { ; CHECK-LABEL: c7_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx x8, x0, #51, #10 -; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: lsr x8, x0, #49 +; CHECK-NEXT: and x8, x8, #0xffc ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll index 3a307f7731037..3351565d8dd89 100644 --- a/llvm/test/CodeGen/AArch64/fpenv.ll +++ b/llvm/test/CodeGen/AArch64/fpenv.ll @@ -4,11 +4,11 @@ define void @func_set_rounding_dyn(i32 %rm) { ; CHECK-LABEL: func_set_rounding_dyn: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w9, w0, #1 +; CHECK-NEXT: lsl w9, w0, #22 ; CHECK-NEXT: mrs x8, FPCR -; CHECK-NEXT: and w9, w9, #0x3 ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff -; CHECK-NEXT: lsl w9, w9, #22 +; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 +; CHECK-NEXT: and w9, w9, #0xc00000 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: msr FPCR, x8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll index b106e15c23e30..a45881f2034b2 100644 --- a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll +++ b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll @@ -133,8 +133,9 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_64 -; CHECK: cmp x1, w0, uxth #2 -; CHECK-NEXT: cset w0, lo +; CHECK: ubfiz x8, x0, #2, #16 +; CHECK: cmp x8, x1 +; CHECK-NEXT: cset w0, hi entry: %a64 = zext i16 %a to i64 %shl.0 = shl i64 %a64, 2 @@ -144,8 +145,9 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_64 -; CHECK: cmp x1, w0, uxtb #4 -; CHECK-NEXT: cset w0, lo +; CHECK: ubfiz x8, x0, #4, #8 +; CHECK: cmp x8, x1 +; CHECK-NEXT: cset w0, hi entry: %a64 = zext i8 %a to i64 %shl.2 = shl i64 %a64, 4 @@ -155,8 +157,9 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_32 -; CHECK: cmp w1, w0, uxth #3 -; CHECK-NEXT: cset w0, lo +; CHECK: ubfiz w8, w0, #3, #16 +; CHECK: cmp w8, w1 +; CHECK-NEXT: cset w0, hi entry: %a32 = zext i16 %a to i32 %shl = shl i32 %a32, 3 @@ -166,8 +169,9 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_32 -; CHECK: cmp w1, w0, uxtb #4 -; CHECK-NEXT: cset w0, lo +; CHECK: ubfiz w8, w0, #4, #8 +; CHECK: cmp w8, w1 +; CHECK-NEXT: cset w0, hi entry: %a32 = zext i8 %a to i32 %shl = shl i32 %a32, 4 @@ -177,9 +181,9 @@ entry: define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) { ; CHECK-LABEL: testSwapCmpWithTooLargeShiftedZeroExtend8_32 -; CHECK: and [[REG:w[0-9]+]], w0, #0xff -; CHECK: cmp w1, [[REG]], lsl #5 -; CHECK-NEXT: cset w0, lo +; CHECK: ubfiz w8, w0, #5, #8 +; CHECK: cmp w8, w1 +; CHECK-NEXT: cset w0, hi entry: %a32 = zext i8 %a to i32 %shl = shl i32 %a32, 5 @@ -517,7 +521,8 @@ t1: %shl1 = shl i64 %conv1, 4 %na1 = sub i64 0, %shl1 %cmp1 = icmp ne i64 %na1, %b64 -; CHECK: cmn x3, w1, uxth #4 +; CHECK: ubfiz x8, x1, #4, #16 +; CHECK: cmn x3, x8 br i1 %cmp1, label %t2, label %end t2: @@ -525,7 +530,8 @@ t2: %shl2 = shl i64 %conv2, 3 %na2 = sub i64 0, %shl2 %cmp2 = icmp ne i64 %na2, %b64 -; CHECK: cmn x3, w2, uxtb #3 +; CHECK: ubfiz x8, x2, #3, #8 +; CHECK: cmn x3, x8 br i1 %cmp2, label %t3, label %end t3: @@ -533,7 +539,8 @@ t3: %shl3 = shl i32 %conv3, 2 %na3 = sub i32 0, %shl3 %cmp3 = icmp ne i32 %na3, %b32 -; CHECK: cmn w4, w1, uxth #2 +; CHECK: ubfiz w8, w1, #2, #16 +; CHECK: cmn w4, w8 br i1 %cmp3, label %t4, label %end t4: @@ -541,7 +548,8 @@ t4: %shl4 = shl i32 %conv4, 1 %na4 = sub i32 0, %shl4 %cmp4 = icmp ne i32 %na4, %b32 -; CHECK: cmn w4, w2, uxtb #1 +; CHECK: ubfiz w8, w2, #1, #8 +; CHECK: cmn w4, w8 br i1 %cmp4, label %t5, label %end t5: @@ -549,8 +557,8 @@ t5: %shl5 = shl i32 %conv5, 5 %na5 = sub i32 0, %shl5 %cmp5 = icmp ne i32 %na5, %b32 -; CHECK: and [[REG:w[0-9]+]], w2, #0xff -; CHECK: cmn w4, [[REG]], lsl #5 +; CHECK: ubfiz w8, w2, #5, #8 +; CHECK: cmn w4, w8 br i1 %cmp5, label %t6, label %end t6: From 69be8118952f76baf2accd695aee14121ab67b59 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 13 Dec 2024 14:44:37 +0000 Subject: [PATCH 3/6] Exclude more uses of SHL that might be combined --- .../Target/AArch64/AArch64ISelLowering.cpp | 8 ++-- .../CodeGen/AArch64/swap-compare-operands.ll | 42 ++++++++----------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5c6b04d637b5c..0cae5a536b6f3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26386,9 +26386,11 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) { if (!C1 || !C2) return SDValue(); - // Might be folded into shifted add/sub, do not lower. - if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD || - N->use_begin()->getOpcode() == ISD::SUB)) + // Might be folded into shifted op, do not lower. + unsigned UseOpc = N->use_begin()->getOpcode(); + if (N->hasOneUse() && + (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || + UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)) return SDValue(); SDLoc DL(N); diff --git a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll index a45881f2034b2..b106e15c23e30 100644 --- a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll +++ b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll @@ -133,9 +133,8 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_64 -; CHECK: ubfiz x8, x0, #2, #16 -; CHECK: cmp x8, x1 -; CHECK-NEXT: cset w0, hi +; CHECK: cmp x1, w0, uxth #2 +; CHECK-NEXT: cset w0, lo entry: %a64 = zext i16 %a to i64 %shl.0 = shl i64 %a64, 2 @@ -145,9 +144,8 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_64 -; CHECK: ubfiz x8, x0, #4, #8 -; CHECK: cmp x8, x1 -; CHECK-NEXT: cset w0, hi +; CHECK: cmp x1, w0, uxtb #4 +; CHECK-NEXT: cset w0, lo entry: %a64 = zext i8 %a to i64 %shl.2 = shl i64 %a64, 4 @@ -157,9 +155,8 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_32 -; CHECK: ubfiz w8, w0, #3, #16 -; CHECK: cmp w8, w1 -; CHECK-NEXT: cset w0, hi +; CHECK: cmp w1, w0, uxth #3 +; CHECK-NEXT: cset w0, lo entry: %a32 = zext i16 %a to i32 %shl = shl i32 %a32, 3 @@ -169,9 +166,8 @@ entry: define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) { ; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_32 -; CHECK: ubfiz w8, w0, #4, #8 -; CHECK: cmp w8, w1 -; CHECK-NEXT: cset w0, hi +; CHECK: cmp w1, w0, uxtb #4 +; CHECK-NEXT: cset w0, lo entry: %a32 = zext i8 %a to i32 %shl = shl i32 %a32, 4 @@ -181,9 +177,9 @@ entry: define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) { ; CHECK-LABEL: testSwapCmpWithTooLargeShiftedZeroExtend8_32 -; CHECK: ubfiz w8, w0, #5, #8 -; CHECK: cmp w8, w1 -; CHECK-NEXT: cset w0, hi +; CHECK: and [[REG:w[0-9]+]], w0, #0xff +; CHECK: cmp w1, [[REG]], lsl #5 +; CHECK-NEXT: cset w0, lo entry: %a32 = zext i8 %a to i32 %shl = shl i32 %a32, 5 @@ -521,8 +517,7 @@ t1: %shl1 = shl i64 %conv1, 4 %na1 = sub i64 0, %shl1 %cmp1 = icmp ne i64 %na1, %b64 -; CHECK: ubfiz x8, x1, #4, #16 -; CHECK: cmn x3, x8 +; CHECK: cmn x3, w1, uxth #4 br i1 %cmp1, label %t2, label %end t2: @@ -530,8 +525,7 @@ t2: %shl2 = shl i64 %conv2, 3 %na2 = sub i64 0, %shl2 %cmp2 = icmp ne i64 %na2, %b64 -; CHECK: ubfiz x8, x2, #3, #8 -; CHECK: cmn x3, x8 +; CHECK: cmn x3, w2, uxtb #3 br i1 %cmp2, label %t3, label %end t3: @@ -539,8 +533,7 @@ t3: %shl3 = shl i32 %conv3, 2 %na3 = sub i32 0, %shl3 %cmp3 = icmp ne i32 %na3, %b32 -; CHECK: ubfiz w8, w1, #2, #16 -; CHECK: cmn w4, w8 +; CHECK: cmn w4, w1, uxth #2 br i1 %cmp3, label %t4, label %end t4: @@ -548,8 +541,7 @@ t4: %shl4 = shl i32 %conv4, 1 %na4 = sub i32 0, %shl4 %cmp4 = icmp ne i32 %na4, %b32 -; CHECK: ubfiz w8, w2, #1, #8 -; CHECK: cmn w4, w8 +; CHECK: cmn w4, w2, uxtb #1 br i1 %cmp4, label %t5, label %end t5: @@ -557,8 +549,8 @@ t5: %shl5 = shl i32 %conv5, 5 %na5 = sub i32 0, %shl5 %cmp5 = icmp ne i32 %na5, %b32 -; CHECK: ubfiz w8, w2, #5, #8 -; CHECK: cmn w4, w8 +; CHECK: and [[REG:w[0-9]+]], w2, #0xff +; CHECK: cmn w4, [[REG]], lsl #5 br i1 %cmp5, label %t6, label %end t6: From fdbe823e14a1cf12a2358bcb141de5c469bc3b01 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 13 Dec 2024 16:50:25 +0000 Subject: [PATCH 4/6] address comments --- .../Target/AArch64/AArch64ISelLowering.cpp | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0cae5a536b6f3..13871b149c2b9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26367,35 +26367,38 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return NVCAST; } +/// If the operand is a bitwise AND with a constant RHS, and the shift has a +/// constant RHS and is the only use, we can pull it out of the shift, i.e. +/// +/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) +/// +/// We prefer this canonical form to match existing isel patterns. static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); - // If the operand is a bitwise AND with a constant RHS, and the shift is the - // only use, we can pull it out of the shift. - // - // (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) + SDValue Op0 = N->getOperand(0); if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND) return SDValue(); - ConstantSDNode *C1 = dyn_cast(Op0.getOperand(1)); - ConstantSDNode *C2 = dyn_cast(Op1); - if (!C1 || !C2) + SDValue C1 = Op0->getOperand(1); + SDValue C2 = N->getOperand(1); + if (!isa(C1) || !isa(C2)) return SDValue(); // Might be folded into shifted op, do not lower. - unsigned UseOpc = N->use_begin()->getOpcode(); - if (N->hasOneUse() && - (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || - UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)) - return SDValue(); + if (N->hasOneUse()) { + unsigned UseOpc = N->use_begin()->getOpcode(); + if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || + UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) + return SDValue(); + } SDLoc DL(N); - SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, Op0.getOperand(1), Op1); - SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, Op0->getOperand(0), Op1); + SDValue X = Op0->getOperand(0); + SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); + SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); } From f0cbfdda1a8a0c09904969767078fcf7c5c9a9e5 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Wed, 8 Jan 2025 14:17:43 +0000 Subject: [PATCH 5/6] canonicalize after legalization --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 13871b149c2b9..fa13f8c6d513e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26373,9 +26373,10 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, /// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) /// /// We prefer this canonical form to match existing isel patterns. -static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) +static SDValue performSHLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue Op0 = N->getOperand(0); @@ -26389,13 +26390,14 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) { // Might be folded into shifted op, do not lower. if (N->hasOneUse()) { - unsigned UseOpc = N->use_begin()->getOpcode(); + unsigned UseOpc = N->user_begin()->getOpcode(); if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) return SDValue(); } SDLoc DL(N); + EVT VT = N->getValueType(0); SDValue X = Op0->getOperand(0); SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); @@ -26748,7 +26750,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SCALAR_TO_VECTOR: return performScalarToVectorCombine(N, DCI, DAG); case ISD::SHL: - return performSHLCombine(N, DAG); + return performSHLCombine(N, DCI, DAG); } return SDValue(); } From ed9338a48b28a7e9cd8040354473be0c3ec7d675 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Wed, 8 Jan 2025 14:56:37 +0000 Subject: [PATCH 6/6] address comments --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fa13f8c6d513e..23671c9ffcf19 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26380,7 +26380,7 @@ static SDValue performSHLCombine(SDNode *N, return SDValue(); SDValue Op0 = N->getOperand(0); - if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND) + if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) return SDValue(); SDValue C1 = Op0->getOperand(1);