From 9f292d73cc11993499347eefce99bf2e02741faf Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 20 Jan 2025 11:54:32 +0000 Subject: [PATCH 1/3] Reapply "[AArch64] Combine and and lsl into ubfiz" (#123356) Patch was reverted due to test case (added) exposing an infinite loop in combiner, where (shl C1, C2) create by performSHLCombine isn't constant-folded: Combining: t14: i64 = shl t12, Constant:i64<1> Creating new node: t36: i64 = shl OpaqueConstant:i64<-2401053089408754003>, Constant:i64<1> Creating new node: t37: i64 = shl t6, Constant:i64<1> Creating new node: t38: i64 = and t37, t36 ... into: t38: i64 = and t37, t36 ... Combining: t38: i64 = and t37, t36 Creating new node: t39: i64 = and t6, OpaqueConstant:i64<-2401053089408754003> Creating new node: t40: i64 = shl t39, Constant:i64<1> ... into: t40: i64 = shl t39, Constant:i64<1> and subsequently gets simplified by DAGCombiner::visitAND: // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; before being folded by performSHLCombine once again and so on. The combine in performSHLCombine should only be done if (shl C1, C2) can be constant-folded, it may otherwise be unsafe and generally have a worse end result. Thanks to Dave Sherwood for his insight on this one. This reverts commit f719771f251d7c30eca448133fe85730f19a6bd1. --- .../Target/AArch64/AArch64ISelLowering.cpp | 41 +++++++ .../AArch64/const-shift-of-constmasked.ll | 101 ++++++++---------- llvm/test/CodeGen/AArch64/extract-bits.ll | 16 +-- llvm/test/CodeGen/AArch64/fpenv.ll | 6 +- llvm/test/CodeGen/AArch64/xbfiz.ll | 16 +++ 5 files changed, 115 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ede1fb93fe5f..f209c186c0542 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1140,6 +1140,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); + setTargetDAGCombine(ISD::SHL); + // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemset = @@ -26339,6 +26341,43 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return NVCAST; } +/// If the operand is a bitwise AND with a constant RHS, and the shift has a +/// constant RHS and is the only use, we can pull it out of the shift, i.e. +/// +/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) +/// +/// We prefer this canonical form to match existing isel patterns. +static SDValue performSHLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) + return SDValue(); + + SDValue C1 = Op0->getOperand(1); + SDValue C2 = N->getOperand(1); + if (!isa(C1) || !isa(C2)) + return SDValue(); + + // Might be folded into shifted op, do not lower. + if (N->hasOneUse()) { + unsigned UseOpc = N->user_begin()->getOpcode(); + if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || + UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) + return SDValue(); + } + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue X = Op0->getOperand(0); + SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); + SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); + return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -26684,6 +26723,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCTLZCombine(N, DAG, Subtarget); case ISD::SCALAR_TO_VECTOR: return performScalarToVectorCombine(N, DCI, DAG); + case ISD::SHL: + return performSHLCombine(N, DCI, DAG); } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll index 66a6745cda8f7..1fffcdda4b416 100644 --- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll @@ -190,8 +190,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { define i8 @test_i8_7_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: ubfiz w0, w0, #1, #3 ; CHECK-NEXT: ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 1 @@ -200,8 +199,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { define i8 @test_i8_7_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7 -; CHECK-NEXT: lsl w0, w8, #4 +; CHECK-NEXT: ubfiz w0, w0, #4, #3 ; CHECK-NEXT: ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 4 @@ -229,8 +227,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { define i8 @test_i8_28_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1c -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0x38 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 1 @@ -239,8 +237,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { define i8 @test_i8_28_mask_shl_2(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_2: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1c -; CHECK-NEXT: lsl w0, w8, #2 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: and w0, w8, #0x70 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 2 @@ -249,8 +247,8 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) { define i8 @test_i8_28_mask_shl_3(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_3: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1c -; CHECK-NEXT: lsl w0, w8, #3 +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: and w0, w8, #0xe0 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 3 @@ -259,8 +257,8 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) { define i8 @test_i8_28_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xc -; CHECK-NEXT: lsl w0, w8, #4 +; CHECK-NEXT: lsl w8, w0, #4 +; CHECK-NEXT: and w0, w8, #0xc0 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 4 @@ -270,8 +268,8 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) { define i8 @test_i8_224_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_224_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x60 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0xc0 ; CHECK-NEXT: ret %t0 = and i8 %a0, 224 %t1 = shl i8 %t0, 1 @@ -465,8 +463,7 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) { define i16 @test_i16_127_mask_shl_1(i16 %a0) { ; CHECK-LABEL: test_i16_127_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: ubfiz w0, w0, #1, #7 ; CHECK-NEXT: ret %t0 = and i16 %a0, 127 %t1 = shl i16 %t0, 1 @@ -475,8 +472,7 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) { define i16 @test_i16_127_mask_shl_8(i16 %a0) { ; CHECK-LABEL: test_i16_127_mask_shl_8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f -; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: ubfiz w0, w0, #8, #7 ; CHECK-NEXT: ret %t0 = and i16 %a0, 127 %t1 = shl i16 %t0, 8 @@ -504,8 +500,8 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) { define i16 @test_i16_2032_mask_shl_3(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_3: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f0 -; CHECK-NEXT: lsl w0, w8, #3 +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: and w0, w8, #0x3f80 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 3 @@ -514,8 +510,8 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) { define i16 @test_i16_2032_mask_shl_4(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f0 -; CHECK-NEXT: lsl w0, w8, #4 +; CHECK-NEXT: lsl w8, w0, #4 +; CHECK-NEXT: and w0, w8, #0x7f00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 4 @@ -524,8 +520,8 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) { define i16 @test_i16_2032_mask_shl_5(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_5: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7f0 -; CHECK-NEXT: lsl w0, w8, #5 +; CHECK-NEXT: lsl w8, w0, #5 +; CHECK-NEXT: and w0, w8, #0xfe00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 5 @@ -534,8 +530,8 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) { define i16 @test_i16_2032_mask_shl_6(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_6: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x3f0 -; CHECK-NEXT: lsl w0, w8, #6 +; CHECK-NEXT: lsl w8, w0, #6 +; CHECK-NEXT: and w0, w8, #0xfc00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 6 @@ -545,8 +541,8 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) { define i16 @test_i16_65024_mask_shl_1(i16 %a0) { ; CHECK-LABEL: test_i16_65024_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7e00 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0xfc00 ; CHECK-NEXT: ret %t0 = and i16 %a0, 65024 %t1 = shl i16 %t0, 1 @@ -740,8 +736,7 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) { define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ; CHECK-LABEL: test_i32_32767_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: ubfiz w0, w0, #1, #15 ; CHECK-NEXT: ret %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 1 @@ -750,8 +745,7 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { define i32 @test_i32_32767_mask_shl_16(i32 %a0) { ; CHECK-LABEL: test_i32_32767_mask_shl_16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff -; CHECK-NEXT: lsl w0, w8, #16 +; CHECK-NEXT: ubfiz w0, w0, #16, #15 ; CHECK-NEXT: ret %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 16 @@ -779,8 +773,8 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) { define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_7: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff00 -; CHECK-NEXT: lsl w0, w8, #7 +; CHECK-NEXT: lsl w8, w0, #7 +; CHECK-NEXT: and w0, w8, #0x3fff8000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 7 @@ -789,8 +783,8 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff00 -; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: lsl w8, w0, #8 +; CHECK-NEXT: and w0, w8, #0x7fff0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 8 @@ -799,8 +793,8 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_9: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7fff00 -; CHECK-NEXT: lsl w0, w8, #9 +; CHECK-NEXT: lsl w8, w0, #9 +; CHECK-NEXT: and w0, w8, #0xfffe0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 9 @@ -809,8 +803,8 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_10: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x3fff00 -; CHECK-NEXT: lsl w0, w8, #10 +; CHECK-NEXT: lsl w8, w0, #10 +; CHECK-NEXT: and w0, w8, #0xfffc0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 10 @@ -820,8 +814,8 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7ffe0000 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: and w0, w8, #0xfffc0000 ; CHECK-NEXT: ret %t0 = and i32 %a0, 4294836224 %t1 = shl i32 %t0, 1 @@ -1015,8 +1009,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) { define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff -; CHECK-NEXT: lsl x0, x8, #1 +; CHECK-NEXT: lsl w0, w0, #1 ; CHECK-NEXT: ret %t0 = and i64 %a0, 2147483647 %t1 = shl i64 %t0, 1 @@ -1054,8 +1047,8 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff0000 -; CHECK-NEXT: lsl x0, x8, #15 +; CHECK-NEXT: lsl x8, x0, #15 +; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 15 @@ -1064,8 +1057,8 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff0000 -; CHECK-NEXT: lsl x0, x8, #16 +; CHECK-NEXT: lsl x8, x0, #16 +; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 16 @@ -1074,8 +1067,8 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffff0000 -; CHECK-NEXT: lsl x0, x8, #17 +; CHECK-NEXT: lsl x8, x0, #17 +; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 17 @@ -1084,8 +1077,8 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x3fffffff0000 -; CHECK-NEXT: lsl x0, x8, #18 +; CHECK-NEXT: lsl x8, x0, #18 +; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 18 @@ -1095,8 +1088,8 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 -; CHECK-NEXT: lsl x0, x8, #1 +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ; CHECK-NEXT: ret %t0 = and i64 %a0, 18446744065119617024 %t1 = shl i64 %t0, 1 diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index b87157a183835..aaa6c7eb4a30f 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -1013,8 +1013,8 @@ define i32 @c1_i32(i32 %arg) nounwind { define i32 @c2_i32(i32 %arg) nounwind { ; CHECK-LABEL: c2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx w8, w0, #19, #10 -; CHECK-NEXT: lsl w0, w8, #2 +; CHECK-NEXT: lsr w8, w0, #17 +; CHECK-NEXT: and w0, w8, #0xffc ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 @@ -1063,8 +1063,8 @@ define i64 @c1_i64(i64 %arg) nounwind { define i64 @c2_i64(i64 %arg) nounwind { ; CHECK-LABEL: c2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx x8, x0, #51, #10 -; CHECK-NEXT: lsl x0, x8, #2 +; CHECK-NEXT: lsr x8, x0, #49 +; CHECK-NEXT: and x0, x8, #0xffc ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 @@ -1120,8 +1120,8 @@ define void @c6_i32(i32 %arg, ptr %ptr) nounwind { define void @c7_i32(i32 %arg, ptr %ptr) nounwind { ; CHECK-LABEL: c7_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx w8, w0, #19, #10 -; CHECK-NEXT: lsl w8, w8, #2 +; CHECK-NEXT: lsr w8, w0, #17 +; CHECK-NEXT: and w8, w8, #0xffc ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 @@ -1163,8 +1163,8 @@ define void @c6_i64(i64 %arg, ptr %ptr) nounwind { define void @c7_i64(i64 %arg, ptr %ptr) nounwind { ; CHECK-LABEL: c7_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfx x8, x0, #51, #10 -; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: lsr x8, x0, #49 +; CHECK-NEXT: and x8, x8, #0xffc ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll index 3a307f7731037..3351565d8dd89 100644 --- a/llvm/test/CodeGen/AArch64/fpenv.ll +++ b/llvm/test/CodeGen/AArch64/fpenv.ll @@ -4,11 +4,11 @@ define void @func_set_rounding_dyn(i32 %rm) { ; CHECK-LABEL: func_set_rounding_dyn: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w9, w0, #1 +; CHECK-NEXT: lsl w9, w0, #22 ; CHECK-NEXT: mrs x8, FPCR -; CHECK-NEXT: and w9, w9, #0x3 ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff -; CHECK-NEXT: lsl w9, w9, #22 +; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 +; CHECK-NEXT: and w9, w9, #0xc00000 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: msr FPCR, x8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll index b777ddcb7efcc..05567e3425840 100644 --- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++ b/llvm/test/CodeGen/AArch64/xbfiz.ll @@ -69,3 +69,19 @@ define i64 @lsl32_not_ubfiz64(i64 %v) { %and = and i64 %shl, 4294967295 ret i64 %and } + +define i64 @lsl_zext_i8_i64(i8 %b) { +; CHECK-LABEL: lsl_zext_i8_i64: +; CHECK: ubfiz x0, x0, #1, #8 + %1 = zext i8 %b to i64 + %2 = shl i64 %1, 1 + ret i64 %2 +} + +define i64 @lsl_zext_i16_i64(i16 %b) { +; CHECK-LABEL: lsl_zext_i16_i64: +; CHECK: ubfiz x0, x0, #1, #16 + %1 = zext i16 %b to i64 + %2 = shl i64 %1, 1 + ret i64 %2 +} From 48359416355cfb78cf57f3ca5475a6b77784b8b5 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 27 Jan 2025 15:12:51 +0000 Subject: [PATCH 2/3] fix --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 9 ++++++++- llvm/test/CodeGen/AArch64/xbfiz.ll | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f209c186c0542..bd9994bcb669c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26372,8 +26372,15 @@ static SDValue performSHLCombine(SDNode *N, SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue X = Op0->getOperand(0); + + // Don't combine unless (shl C1, C2) can be constant folded. Otherwise, + // DAGCombiner will simplify (and (op x...), (op y...)) -> (op (and x, y)) + // causing infinite loop. Result may also be worse. SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); + if (!isa(NewRHS)) + return SDValue(); + + SDValue X = Op0->getOperand(0); SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); } diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll index 05567e3425840..bb71f5f1f7f94 100644 --- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++ b/llvm/test/CodeGen/AArch64/xbfiz.ll @@ -85,3 +85,20 @@ define i64 @lsl_zext_i16_i64(i16 %b) { %2 = shl i64 %1, 1 ret i64 %2 } + +; Regression test for: +; https://github.com/llvm/llvm-project/pull/118974#issuecomment-2598521878 +; that exposed infinite loop in DAGCombiner. +define void @_f(ptr %0, ptr %1, i64 %2) { +; CHECK-LABEL: @_f + store i64 -2401053089408754003, ptr %1, align 8 + %4 = and i64 %2, -2401053089408754003 + %5 = shl i64 %4, 1 + store i64 %5, ptr %0, align 1 + %6 = lshr i64 %4, 54 + %7 = shl i64 %2, 10 + %8 = and i64 %7, 131072 + %9 = or i64 %8, %6 + store i64 %9, ptr %1, align 1 + ret void +} From 3df21b63c7b05de718a6141960a927ecbd118cc0 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 27 Jan 2025 16:11:29 +0000 Subject: [PATCH 3/3] address comments --- llvm/test/CodeGen/AArch64/xbfiz.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll index bb71f5f1f7f94..d3bebf7c6637c 100644 --- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++ b/llvm/test/CodeGen/AArch64/xbfiz.ll @@ -91,6 +91,7 @@ define i64 @lsl_zext_i16_i64(i16 %b) { ; that exposed infinite loop in DAGCombiner. define void @_f(ptr %0, ptr %1, i64 %2) { ; CHECK-LABEL: @_f +; CHECK-NOT: ubfiz store i64 -2401053089408754003, ptr %1, align 8 %4 = and i64 %2, -2401053089408754003 %5 = shl i64 %4, 1