Skip to content

Commit 9032a45

Browse files
committed
Revert "[AArch64][Codegen] Improve small shufflevector/concat lowering for SME"
This reverts commit 2947c37.
1 parent 66598fe commit 9032a45

File tree

3 files changed

+34
-67
lines changed

3 files changed

+34
-67
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -24722,49 +24722,6 @@ static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
2472224722
Op0ExtV, Op1ExtV, Op->getOperand(2));
2472324723
}
2472424724

24725-
static SDValue skipElementSizePreservingCast(SDValue Op, EVT VT) {
24726-
if (Op->getOpcode() == ISD::BITCAST)
24727-
Op = Op->getOperand(0);
24728-
EVT OpVT = Op.getValueType();
24729-
if (OpVT.isVector() && OpVT.getVectorElementType().getSizeInBits() ==
24730-
VT.getVectorElementType().getSizeInBits())
24731-
return Op;
24732-
return SDValue();
24733-
}
24734-
24735-
static SDValue performZIP1Combine(SDNode *N, SelectionDAG &DAG) {
24736-
SDLoc DL(N);
24737-
EVT VT = N->getValueType(0);
24738-
24739-
// zip1(insert_vector_elt(undef, extract_vector_elt(vec, 0), 0),
24740-
// insert_vector_elt(undef, extract_vector_elt(vec, 1), 0))
24741-
// -> vec
24742-
SDValue Op0 = skipElementSizePreservingCast(N->getOperand(0), VT);
24743-
SDValue Op1 = skipElementSizePreservingCast(N->getOperand(1), VT);
24744-
if (Op0 && Op1 && Op0->getOpcode() == ISD::INSERT_VECTOR_ELT &&
24745-
Op1->getOpcode() == ISD::INSERT_VECTOR_ELT) {
24746-
SDValue Op00 = Op0->getOperand(0);
24747-
SDValue Op10 = Op1->getOperand(0);
24748-
if (Op00.isUndef() && Op10.isUndef() &&
24749-
Op0->getConstantOperandVal(2) == 0 &&
24750-
Op1->getConstantOperandVal(2) == 0) {
24751-
SDValue Op01 = Op0->getOperand(1);
24752-
SDValue Op11 = Op1->getOperand(1);
24753-
if (Op01->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24754-
Op11->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24755-
Op01->getConstantOperandVal(1) == 0 &&
24756-
Op11->getConstantOperandVal(1) == 1) {
24757-
SDValue Op010 = skipElementSizePreservingCast(Op01->getOperand(0), VT);
24758-
SDValue Op110 = skipElementSizePreservingCast(Op11->getOperand(0), VT);
24759-
if (Op010 && Op010 == Op110)
24760-
return DAG.getBitcast(VT, Op010);
24761-
}
24762-
}
24763-
}
24764-
24765-
return SDValue();
24766-
}
24767-
2476824725
static SDValue
2476924726
performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
2477024727
SelectionDAG &DAG) {
@@ -26206,8 +26163,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2620626163

2620726164
break;
2620826165
}
26209-
case AArch64ISD::ZIP1:
26210-
return performZIP1Combine(N, DAG);
2621126166
case ISD::XOR:
2621226167
return performXorCombine(N, DAG, DCI, Subtarget);
2621326168
case ISD::MUL:
@@ -29077,14 +29032,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2907729032
if (!IsSingleOp && !Subtarget.hasSVE2())
2907829033
return SDValue();
2907929034

29080-
// Small vectors (with few extracts) can be lowered more efficiently as a
29081-
// sequence of ZIPs.
2908229035
EVT VTOp1 = Op.getOperand(0).getValueType();
29083-
unsigned NumElts = VT.getVectorNumElements();
29084-
if (VT.isPow2VectorType() && VT.getFixedSizeInBits() <= 128 &&
29085-
(NumElts <= 2 || (NumElts <= 4 && !Op2.isUndef())))
29086-
return SDValue();
29087-
2908829036
unsigned BitsPerElt = VTOp1.getVectorElementType().getSizeInBits();
2908929037
unsigned IndexLen = MinSVESize / BitsPerElt;
2909029038
unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK
3-
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
2+
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
3+
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME
44
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
55

66
target triple = "aarch64-unknown-linux-gnu"
@@ -406,13 +406,33 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
406406
;
407407

408408
define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
409-
; CHECK-LABEL: concat_v4f16:
410-
; CHECK: // %bb.0:
411-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
412-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
413-
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
414-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
415-
; CHECK-NEXT: ret
409+
; SVE2-LABEL: concat_v4f16:
410+
; SVE2: // %bb.0:
411+
; SVE2-NEXT: cnth x8
412+
; SVE2-NEXT: adrp x9, .LCPI15_0
413+
; SVE2-NEXT: adrp x10, .LCPI15_1
414+
; SVE2-NEXT: mov z2.h, w8
415+
; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0]
416+
; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1]
417+
; SVE2-NEXT: ptrue p0.h, vl8
418+
; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
419+
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
420+
; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h
421+
; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
422+
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
423+
; SVE2-NEXT: ret
424+
;
425+
; SME-LABEL: concat_v4f16:
426+
; SME: // %bb.0:
427+
; SME-NEXT: // kill: def $d1 killed $d1 def $z1
428+
; SME-NEXT: // kill: def $d0 killed $d0 def $z0
429+
; SME-NEXT: mov z2.h, z1.h[1]
430+
; SME-NEXT: mov z3.h, z0.h[1]
431+
; SME-NEXT: zip1 z1.h, z1.h, z2.h
432+
; SME-NEXT: zip1 z0.h, z0.h, z3.h
433+
; SME-NEXT: zip1 z0.s, z0.s, z1.s
434+
; SME-NEXT: // kill: def $d0 killed $d0 killed $z0
435+
; SME-NEXT: ret
416436
;
417437
; NONEON-NOSVE-LABEL: concat_v4f16:
418438
; NONEON-NOSVE: // %bb.0:

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -643,12 +643,11 @@ define void @test_revhv32i16(ptr %a) {
643643
define void @test_rev_elts_fail(ptr %a) {
644644
; CHECK-LABEL: test_rev_elts_fail:
645645
; CHECK: // %bb.0:
646-
; CHECK-NEXT: ldp q0, q1, [x0]
647-
; CHECK-NEXT: mov z2.d, z0.d[1]
648-
; CHECK-NEXT: mov z3.d, z1.d[1]
649-
; CHECK-NEXT: zip1 z0.d, z2.d, z0.d
650-
; CHECK-NEXT: zip1 z1.d, z3.d, z1.d
651-
; CHECK-NEXT: stp q0, q1, [x0]
646+
; CHECK-NEXT: index z0.d, #1, #-1
647+
; CHECK-NEXT: ldp q1, q2, [x0]
648+
; CHECK-NEXT: tbl z1.d, { z1.d }, z0.d
649+
; CHECK-NEXT: tbl z0.d, { z2.d }, z0.d
650+
; CHECK-NEXT: stp q1, q0, [x0]
652651
; CHECK-NEXT: ret
653652
;
654653
; NONEON-NOSVE-LABEL: test_rev_elts_fail:

0 commit comments

Comments
 (0)