Skip to content

Commit a6cb0ce

Browse files
committed
Remove splitting from lowering
1 parent 5e9e637 commit a6cb0ce

File tree

2 files changed

+53
-98
lines changed

2 files changed

+53
-98
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 26 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -5248,94 +5248,49 @@ AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
52485248
SelectionDAG &DAG) const {
52495249
SDLoc DL(Op);
52505250
uint64_t EltSize = Op.getConstantOperandVal(2);
5251-
EVT FullVT = Op.getValueType();
5252-
unsigned NumElements = FullVT.getVectorMinNumElements();
5253-
unsigned NumSplits = 0;
5254-
EVT EltVT;
5251+
EVT VT = Op.getValueType();
52555252
switch (EltSize) {
52565253
case 1:
5257-
EltVT = MVT::i8;
5254+
if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
5255+
return SDValue();
52585256
break;
52595257
case 2:
5260-
if (NumElements >= 16)
5261-
NumSplits = NumElements / 16;
5262-
EltVT = MVT::i16;
5258+
if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
5259+
return SDValue();
52635260
break;
52645261
case 4:
5265-
if (NumElements >= 8)
5266-
NumSplits = NumElements / 8;
5267-
EltVT = MVT::i32;
5262+
if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
5263+
return SDValue();
52685264
break;
52695265
case 8:
5270-
if (NumElements >= 4)
5271-
NumSplits = NumElements / 4;
5272-
EltVT = MVT::i64;
5266+
if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
5267+
return SDValue();
52735268
break;
52745269
default:
52755270
// Other element sizes are incompatible with whilewr/rw, so expand instead
52765271
return SDValue();
52775272
}
52785273

5279-
auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
5280-
SDValue PtrA = Op.getOperand(0);
5281-
SDValue PtrB = Op.getOperand(1);
5274+
SDValue PtrA = Op.getOperand(0);
5275+
SDValue PtrB = Op.getOperand(1);
52825276

5283-
EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
5284-
VT.getVectorMinNumElements(), false);
5285-
if (AddrScale > 0) {
5286-
unsigned Offset = StoreVT.getStoreSizeInBits() / 8 * AddrScale;
5287-
SDValue Addend;
5288-
5289-
if (VT.isScalableVT())
5290-
Addend = DAG.getVScale(DL, MVT::i64, APInt(64, Offset));
5291-
else
5292-
Addend = DAG.getConstant(Offset, DL, MVT::i64);
5293-
5294-
PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
5295-
PtrB = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrB, Addend);
5296-
}
5277+
if (VT.isScalableVT())
5278+
return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
52975279

5298-
if (VT.isScalableVT())
5299-
return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
5300-
5301-
// We can use the SVE whilewr/whilerw instruction to lower this
5302-
// intrinsic by creating the appropriate sequence of scalable vector
5303-
// operations and then extracting a fixed-width subvector from the scalable
5304-
// vector. Scalable vector variants are already legal.
5305-
EVT ContainerVT =
5306-
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
5307-
VT.getVectorNumElements(), true);
5308-
EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
5309-
5310-
SDValue Mask =
5311-
DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
5312-
SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
5313-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
5314-
DAG.getVectorIdxConstant(0, DL));
5315-
};
5280+
// We can use the SVE whilewr/whilerw instruction to lower this
5281+
// intrinsic by creating the appropriate sequence of scalable vector
5282+
// operations and then extracting a fixed-width subvector from the scalable
5283+
// vector. Scalable vector variants are already legal.
5284+
EVT ContainerVT =
5285+
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
5286+
VT.getVectorNumElements(), true);
5287+
EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
53165288

5317-
if (NumSplits == 0)
5318-
return LowerToWhile(FullVT, 0);
5319-
5320-
SDValue FullVec = DAG.getUNDEF(FullVT);
5321-
5322-
unsigned NumElementsPerSplit = NumElements / (2 * NumSplits);
5323-
EVT PartVT =
5324-
EVT::getVectorVT(*DAG.getContext(), FullVT.getVectorElementType(),
5325-
NumElementsPerSplit, FullVT.isScalableVT());
5326-
for (unsigned Split = 0, InsertIdx = 0; Split < NumSplits;
5327-
Split++, InsertIdx += 2) {
5328-
SDValue Low = LowerToWhile(PartVT, InsertIdx);
5329-
SDValue High = LowerToWhile(PartVT, InsertIdx + 1);
5330-
unsigned InsertIdxLow = InsertIdx * NumElementsPerSplit;
5331-
unsigned InsertIdxHigh = (InsertIdx + 1) * NumElementsPerSplit;
5332-
SDValue Insert =
5333-
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FullVT, FullVec, Low,
5334-
DAG.getVectorIdxConstant(InsertIdxLow, DL));
5335-
FullVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FullVT, Insert, High,
5336-
DAG.getVectorIdxConstant(InsertIdxHigh, DL));
5337-
}
5338-
return FullVec;
5289+
SDValue Mask =
5290+
DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
5291+
SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
5292+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
5293+
DAG.getVectorIdxConstant(0, DL));
53395294
}
53405295

53415296
SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,

llvm/test/CodeGen/AArch64/alias_mask_scalable.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -588,8 +588,8 @@ entry:
588588
ret <vscale x 64 x i1> %0
589589
}
590590

591-
define <vscale x 16 x i1> @whilewr_16_split(ptr %a, ptr %b) {
592-
; CHECK-SVE2-LABEL: whilewr_16_split:
591+
define <vscale x 16 x i1> @whilewr_16_expand(ptr %a, ptr %b) {
592+
; CHECK-SVE2-LABEL: whilewr_16_expand:
593593
; CHECK-SVE2: // %bb.0: // %entry
594594
; CHECK-SVE2-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
595595
; CHECK-SVE2-NEXT: addvl sp, sp, #-1
@@ -645,7 +645,7 @@ define <vscale x 16 x i1> @whilewr_16_split(ptr %a, ptr %b) {
645645
; CHECK-SVE2-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
646646
; CHECK-SVE2-NEXT: ret
647647
;
648-
; CHECK-SVE-LABEL: whilewr_16_split:
648+
; CHECK-SVE-LABEL: whilewr_16_expand:
649649
; CHECK-SVE: // %bb.0: // %entry
650650
; CHECK-SVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
651651
; CHECK-SVE-NEXT: addvl sp, sp, #-1
@@ -705,8 +705,8 @@ entry:
705705
ret <vscale x 16 x i1> %0
706706
}
707707

708-
define <vscale x 32 x i1> @whilewr_16_split2(ptr %a, ptr %b) {
709-
; CHECK-SVE2-LABEL: whilewr_16_split2:
708+
define <vscale x 32 x i1> @whilewr_16_expand2(ptr %a, ptr %b) {
709+
; CHECK-SVE2-LABEL: whilewr_16_expand2:
710710
; CHECK-SVE2: // %bb.0: // %entry
711711
; CHECK-SVE2-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
712712
; CHECK-SVE2-NEXT: addvl sp, sp, #-1
@@ -794,7 +794,7 @@ define <vscale x 32 x i1> @whilewr_16_split2(ptr %a, ptr %b) {
794794
; CHECK-SVE2-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
795795
; CHECK-SVE2-NEXT: ret
796796
;
797-
; CHECK-SVE-LABEL: whilewr_16_split2:
797+
; CHECK-SVE-LABEL: whilewr_16_expand2:
798798
; CHECK-SVE: // %bb.0: // %entry
799799
; CHECK-SVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
800800
; CHECK-SVE-NEXT: addvl sp, sp, #-1
@@ -887,8 +887,8 @@ entry:
887887
ret <vscale x 32 x i1> %0
888888
}
889889

890-
define <vscale x 8 x i1> @whilewr_32_split(ptr %a, ptr %b) {
891-
; CHECK-SVE2-LABEL: whilewr_32_split:
890+
define <vscale x 8 x i1> @whilewr_32_expand(ptr %a, ptr %b) {
891+
; CHECK-SVE2-LABEL: whilewr_32_expand:
892892
; CHECK-SVE2: // %bb.0: // %entry
893893
; CHECK-SVE2-NEXT: index z0.d, #0, #1
894894
; CHECK-SVE2-NEXT: sub x8, x1, x0
@@ -918,7 +918,7 @@ define <vscale x 8 x i1> @whilewr_32_split(ptr %a, ptr %b) {
918918
; CHECK-SVE2-NEXT: sel p0.b, p0, p0.b, p1.b
919919
; CHECK-SVE2-NEXT: ret
920920
;
921-
; CHECK-SVE-LABEL: whilewr_32_split:
921+
; CHECK-SVE-LABEL: whilewr_32_expand:
922922
; CHECK-SVE: // %bb.0: // %entry
923923
; CHECK-SVE-NEXT: index z0.d, #0, #1
924924
; CHECK-SVE-NEXT: sub x8, x1, x0
@@ -952,8 +952,8 @@ entry:
952952
ret <vscale x 8 x i1> %0
953953
}
954954

955-
define <vscale x 16 x i1> @whilewr_32_split2(ptr %a, ptr %b) {
956-
; CHECK-SVE2-LABEL: whilewr_32_split2:
955+
define <vscale x 16 x i1> @whilewr_32_expand2(ptr %a, ptr %b) {
956+
; CHECK-SVE2-LABEL: whilewr_32_expand2:
957957
; CHECK-SVE2: // %bb.0: // %entry
958958
; CHECK-SVE2-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
959959
; CHECK-SVE2-NEXT: addvl sp, sp, #-1
@@ -1011,7 +1011,7 @@ define <vscale x 16 x i1> @whilewr_32_split2(ptr %a, ptr %b) {
10111011
; CHECK-SVE2-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
10121012
; CHECK-SVE2-NEXT: ret
10131013
;
1014-
; CHECK-SVE-LABEL: whilewr_32_split2:
1014+
; CHECK-SVE-LABEL: whilewr_32_expand2:
10151015
; CHECK-SVE: // %bb.0: // %entry
10161016
; CHECK-SVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
10171017
; CHECK-SVE-NEXT: addvl sp, sp, #-1
@@ -1073,8 +1073,8 @@ entry:
10731073
ret <vscale x 16 x i1> %0
10741074
}
10751075

1076-
define <vscale x 32 x i1> @whilewr_32_split3(ptr %a, ptr %b) {
1077-
; CHECK-SVE2-LABEL: whilewr_32_split3:
1076+
define <vscale x 32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
1077+
; CHECK-SVE2-LABEL: whilewr_32_expand3:
10781078
; CHECK-SVE2: // %bb.0: // %entry
10791079
; CHECK-SVE2-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
10801080
; CHECK-SVE2-NEXT: addvl sp, sp, #-1
@@ -1168,7 +1168,7 @@ define <vscale x 32 x i1> @whilewr_32_split3(ptr %a, ptr %b) {
11681168
; CHECK-SVE2-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
11691169
; CHECK-SVE2-NEXT: ret
11701170
;
1171-
; CHECK-SVE-LABEL: whilewr_32_split3:
1171+
; CHECK-SVE-LABEL: whilewr_32_expand3:
11721172
; CHECK-SVE: // %bb.0: // %entry
11731173
; CHECK-SVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
11741174
; CHECK-SVE-NEXT: addvl sp, sp, #-1
@@ -1267,8 +1267,8 @@ entry:
12671267
ret <vscale x 32 x i1> %0
12681268
}
12691269

1270-
define <vscale x 4 x i1> @whilewr_64_split(ptr %a, ptr %b) {
1271-
; CHECK-SVE2-LABEL: whilewr_64_split:
1270+
define <vscale x 4 x i1> @whilewr_64_expand(ptr %a, ptr %b) {
1271+
; CHECK-SVE2-LABEL: whilewr_64_expand:
12721272
; CHECK-SVE2: // %bb.0: // %entry
12731273
; CHECK-SVE2-NEXT: index z0.d, #0, #1
12741274
; CHECK-SVE2-NEXT: sub x8, x1, x0
@@ -1290,7 +1290,7 @@ define <vscale x 4 x i1> @whilewr_64_split(ptr %a, ptr %b) {
12901290
; CHECK-SVE2-NEXT: sel p0.b, p0, p0.b, p1.b
12911291
; CHECK-SVE2-NEXT: ret
12921292
;
1293-
; CHECK-SVE-LABEL: whilewr_64_split:
1293+
; CHECK-SVE-LABEL: whilewr_64_expand:
12941294
; CHECK-SVE: // %bb.0: // %entry
12951295
; CHECK-SVE-NEXT: index z0.d, #0, #1
12961296
; CHECK-SVE-NEXT: sub x8, x1, x0
@@ -1316,8 +1316,8 @@ entry:
13161316
ret <vscale x 4 x i1> %0
13171317
}
13181318

1319-
define <vscale x 8 x i1> @whilewr_64_split2(ptr %a, ptr %b) {
1320-
; CHECK-SVE2-LABEL: whilewr_64_split2:
1319+
define <vscale x 8 x i1> @whilewr_64_expand2(ptr %a, ptr %b) {
1320+
; CHECK-SVE2-LABEL: whilewr_64_expand2:
13211321
; CHECK-SVE2: // %bb.0: // %entry
13221322
; CHECK-SVE2-NEXT: index z0.d, #0, #1
13231323
; CHECK-SVE2-NEXT: sub x8, x1, x0
@@ -1347,7 +1347,7 @@ define <vscale x 8 x i1> @whilewr_64_split2(ptr %a, ptr %b) {
13471347
; CHECK-SVE2-NEXT: sel p0.b, p0, p0.b, p1.b
13481348
; CHECK-SVE2-NEXT: ret
13491349
;
1350-
; CHECK-SVE-LABEL: whilewr_64_split2:
1350+
; CHECK-SVE-LABEL: whilewr_64_expand2:
13511351
; CHECK-SVE: // %bb.0: // %entry
13521352
; CHECK-SVE-NEXT: index z0.d, #0, #1
13531353
; CHECK-SVE-NEXT: sub x8, x1, x0
@@ -1381,8 +1381,8 @@ entry:
13811381
ret <vscale x 8 x i1> %0
13821382
}
13831383

1384-
define <vscale x 16 x i1> @whilewr_64_split3(ptr %a, ptr %b) {
1385-
; CHECK-SVE2-LABEL: whilewr_64_split3:
1384+
define <vscale x 16 x i1> @whilewr_64_expand3(ptr %a, ptr %b) {
1385+
; CHECK-SVE2-LABEL: whilewr_64_expand3:
13861386
; CHECK-SVE2: // %bb.0: // %entry
13871387
; CHECK-SVE2-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
13881388
; CHECK-SVE2-NEXT: addvl sp, sp, #-1
@@ -1440,7 +1440,7 @@ define <vscale x 16 x i1> @whilewr_64_split3(ptr %a, ptr %b) {
14401440
; CHECK-SVE2-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
14411441
; CHECK-SVE2-NEXT: ret
14421442
;
1443-
; CHECK-SVE-LABEL: whilewr_64_split3:
1443+
; CHECK-SVE-LABEL: whilewr_64_expand3:
14441444
; CHECK-SVE: // %bb.0: // %entry
14451445
; CHECK-SVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
14461446
; CHECK-SVE-NEXT: addvl sp, sp, #-1
@@ -1502,8 +1502,8 @@ entry:
15021502
ret <vscale x 16 x i1> %0
15031503
}
15041504

1505-
define <vscale x 32 x i1> @whilewr_64_split4(ptr %a, ptr %b) {
1506-
; CHECK-SVE2-LABEL: whilewr_64_split4:
1505+
define <vscale x 32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
1506+
; CHECK-SVE2-LABEL: whilewr_64_expand4:
15071507
; CHECK-SVE2: // %bb.0: // %entry
15081508
; CHECK-SVE2-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
15091509
; CHECK-SVE2-NEXT: addvl sp, sp, #-1
@@ -1597,7 +1597,7 @@ define <vscale x 32 x i1> @whilewr_64_split4(ptr %a, ptr %b) {
15971597
; CHECK-SVE2-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15981598
; CHECK-SVE2-NEXT: ret
15991599
;
1600-
; CHECK-SVE-LABEL: whilewr_64_split4:
1600+
; CHECK-SVE-LABEL: whilewr_64_expand4:
16011601
; CHECK-SVE: // %bb.0: // %entry
16021602
; CHECK-SVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
16031603
; CHECK-SVE-NEXT: addvl sp, sp, #-1

0 commit comments

Comments
 (0)