@@ -1391,6 +1391,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13911391 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
13921392 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
13931393
1394+ if (Subtarget->forceStreamingCompatibleSVE()) {
1395+ for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1396+ MVT::v4i32, MVT::v2i64})
1397+ addTypeForStreamingSVE(VT);
1398+
1399+ for (MVT VT :
1400+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1401+ addTypeForStreamingSVE(VT);
1402+ }
1403+
13941404 // NOTE: Currently this has to happen after computeRegisterProperties rather
13951405 // than the preferred option of combining it with the addRegisterClass call.
13961406 if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1597,6 +1607,14 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
15971607 return false;
15981608}
15991609
1610+ void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
1611+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1612+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1614+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1615+ setOperationAction(ISD::AND, VT, Custom);
1616+ }
1617+
16001618void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
16011619 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
16021620
@@ -5773,8 +5791,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
57735791 case ISD::MLOAD:
57745792 return LowerMLOAD(Op, DAG);
57755793 case ISD::LOAD:
5776- if (useSVEForFixedLengthVectorVT(Op.getValueType(),
5777- Subtarget->forceStreamingCompatibleSVE()))
5794+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
57785795 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
57795796 return LowerLOAD(Op, DAG);
57805797 case ISD::ADD:
@@ -11400,9 +11417,13 @@ static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1140011417static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1140111418 const APInt &Bits,
1140211419 const SDValue *LHS = nullptr) {
11420+ EVT VT = Op.getValueType();
11421+ if (VT.isFixedLengthVector() &&
11422+ DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
11423+ return SDValue();
11424+
1140311425 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
1140411426 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11405- EVT VT = Op.getValueType();
1140611427 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
1140711428 bool isAdvSIMDModImm = false;
1140811429 uint64_t Shift;
@@ -11448,9 +11469,13 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1144811469static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1144911470 const APInt &Bits,
1145011471 const SDValue *LHS = nullptr) {
11472+ EVT VT = Op.getValueType();
11473+ if (VT.isFixedLengthVector() &&
11474+ DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
11475+ return SDValue();
11476+
1145111477 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
1145211478 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11453- EVT VT = Op.getValueType();
1145411479 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
1145511480 bool isAdvSIMDModImm = false;
1145611481 uint64_t Shift;
@@ -12128,7 +12153,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1212812153
1212912154SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
1213012155 SelectionDAG &DAG) const {
12131- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
12156+ if (useSVEForFixedLengthVectorVT(Op.getValueType(),
12157+ Subtarget->forceStreamingCompatibleSVE()))
1213212158 return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
1213312159
1213412160 assert(Op.getValueType().isScalableVector() &&
@@ -12234,7 +12260,8 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
1223412260 return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
1223512261 }
1223612262
12237- if (useSVEForFixedLengthVectorVT(VT))
12263+ if (useSVEForFixedLengthVectorVT(VT,
12264+ Subtarget->forceStreamingCompatibleSVE()))
1223812265 return LowerFixedLengthExtractVectorElt(Op, DAG);
1223912266
1224012267 // Check for non-constant or out of range lane.
@@ -12296,10 +12323,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
1229612323 // If this is extracting the upper 64-bits of a 128-bit vector, we match
1229712324 // that directly.
1229812325 if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
12299- InVT.getSizeInBits() == 128)
12326+ InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE() )
1230012327 return Op;
1230112328
12302- if (useSVEForFixedLengthVectorVT(InVT)) {
12329+ if (useSVEForFixedLengthVectorVT(InVT,
12330+ Subtarget->forceStreamingCompatibleSVE())) {
1230312331 SDLoc DL(Op);
1230412332
1230512333 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
@@ -12487,7 +12515,8 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1248712515
1248812516bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1248912517 // Currently no fixed length shuffles that require SVE are legal.
12490- if (useSVEForFixedLengthVectorVT(VT))
12518+ if (useSVEForFixedLengthVectorVT(VT,
12519+ Subtarget->forceStreamingCompatibleSVE()))
1249112520 return false;
1249212521
1249312522 if (VT.getVectorNumElements() == 4 &&
@@ -12597,7 +12626,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
1259712626
1259812627 switch (Op.getOpcode()) {
1259912628 case ISD::SHL:
12600- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
12629+ if (VT.isScalableVector() ||
12630+ useSVEForFixedLengthVectorVT(VT,
12631+ Subtarget->forceStreamingCompatibleSVE()))
1260112632 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
1260212633
1260312634 if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
@@ -12609,7 +12640,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
1260912640 Op.getOperand(0), Op.getOperand(1));
1261012641 case ISD::SRA:
1261112642 case ISD::SRL:
12612- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
12643+ if (VT.isScalableVector() ||
12644+ useSVEForFixedLengthVectorVT(
12645+ VT, Subtarget->forceStreamingCompatibleSVE())) {
1261312646 unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
1261412647 : AArch64ISD::SRL_PRED;
1261512648 return LowerToPredicatedOp(Op, DAG, Opc);
@@ -14008,6 +14041,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
1400814041bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1400914042 ShuffleVectorInst *SVI,
1401014043 unsigned Factor) const {
14044+ // Skip if streaming compatible SVE is enabled, because it generates invalid
14045+ // code in streaming mode when SVE length is not specified.
14046+ if (Subtarget->forceStreamingCompatibleSVE())
14047+ return false;
14048+
1401114049 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
1401214050 "Invalid interleave factor");
1401314051
@@ -22489,7 +22527,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
2248922527SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
2249022528 SelectionDAG &DAG) const {
2249122529 EVT VT = Op.getValueType();
22492- assert(useSVEForFixedLengthVectorVT (VT) &&
22530+ assert(VT.isFixedLengthVector() && isTypeLegal (VT) &&
2249322531 "Only expected to lower fixed length vector operation!");
2249422532 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
2249522533
@@ -22505,7 +22543,8 @@ SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
2250522543 }
2250622544
2250722545 // "cast" fixed length vector to a scalable vector.
22508- assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
22546+ assert(V.getValueType().isFixedLengthVector() &&
22547+ isTypeLegal(V.getValueType()) &&
2250922548 "Only fixed length vectors are supported!");
2251022549 Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
2251122550 }
0 commit comments