Skip to content

Commit aaa37af

Browse files
[LLVM][CodeGen][SVE] Add lowering for ISD::[ANY,SIGN,ZERO]_EXTEND_VECTOR_INREG. (#169847)
1 parent c12dd59 commit aaa37af

File tree

3 files changed

+176
-1
lines changed

3 files changed

+176
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15901590
setOperationAction(ISD::AVGCEILS, VT, Custom);
15911591
setOperationAction(ISD::AVGCEILU, VT, Custom);
15921592

1593+
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Custom);
1594+
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1595+
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1596+
15931597
if (!Subtarget->isLittleEndian())
15941598
setOperationAction(ISD::BITCAST, VT, Custom);
15951599

@@ -7858,6 +7862,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
78587862
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
78597863
case ISD::BUILD_VECTOR:
78607864
return LowerBUILD_VECTOR(Op, DAG);
7865+
case ISD::ANY_EXTEND_VECTOR_INREG:
7866+
case ISD::SIGN_EXTEND_VECTOR_INREG:
7867+
return LowerEXTEND_VECTOR_INREG(Op, DAG);
78617868
case ISD::ZERO_EXTEND_VECTOR_INREG:
78627869
return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
78637870
case ISD::VECTOR_SHUFFLE:
@@ -14690,6 +14697,40 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
1469014697
Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
1469114698
}
1469214699

14700+
SDValue
14701+
AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
14702+
SelectionDAG &DAG) const {
14703+
SDLoc DL(Op);
14704+
EVT VT = Op.getValueType();
14705+
assert(VT.isScalableVector() && "Unexpected result type!");
14706+
14707+
bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
14708+
unsigned UnpackOpcode = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
14709+
14710+
// Repeatedly unpack Val until the result is of the desired type.
14711+
SDValue Val = Op.getOperand(0);
14712+
switch (Val.getSimpleValueType().SimpleTy) {
14713+
default:
14714+
return SDValue();
14715+
case MVT::nxv16i8:
14716+
Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv8i16, Val);
14717+
if (VT == MVT::nxv8i16)
14718+
break;
14719+
[[fallthrough]];
14720+
case MVT::nxv8i16:
14721+
Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv4i32, Val);
14722+
if (VT == MVT::nxv4i32)
14723+
break;
14724+
[[fallthrough]];
14725+
case MVT::nxv4i32:
14726+
Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv2i64, Val);
14727+
assert(VT == MVT::nxv2i64 && "Unexpected result type!");
14728+
break;
14729+
}
14730+
14731+
return Val;
14732+
}
14733+
1469314734
// Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
1469414735
// but we don't have an appropriate instruction,
1469514736
// so custom-lower it as ZIP1-with-zeros.
@@ -14698,6 +14739,10 @@ AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
1469814739
SelectionDAG &DAG) const {
1469914740
SDLoc DL(Op);
1470014741
EVT VT = Op.getValueType();
14742+
14743+
if (VT.isScalableVector())
14744+
return LowerEXTEND_VECTOR_INREG(Op, DAG);
14745+
1470114746
SDValue SrcOp = Op.getOperand(0);
1470214747
EVT SrcVT = SrcOp.getValueType();
1470314748
assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
@@ -28876,7 +28921,8 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
2887628921
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
2887728922
return;
2887828923

28879-
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
28924+
unsigned Opcode = (Index == 0) ? (unsigned)ISD::ANY_EXTEND_VECTOR_INREG
28925+
: (unsigned)AArch64ISD::UUNPKHI;
2888028926
EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
2888128927

2888228928
SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,7 @@ class AArch64TargetLowering : public TargetLowering {
714714
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
715715
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
716716
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
717+
SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
717718
SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
718719
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
719720
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/AArch64/sve-sext-zext.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,3 +456,131 @@ define <vscale x 2 x i64> @zext_i18_i64(<vscale x 2 x i18> %a) {
456456
%r = zext <vscale x 2 x i18> %a to <vscale x 2 x i64>
457457
ret <vscale x 2 x i64> %r
458458
}
459+
460+
define <vscale x 8 x i16> @sext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
461+
; CHECK-LABEL: sext_inreg_i16_from_i8:
462+
; CHECK: // %bb.0:
463+
; CHECK-NEXT: sunpklo z0.h, z0.b
464+
; CHECK-NEXT: ret
465+
%subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
466+
%sext = sext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
467+
ret <vscale x 8 x i16> %sext
468+
}
469+
470+
define <vscale x 4 x i32> @sext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
471+
; CHECK-LABEL: sext_inreg_i32_from_i8:
472+
; CHECK: // %bb.0:
473+
; CHECK-NEXT: sunpklo z0.h, z0.b
474+
; CHECK-NEXT: sunpklo z0.s, z0.h
475+
; CHECK-NEXT: ret
476+
%subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
477+
%sext = sext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
478+
ret <vscale x 4 x i32> %sext
479+
}
480+
481+
define <vscale x 4 x i32> @sext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
482+
; CHECK-LABEL: sext_inreg_i32_from_i16:
483+
; CHECK: // %bb.0:
484+
; CHECK-NEXT: sunpklo z0.s, z0.h
485+
; CHECK-NEXT: ret
486+
%subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
487+
%sext = sext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
488+
ret <vscale x 4 x i32> %sext
489+
}
490+
491+
define <vscale x 2 x i64> @sext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
492+
; CHECK-LABEL: sext_inreg_i64_from_i8:
493+
; CHECK: // %bb.0:
494+
; CHECK-NEXT: sunpklo z0.h, z0.b
495+
; CHECK-NEXT: sunpklo z0.s, z0.h
496+
; CHECK-NEXT: sunpklo z0.d, z0.s
497+
; CHECK-NEXT: ret
498+
%subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
499+
%sext = sext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
500+
ret <vscale x 2 x i64> %sext
501+
}
502+
503+
define <vscale x 2 x i64> @sext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
504+
; CHECK-LABEL: sext_inreg_i64_from_i16:
505+
; CHECK: // %bb.0:
506+
; CHECK-NEXT: sunpklo z0.s, z0.h
507+
; CHECK-NEXT: sunpklo z0.d, z0.s
508+
; CHECK-NEXT: ret
509+
%subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
510+
%sext = sext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
511+
ret <vscale x 2 x i64> %sext
512+
}
513+
514+
define <vscale x 2 x i64> @sext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
515+
; CHECK-LABEL: sext_inreg_i64_from_i32:
516+
; CHECK: // %bb.0:
517+
; CHECK-NEXT: sunpklo z0.d, z0.s
518+
; CHECK-NEXT: ret
519+
%subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
520+
%sext = sext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
521+
ret <vscale x 2 x i64> %sext
522+
}
523+
524+
define <vscale x 8 x i16> @zext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
525+
; CHECK-LABEL: zext_inreg_i16_from_i8:
526+
; CHECK: // %bb.0:
527+
; CHECK-NEXT: uunpklo z0.h, z0.b
528+
; CHECK-NEXT: ret
529+
%subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
530+
%zext = zext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
531+
ret <vscale x 8 x i16> %zext
532+
}
533+
534+
define <vscale x 4 x i32> @zext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
535+
; CHECK-LABEL: zext_inreg_i32_from_i8:
536+
; CHECK: // %bb.0:
537+
; CHECK-NEXT: uunpklo z0.h, z0.b
538+
; CHECK-NEXT: uunpklo z0.s, z0.h
539+
; CHECK-NEXT: ret
540+
%subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
541+
%zext = zext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
542+
ret <vscale x 4 x i32> %zext
543+
}
544+
545+
define <vscale x 4 x i32> @zext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
546+
; CHECK-LABEL: zext_inreg_i32_from_i16:
547+
; CHECK: // %bb.0:
548+
; CHECK-NEXT: uunpklo z0.s, z0.h
549+
; CHECK-NEXT: ret
550+
%subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
551+
%zext = zext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
552+
ret <vscale x 4 x i32> %zext
553+
}
554+
555+
define <vscale x 2 x i64> @zext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
556+
; CHECK-LABEL: zext_inreg_i64_from_i8:
557+
; CHECK: // %bb.0:
558+
; CHECK-NEXT: uunpklo z0.h, z0.b
559+
; CHECK-NEXT: uunpklo z0.s, z0.h
560+
; CHECK-NEXT: uunpklo z0.d, z0.s
561+
; CHECK-NEXT: ret
562+
%subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
563+
%zext = zext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
564+
ret <vscale x 2 x i64> %zext
565+
}
566+
567+
define <vscale x 2 x i64> @zext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
568+
; CHECK-LABEL: zext_inreg_i64_from_i16:
569+
; CHECK: // %bb.0:
570+
; CHECK-NEXT: uunpklo z0.s, z0.h
571+
; CHECK-NEXT: uunpklo z0.d, z0.s
572+
; CHECK-NEXT: ret
573+
%subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
574+
%zext = zext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
575+
ret <vscale x 2 x i64> %zext
576+
}
577+
578+
define <vscale x 2 x i64> @zext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
579+
; CHECK-LABEL: zext_inreg_i64_from_i32:
580+
; CHECK: // %bb.0:
581+
; CHECK-NEXT: uunpklo z0.d, z0.s
582+
; CHECK-NEXT: ret
583+
%subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
584+
%zext = zext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
585+
ret <vscale x 2 x i64> %zext
586+
}

0 commit comments

Comments
 (0)