Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1591,6 +1591,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::AVGCEILS, VT, Custom);
setOperationAction(ISD::AVGCEILU, VT, Custom);

setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Custom);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know if the VT refers to src or dest VT here? If it's the src, does that mean we also need to test extends from something like nxv2i64 -> nxv2i128?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For ###_EXTEND_VECTOR_INREG the result type is used. The operations require both types to be the same size so it would be nxv2i64 -> nxv1i128, which is not a legal type so the current lowering code wouldn't apply.

I'm pretty sure that means this case will fail, but that's no worse than today.

setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);

if (!Subtarget->isLittleEndian())
setOperationAction(ISD::BITCAST, VT, Custom);

Expand Down Expand Up @@ -7851,6 +7855,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::VECTOR_SHUFFLE:
Expand Down Expand Up @@ -14688,6 +14695,40 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
}

SDValue
AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: It's a minor thing, but we already have a LowerZERO_EXTEND_VECTOR_INREG, so at first it confused me a little why this was named as if it handles all extends. Then I realised it does handle all extends for scalable vectors only. I realise it's a bit verbose, but is it worth making it clear this is for scalable vectors only? For example, something like LowerSCALABLE_EXTEND_VECTOR_INREG?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure because I'm likely to extend this function for SVE VLS as well? Also, looking at the current implementation of LowerZERO_EXTEND_VECTOR_INREG I think this is another of those cases where we can do better for NEON vectors when SVE is available. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough, that makes sense.

SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
assert(VT.isScalableVector() && "Unexpected result type!");

bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
unsigned UnpackOpcode = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;

// Repeatedly unpack Val until the result is of the desired type.
SDValue Val = Op.getOperand(0);
switch (Val.getSimpleValueType().SimpleTy) {
default:
return SDValue();
case MVT::nxv16i8:
Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv8i16, Val);
if (VT == MVT::nxv8i16)
break;
[[fallthrough]];
case MVT::nxv8i16:
Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv4i32, Val);
if (VT == MVT::nxv4i32)
break;
[[fallthrough]];
case MVT::nxv4i32:
Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv2i64, Val);
assert(VT == MVT::nxv2i64 && "Unexpected result type!");
break;
}

return Val;
}

// Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
// but we don't have an appropriate instruction,
// so custom-lower it as ZIP1-with-zeros.
Expand All @@ -14696,6 +14737,10 @@ AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();

if (VT.isScalableVector())
return LowerEXTEND_VECTOR_INREG(Op, DAG);

SDValue SrcOp = Op.getOperand(0);
EVT SrcVT = SrcOp.getValueType();
assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
Expand Down Expand Up @@ -28872,7 +28917,8 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;

unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
unsigned Opcode = (Index == 0) ? (unsigned)ISD::ANY_EXTEND_VECTOR_INREG
: (unsigned)AArch64ISD::UUNPKHI;
EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());

SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
Expand Down
128 changes: 128 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-sext-zext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,131 @@ define <vscale x 2 x i64> @zext_i18_i64(<vscale x 2 x i18> %a) {
%r = zext <vscale x 2 x i18> %a to <vscale x 2 x i64>
ret <vscale x 2 x i64> %r
}

define <vscale x 8 x i16> @sext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: sext_inreg_i16_from_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: ret
%subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
%sext = sext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
ret <vscale x 8 x i16> %sext
}

define <vscale x 4 x i32> @sext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: sext_inreg_i32_from_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ret
%subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
%sext = sext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
ret <vscale x 4 x i32> %sext
}

define <vscale x 4 x i32> @sext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sext_inreg_i32_from_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ret
%subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
%sext = sext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
ret <vscale x 4 x i32> %sext
}

define <vscale x 2 x i64> @sext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: sext_inreg_i64_from_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: ret
%subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
%sext = sext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
ret <vscale x 2 x i64> %sext
}

define <vscale x 2 x i64> @sext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sext_inreg_i64_from_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: ret
%subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
%sext = sext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
ret <vscale x 2 x i64> %sext
}

define <vscale x 2 x i64> @sext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sext_inreg_i64_from_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: ret
%subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
%sext = sext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
ret <vscale x 2 x i64> %sext
}

define <vscale x 8 x i16> @zext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: zext_inreg_i16_from_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ret
%subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
%zext = zext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
ret <vscale x 8 x i16> %zext
}

define <vscale x 4 x i32> @zext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: zext_inreg_i32_from_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
%subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
%zext = zext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
ret <vscale x 4 x i32> %zext
}

define <vscale x 4 x i32> @zext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: zext_inreg_i32_from_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
%subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
%zext = zext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
ret <vscale x 4 x i32> %zext
}

define <vscale x 2 x i64> @zext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: zext_inreg_i64_from_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
%zext = zext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
ret <vscale x 2 x i64> %zext
}

define <vscale x 2 x i64> @zext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: zext_inreg_i64_from_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
%zext = zext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
ret <vscale x 2 x i64> %zext
}

define <vscale x 2 x i64> @zext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: zext_inreg_i64_from_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
%zext = zext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
ret <vscale x 2 x i64> %zext
}