Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13430,6 +13430,30 @@ static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return true;
}

/// isDUPQMask - matches a splat of equivalent lanes within 128b segments in
/// the first vector operand.
static std::optional<unsigned> isDUPQMask(ArrayRef<int> M, EVT VT) {
assert(VT.getFixedSizeInBits() % 128 == 0 && "Unsupported SVE vector size");
unsigned Lane = (unsigned)M[0];
unsigned Segments = VT.getFixedSizeInBits() / 128;
unsigned SegmentElts = VT.getVectorNumElements() / Segments;

// Make sure there's no size changes.
if (SegmentElts * Segments != M.size())
return std::nullopt;

// Check the first index corresponds to one of the lanes in the first segment.
if (Lane >= SegmentElts)
return std::nullopt;

// Check that all lanes match the first, adjusted for segment.
for (unsigned I = 0; I < M.size(); ++I)
if ((unsigned)M[I] != (Lane + ((I / SegmentElts) * SegmentElts)))
return std::nullopt;

return Lane;
}

/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
Expand Down Expand Up @@ -30013,6 +30037,19 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
}

if (Subtarget->hasSVE2p1()) {
if (std::optional<unsigned> Lane = isDUPQMask(ShuffleMask, VT)) {
SDValue IID =
DAG.getConstant(Intrinsic::aarch64_sve_dup_laneq, DL, MVT::i64);
return convertFromScalableVector(
DAG, VT,
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
{IID, Op1,
DAG.getConstant(*Lane, DL, MVT::i64,
/*isTarget=*/true)}));
}
}
}

// Try to widen the shuffle before generating a possibly expensive SVE TBL.
Expand Down
115 changes: 115 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s

define void @dupq_i8_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_i8_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: dupq z0.b, z0.b[15]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <32 x i8>, ptr %addr
%splat.lanes = shufflevector <32 x i8> %load, <32 x i8> poison, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15,
i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
store <32 x i8> %splat.lanes, ptr %addr
ret void
}

define void @dupq_i16_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_i16_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: dupq z0.h, z0.h[2]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <16 x i16>, ptr %addr
%splat.lanes = shufflevector <16 x i16> %load, <16 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
store <16 x i16> %splat.lanes, ptr %addr
ret void
}

define void @dupq_i32_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_i32_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: dupq z0.s, z0.s[3]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <8 x i32>, ptr %addr
%splat.lanes = shufflevector <8 x i32> %load, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3,
i32 7, i32 7, i32 7, i32 7>
store <8 x i32> %splat.lanes, ptr %addr
ret void
}

define void @dupq_i64_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_i64_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: trn1 z0.d, z0.d, z0.d
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <4 x i64>, ptr %addr
%splat.lanes = shufflevector <4 x i64> %load, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
store <4 x i64> %splat.lanes, ptr %addr
ret void
}

define void @dupq_f16_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_f16_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: dupq z0.h, z0.h[2]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <16 x half>, ptr %addr
%splat.lanes = shufflevector <16 x half> %load, <16 x half> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
store <16 x half> %splat.lanes, ptr %addr
ret void
}

define void @dupq_bf16_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_bf16_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: dup v0.8h, v0.h[2]
; CHECK-NEXT: dup v1.8h, v1.h[2]
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%load = load <16 x bfloat>, ptr %addr
%splat.lanes = shufflevector <16 x bfloat> %load, <16 x bfloat> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
store <16 x bfloat> %splat.lanes, ptr %addr
ret void
}

define void @dupq_f32_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_f32_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: dupq z0.s, z0.s[3]
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <8 x float>, ptr %addr
%splat.lanes = shufflevector <8 x float> %load, <8 x float> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3,
i32 7, i32 7, i32 7, i32 7>
store <8 x float> %splat.lanes, ptr %addr
ret void
}

define void @dupq_f64_256b(ptr %addr) #0 {
; CHECK-LABEL: dupq_f64_256b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: trn1 z0.d, z0.d, z0.d
; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
%load = load <4 x double>, ptr %addr
%splat.lanes = shufflevector <4 x double> %load, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
store <4 x double> %splat.lanes, ptr %addr
ret void
}

attributes #0 = { noinline vscale_range(2,2) "target-features"="+sve2p1,+bf16" }
Loading