Skip to content

Commit b0e77d5

Browse files
committed
[RISCV] Lower the shufflevector equivalent of vector.splice
We can lower a vector splice to a vslidedown and a vslideup. The majority of the matching code here came from X86's code for matching PALIGNR and VPALIGND/Q. The slidedown and slideup lowering don't really require it to be concatenation, but it happened to be an interesting pattern with existing analysis code I could use. This helps with cases where the scalar loop optimizer forwarded a load result from a previous loop iteration. For example, this happens if the loop uses x[i] and x[i+1] on the same iteration. The scalar optimizer will forward x[i+1] load from the previous loop to satisfy x[i] on this loop. When this get vectorized it results in one element of a vector being forwarded from the previous loop to be concatenated with elements loaded on this iteration. Whether that's more efficient than doing a shifted loaded or reloading the single scalar and using vslide1up is an interesting question. But that's not something the backend can help with. Reviewed By: khchen Differential Revision: https://reviews.llvm.org/D119039
1 parent 85b89ed commit b0e77d5

File tree

3 files changed

+194
-0
lines changed

3 files changed

+194
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2514,6 +2514,72 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
25142514
return true;
25152515
}
25162516

2517+
static int isElementRotate(SDValue &V1, SDValue &V2, ArrayRef<int> Mask) {
2518+
int Size = Mask.size();
2519+
2520+
// We need to detect various ways of spelling a rotation:
2521+
// [11, 12, 13, 14, 15, 0, 1, 2]
2522+
// [-1, 12, 13, 14, -1, -1, 1, -1]
2523+
// [-1, -1, -1, -1, -1, -1, 1, 2]
2524+
// [ 3, 4, 5, 6, 7, 8, 9, 10]
2525+
// [-1, 4, 5, 6, -1, -1, 9, -1]
2526+
// [-1, 4, 5, 6, -1, -1, -1, -1]
2527+
int Rotation = 0;
2528+
SDValue Lo, Hi;
2529+
for (int i = 0; i != Size; ++i) {
2530+
int M = Mask[i];
2531+
if (M < 0)
2532+
continue;
2533+
2534+
// Determine where a rotate vector would have started.
2535+
int StartIdx = i - (M % Size);
2536+
// The identity rotation isn't interesting, stop.
2537+
if (StartIdx == 0)
2538+
return -1;
2539+
2540+
// If we found the tail of a vector the rotation must be the missing
2541+
// front. If we found the head of a vector, it must be how much of the
2542+
// head.
2543+
int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
2544+
2545+
if (Rotation == 0)
2546+
Rotation = CandidateRotation;
2547+
else if (Rotation != CandidateRotation)
2548+
// The rotations don't match, so we can't match this mask.
2549+
return -1;
2550+
2551+
// Compute which value this mask is pointing at.
2552+
SDValue MaskV = M < Size ? V1 : V2;
2553+
2554+
// Compute which of the two target values this index should be assigned to.
2555+
// This reflects whether the high elements are remaining or the low elemnts
2556+
// are remaining.
2557+
SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
2558+
2559+
// Either set up this value if we've not encountered it before, or check
2560+
// that it remains consistent.
2561+
if (!TargetV)
2562+
TargetV = MaskV;
2563+
else if (TargetV != MaskV)
2564+
// This may be a rotation, but it pulls from the inputs in some
2565+
// unsupported interleaving.
2566+
return -1;
2567+
}
2568+
2569+
// Check that we successfully analyzed the mask, and normalize the results.
2570+
assert(Rotation != 0 && "Failed to locate a viable rotation!");
2571+
assert((Lo || Hi) && "Failed to find a rotated input vector!");
2572+
2573+
// Make sure we've found a value for both halves.
2574+
if (!Lo || !Hi)
2575+
return -1;
2576+
2577+
V1 = Lo;
2578+
V2 = Hi;
2579+
2580+
return Rotation;
2581+
}
2582+
25172583
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
25182584
const RISCVSubtarget &Subtarget) {
25192585
SDValue V1 = Op.getOperand(0);
@@ -2619,6 +2685,33 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
26192685
return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
26202686
}
26212687

2688+
// Match shuffles that concatenate two vectors, rotate the concatenation,
2689+
// and then extract the original number of elements from the rotated result.
2690+
// This is equivalent to vector.splice or X86's PALIGNR instruction. Lower
2691+
// it to a SLIDEDOWN and a SLIDEUP.
2692+
// FIXME: We don't really need it to be a concatenation. We just need two
2693+
// regions with contiguous elements that need to be shifted down and up.
2694+
int Rotation = isElementRotate(V1, V2, Mask);
2695+
if (Rotation > 0) {
2696+
// We found a rotation. We need to slide V1 down by Rotation. Using
2697+
// (NumElts - Rotation) for VL. Then we need to slide V2 up by
2698+
// (NumElts - Rotation) using NumElts for VL.
2699+
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2700+
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2701+
2702+
unsigned InvRotate = NumElts - Rotation;
2703+
SDValue SlideDown =
2704+
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2705+
DAG.getUNDEF(ContainerVT), V2,
2706+
DAG.getConstant(Rotation, DL, XLenVT),
2707+
TrueMask, DAG.getConstant(InvRotate, DL, XLenVT));
2708+
SDValue SlideUp =
2709+
DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, SlideDown, V1,
2710+
DAG.getConstant(InvRotate, DL, XLenVT),
2711+
TrueMask, VL);
2712+
return convertFromScalableVector(VT, SlideUp, DAG, Subtarget);
2713+
}
2714+
26222715
// Detect an interleave shuffle and lower to
26232716
// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
26242717
bool SwapSources;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,54 @@ define <8 x float> @slidedown_v8f32(<8 x float> %x) {
255255
%s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
256256
ret <8 x float> %s
257257
}
258+
259+
define <8 x float> @splice_unary(<8 x float> %x) {
260+
; CHECK-LABEL: splice_unary:
261+
; CHECK: # %bb.0:
262+
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, mu
263+
; CHECK-NEXT: vslidedown.vi v10, v8, 1
264+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
265+
; CHECK-NEXT: vslideup.vi v10, v8, 7
266+
; CHECK-NEXT: vmv2r.v v8, v10
267+
; CHECK-NEXT: ret
268+
%s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
269+
ret <8 x float> %s
270+
}
271+
272+
define <8 x double> @splice_unary2(<8 x double> %x) {
273+
; CHECK-LABEL: splice_unary2:
274+
; CHECK: # %bb.0:
275+
; CHECK-NEXT: vsetivli zero, 2, e64, m4, ta, mu
276+
; CHECK-NEXT: vslidedown.vi v12, v8, 6
277+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, mu
278+
; CHECK-NEXT: vslideup.vi v12, v8, 2
279+
; CHECK-NEXT: vmv4r.v v8, v12
280+
; CHECK-NEXT: ret
281+
%s = shufflevector <8 x double> %x, <8 x double> poison, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
282+
ret <8 x double> %s
283+
}
284+
285+
define <8 x float> @splice_binary(<8 x float> %x, <8 x float> %y) {
286+
; CHECK-LABEL: splice_binary:
287+
; CHECK: # %bb.0:
288+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, mu
289+
; CHECK-NEXT: vslidedown.vi v8, v8, 2
290+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
291+
; CHECK-NEXT: vslideup.vi v8, v10, 6
292+
; CHECK-NEXT: ret
293+
%s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
294+
ret <8 x float> %s
295+
}
296+
297+
define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
298+
; CHECK-LABEL: splice_binary2:
299+
; CHECK: # %bb.0:
300+
; CHECK-NEXT: vsetivli zero, 3, e64, m4, ta, mu
301+
; CHECK-NEXT: vslidedown.vi v12, v12, 5
302+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, mu
303+
; CHECK-NEXT: vslideup.vi v12, v8, 3
304+
; CHECK-NEXT: vmv4r.v v8, v12
305+
; CHECK-NEXT: ret
306+
%s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
307+
ret <8 x double> %s
308+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,3 +554,53 @@ define <8 x i32> @slidedown_v8i32(<8 x i32> %x) {
554554
%s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
555555
ret <8 x i32> %s
556556
}
557+
558+
define <8 x i16> @splice_unary(<8 x i16> %x) {
559+
; CHECK-LABEL: splice_unary:
560+
; CHECK: # %bb.0:
561+
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, mu
562+
; CHECK-NEXT: vslidedown.vi v9, v8, 2
563+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, mu
564+
; CHECK-NEXT: vslideup.vi v9, v8, 6
565+
; CHECK-NEXT: vmv1r.v v8, v9
566+
; CHECK-NEXT: ret
567+
%s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
568+
ret <8 x i16> %s
569+
}
570+
571+
define <8 x i32> @splice_unary2(<8 x i32> %x) {
572+
; CHECK-LABEL: splice_unary2:
573+
; CHECK: # %bb.0:
574+
; CHECK-NEXT: vsetivli zero, 3, e32, m2, ta, mu
575+
; CHECK-NEXT: vslidedown.vi v10, v8, 5
576+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
577+
; CHECK-NEXT: vslideup.vi v10, v8, 3
578+
; CHECK-NEXT: vmv2r.v v8, v10
579+
; CHECK-NEXT: ret
580+
%s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
581+
ret <8 x i32> %s
582+
}
583+
584+
define <8 x i16> @splice_binary(<8 x i16> %x, <8 x i16> %y) {
585+
; CHECK-LABEL: splice_binary:
586+
; CHECK: # %bb.0:
587+
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, mu
588+
; CHECK-NEXT: vslidedown.vi v8, v8, 2
589+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, mu
590+
; CHECK-NEXT: vslideup.vi v8, v9, 6
591+
; CHECK-NEXT: ret
592+
%s = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
593+
ret <8 x i16> %s
594+
}
595+
596+
define <8 x i32> @splice_binary2(<8 x i32> %x, <8 x i32> %y) {
597+
; CHECK-LABEL: splice_binary2:
598+
; CHECK: # %bb.0:
599+
; CHECK-NEXT: vsetivli zero, 3, e32, m2, ta, mu
600+
; CHECK-NEXT: vslidedown.vi v8, v8, 5
601+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
602+
; CHECK-NEXT: vslideup.vi v8, v10, 3
603+
; CHECK-NEXT: ret
604+
%s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
605+
ret <8 x i32> %s
606+
}

0 commit comments

Comments
 (0)