-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Allow undef prefix for local repeating VLA shuffle lowering #126097
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Allow undef prefix for local repeating VLA shuffle lowering #126097
Conversation
Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
|
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesImplement the first TODO from #125735, and minorly cleanup code using same style as the recently landed strict prefix case. Full diff: https://github.com/llvm/llvm-project/pull/126097.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 165c71d8e03f16..ec74523a31227b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5338,13 +5338,19 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
/// Is this mask local (i.e. elements only move within their local span), and
/// repeating (that is, the same rearrangement is being done within each span)?
static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
- // TODO: Could improve the case where undef elements exist in the first span.
+ SmallVector<int> LowSpan(Span, -1);
for (auto [I, M] : enumerate(Mask)) {
if (M == -1)
continue;
- int ChunkLo = I - (I % Span);
+ int SpanIdx = I % Span;
+ int ChunkLo = I - SpanIdx;
int ChunkHi = ChunkLo + Span;
- if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span])
+ if (M < ChunkLo || M >= ChunkHi)
+ return false;
+ int Expected = M - ChunkLo;
+ if (LowSpan[SpanIdx] == -1)
+ LowSpan[SpanIdx] = Expected;
+ if (LowSpan[SpanIdx] != Expected)
return false;
}
return true;
@@ -5742,9 +5748,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// If we have a locally repeating mask, then we can reuse the first register
// in the index register group for all registers within the source register
// group. TODO: This generalizes to m2, and m4.
- const MVT M1VT = getLMUL1VT(ContainerVT);
- auto VLMAX = RISCVTargetLowering::computeVLMAXBounds(M1VT, Subtarget).first;
- if (ContainerVT.bitsGT(M1VT) && isLocalRepeatingShuffle(Mask, VLMAX)) {
+ if (NumElts > MinVLMAX && isLocalRepeatingShuffle(Mask, MinVLMAX)) {
+ const MVT M1VT = getLMUL1VT(ContainerVT);
EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
SDValue SubIndex =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index 86d8a275a90550..c9fe39685fbc6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -969,11 +969,44 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) {
ret <8 x i64> %shuffle
}
-define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
-; CHECK-LABEL: shuffle_v8i64_as_i256:
+; Test case where first span has undefs
+define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i128_2:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v13, v9, v16
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT: vrgatherei16.vv v14, v10, v16
+; CHECK-NEXT: vrgatherei16.vv v15, v11, v16
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+;
+; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2:
+; ZVKB-V: # %bb.0:
+; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0)
+; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVKB-V-NEXT: vle16.v v16, (a0)
+; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVKB-V-NEXT: vrgatherei16.vv v13, v9, v16
+; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16
+; ZVKB-V-NEXT: vrgatherei16.vv v14, v10, v16
+; ZVKB-V-NEXT: vrgatherei16.vv v15, v11, v16
+; ZVKB-V-NEXT: vmv4r.v v8, v12
+; ZVKB-V-NEXT: ret
+ %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 undef, i32 undef, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+ ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -982,8 +1015,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
;
; ZVKB-V-LABEL: shuffle_v8i64_as_i256:
; ZVKB-V: # %bb.0:
-; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0)
-; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0)
+; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0)
; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; ZVKB-V-NEXT: vle16.v v16, (a0)
; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16
@@ -996,8 +1029,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) {
; CHECK-LABEL: shuffle_v8i64_as_i256_zvl256b:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
+; CHECK-NEXT: lui a0, %hi(.LCPI32_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0)
; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -1008,8 +1041,8 @@ define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0)
;
; ZVKB-V-LABEL: shuffle_v8i64_as_i256_zvl256b:
; ZVKB-V: # %bb.0:
-; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0)
-; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0)
+; ZVKB-V-NEXT: lui a0, %hi(.LCPI32_0)
+; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI32_0)
; ZVKB-V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVKB-V-NEXT: vle16.v v12, (a0)
; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' e225677b1f6fe9f8e928836276f1d43b0591e9de 842f79815d06459441ed410bc18d01477910ce77 llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
| int ChunkLo = I - SpanIdx; | ||
| int ChunkHi = ChunkLo + Span; | ||
| if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span]) | ||
| if (M < ChunkLo || M >= ChunkHi) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be simplified to
if ((M / Span) != (I / Span))
return false;
int Expected = M % Span;
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/13197 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/13008 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/11207 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/18443 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/19384 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/13628 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/92/builds/13522 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/22597 Here is the relevant piece of the build log for the reference |
…lvm#126097) Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
…wering (llvm#126097)" This reverts commit ab0006d. It appears to have rebased badly during web merge.
…owering (llvm#126097)" (With a fix to recently added code.) Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
…lvm#126097) Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
…wering (llvm#126097)" This reverts commit ab0006d. It appears to have rebased badly during web merge.
…owering (llvm#126097)" (With a fix to recently added code.) Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
…lvm#126097) Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
…wering (llvm#126097)" This reverts commit ab0006d. It appears to have rebased badly during web merge.
…owering (llvm#126097)" (With a fix to recently added code.) Implement the first TODO from llvm#125735, and minorly cleanup code using same style as the recently landed strict prefix case.
Implement the first TODO from #125735, and minorly cleanup code using same style as the recently landed strict prefix case.