Skip to content

Commit d298e8c

Browse files
committed
[RISCV] Fix incorrect slide offset when using vnsrl to de-interleave
Fix #132071
1 parent be9b7a1 commit d298e8c

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5545,12 +5545,25 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
55455545
})) {
55465546
// Narrow each source and concatenate them.
55475547
// FIXME: For small LMUL it is better to concatenate first.
5548-
MVT HalfVT = VT.getHalfNumVectorElementsVT();
5548+
MVT EltVT = VT.getVectorElementType();
5549+
auto EltCnt = VT.getVectorElementCount();
5550+
MVT SubVT =
5551+
MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5552+
55495553
SDValue Lo =
5550-
getDeinterleaveShiftAndTrunc(DL, HalfVT, V1, Factor, Index, DAG);
5554+
getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
55515555
SDValue Hi =
5552-
getDeinterleaveShiftAndTrunc(DL, HalfVT, V2, Factor, Index, DAG);
5553-
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5556+
getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5557+
5558+
MVT NewVT = SubVT.getDoubleNumVectorElementsVT();
5559+
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, Lo, Hi);
5560+
for (unsigned F = Factor; F > 2; F >>= 1) {
5561+
SDValue Undef = DAG.getUNDEF(NewVT);
5562+
NewVT = NewVT.getDoubleNumVectorElementsVT();
5563+
Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, Concat, Undef);
5564+
}
5565+
5566+
return Concat;
55545567
}
55555568
}
55565569
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,9 @@ define void @deinterleave4_0_i8_two_source(ptr %in0, ptr %in1, ptr %out) {
378378
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
379379
; CHECK-NEXT: vnsrl.wi v8, v8, 0
380380
; CHECK-NEXT: vnsrl.wi v9, v9, 0
381+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
382+
; CHECK-NEXT: vslideup.vi v9, v8, 2
381383
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
382-
; CHECK-NEXT: vslideup.vi v9, v8, 4
383384
; CHECK-NEXT: vse8.v v9, (a2)
384385
; CHECK-NEXT: ret
385386
entry:
@@ -402,8 +403,9 @@ define void @deinterleave4_8_i8_two_source(ptr %in0, ptr %in1, ptr %out) {
402403
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
403404
; CHECK-NEXT: vnsrl.wi v8, v8, 0
404405
; CHECK-NEXT: vnsrl.wi v9, v9, 0
406+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
407+
; CHECK-NEXT: vslideup.vi v9, v8, 2
405408
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
406-
; CHECK-NEXT: vslideup.vi v9, v8, 4
407409
; CHECK-NEXT: vse8.v v9, (a2)
408410
; CHECK-NEXT: ret
409411
entry:

0 commit comments

Comments
 (0)