Skip to content

Commit ce66473

Browse files
MacDuekcloudy0717
authored andcommitted
[SDAG] Disable illegal extract_subvector splitting for scalable vectors (llvm#170315)
The "half spanning" legalization of extract_subvector is only valid for fixed-length vectors. This patch disables it for scalable vectors and makes more careful use of ElementCount in the lowering. Fixes regression from llvm#154101, which was encountered here: llvm#166748 (comment) Note: We could optimize this case given the known vscale, but this patch only attempts to fix the miscompile.
1 parent f53ed8f commit ce66473

File tree

2 files changed

+54
-19
lines changed

2 files changed

+54
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3938,43 +3938,55 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
39383938

39393939
GetSplitVector(N->getOperand(0), Lo, Hi);
39403940

3941-
uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
3942-
uint64_t IdxVal = Idx->getAsZExtVal();
3941+
ElementCount LoElts = Lo.getValueType().getVectorElementCount();
3942+
// Note: For scalable vectors, the index is scaled by vscale.
3943+
ElementCount IdxVal =
3944+
ElementCount::get(Idx->getAsZExtVal(), SubVT.isScalableVector());
3945+
uint64_t IdxValMin = IdxVal.getKnownMinValue();
39433946

3944-
unsigned NumResultElts = SubVT.getVectorMinNumElements();
3947+
EVT SrcVT = N->getOperand(0).getValueType();
3948+
ElementCount NumResultElts = SubVT.getVectorElementCount();
39453949

3946-
if (IdxVal < LoEltsMin) {
3947-
// If the extracted elements are all in the low half, do a simple extract.
3948-
if (IdxVal + NumResultElts <= LoEltsMin)
3949-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
3950+
// If the extracted elements are all in the low half, do a simple extract.
3951+
if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts))
3952+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
39503953

3954+
unsigned LoEltsMin = LoElts.getKnownMinValue();
3955+
if (IdxValMin < LoEltsMin && SubVT.isFixedLengthVector() &&
3956+
SrcVT.isFixedLengthVector()) {
39513957
// Extracted subvector crosses vector split, so we need to blend the two
39523958
// halves.
39533959
// TODO: May be able to emit partial extract_subvector.
39543960
SmallVector<SDValue, 8> Elts;
3955-
Elts.reserve(NumResultElts);
3961+
Elts.reserve(NumResultElts.getFixedValue());
39563962

3957-
DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxVal,
3958-
/*Count=*/LoEltsMin - IdxVal);
3963+
// This is not valid for scalable vectors. If SubVT is scalable, this is the
3964+
// same as unrolling a scalable dimension (invalid). If ScrVT is scalable,
3965+
// `Lo[LoEltsMin]` may not be the last element of `Lo`.
3966+
DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin,
3967+
/*Count=*/LoEltsMin - IdxValMin);
39593968
DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0,
39603969
/*Count=*/SubVT.getVectorNumElements() -
39613970
Elts.size());
39623971
return DAG.getBuildVector(SubVT, dl, Elts);
39633972
}
39643973

3965-
EVT SrcVT = N->getOperand(0).getValueType();
39663974
if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
3967-
uint64_t ExtractIdx = IdxVal - LoEltsMin;
3968-
if (ExtractIdx % NumResultElts == 0)
3969-
return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx);
3975+
ElementCount ExtractIdx = IdxVal - LoElts;
3976+
if (ExtractIdx.isKnownMultipleOf(NumResultElts))
3977+
return DAG.getExtractSubvector(dl, SubVT, Hi,
3978+
ExtractIdx.getKnownMinValue());
39703979

3971-
// We cannot create an extract_subvector that isn't a multiple of the result
3972-
// size, which may go out of bounds for the last elements. Shuffle the
3973-
// desired elements down to 0 and do a simple 0 extract.
39743980
EVT HiVT = Hi.getValueType();
3981+
assert(HiVT.isFixedLengthVector() &&
3982+
"Only fixed-vector extracts are supported in this case");
3983+
3984+
// We cannot create an extract_subvector that isn't a multiple of the
3985+
// result size, which may go out of bounds for the last elements. Shuffle
3986+
// the desired elements down to 0 and do a simple 0 extract.
39753987
SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
3976-
for (int I = 0; I != static_cast<int>(NumResultElts); ++I)
3977-
Mask[I] = ExtractIdx + I;
3988+
for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I)
3989+
Mask[I] = int(ExtractIdx.getFixedValue()) + I;
39783990

39793991
SDValue Shuffle =
39803992
DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask);
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s -o - | FileCheck %s
3+
4+
; Note: This test case is reduced from: https://github.com/llvm/llvm-project/pull/166748#issuecomment-3600498185
5+
6+
define i32 @test_extract_v8i32_from_nxv8i32(<vscale x 8 x i32> %vec) nounwind {
7+
; CHECK-LABEL: test_extract_v8i32_from_nxv8i32:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
10+
; CHECK-NEXT: addvl sp, sp, #-2
11+
; CHECK-NEXT: str z0, [sp]
12+
; CHECK-NEXT: ptrue p0.s
13+
; CHECK-NEXT: ldr z0, [sp]
14+
; CHECK-NEXT: str z1, [sp, #1, mul vl]
15+
; CHECK-NEXT: uaddv d0, p0, z0.s
16+
; CHECK-NEXT: fmov w0, s0
17+
; CHECK-NEXT: addvl sp, sp, #2
18+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
19+
; CHECK-NEXT: ret
20+
%1 = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
21+
%2 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %1)
22+
ret i32 %2
23+
}

0 commit comments

Comments
 (0)