Skip to content

Commit 44cd502

Browse files
[LLVM][CodeGen][SVE] List MVTs that are desirable for extending loads. (#149153)
Extend AArch64TargetLowering::isVectorLoadExtDesirable to specify the set of MVT for which load extension is desirable. Fixes #148939
1 parent 2207664 commit 44cd502

File tree

3 files changed

+29
-3
lines changed

3 files changed

+29
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7641,7 +7641,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
76417641

76427642
if (SDValue(GN0, 0).hasOneUse() &&
76437643
isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7644-
TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
7644+
TLI.isVectorLoadExtDesirable(SDValue(N, 0))) {
76457645
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
76467646
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
76477647

@@ -15745,7 +15745,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
1574515745
// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
1574615746
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
1574715747
if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15748-
TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
15748+
TLI.isVectorLoadExtDesirable(SDValue(N, 0))) {
1574915749
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
1575015750
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
1575115751

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6439,7 +6439,9 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
64396439
}
64406440
}
64416441

6442-
return true;
6442+
EVT PreExtScalarVT = ExtVal->getOperand(0).getValueType().getScalarType();
6443+
return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
6444+
PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
64436445
}
64446446

64456447
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {

llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,27 @@ define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) {
231231
%aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
232232
ret <vscale x 8 x i64> %aext
233233
}
234+
235+
; Ensure we don't try to promote a predicate load to a sign-extended load.
236+
define <vscale x 16 x i8> @sload_16i1_16i8(ptr %addr) {
237+
; CHECK-LABEL: sload_16i1_16i8:
238+
; CHECK: // %bb.0:
239+
; CHECK-NEXT: ldr p0, [x0]
240+
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
241+
; CHECK-NEXT: ret
242+
%load = load <vscale x 16 x i1>, ptr %addr
243+
%zext = sext <vscale x 16 x i1> %load to <vscale x 16 x i8>
244+
ret <vscale x 16 x i8> %zext
245+
}
246+
247+
; Ensure we don't try to promote a predicate load to a zero-extended load.
248+
define <vscale x 16 x i8> @zload_16i1_16i8(ptr %addr) {
249+
; CHECK-LABEL: zload_16i1_16i8:
250+
; CHECK: // %bb.0:
251+
; CHECK-NEXT: ldr p0, [x0]
252+
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
253+
; CHECK-NEXT: ret
254+
%load = load <vscale x 16 x i1>, ptr %addr
255+
%zext = zext <vscale x 16 x i1> %load to <vscale x 16 x i8>
256+
ret <vscale x 16 x i8> %zext
257+
}

0 commit comments

Comments
 (0)