Skip to content

Commit 055e4ff

Browse files
committed
[VPlan] Don't narrow op multiple times in narrowInterleaveGroups.
Track which ops already have been narrowed, to avoid narrowing the same operation multiple times. Repeated narrowing will lead to incorrect results, because we could first narrow from an interleave group -> wide load, and then narrow the wide load > single-scalar load. Fixes thttps://github.com/llvm/llvm-project/issues/156190.
1 parent 9fdf2c7 commit 055e4ff

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4017,9 +4017,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
40174017
return;
40184018

40194019
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
4020-
auto NarrowOp = [](VPValue *V) -> VPValue * {
4020+
SmallPtrSet<VPValue *, 4> NarrowedOps;
4021+
auto NarrowOp = [&NarrowedOps](VPValue *V) -> VPValue * {
40214022
auto *R = V->getDefiningRecipe();
4022-
if (!R)
4023+
if (!R || NarrowedOps.contains(V))
40234024
return V;
40244025
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
40254026
// Narrow interleave group to wide load, as transformed VPlan will only
@@ -4029,13 +4030,15 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
40294030
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
40304031
/*Reverse=*/false, {}, LoadGroup->getDebugLoc());
40314032
L->insertBefore(LoadGroup);
4033+
NarrowedOps.insert(L);
40324034
return L;
40334035
}
40344036

40354037
if (auto *RepR = dyn_cast<VPReplicateRecipe>(R)) {
40364038
assert(RepR->isSingleScalar() &&
40374039
isa<LoadInst>(RepR->getUnderlyingInstr()) &&
40384040
"must be a single scalar load");
4041+
NarrowedOps.insert(RepR);
40394042
return RepR;
40404043
}
40414044
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
@@ -4049,6 +4052,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
40494052
/*IsUniform*/ true,
40504053
/*Mask*/ nullptr, *WideLoad);
40514054
N->insertBefore(WideLoad);
4055+
NarrowedOps.insert(N);
40524056
return N;
40534057
};
40544058

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,7 +1169,7 @@ exit:
11691169
ret void
11701170
}
11711171

1172-
; FIXME: Currently incorrectly narrows the load,
1172+
; Make sure multiple uses of a narrowed op are handled correctly,
11731173
; https://github.com/llvm/llvm-project/issues/156190.
11741174
define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
11751175
; VF2-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
@@ -1181,9 +1181,7 @@ define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr
11811181
; VF2: [[VECTOR_BODY]]:
11821182
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
11831183
; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
1184-
; VF2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8
1185-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
1186-
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
1184+
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
11871185
; VF2-NEXT: [[TMP2:%.*]] = fadd contract <2 x double> [[BROADCAST_SPLAT]], splat (double 2.000000e+01)
11881186
; VF2-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
11891187
; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 8

0 commit comments

Comments
 (0)