Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3252,9 +3252,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
return;

// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
auto NarrowOp = [](VPValue *V) -> VPValue * {
SmallPtrSet<VPValue *, 4> NarrowedOps;
auto NarrowOp = [&NarrowedOps](VPValue *V) -> VPValue * {
auto *R = V->getDefiningRecipe();
if (!R)
if (!R || NarrowedOps.contains(V))
return V;
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
// Narrow interleave group to wide load, as transformed VPlan will only
Expand All @@ -3264,13 +3265,15 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
/*Reverse=*/false, {}, LoadGroup->getDebugLoc());
L->insertBefore(LoadGroup);
NarrowedOps.insert(L);
return L;
}

if (auto *RepR = dyn_cast<VPReplicateRecipe>(R)) {
assert(RepR->isSingleScalar() &&
isa<LoadInst>(RepR->getUnderlyingInstr()) &&
"must be a single scalar load");
NarrowedOps.insert(RepR);
return RepR;
}
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
Expand All @@ -3281,6 +3284,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
WideLoad->operands(), /*IsUniform*/ true,
/*Mask*/ nullptr, *WideLoad);
N->insertBefore(WideLoad);
NarrowedOps.insert(N);
return N;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1203,3 +1203,82 @@ loop:
exit:
ret void
}

; Make sure multiple uses of a narrowed op are handled correctly,
; https://github.com/llvm/llvm-project/issues/156190.
define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
; VF2-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; VF2-NEXT: [[ENTRY:.*:]]
; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF2: [[VECTOR_PH]]:
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF2: [[VECTOR_BODY]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
; VF2-NEXT: [[TMP2:%.*]] = fadd contract <2 x double> [[BROADCAST_SPLAT]], splat (double 2.000000e+01)
; VF2-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 8
; VF2-NEXT: [[TMP4:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP4]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; VF2: [[MIDDLE_BLOCK]]:
; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
; VF2: [[SCALAR_PH]]:
;
; VF4-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; VF4-NEXT: [[ENTRY:.*:]]
; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF4: [[VECTOR_PH]]:
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
; VF4: [[VECTOR_BODY]]:
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; VF4-NEXT: [[TMP1:%.*]] = fadd contract <4 x double> [[STRIDED_VEC]], splat (double 2.000000e+01)
; VF4-NEXT: [[TMP2:%.*]] = fadd contract <4 x double> [[STRIDED_VEC1]], splat (double 2.000000e+01)
; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
; VF4-NEXT: [[TMP5:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; VF4: [[MIDDLE_BLOCK]]:
; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
; VF4: [[SCALAR_PH]]:
;
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr { double, double }, ptr %A, i64 %iv
%l.A.0 = load double, ptr %gep.A, align 8
%gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A, i64 8
%l.A.1 = load double, ptr %gep.A.1, align 8
%add.0 = fadd contract double %l.A.0, 20.0
%add.1 = fadd contract double %l.A.1, 20.0
%gep.B = getelementptr { double, double }, ptr %B, i64 %iv
store double %add.0, ptr %gep.B, align 8
%gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B, i64 8
store double %add.1, ptr %gep.B.1, align 8
%gep.C = getelementptr { double, double }, ptr %C, i64 %iv
%gep.C.1 = getelementptr inbounds nuw i8, ptr %gep.C, i64 8
store double %add.0, ptr %gep.C, align 8
store double %add.1, ptr %gep.C.1, align 8
%iv.next = add nuw nsw i64 %iv, 1
%.not = icmp eq i64 %iv.next, 1000
br i1 %.not, label %exit, label %loop

exit:
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -587,3 +587,76 @@ loop:
exit:
ret void
}

define void @multiple_store_groups_storing_same_load_group(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
; VF2-LABEL: define void @multiple_store_groups_storing_same_load_group(
; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; VF2-NEXT: [[ENTRY:.*:]]
; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF2: [[VECTOR_PH]]:
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF2: [[VECTOR_BODY]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
; VF2-NEXT: [[TMP1:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP1]], align 8
; VF2-NEXT: [[TMP2:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
; VF2-NEXT: store <2 x double> [[WIDE_LOAD1]], ptr [[TMP2]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
; VF2-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; VF2: [[MIDDLE_BLOCK]]:
; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
; VF2: [[SCALAR_PH]]:
;
; VF4-LABEL: define void @multiple_store_groups_storing_same_load_group(
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; VF4-NEXT: [[ENTRY:.*:]]
; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VF4: [[VECTOR_PH]]:
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
; VF4: [[VECTOR_BODY]]:
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; VF4-NEXT: [[TMP1:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
; VF4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[STRIDED_VEC]], <4 x double> [[STRIDED_VEC1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF4-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; VF4: [[MIDDLE_BLOCK]]:
; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
; VF4: [[SCALAR_PH]]:
;
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr { double, double }, ptr %A, i64 %iv
%gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A, i64 8
%l.A.0 = load double, ptr %gep.A, align 8
%l.A.1 = load double, ptr %gep.A.1, align 8
%gep.B = getelementptr { double, double }, ptr %B, i64 %iv
%gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B, i64 8
store double %l.A.0, ptr %gep.B, align 8
store double %l.A.1, ptr %gep.B.1, align 8
%gep.C = getelementptr { double, double }, ptr %C, i64 %iv
%gep.C.1 = getelementptr inbounds nuw i8, ptr %gep.C, i64 8
store double %l.A.0, ptr %gep.C, align 8
store double %l.A.1, ptr %gep.C.1, align 8
%iv.next = add nuw nsw i64 %iv, 1
%.not = icmp eq i64 %iv.next, 1000
br i1 %.not, label %exit, label %loop

exit:
ret void
}
Loading