Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9842,7 +9842,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {

// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
unsigned UserIC = LVL.isSafeForAnyVectorWidth() ? Hints.getInterleave() : 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the user didn't specify an interleave hint then Hints.getInterleave() actually returns 0. If LVL.isSafeForAnyVectorWidth returns false and there is no hint you'd be setting UserIC to 1 instead of 0. I wonder if it's better to rewrite this as:

  unsigned UserIC = Hints.getInterleave();
  if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth())
    UserIC = 1;


// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
Expand Down Expand Up @@ -9907,7 +9907,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}

if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
if (UserIC == 1 && Hints.getInterleave() > 1) {
LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
IntDiagMsg = {"InterleavingUnsafe",
"Ignoring user-specified interleave count due to possibly "
"unsafe dependencies in the loop."};
InterleaveLoop = false;
} else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,21 +417,17 @@ for.end: ; preds = %for.body, %entry

; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
; CHECK-REMARK: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 1)
define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
; CHECK-LABEL: @memory_dependence
; CHECK: vector.body:
; CHECK: %[[LOAD1:.*]] = load <4 x i32>
; CHECK: %[[LOAD2:.*]] = load <4 x i32>
; CHECK: %[[LOAD3:.*]] = load <4 x i32>
; CHECK: %[[LOAD4:.*]] = load <4 x i32>
; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]]
; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]]
; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]]
; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]]
; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD2]], %[[LOAD1]]
; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD2]]
; CHECK: middle.block:
; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[MUL1]])
entry:
br label %for.body

Expand Down
31 changes: 31 additions & 0 deletions llvm/test/Transforms/LoopVectorize/unsafe-ic-hint-remark.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
; RUN: opt -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -S < %s 2>&1 | FileCheck %s

; Make sure the unsafe user specified interleave count is ignored.

; CHECK: remark: <unknown>:0:0: Ignoring user-specified interleave count due to possibly unsafe dependencies in the loop.
; CHECK-LABEL: @loop_distance_4
define void @loop_distance_4(i64 %N, ptr %a, ptr %b) {
entry:
%cmp10 = icmp sgt i64 %N, 4
br i1 %cmp10, label %for.body, label %for.end
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't be needed, you could probably just use a constant %N below?


for.body:
%indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
%iv = phi i64 [ 4, %entry ], [ %iv.next, %loop ]

for consistency with other tests

%0 = getelementptr i32, ptr %b, i64 %indvars.iv
%arrayidx = getelementptr i8, ptr %0, i64 -16
%1 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
%2 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %2, %1
store i32 %add, ptr %0, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1

for.end:
ret void
}

!1 = !{!1, !2, !3}
!2 = !{!"llvm.loop.interleave.count", i32 4}
!3 = !{!"llvm.loop.vectorize.width", i32 4}