Skip to content

Commit 3d4eb1b

Browse files
committed
!fixup address comments, thanks!
1 parent 6f781f2 commit 3d4eb1b

File tree

2 files changed

+84
-10
lines changed

2 files changed

+84
-10
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ getRecipesForUncountableExit(VPlan &Plan,
7474
SmallVectorImpl<VPRecipeBase *> &GEPs);
7575

7676
/// Return a MemoryLocation for \p R with noalias metadata populated from
77-
/// \p R. The pointer of the location is conservatively set to nullptr.
77+
/// \p R, if the recipe is supported and std::nullopt otherwise. The pointer of
78+
/// the location is conservatively set to nullptr.
7879
std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
7980
} // namespace vputils
8081

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -329,24 +329,96 @@ for.end:
329329
define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
330330
; CHECK-LABEL: @multi_exit(
331331
; CHECK-NEXT: entry:
332+
; CHECK-NEXT: [[UMAX9:%.*]] = call i64 @llvm.umax.i64(i64 [[B:%.*]], i64 1)
333+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX9]], -1
334+
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
335+
; CHECK-NEXT: [[UMIN10:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
336+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN10]], 1
337+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 24
338+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
339+
; CHECK: vector.scevcheck:
340+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
341+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1
342+
; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
343+
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]])
344+
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i32
345+
; CHECK-NEXT: [[TMP6:%.*]] = add i32 1, [[TMP5]]
346+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 1
347+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
348+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
349+
; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[UMIN]] to i32
350+
; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], 0
351+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
352+
; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
353+
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP9]], [[TMP13]]
354+
; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
355+
; CHECK: vector.memcheck:
356+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1
357+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8
358+
; CHECK-NEXT: [[UMAX3:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
359+
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[UMAX3]], -1
360+
; CHECK-NEXT: [[TMP16:%.*]] = freeze i64 [[TMP15]]
361+
; CHECK-NEXT: [[UMIN4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP16]], i64 [[A]])
362+
; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[UMIN4]], 3
363+
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8
364+
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC_3:%.*]], i64 [[TMP18]]
365+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
366+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP]]
367+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
368+
; CHECK-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP5]]
369+
; CHECK-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[SRC_3]], [[SCEVGEP]]
370+
; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
371+
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT8]]
372+
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
373+
; CHECK: vector.ph:
374+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
375+
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
376+
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 4, i64 [[N_MOD_VF]]
377+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP20]]
378+
; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META6:![0-9]+]]
379+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP21]], i64 0
380+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
381+
; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[N_VEC]] to i32
382+
; CHECK-NEXT: [[TMP23:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
332383
; CHECK-NEXT: br label [[LOOP:%.*]]
384+
; CHECK: vector.body:
385+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
386+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
387+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[OFFSET_IDX]]
388+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 2
389+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8, !alias.scope [[META9:![0-9]+]]
390+
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer
391+
; CHECK-NEXT: [[TMP27:%.*]] = and <2 x i1> [[TMP23]], [[TMP26]]
392+
; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i1> [[TMP27]] to <2 x i8>
393+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i8> [[TMP28]], i32 1
394+
; CHECK-NEXT: store i8 [[TMP29]], ptr [[DST]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
395+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
396+
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
397+
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
398+
; CHECK: middle.block:
399+
; CHECK-NEXT: br label [[SCALAR_PH]]
400+
; CHECK: scalar.ph:
401+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
402+
; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
403+
; CHECK-NEXT: br label [[LOOP1:%.*]]
333404
; CHECK: loop:
334-
; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
335-
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
336-
; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A:%.*]]
405+
; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
406+
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL11]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
407+
; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A]]
337408
; CHECK-NEXT: br i1 [[EC_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
338409
; CHECK: loop.latch:
339-
; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1:%.*]], align 8
340-
; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2:%.*]], align 8
410+
; CHECK-NEXT: [[SRC_1:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[IV_1]]
411+
; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1]], align 8
412+
; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2]], align 8
341413
; CHECK-NEXT: [[CMP55_US:%.*]] = icmp eq i64 [[L_1]], 0
342414
; CHECK-NEXT: [[CMP_I_US:%.*]] = icmp ne i64 [[L_2]], 0
343415
; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_I_US]], [[CMP55_US]]
344416
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[AND]] to i8
345-
; CHECK-NEXT: store i8 [[EXT]], ptr [[DST:%.*]], align 1
417+
; CHECK-NEXT: store i8 [[EXT]], ptr [[DST]], align 1
346418
; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
347419
; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
348-
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B:%.*]]
349-
; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]]
420+
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
421+
; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP1]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]]
350422
; CHECK: exit:
351423
; CHECK-NEXT: ret void
352424
;
@@ -360,7 +432,8 @@ loop:
360432
br i1 %ec.1, label %loop.latch, label %exit
361433

362434
loop.latch:
363-
%l.1 = load i64, ptr %src.1, align 8
435+
%gep.src.1 = getelementptr inbounds i64, ptr %src.1, i32 %iv.1
436+
%l.1 = load i64, ptr %gep.src.1, align 8
364437
%l.2 = load i64, ptr %src.2, align 8
365438
%cmp55.us = icmp eq i64 %l.1, 0
366439
%cmp.i.us = icmp ne i64 %l.2, 0

0 commit comments

Comments
 (0)