Skip to content

Commit a3070b4

Browse files
committed
!fixup add missed comment changes ,update remaining tests.
1 parent 284c905 commit a3070b4

File tree

3 files changed

+21
-34
lines changed

3 files changed

+21
-34
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,12 +1516,12 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
15161516
if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
15171517
return Stride;
15181518

1519-
// An inbounds getelementptr that is a AddRec with a unit stride
1520-
// cannot wrap per definition. If it did, the result would be poison
1521-
// and any memory access dependent on it would be immediate UB
1522-
// when executed.
1523-
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1524-
GEP && GEP->isInBounds())
1519+
// An inbounds getelementptr that is an AddRec cannot wrap. If it would wrap,
1520+
// the distance between the previously accessed location and the wrapped
1521+
// location will be larger than half the pointer index type space. In that
1522+
// case, the GEP would be poison and any memory access dependent on it would
1523+
// be immediate UB when executed.
1524+
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); GEP && GEP->isInBounds())
15251525
return Stride;
15261526

15271527
// If the null pointer is undefined, then a access sequence which would

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
9595
}
9696

9797
; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2'
98-
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
98+
; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
9999
define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
100100
entry:
101101
br label %for.body

llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt
2929
; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
3030
; CHECK: [[FOR_BODY_PREHEADER]]:
3131
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i8 [[Y]] to i64
32-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i8 [[Y]], 4
32+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i8 [[Y]], 5
3333
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
3434
; CHECK: [[VECTOR_SCEVCHECK]]:
3535
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
@@ -41,49 +41,36 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt
4141
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
4242
; CHECK-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
4343
; CHECK: [[VECTOR_PH]]:
44-
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 252
45-
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw i64 [[N_VEC]] to i8
44+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3
45+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
46+
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP8]], i64 4, i64 [[N_MOD_VF]]
47+
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[TMP7]]
48+
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i8
4649
; CHECK-NEXT: [[TMP6:%.*]] = shl i8 [[DOTCAST]], 1
4750
; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[X]], [[TMP6]]
48-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i64 0
49-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
50-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[DOTSPLAT]], <i8 0, i8 2, i8 4, i8 6>
5151
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
5252
; CHECK: [[VECTOR_BODY]]:
5353
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
54-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
55-
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64>
56-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i64 0
57-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
58-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i64 1
59-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
60-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i64 2
61-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
62-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i64 3
54+
; CHECK-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i8
55+
; CHECK-NEXT: [[TMP9:%.*]] = shl i8 [[DOTCAST3]], 1
56+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[X]], [[TMP9]]
57+
; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[OFFSET_IDX]] to i64
6358
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
64-
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP9]], align 4
65-
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP11]], align 4
66-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP13]], align 4
67-
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP15]], align 4
68-
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> poison, i32 [[TMP16]], i64 0
69-
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i64 1
70-
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i64 2
71-
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP19]], i64 3
59+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4
60+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
7261
; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], <i32 1, i32 1, i32 1, i32 1>
7362
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
7463
; CHECK-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP25]], align 4
7564
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
76-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], <i8 8, i8 8, i8 8, i8 8>
7765
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
7866
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
7967
; CHECK: [[MIDDLE_BLOCK]]:
80-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
81-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]]
68+
; CHECK-NEXT: br label %[[SCALAR_PH]]
8269
; CHECK: [[SCALAR_PH]]:
8370
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
8471
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ]
8572
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
86-
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT]]:
73+
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
8774
; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
8875
; CHECK: [[FOR_COND_CLEANUP]]:
8976
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)