Skip to content

Commit 1ad2c5b

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5
1 parent 29ed600 commit 1ad2c5b

File tree

4 files changed

+12
-58
lines changed

4 files changed

+12
-58
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3943,11 +3943,13 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
39433943
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
39443944
// the memory accesses that is most restrictive (involved in the smallest
39453945
// dependence distance).
3946-
unsigned MaxSafeElements =
3947-
llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
3946+
unsigned MaxSafeElements = Legal->getMaxSafeVectorWidthInBits() / WidestType;
3947+
if (Legal->isSafeForAnyVectorWidth())
3948+
MaxSafeElements = bit_ceil(MaxSafeElements);
3949+
unsigned MaxSafeElementsPowerOf2 = 1ULL << countr_zero(MaxSafeElements);
3950+
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
3951+
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
39483952

3949-
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
3950-
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
39513953
if (!Legal->isSafeForAnyVectorWidth())
39523954
this->MaxSafeElements = MaxSafeElements;
39533955

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
2121
; CHECK-NEXT: LV: Found trip count: 0
2222
; CHECK-NEXT: LV: Found maximum trip count: 4294967295
2323
; CHECK-NEXT: LV: Scalable vectorization is available
24-
; CHECK-NEXT: LV: The max safe fixed VF is: 67108864.
24+
; CHECK-NEXT: LV: The max safe fixed VF is: 134217728.
2525
; CHECK-NEXT: LV: The max safe scalable VF is: vscale x 4294967295.
2626
; CHECK-NEXT: LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
2727
; CHECK-NEXT: LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
@@ -271,7 +271,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
271271
; CHECK-NEXT: LV: Found trip count: 0
272272
; CHECK-NEXT: LV: Found maximum trip count: 4294967295
273273
; CHECK-NEXT: LV: Scalable vectorization is available
274-
; CHECK-NEXT: LV: The max safe fixed VF is: 67108864.
274+
; CHECK-NEXT: LV: The max safe fixed VF is: 134217728.
275275
; CHECK-NEXT: LV: The max safe scalable VF is: vscale x 4294967295.
276276
; CHECK-NEXT: LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
277277
; CHECK-NEXT: LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom

llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,65 +24,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
2424
define void @maxvf3() {
2525
; CHECK-LABEL: @maxvf3(
2626
; CHECK-NEXT: entry:
27-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
28-
; CHECK: vector.ph:
29-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
30-
; CHECK: vector.body:
31-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
32-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
33-
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 14)
34-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
35-
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
36-
; CHECK: pred.store.if:
37-
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
38-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]]
39-
; CHECK-NEXT: store i8 69, ptr [[TMP3]], align 8
40-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
41-
; CHECK: pred.store.continue:
42-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
43-
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
44-
; CHECK: pred.store.if1:
45-
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
46-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP5]]
47-
; CHECK-NEXT: store i8 69, ptr [[TMP6]], align 8
48-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
49-
; CHECK: pred.store.continue2:
50-
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]]
51-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
52-
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
53-
; CHECK: pred.store.if3:
54-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
55-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]]
56-
; CHECK-NEXT: store i8 7, ptr [[TMP10]], align 8
57-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
58-
; CHECK: pred.store.continue4:
59-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
60-
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
61-
; CHECK: pred.store.if5:
62-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
63-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]]
64-
; CHECK-NEXT: store i8 7, ptr [[TMP13]], align 8
65-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
66-
; CHECK: pred.store.continue6:
67-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
68-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
69-
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
70-
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
71-
; CHECK: middle.block:
72-
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
73-
; CHECK: scalar.ph:
74-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
7527
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
7628
; CHECK: for.body:
77-
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ]
29+
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ]
7830
; CHECK-NEXT: [[AJ:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[J]]
7931
; CHECK-NEXT: store i8 69, ptr [[AJ]], align 8
8032
; CHECK-NEXT: [[JP3:%.*]] = add nuw nsw i32 3, [[J]]
8133
; CHECK-NEXT: [[AJP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[JP3]]
8234
; CHECK-NEXT: store i8 7, ptr [[AJP3]], align 8
8335
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
8436
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15
85-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
37+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8638
; CHECK: for.end:
8739
; CHECK-NEXT: ret void
8840
;

llvm/test/Transforms/LoopVectorize/memdep.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ for.end:
226226

227227
;Check the new calculation of the maximum safe distance in bits which can be vectorized.
228228
;The previous behavior did not take account that the stride was 2.
229-
;Therefore the maxVF was computed as 8 instead of 4, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2.
229+
;Therefore the maxVF was computed as 8 instead of 2, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2.
230230

231231
;#define M 32
232232
;#define N 2 * M
@@ -242,7 +242,7 @@ for.end:
242242
;}
243243

244244
; RIGHTVF-LABEL: @pr34283
245-
; RIGHTVF: <4 x i64>
245+
; RIGHTVF: <2 x i64>
246246

247247
; WRONGVF-LABLE: @pr34283
248248
; WRONGVF-NOT: <8 x i64>

0 commit comments

Comments
 (0)