@@ -25,37 +25,9 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
25
25
; NO-ZVFBFMIN-PREDICATED-LABEL: define void @fadd(
26
26
; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
27
27
; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
28
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
29
- ; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]:
30
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 15
31
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
32
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
33
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
34
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
35
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
36
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]]
37
- ; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]:
38
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
39
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0
40
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
41
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
42
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
43
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[INDEX]]
44
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[INDEX]]
45
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x bfloat> @llvm.masked.load.v16bf16.p0(ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]], <16 x bfloat> poison)
46
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <16 x bfloat> @llvm.masked.load.v16bf16.p0(ptr [[TMP2]], i32 2, <16 x i1> [[TMP0]], <16 x bfloat> poison)
47
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP3:%.*]] = fadd <16 x bfloat> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD3]]
48
- ; NO-ZVFBFMIN-PREDICATED-NEXT: call void @llvm.masked.store.v16bf16.p0(<16 x bfloat> [[TMP3]], ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]])
49
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
50
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
51
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
52
- ; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]:
53
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[EXIT:.*]]
54
- ; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]:
55
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
56
28
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
57
29
; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
58
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , %[[SCALAR_PH ]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
30
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0 , %[[ENTRY ]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
59
31
; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
60
32
; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
61
33
; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
@@ -64,7 +36,7 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
64
36
; NO-ZVFBFMIN-PREDICATED-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2
65
37
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1
66
38
; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
67
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+ ]]
39
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.* ]], label %[[LOOP]]
68
40
; NO-ZVFBFMIN-PREDICATED: [[EXIT]]:
69
41
; NO-ZVFBFMIN-PREDICATED-NEXT: ret void
70
42
;
@@ -183,38 +155,32 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
183
155
; NO-ZVFBFMIN-PREDICATED-LABEL: define void @vfwmaccbf16.vv(
184
156
; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
185
157
; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
186
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
158
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
159
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
187
160
; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]:
188
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3
189
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
190
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
191
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
192
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
193
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
161
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
162
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
194
163
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]]
195
164
; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]:
196
165
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
197
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[I]], i64 0
198
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
199
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
200
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
201
166
; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
202
167
; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
203
168
; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]]
204
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x bfloat> @llvm.masked.load.v4bf16.p0( ptr [[A_GEP]], i32 2, <4 x i1> [[TMP0]], <4 x bfloat> poison)
205
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x bfloat> @llvm.masked.load.v4bf16.p0( ptr [[B_GEP]], i32 2, <4 x i1> [[TMP0]], <4 x bfloat> poison)
206
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0( ptr [[C_GEP]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison)
169
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <4 x bfloat>, ptr [[A_GEP]], align 2
170
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = load <4 x bfloat>, ptr [[B_GEP]], align 2
171
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = load <4 x float>, ptr [[C_GEP]], align 4
207
172
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD]] to <4 x float>
208
173
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP5:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD3]] to <4 x float>
209
174
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[WIDE_MASKED_LOAD4]])
210
- ; NO-ZVFBFMIN-PREDICATED-NEXT: call void @llvm.masked. store.v4f32.p0( <4 x float> [[TMP6]], ptr [[C_GEP]], i32 4, <4 x i1> [[TMP0]])
211
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[I]], 4
175
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: store <4 x float> [[TMP6]], ptr [[C_GEP]], align 4
176
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[I]], 4
212
177
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
213
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
178
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0 :![0-9]+]]
214
179
; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]:
215
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[EXIT:.*]]
180
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
181
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
216
182
; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]:
217
- ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
183
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
218
184
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
219
185
; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
220
186
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
@@ -230,7 +196,7 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
230
196
; NO-ZVFBFMIN-PREDICATED-NEXT: store float [[FMULADD]], ptr [[C_GEP1]], align 4
231
197
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I1]], 1
232
198
; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
233
- ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5 :![0-9]+]]
199
+ ; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3 :![0-9]+]]
234
200
; NO-ZVFBFMIN-PREDICATED: [[EXIT]]:
235
201
; NO-ZVFBFMIN-PREDICATED-NEXT: ret void
236
202
;
@@ -318,8 +284,6 @@ exit:
318
284
; NO-ZVFBFMIN-PREDICATED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
319
285
; NO-ZVFBFMIN-PREDICATED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
320
286
; NO-ZVFBFMIN-PREDICATED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
321
- ; NO-ZVFBFMIN-PREDICATED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
322
- ; NO-ZVFBFMIN-PREDICATED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
323
287
;.
324
288
; ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
325
289
; ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
0 commit comments