@@ -165,6 +165,204 @@ exit:
165
165
ret void
166
166
}
167
167
168
+ define void @load_op_store_loop (ptr %src , ptr %dst , i64 %N , i64 %scale , float %k ) {
169
+ ; APPLE-LABEL: define void @load_op_store_loop(
170
+ ; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
171
+ ; APPLE-NEXT: [[ENTRY:.*]]:
172
+ ; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
173
+ ; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
174
+ ; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
175
+ ; APPLE-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
176
+ ; APPLE: [[ENTRY_NEW]]:
177
+ ; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
178
+ ; APPLE-NEXT: br label %[[LOOP:.*]]
179
+ ; APPLE: [[LOOP]]:
180
+ ; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
181
+ ; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
182
+ ; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
183
+ ; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
184
+ ; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
185
+ ; APPLE-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
186
+ ; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
187
+ ; APPLE-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
188
+ ; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
189
+ ; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
190
+ ; APPLE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
191
+ ; APPLE-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
192
+ ; APPLE-NEXT: [[O_1:%.*]] = fadd float [[L_1]], [[K]]
193
+ ; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
194
+ ; APPLE-NEXT: store float [[O_1]], ptr [[GEP_DST_1]], align 4
195
+ ; APPLE-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
196
+ ; APPLE-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
197
+ ; APPLE-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
198
+ ; APPLE-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
199
+ ; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
200
+ ; APPLE-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
201
+ ; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
202
+ ; APPLE: [[EXIT_UNR_LCSSA]]:
203
+ ; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
204
+ ; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
205
+ ; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
206
+ ; APPLE: [[LOOP_EPIL_PREHEADER]]:
207
+ ; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
208
+ ; APPLE: [[LOOP_EPIL]]:
209
+ ; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
210
+ ; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
211
+ ; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
212
+ ; APPLE-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
213
+ ; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
214
+ ; APPLE-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
215
+ ; APPLE-NEXT: br label %[[EXIT]]
216
+ ; APPLE: [[EXIT]]:
217
+ ; APPLE-NEXT: ret void
218
+ ;
219
+ ; OTHER-LABEL: define void @load_op_store_loop(
220
+ ; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
221
+ ; OTHER-NEXT: [[ENTRY:.*]]:
222
+ ; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
223
+ ; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
224
+ ; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
225
+ ; OTHER-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
226
+ ; OTHER: [[ENTRY_NEW]]:
227
+ ; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
228
+ ; OTHER-NEXT: br label %[[LOOP:.*]]
229
+ ; OTHER: [[LOOP]]:
230
+ ; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
231
+ ; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
232
+ ; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
233
+ ; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
234
+ ; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
235
+ ; OTHER-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
236
+ ; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
237
+ ; OTHER-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
238
+ ; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
239
+ ; OTHER-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
240
+ ; OTHER-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
241
+ ; OTHER-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
242
+ ; OTHER-NEXT: [[O_1:%.*]] = fadd float [[L_1]], [[K]]
243
+ ; OTHER-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
244
+ ; OTHER-NEXT: store float [[O_1]], ptr [[GEP_DST_1]], align 4
245
+ ; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
246
+ ; OTHER-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
247
+ ; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
248
+ ; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
249
+ ; OTHER: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
250
+ ; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
251
+ ; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]]
252
+ ; OTHER: [[EXIT_UNR_LCSSA]]:
253
+ ; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
254
+ ; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
255
+ ; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
256
+ ; OTHER: [[LOOP_EPIL_PREHEADER]]:
257
+ ; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
258
+ ; OTHER: [[LOOP_EPIL]]:
259
+ ; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
260
+ ; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
261
+ ; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
262
+ ; OTHER-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
263
+ ; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
264
+ ; OTHER-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
265
+ ; OTHER-NEXT: br label %[[EXIT]]
266
+ ; OTHER: [[EXIT]]:
267
+ ; OTHER-NEXT: ret void
268
+ ;
269
+ entry:
270
+ br label %loop
271
+
272
+ loop:
273
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
274
+ %scaled.iv = mul nuw nsw i64 %iv , %scale
275
+ %gep.src = getelementptr inbounds float , ptr %src , i64 %scaled.iv
276
+ %l = load float , ptr %gep.src , align 4
277
+ %o = fadd float %l , %k
278
+ %gep.dst = getelementptr inbounds float , ptr %dst , i64 %iv
279
+ store float %o , ptr %gep.dst , align 4
280
+ %iv.next = add nuw nsw i64 %iv , 1
281
+ %ec = icmp eq i64 %iv.next , %N
282
+ br i1 %ec , label %exit , label %loop
283
+
284
+ exit:
285
+ ret void
286
+ }
287
+
288
+ define void @load_op_store_loop_multiblock (ptr %src , ptr %dst , i64 %N , i64 %scale , float %k ) {
289
+ ; APPLE-LABEL: define void @load_op_store_loop_multiblock(
290
+ ; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
291
+ ; APPLE-NEXT: [[ENTRY:.*]]:
292
+ ; APPLE-NEXT: br label %[[LOOP:.*]]
293
+ ; APPLE: [[LOOP]]:
294
+ ; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOPCONT:.*]] ]
295
+ ; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
296
+ ; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
297
+ ; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
298
+ ; APPLE-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
299
+ ; APPLE-NEXT: [[ODD:%.*]] = icmp eq i64 [[AND]], 1
300
+ ; APPLE-NEXT: br i1 [[ODD]], label %[[LOOPODD:.*]], label %[[LOOPCONT]]
301
+ ; APPLE: [[LOOPCONT]]:
302
+ ; APPLE-NEXT: [[D:%.*]] = phi float [ [[L2:%.*]], %[[LOOPODD]] ], [ [[L]], %[[LOOP]] ]
303
+ ; APPLE-NEXT: [[O:%.*]] = fadd float [[D]], [[K]]
304
+ ; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
305
+ ; APPLE-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
306
+ ; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
307
+ ; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
308
+ ; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
309
+ ; APPLE: [[LOOPODD]]:
310
+ ; APPLE-NEXT: [[L2]] = fneg float [[L]]
311
+ ; APPLE-NEXT: br label %[[LOOPCONT]]
312
+ ; APPLE: [[EXIT]]:
313
+ ; APPLE-NEXT: ret void
314
+ ;
315
+ ; OTHER-LABEL: define void @load_op_store_loop_multiblock(
316
+ ; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
317
+ ; OTHER-NEXT: [[ENTRY:.*]]:
318
+ ; OTHER-NEXT: br label %[[LOOP:.*]]
319
+ ; OTHER: [[LOOP]]:
320
+ ; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOPCONT:.*]] ]
321
+ ; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
322
+ ; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
323
+ ; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
324
+ ; OTHER-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
325
+ ; OTHER-NEXT: [[ODD:%.*]] = icmp eq i64 [[AND]], 1
326
+ ; OTHER-NEXT: br i1 [[ODD]], label %[[LOOPODD:.*]], label %[[LOOPCONT]]
327
+ ; OTHER: [[LOOPCONT]]:
328
+ ; OTHER-NEXT: [[D:%.*]] = phi float [ [[L2:%.*]], %[[LOOPODD]] ], [ [[L]], %[[LOOP]] ]
329
+ ; OTHER-NEXT: [[O:%.*]] = fadd float [[D]], [[K]]
330
+ ; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
331
+ ; OTHER-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
332
+ ; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
333
+ ; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
334
+ ; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
335
+ ; OTHER: [[LOOPODD]]:
336
+ ; OTHER-NEXT: [[L2]] = fneg float [[L]]
337
+ ; OTHER-NEXT: br label %[[LOOPCONT]]
338
+ ; OTHER: [[EXIT]]:
339
+ ; OTHER-NEXT: ret void
340
+ ;
341
+ entry:
342
+ br label %loop
343
+ loop:
344
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loopcont ]
345
+ %scaled.iv = mul nuw nsw i64 %iv , %scale
346
+ %gep.src = getelementptr inbounds float , ptr %src , i64 %scaled.iv
347
+ %l1 = load float , ptr %gep.src , align 4
348
+ %and = and i64 %iv , 1
349
+ %odd = icmp eq i64 %and , 1
350
+ br i1 %odd , label %loopodd , label %loopcont
351
+ loopcont:
352
+ %d = phi float [ %l2 , %loopodd ], [ %l1 , %loop ]
353
+ %o = fadd float %d , %k
354
+ %gep.dst = getelementptr inbounds float , ptr %dst , i64 %iv
355
+ store float %o , ptr %gep.dst , align 4
356
+ %iv.next = add nuw nsw i64 %iv , 1
357
+ %ec = icmp eq i64 %iv.next , %N
358
+ br i1 %ec , label %exit , label %loop
359
+ loopodd:
360
+ %l2 = fneg float %l1
361
+ br label %loopcont
362
+ exit:
363
+ ret void
364
+ }
365
+
168
366
@A = external constant [9 x i8 ], align 1
169
367
@B = external constant [8 x i32 ], align 4
170
368
@C = external constant [8 x i32 ], align 4
0 commit comments