Skip to content

Commit 510ffd4

Browse files
juliannageleaugusto2112
authored andcommitted
[LoopUnroll] Introduce parallel accumulators when unrolling FP reductions. (llvm#166630)
This is building on top of llvm#149470, also introducing parallel accumulator PHIs when the reduction is for floating points, provided we have the reassoc flag. See also llvm#166353, which aims to introduce parallel accumulators for reductions with vector instructions.
1 parent ec4a500 commit 510ffd4

File tree

3 files changed

+218
-11
lines changed

3 files changed

+218
-11
lines changed

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
10931093
if (!RdxResult) {
10941094
RdxResult = PartialReductions.front();
10951095
IRBuilder Builder(ExitBlock, ExitBlock->getFirstNonPHIIt());
1096+
Builder.setFastMathFlags(Reductions.begin()->second.getFastMathFlags());
10961097
RecurKind RK = Reductions.begin()->second.getRecurrenceKind();
10971098
for (Instruction *RdxPart : drop_begin(PartialReductions)) {
10981099
RdxResult = Builder.CreateBinOp(
@@ -1255,14 +1256,15 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L,
12551256
return std::nullopt;
12561257
RecurKind RK = RdxDesc.getRecurrenceKind();
12571258
// Skip unsupported reductions.
1258-
// TODO: Handle additional reductions, including FP and min-max
1259-
// reductions.
1260-
if (!RecurrenceDescriptor::isIntegerRecurrenceKind(RK) ||
1261-
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
1259+
// TODO: Handle additional reductions, including min-max reductions.
1260+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
12621261
RecurrenceDescriptor::isFindIVRecurrenceKind(RK) ||
12631262
RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
12641263
return std::nullopt;
12651264

1265+
if (RdxDesc.hasExactFPMath())
1266+
return std::nullopt;
1267+
12661268
if (RdxDesc.IntermediateStore)
12671269
return std::nullopt;
12681270

llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -319,27 +319,33 @@ define float @test_fadd_with_ressaoc(ptr %src, i64 %n, float %start) {
319319
; CHECK-NEXT: br label %[[LOOP:.*]]
320320
; CHECK: [[LOOP]]:
321321
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
322-
; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
322+
; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
323+
; CHECK-NEXT: [[RDX_2:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
324+
; CHECK-NEXT: [[RDX_3:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
325+
; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
323326
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
324327
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]]
325328
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1
326-
; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]]
329+
; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[L]]
327330
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
328331
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]]
329332
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1
330-
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]]
333+
; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[L_1]]
331334
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
332335
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]]
333336
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1
334-
; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]]
337+
; CHECK-NEXT: [[RDX_NEXT_2]] = fadd reassoc float [[RDX_2]], [[L_2]]
335338
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
336339
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]]
337340
; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1
338-
; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]]
341+
; CHECK-NEXT: [[RDX_NEXT_3]] = fadd reassoc float [[RDX_3]], [[L_24]]
339342
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
340343
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
341344
; CHECK: [[EXIT]]:
342-
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ]
345+
; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ]
346+
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]]
347+
; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd reassoc float [[RDX_NEXT_2]], [[BIN_RDX]]
348+
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = fadd reassoc float [[RDX_NEXT_3]], [[BIN_RDX1]]
343349
; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]]
344350
;
345351
entry:
@@ -351,7 +357,7 @@ loop:
351357
%iv.next = add i64 %iv, 1
352358
%gep.src = getelementptr float, ptr %src, i64 %iv
353359
%l = load float, ptr %gep.src, align 1
354-
%rdx.next = fadd float %rdx, %l
360+
%rdx.next = fadd reassoc float %rdx, %l
355361
%ec = icmp ne i64 %iv.next, 1000
356362
br i1 %ec, label %loop, label %exit
357363

llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,202 @@ exit:
287287
ret <4 x i32> %res
288288
}
289289

290+
define float @test_fadd_reduction(ptr %a, i64 %n) {
291+
; CHECK-LABEL: define float @test_fadd_reduction(
292+
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
293+
; CHECK-NEXT: [[ENTRY:.*]]:
294+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
295+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
296+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
297+
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
298+
; CHECK: [[ENTRY_NEW]]:
299+
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
300+
; CHECK-NEXT: br label %[[LOOP:.*]]
301+
; CHECK: [[LOOP]]:
302+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
303+
; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
304+
; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
305+
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
306+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
307+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
308+
; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[TMP2]]
309+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
310+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
311+
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
312+
; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[TMP3]]
313+
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
314+
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
315+
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
316+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
317+
; CHECK: [[EXIT_UNR_LCSSA]]:
318+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
319+
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
320+
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
321+
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]]
322+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
323+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
324+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
325+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
326+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
327+
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
328+
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
329+
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
330+
; CHECK: [[LOOP_EPIL]]:
331+
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
332+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
333+
; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd reassoc float [[RDX_EPIL_INIT]], [[TMP4]]
334+
; CHECK-NEXT: br label %[[EXIT]]
335+
; CHECK: [[EXIT]]:
336+
; CHECK-NEXT: [[RES:%.*]] = phi float [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
337+
; CHECK-NEXT: ret float [[RES]]
338+
;
339+
entry:
340+
br label %loop
341+
342+
loop:
343+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
344+
%rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ]
345+
%gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv
346+
%1 = load float, ptr %gep.a, align 16
347+
%rdx.next = fadd reassoc float %rdx, %1
348+
%iv.next = add nuw nsw i64 %iv, 1
349+
%ec = icmp eq i64 %iv.next, %n
350+
br i1 %ec, label %exit, label %loop, !llvm.loop !0
351+
352+
exit:
353+
%res = phi float [ %rdx.next, %loop ]
354+
ret float %res
355+
}
356+
357+
define float @test_fadd_no_reassoc(ptr %a, i64 %n) {
358+
; CHECK-LABEL: define float @test_fadd_no_reassoc(
359+
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
360+
; CHECK-NEXT: [[ENTRY:.*]]:
361+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
362+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
363+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
364+
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
365+
; CHECK: [[ENTRY_NEW]]:
366+
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
367+
; CHECK-NEXT: br label %[[LOOP:.*]]
368+
; CHECK: [[LOOP]]:
369+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
370+
; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
371+
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
372+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
373+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
374+
; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[TMP2]]
375+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
376+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
377+
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
378+
; CHECK-NEXT: [[RDX_NEXT_1]] = fadd float [[RDX_NEXT]], [[TMP3]]
379+
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
380+
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
381+
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
382+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
383+
; CHECK: [[EXIT_UNR_LCSSA]]:
384+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
385+
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
386+
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
387+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
388+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
389+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
390+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
391+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
392+
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
393+
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
394+
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
395+
; CHECK: [[LOOP_EPIL]]:
396+
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
397+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
398+
; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd float [[RDX_EPIL_INIT]], [[TMP4]]
399+
; CHECK-NEXT: br label %[[EXIT]]
400+
; CHECK: [[EXIT]]:
401+
; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
402+
; CHECK-NEXT: ret float [[RES]]
403+
;
404+
entry:
405+
br label %loop
406+
407+
loop:
408+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
409+
%rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ]
410+
%gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv
411+
%1 = load float, ptr %gep.a, align 16
412+
%rdx.next = fadd float %rdx, %1
413+
%iv.next = add nuw nsw i64 %iv, 1
414+
%ec = icmp eq i64 %iv.next, %n
415+
br i1 %ec, label %exit, label %loop, !llvm.loop !0
416+
417+
exit:
418+
%res = phi float [ %rdx.next, %loop ]
419+
ret float %res
420+
}
421+
422+
define float @test_fadd_other_fastmath(ptr %a, i64 %n) {
423+
; CHECK-LABEL: define float @test_fadd_other_fastmath(
424+
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
425+
; CHECK-NEXT: [[ENTRY:.*]]:
426+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
427+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
428+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
429+
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
430+
; CHECK: [[ENTRY_NEW]]:
431+
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
432+
; CHECK-NEXT: br label %[[LOOP:.*]]
433+
; CHECK: [[LOOP]]:
434+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
435+
; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
436+
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
437+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
438+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
439+
; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd contract float [[RDX]], [[TMP2]]
440+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
441+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
442+
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
443+
; CHECK-NEXT: [[RDX_NEXT_1]] = fadd contract float [[RDX_NEXT]], [[TMP3]]
444+
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
445+
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
446+
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
447+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
448+
; CHECK: [[EXIT_UNR_LCSSA]]:
449+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
450+
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
451+
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
452+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
453+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
454+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
455+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
456+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
457+
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
458+
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
459+
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
460+
; CHECK: [[LOOP_EPIL]]:
461+
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
462+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
463+
; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd contract float [[RDX_EPIL_INIT]], [[TMP4]]
464+
; CHECK-NEXT: br label %[[EXIT]]
465+
; CHECK: [[EXIT]]:
466+
; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
467+
; CHECK-NEXT: ret float [[RES]]
468+
;
469+
entry:
470+
br label %loop
471+
472+
loop:
473+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
474+
%rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ]
475+
%gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv
476+
%1 = load float, ptr %gep.a, align 16
477+
%rdx.next = fadd contract float %rdx, %1
478+
%iv.next = add nuw nsw i64 %iv, 1
479+
%ec = icmp eq i64 %iv.next, %n
480+
br i1 %ec, label %exit, label %loop, !llvm.loop !0
481+
482+
exit:
483+
%res = phi float [ %rdx.next, %loop ]
484+
ret float %res
485+
}
290486

291487
!0 = distinct !{!0, !1}
292488
!1 = !{!"llvm.loop.unroll.count", i32 2}
@@ -301,4 +497,7 @@ exit:
301497
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
302498
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
303499
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
500+
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
501+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
502+
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]}
304503
;.

0 commit comments

Comments
 (0)