@@ -287,6 +287,202 @@ exit:
287287 ret <4 x i32 > %res
288288}
289289
290+ define float @test_fadd_reduction (ptr %a , i64 %n ) {
291+ ; CHECK-LABEL: define float @test_fadd_reduction(
292+ ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
293+ ; CHECK-NEXT: [[ENTRY:.*]]:
294+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
295+ ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
296+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
297+ ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
298+ ; CHECK: [[ENTRY_NEW]]:
299+ ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
300+ ; CHECK-NEXT: br label %[[LOOP:.*]]
301+ ; CHECK: [[LOOP]]:
302+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
303+ ; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
304+ ; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
305+ ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
306+ ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
307+ ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
308+ ; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[TMP2]]
309+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
310+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
311+ ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
312+ ; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[TMP3]]
313+ ; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
314+ ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
315+ ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
316+ ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
317+ ; CHECK: [[EXIT_UNR_LCSSA]]:
318+ ; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
319+ ; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
320+ ; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
321+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]]
322+ ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
323+ ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
324+ ; CHECK: [[LOOP_EPIL_PREHEADER]]:
325+ ; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
326+ ; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
327+ ; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
328+ ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
329+ ; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
330+ ; CHECK: [[LOOP_EPIL]]:
331+ ; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
332+ ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
333+ ; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd reassoc float [[RDX_EPIL_INIT]], [[TMP4]]
334+ ; CHECK-NEXT: br label %[[EXIT]]
335+ ; CHECK: [[EXIT]]:
336+ ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
337+ ; CHECK-NEXT: ret float [[RES]]
338+ ;
339+ entry:
340+ br label %loop
341+
342+ loop:
343+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
344+ %rdx = phi float [ 0 .0 , %entry ], [ %rdx.next , %loop ]
345+ %gep.a = getelementptr inbounds nuw float , ptr %a , i64 %iv
346+ %1 = load float , ptr %gep.a , align 16
347+ %rdx.next = fadd reassoc float %rdx , %1
348+ %iv.next = add nuw nsw i64 %iv , 1
349+ %ec = icmp eq i64 %iv.next , %n
350+ br i1 %ec , label %exit , label %loop , !llvm.loop !0
351+
352+ exit:
353+ %res = phi float [ %rdx.next , %loop ]
354+ ret float %res
355+ }
356+
357+ define float @test_fadd_no_reassoc (ptr %a , i64 %n ) {
358+ ; CHECK-LABEL: define float @test_fadd_no_reassoc(
359+ ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
360+ ; CHECK-NEXT: [[ENTRY:.*]]:
361+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
362+ ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
363+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
364+ ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
365+ ; CHECK: [[ENTRY_NEW]]:
366+ ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
367+ ; CHECK-NEXT: br label %[[LOOP:.*]]
368+ ; CHECK: [[LOOP]]:
369+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
370+ ; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
371+ ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
372+ ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
373+ ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
374+ ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[TMP2]]
375+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
376+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
377+ ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
378+ ; CHECK-NEXT: [[RDX_NEXT_1]] = fadd float [[RDX_NEXT]], [[TMP3]]
379+ ; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
380+ ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
381+ ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
382+ ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
383+ ; CHECK: [[EXIT_UNR_LCSSA]]:
384+ ; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
385+ ; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
386+ ; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
387+ ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
388+ ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
389+ ; CHECK: [[LOOP_EPIL_PREHEADER]]:
390+ ; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
391+ ; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
392+ ; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
393+ ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
394+ ; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
395+ ; CHECK: [[LOOP_EPIL]]:
396+ ; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
397+ ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
398+ ; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd float [[RDX_EPIL_INIT]], [[TMP4]]
399+ ; CHECK-NEXT: br label %[[EXIT]]
400+ ; CHECK: [[EXIT]]:
401+ ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
402+ ; CHECK-NEXT: ret float [[RES]]
403+ ;
404+ entry:
405+ br label %loop
406+
407+ loop:
408+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
409+ %rdx = phi float [ 0 .0 , %entry ], [ %rdx.next , %loop ]
410+ %gep.a = getelementptr inbounds nuw float , ptr %a , i64 %iv
411+ %1 = load float , ptr %gep.a , align 16
412+ %rdx.next = fadd float %rdx , %1
413+ %iv.next = add nuw nsw i64 %iv , 1
414+ %ec = icmp eq i64 %iv.next , %n
415+ br i1 %ec , label %exit , label %loop , !llvm.loop !0
416+
417+ exit:
418+ %res = phi float [ %rdx.next , %loop ]
419+ ret float %res
420+ }
421+
422+ define float @test_fadd_other_fastmath (ptr %a , i64 %n ) {
423+ ; CHECK-LABEL: define float @test_fadd_other_fastmath(
424+ ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
425+ ; CHECK-NEXT: [[ENTRY:.*]]:
426+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
427+ ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
428+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
429+ ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
430+ ; CHECK: [[ENTRY_NEW]]:
431+ ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
432+ ; CHECK-NEXT: br label %[[LOOP:.*]]
433+ ; CHECK: [[LOOP]]:
434+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
435+ ; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
436+ ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
437+ ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
438+ ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
439+ ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd contract float [[RDX]], [[TMP2]]
440+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
441+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
442+ ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
443+ ; CHECK-NEXT: [[RDX_NEXT_1]] = fadd contract float [[RDX_NEXT]], [[TMP3]]
444+ ; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
445+ ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
446+ ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
447+ ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
448+ ; CHECK: [[EXIT_UNR_LCSSA]]:
449+ ; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
450+ ; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
451+ ; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
452+ ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
453+ ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
454+ ; CHECK: [[LOOP_EPIL_PREHEADER]]:
455+ ; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
456+ ; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
457+ ; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
458+ ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
459+ ; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
460+ ; CHECK: [[LOOP_EPIL]]:
461+ ; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
462+ ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
463+ ; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd contract float [[RDX_EPIL_INIT]], [[TMP4]]
464+ ; CHECK-NEXT: br label %[[EXIT]]
465+ ; CHECK: [[EXIT]]:
466+ ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
467+ ; CHECK-NEXT: ret float [[RES]]
468+ ;
469+ entry:
470+ br label %loop
471+
472+ loop:
473+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
474+ %rdx = phi float [ 0 .0 , %entry ], [ %rdx.next , %loop ]
475+ %gep.a = getelementptr inbounds nuw float , ptr %a , i64 %iv
476+ %1 = load float , ptr %gep.a , align 16
477+ %rdx.next = fadd contract float %rdx , %1
478+ %iv.next = add nuw nsw i64 %iv , 1
479+ %ec = icmp eq i64 %iv.next , %n
480+ br i1 %ec , label %exit , label %loop , !llvm.loop !0
481+
482+ exit:
483+ %res = phi float [ %rdx.next , %loop ]
484+ ret float %res
485+ }
290486
291487!0 = distinct !{!0 , !1 }
292488!1 = !{!"llvm.loop.unroll.count" , i32 2 }
@@ -301,4 +497,7 @@ exit:
301497; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
302498; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
303499; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
500+ ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
501+ ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
502+ ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]}
304503;.
0 commit comments