@@ -444,3 +444,158 @@ loop:
444444exit:
445445 ret i64 %rdx.next
446446}
447+
448+ define void @reduction_with_intermediate_store (ptr %src , ptr %sum ) {
449+ ; CHECK-LABEL: define void @reduction_with_intermediate_store(
450+ ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) {
451+ ; CHECK-NEXT: [[ENTRY:.*]]:
452+ ; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4
453+ ; CHECK-NEXT: br label %[[LOOP:.*]]
454+ ; CHECK: [[LOOP]]:
455+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
456+ ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ]
457+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]]
458+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
459+ ; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]]
460+ ; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4
461+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
462+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]]
463+ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
464+ ; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]]
465+ ; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4
466+ ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
467+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
468+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
469+ ; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]]
470+ ; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4
471+ ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
472+ ; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
473+ ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4
474+ ; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]]
475+ ; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4
476+ ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
477+ ; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000
478+ ; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
479+ ; CHECK: [[EXIT]]:
480+ ; CHECK-NEXT: ret void
481+ ;
482+ entry:
483+ %sum.promoted = load i32 , ptr %sum , align 4
484+ br label %loop
485+
486+ loop:
487+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
488+ %red = phi i32 [ %sum.promoted , %entry ], [ %red.next , %loop ]
489+ %gep.src = getelementptr inbounds nuw i32 , ptr %src , i64 %iv
490+ %l = load i32 , ptr %gep.src , align 4
491+ %red.next = add nsw i32 %red , %l
492+ store i32 %red.next , ptr %sum , align 4
493+ %iv.next = add nuw nsw i64 %iv , 1
494+ %ec = icmp eq i64 %iv.next , 10000
495+ br i1 %ec , label %exit , label %loop
496+
497+ exit:
498+ ret void
499+ }
500+
501+ declare i32 @foo ()
502+
503+ ; Loop with a call cannot be handled by LoopVectorize, introducing additional
504+ ; accumulators when unrolling increases throughput.
505+ define i32 @test_add_with_call (i64 %n , i32 %start ) {
506+ ; CHECK-LABEL: define i32 @test_add_with_call(
507+ ; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) {
508+ ; CHECK-NEXT: [[ENTRY:.*]]:
509+ ; CHECK-NEXT: br label %[[LOOP:.*]]
510+ ; CHECK: [[LOOP]]:
511+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
512+ ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
513+ ; CHECK-NEXT: [[L:%.*]] = call i32 @foo()
514+ ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
515+ ; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo()
516+ ; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
517+ ; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo()
518+ ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]]
519+ ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
520+ ; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo()
521+ ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]]
522+ ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
523+ ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
524+ ; CHECK: [[EXIT]]:
525+ ; CHECK-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
526+ ; CHECK-NEXT: ret i32 [[BIN_RDX2]]
527+ ;
528+ entry:
529+ br label %loop
530+
531+ loop:
532+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
533+ %rdx = phi i32 [ %start , %entry ], [ %rdx.next , %loop ]
534+ %iv.next = add i64 %iv , 1
535+ %l = call i32 @foo ()
536+ %rdx.next = add i32 %rdx , %l
537+ %ec = icmp ne i64 %iv.next , 1000
538+ br i1 %ec , label %loop , label %exit
539+
540+ exit:
541+ ret i32 %rdx.next
542+ }
543+
544+ ; Loop with backward dependence cannot be handled LoopVectorize, introducing additional
545+ ; accumulators when unrolling increases throughput.
546+ define i32 @test_add_with_backward_dep (ptr %p , i64 %n , i32 %start ) {
547+ ; CHECK-LABEL: define i32 @test_add_with_backward_dep(
548+ ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
549+ ; CHECK-NEXT: [[ENTRY:.*]]:
550+ ; CHECK-NEXT: br label %[[LOOP:.*]]
551+ ; CHECK: [[LOOP]]:
552+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
553+ ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
554+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
555+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]]
556+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
557+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
558+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4
559+ ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
560+ ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
561+ ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
562+ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4
563+ ; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
564+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4
565+ ; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
566+ ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
567+ ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
568+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4
569+ ; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
570+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4
571+ ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]]
572+ ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
573+ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
574+ ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4
575+ ; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]]
576+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4
577+ ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]]
578+ ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
579+ ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
580+ ; CHECK: [[EXIT]]:
581+ ; CHECK-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
582+ ; CHECK-NEXT: ret i32 [[BIN_RDX3]]
583+ ;
584+ entry:
585+ br label %loop
586+
587+ loop:
588+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
589+ %rdx = phi i32 [ %start , %entry ], [ %rdx.next , %loop ]
590+ %iv.next = add i64 %iv , 1
591+ %gep = getelementptr inbounds nuw i32 , ptr %p , i64 %iv
592+ %l = load i32 , ptr %gep
593+ %gep.1 = getelementptr inbounds nuw i32 , ptr %p , i64 %iv.next
594+ store i32 0 , ptr %gep.1
595+ %rdx.next = add i32 %rdx , %l
596+ %ec = icmp ne i64 %iv.next , 1000
597+ br i1 %ec , label %loop , label %exit
598+
599+ exit:
600+ ret i32 %rdx.next
601+ }
0 commit comments