Skip to content

Commit 0250eb4

Browse files
igogo-x86tru
authored andcommitted
[LoopVectorize] Pre-commit tests for D157631
Differential Revision: https://reviews.llvm.org/D157630 (cherry picked from commit 2df9ed1)
1 parent b5d3a64 commit 0250eb4

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed

llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,3 +559,124 @@ exit: ; preds = %for.body
559559
%add.lcssa = phi i32 [ %add, %for.body ]
560560
ret i32 %add.lcssa
561561
}
562+
563+
; Make sure that if there are several reductions in the loop, the order of invariant stores sank outside of the loop is preserved
564+
; FIXME: This tests currently shows incorrect behavior and it will fixed in the following patch
565+
; See https://github.com/llvm/llvm-project/issues/64047
566+
define void @reduc_add_mul_store_same_ptr(ptr %dst, ptr readonly %src) {
567+
; CHECK-LABEL: define void @reduc_add_mul_store_same_ptr
568+
; CHECK: middle.block:
569+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
570+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
571+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
572+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
573+
;
574+
entry:
575+
br label %for.body
576+
577+
for.body:
578+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
579+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
580+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
581+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
582+
%0 = load i32, ptr %gep.src, align 4
583+
%sum.next = add nsw i32 %sum, %0
584+
store i32 %sum.next, ptr %dst, align 4
585+
%mul.next = mul nsw i32 %mul, %0
586+
store i32 %mul.next, ptr %dst, align 4
587+
%iv.next = add nuw nsw i64 %iv, 1
588+
%exitcond = icmp eq i64 %iv.next, 1000
589+
br i1 %exitcond, label %exit, label %for.body
590+
591+
exit:
592+
ret void
593+
}
594+
595+
define void @reduc_mul_add_store_same_ptr(ptr %dst, ptr readonly %src) {
596+
; CHECK-LABEL: define void @reduc_mul_add_store_same_ptr
597+
; CHECK: middle.block:
598+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
599+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
600+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
601+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
602+
;
603+
entry:
604+
br label %for.body
605+
606+
for.body:
607+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
608+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
609+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
610+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
611+
%0 = load i32, ptr %gep.src, align 4
612+
%mul.next = mul nsw i32 %mul, %0
613+
store i32 %mul.next, ptr %dst, align 4
614+
%sum.next = add nsw i32 %sum, %0
615+
store i32 %sum.next, ptr %dst, align 4
616+
%iv.next = add nuw nsw i64 %iv, 1
617+
%exitcond = icmp eq i64 %iv.next, 1000
618+
br i1 %exitcond, label %exit, label %for.body
619+
620+
exit:
621+
ret void
622+
}
623+
624+
; Same as above but storing is done to two different pointers and they can be aliased
625+
; FIXME: This tests currently shows incorrect behavior and it will fixed in the following patch
626+
define void @reduc_add_mul_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
627+
; CHECK-LABEL: define void @reduc_add_mul_store_different_ptr
628+
; CHECK: middle.block:
629+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
630+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst2, align 4
631+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
632+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst1, align 4
633+
;
634+
entry:
635+
br label %for.body
636+
637+
for.body:
638+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
639+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
640+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
641+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
642+
%0 = load i32, ptr %gep.src, align 4
643+
%sum.next = add nsw i32 %sum, %0
644+
store i32 %sum.next, ptr %dst1, align 4
645+
%mul.next = mul nsw i32 %mul, %0
646+
store i32 %mul.next, ptr %dst2, align 4
647+
%iv.next = add nuw nsw i64 %iv, 1
648+
%exitcond = icmp eq i64 %iv.next, 1000
649+
br i1 %exitcond, label %exit, label %for.body
650+
651+
exit:
652+
ret void
653+
}
654+
655+
define void @reduc_mul_add_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
656+
; CHECK-LABEL: define void @reduc_mul_add_store_different_ptr
657+
; CHECK: middle.block:
658+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
659+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst1, align 4
660+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
661+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst2, align 4
662+
;
663+
entry:
664+
br label %for.body
665+
666+
for.body:
667+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
668+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
669+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
670+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
671+
%0 = load i32, ptr %gep.src, align 4
672+
%mul.next = mul nsw i32 %mul, %0
673+
store i32 %mul.next, ptr %dst1, align 4
674+
%sum.next = add nsw i32 %sum, %0
675+
store i32 %sum.next, ptr %dst2, align 4
676+
%iv.next = add nuw nsw i64 %iv, 1
677+
%exitcond = icmp eq i64 %iv.next, 1000
678+
br i1 %exitcond, label %exit, label %for.body
679+
680+
exit:
681+
ret void
682+
}

0 commit comments

Comments
 (0)