@@ -586,6 +586,184 @@ exit:
586
586
ret void
587
587
}
588
588
589
+ ; Test case for https://github.com/llvm/llvm-project/issues/112922.
590
+ define void @interleave_store_double_i64 (ptr %dst ) {
591
+ ; CHECK-LABEL: define void @interleave_store_double_i64(
592
+ ; CHECK-SAME: ptr [[DST:%.*]]) {
593
+ ; CHECK-NEXT: [[ENTRY:.*]]:
594
+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
595
+ ; CHECK: [[VECTOR_PH]]:
596
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
597
+ ; CHECK: [[VECTOR_BODY]]:
598
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
599
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
600
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
601
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[TMP0]]
602
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double>
603
+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
604
+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
605
+ ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
606
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
607
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
608
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
609
+ ; CHECK: [[MIDDLE_BLOCK]]:
610
+ ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
611
+ ; CHECK: [[SCALAR_PH]]:
612
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
613
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
614
+ ; CHECK: [[LOOP]]:
615
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
616
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1
617
+ ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8
618
+ ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]]
619
+ ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8
620
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
621
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
622
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
623
+ ; CHECK: [[EXIT]]:
624
+ ; CHECK-NEXT: ret void
625
+ ;
626
+ entry:
627
+ br label %loop
628
+
629
+ loop:
630
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
631
+ %gep.1 = getelementptr { double , i64 }, ptr %dst , i64 %iv , i32 1
632
+ store i64 %iv , ptr %gep.1 , align 8
633
+ %gep.0 = getelementptr { double , i64 }, ptr %dst , i64 %iv
634
+ store double 0 .000000e+00 , ptr %gep.0 , align 8
635
+ %iv.next = add i64 %iv , 1
636
+ %ec = icmp eq i64 %iv , 1
637
+ br i1 %ec , label %exit , label %loop
638
+
639
+ exit:
640
+ ret void
641
+ }
642
+
643
+ define void @interleave_store_i64_double (ptr %dst ) {
644
+ ; CHECK-LABEL: define void @interleave_store_i64_double(
645
+ ; CHECK-SAME: ptr [[DST:%.*]]) {
646
+ ; CHECK-NEXT: [[ENTRY:.*]]:
647
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
648
+ ; CHECK: [[LOOP]]:
649
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
650
+ ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]]
651
+ ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8
652
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1
653
+ ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8
654
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
655
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
656
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
657
+ ; CHECK: [[EXIT]]:
658
+ ; CHECK-NEXT: ret void
659
+ ;
660
+ entry:
661
+ br label %loop
662
+
663
+ loop:
664
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
665
+ %gep.0 = getelementptr { double , i64 }, ptr %dst , i64 %iv
666
+ store double 0 .000000e+00 , ptr %gep.0 , align 8
667
+ %gep.1 = getelementptr { double , i64 }, ptr %dst , i64 %iv , i32 1
668
+ store i64 %iv , ptr %gep.1 , align 8
669
+ %iv.next = add i64 %iv , 1
670
+ %ec = icmp eq i64 %iv , 1
671
+ br i1 %ec , label %exit , label %loop
672
+
673
+ exit:
674
+ ret void
675
+ }
676
+
677
+ ; TODO: The interleave group should likely have the same cost as @interleave_store_double_i64.
678
+ define void @interleave_store_double_i64_2 (ptr %dst ) {
679
+ ; CHECK-LABEL: define void @interleave_store_double_i64_2(
680
+ ; CHECK-SAME: ptr [[DST:%.*]]) {
681
+ ; CHECK-NEXT: [[ENTRY:.*]]:
682
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
683
+ ; CHECK: [[LOOP]]:
684
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
685
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1
686
+ ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8
687
+ ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]]
688
+ ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8
689
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
690
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
691
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
692
+ ; CHECK: [[EXIT]]:
693
+ ; CHECK-NEXT: ret void
694
+ ;
695
+ entry:
696
+ br label %loop
697
+
698
+ loop:
699
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
700
+ %gep.1 = getelementptr { i64 , double }, ptr %dst , i64 %iv , i32 1
701
+ store double 0 .000000e+00 , ptr %gep.1 , align 8
702
+ %gep.0 = getelementptr { i64 , double }, ptr %dst , i64 %iv
703
+ store i64 %iv , ptr %gep.0 , align 8
704
+ %iv.next = add i64 %iv , 1
705
+ %ec = icmp eq i64 %iv , 1
706
+ br i1 %ec , label %exit , label %loop
707
+
708
+ exit:
709
+ ret void
710
+ }
711
+
712
+ define void @interleave_store_i64_double_2 (ptr %dst ) {
713
+ ; CHECK-LABEL: define void @interleave_store_i64_double_2(
714
+ ; CHECK-SAME: ptr [[DST:%.*]]) {
715
+ ; CHECK-NEXT: [[ENTRY:.*]]:
716
+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
717
+ ; CHECK: [[VECTOR_PH]]:
718
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
719
+ ; CHECK: [[VECTOR_BODY]]:
720
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
721
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
722
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
723
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[TMP0]]
724
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VEC_IND]] to <2 x double>
725
+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
726
+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
727
+ ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
728
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
729
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
730
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
731
+ ; CHECK: [[MIDDLE_BLOCK]]:
732
+ ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
733
+ ; CHECK: [[SCALAR_PH]]:
734
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
735
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
736
+ ; CHECK: [[LOOP]]:
737
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
738
+ ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]]
739
+ ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8
740
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1
741
+ ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8
742
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
743
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
744
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
745
+ ; CHECK: [[EXIT]]:
746
+ ; CHECK-NEXT: ret void
747
+ ;
748
+ entry:
749
+ br label %loop
750
+
751
+ loop:
752
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
753
+ %gep.0 = getelementptr { i64 , double }, ptr %dst , i64 %iv
754
+ store i64 %iv , ptr %gep.0 , align 8
755
+ %gep.1 = getelementptr { i64 , double }, ptr %dst , i64 %iv , i32 1
756
+ store double 0 .000000e+00 , ptr %gep.1 , align 8
757
+ %iv.next = add i64 %iv , 1
758
+ %ec = icmp eq i64 %iv , 1
759
+ br i1 %ec , label %exit , label %loop
760
+
761
+ exit:
762
+ ret void
763
+ }
764
+
765
+
766
+
589
767
attributes #0 = { "target-features" ="+sse4.2" }
590
768
attributes #1 = { "min-legal-vector-width" ="0" "target-cpu" ="cascadelake" }
591
769
@@ -601,4 +779,8 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
601
779
; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
602
780
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
603
781
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
782
+ ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
783
+ ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
784
+ ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
785
+ ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
604
786
;.
0 commit comments