@@ -659,6 +659,112 @@ exit:
659
659
ret void
660
660
}
661
661
662
+ define void @cost_scalar_load_of_address (ptr noalias %src , ptr %dst ) {
663
+ ; CHECK-LABEL: define void @cost_scalar_load_of_address(
664
+ ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
665
+ ; CHECK-NEXT: [[ENTRY:.*]]:
666
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
667
+ ; CHECK: [[LOOP]]:
668
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
669
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
670
+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
671
+ ; CHECK-NEXT: [[L_EXT:%.*]] = sext i32 [[L]] to i64
672
+ ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[L_EXT]]
673
+ ; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4
674
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
675
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 8
676
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
677
+ ; CHECK: [[EXIT]]:
678
+ ; CHECK-NEXT: ret void
679
+ ;
680
+ entry:
681
+ br label %loop
682
+
683
+ loop:
684
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
685
+ %gep.src = getelementptr i32 , ptr %src , i64 %iv
686
+ %l = load i32 , ptr %gep.src , align 4
687
+ %l.ext = sext i32 %l to i64
688
+ %gep.dst = getelementptr i32 , ptr %dst , i64 %l.ext
689
+ store i32 0 , ptr %gep.dst , align 4
690
+ %iv.next = add i64 %iv , 1
691
+ %ec = icmp eq i64 %iv , 8
692
+ br i1 %ec , label %exit , label %loop
693
+
694
+ exit:
695
+ ret void
696
+ }
697
+
698
+ %t = type { [3 x double ] }
699
+ %t.2 = type { [ 64 x double ] }
700
+
701
+ define double @test_scalarization_cost_for_load_of_address (ptr %src.0 , ptr %src.1 , ptr %src.2 ) {
702
+ ; CHECK-LABEL: define double @test_scalarization_cost_for_load_of_address(
703
+ ; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], ptr [[SRC_2:%.*]]) {
704
+ ; CHECK-NEXT: [[ENTRY:.*]]:
705
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
706
+ ; CHECK: [[LOOP]]:
707
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
708
+ ; CHECK-NEXT: [[RED:%.*]] = phi double [ 3.000000e+00, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
709
+ ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]]
710
+ ; CHECK-NEXT: [[L_0:%.*]] = load double, ptr [[GEP_0]], align 8
711
+ ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_0]], i64 8
712
+ ; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_8]], align 8
713
+ ; CHECK-NEXT: [[GEP_16:%.*]] = getelementptr i8, ptr [[GEP_0]], i64 16
714
+ ; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[GEP_16]], align 8
715
+ ; CHECK-NEXT: [[MUL_0:%.*]] = fmul double [[L_0]], 3.000000e+00
716
+ ; CHECK-NEXT: [[MUL_1:%.*]] = fmul double [[L_1]], 3.000000e+00
717
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul double [[L_2]], 3.000000e+00
718
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd double [[MUL_0]], [[MUL_1]]
719
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd double [[ADD_0]], [[MUL_2]]
720
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]]
721
+ ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
722
+ ; CHECK-NEXT: [[MUL256_US:%.*]] = fmul double [[ADD_1]], [[L]]
723
+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
724
+ ; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
725
+ ; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
726
+ ; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[L_P_2]], align 8
727
+ ; CHECK-NEXT: [[RED_NEXT]] = tail call double @llvm.fmuladd.f64(double [[MUL256_US]], double [[LV]], double [[RED]])
728
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
729
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
730
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
731
+ ; CHECK: [[EXIT]]:
732
+ ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi double [ [[RED_NEXT]], %[[LOOP]] ]
733
+ ; CHECK-NEXT: ret double [[RED_NEXT_LCSSA]]
734
+ ;
735
+ entry:
736
+ br label %loop
737
+
738
+ loop:
739
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
740
+ %red = phi double [ 3 .000000e+00 , %entry ], [ %red.next , %loop ]
741
+ %gep.0 = getelementptr %t , ptr %src.0 , i64 %iv
742
+ %l.0 = load double , ptr %gep.0 , align 8
743
+ %gep.8 = getelementptr i8 , ptr %gep.0 , i64 8
744
+ %l.1 = load double , ptr %gep.8 , align 8
745
+ %gep.16 = getelementptr i8 , ptr %gep.0 , i64 16
746
+ %l.2 = load double , ptr %gep.16 , align 8
747
+ %mul.0 = fmul double %l.0 , 3 .000000e+00
748
+ %mul.1 = fmul double %l.1 , 3 .000000e+00
749
+ %mul.2 = fmul double %l.2 , 3 .000000e+00
750
+ %add.0 = fadd double %mul.0 , %mul.1
751
+ %add.1 = fadd double %add.0 , %mul.2
752
+ %gep.src = getelementptr double , ptr %src.1 , i64 %iv
753
+ %l = load double , ptr %gep.src , align 8
754
+ %mul256.us = fmul double %add.1 , %l
755
+ %gep.src.2 = getelementptr %t.2 , ptr %src.2 , i64 %iv
756
+ %gep.72 = getelementptr i8 , ptr %gep.src.2 , i64 72
757
+ %l.p.2 = load ptr , ptr %gep.72 , align 8
758
+ %lv = load double , ptr %l.p.2 , align 8
759
+ %red.next = tail call double @llvm.fmuladd.f64 (double %mul256.us , double %lv , double %red )
760
+ %iv.next = add i64 %iv , 1
761
+ %ec = icmp eq i64 %iv , 1
762
+ br i1 %ec , label %exit , label %loop
763
+
764
+ exit:
765
+ ret double %red.next
766
+ }
767
+
662
768
attributes #0 = { "target-cpu" ="neoverse-512tvb" }
663
769
664
770
!0 = !{!1 , !2 , i64 0 }
0 commit comments