@@ -659,6 +659,112 @@ exit:
659659 ret void
660660}
661661
662+ define void @cost_scalar_load_of_address (ptr noalias %src , ptr %dst ) {
663+ ; CHECK-LABEL: define void @cost_scalar_load_of_address(
664+ ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
665+ ; CHECK-NEXT: [[ENTRY:.*]]:
666+ ; CHECK-NEXT: br label %[[LOOP:.*]]
667+ ; CHECK: [[LOOP]]:
668+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
669+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
670+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
671+ ; CHECK-NEXT: [[L_EXT:%.*]] = sext i32 [[L]] to i64
672+ ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[L_EXT]]
673+ ; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4
674+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
675+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 8
676+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
677+ ; CHECK: [[EXIT]]:
678+ ; CHECK-NEXT: ret void
679+ ;
680+ entry:
681+ br label %loop
682+
683+ loop:
684+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
685+ %gep.src = getelementptr i32 , ptr %src , i64 %iv
686+ %l = load i32 , ptr %gep.src , align 4
687+ %l.ext = sext i32 %l to i64
688+ %gep.dst = getelementptr i32 , ptr %dst , i64 %l.ext
689+ store i32 0 , ptr %gep.dst , align 4
690+ %iv.next = add i64 %iv , 1
691+ %ec = icmp eq i64 %iv , 8
692+ br i1 %ec , label %exit , label %loop
693+
694+ exit:
695+ ret void
696+ }
697+
698+ %t = type { [3 x double ] }
699+ %t.2 = type { [ 64 x double ] }
700+
701+ define double @test_scalarization_cost_for_load_of_address (ptr %src.0 , ptr %src.1 , ptr %src.2 ) {
702+ ; CHECK-LABEL: define double @test_scalarization_cost_for_load_of_address(
703+ ; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], ptr [[SRC_2:%.*]]) {
704+ ; CHECK-NEXT: [[ENTRY:.*]]:
705+ ; CHECK-NEXT: br label %[[LOOP:.*]]
706+ ; CHECK: [[LOOP]]:
707+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
708+ ; CHECK-NEXT: [[RED:%.*]] = phi double [ 3.000000e+00, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
709+ ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]]
710+ ; CHECK-NEXT: [[L_0:%.*]] = load double, ptr [[GEP_0]], align 8
711+ ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_0]], i64 8
712+ ; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_8]], align 8
713+ ; CHECK-NEXT: [[GEP_16:%.*]] = getelementptr i8, ptr [[GEP_0]], i64 16
714+ ; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[GEP_16]], align 8
715+ ; CHECK-NEXT: [[MUL_0:%.*]] = fmul double [[L_0]], 3.000000e+00
716+ ; CHECK-NEXT: [[MUL_1:%.*]] = fmul double [[L_1]], 3.000000e+00
717+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul double [[L_2]], 3.000000e+00
718+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd double [[MUL_0]], [[MUL_1]]
719+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd double [[ADD_0]], [[MUL_2]]
720+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]]
721+ ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
722+ ; CHECK-NEXT: [[MUL256_US:%.*]] = fmul double [[ADD_1]], [[L]]
723+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
724+ ; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
725+ ; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
726+ ; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[L_P_2]], align 8
727+ ; CHECK-NEXT: [[RED_NEXT]] = tail call double @llvm.fmuladd.f64(double [[MUL256_US]], double [[LV]], double [[RED]])
728+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
729+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
730+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
731+ ; CHECK: [[EXIT]]:
732+ ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi double [ [[RED_NEXT]], %[[LOOP]] ]
733+ ; CHECK-NEXT: ret double [[RED_NEXT_LCSSA]]
734+ ;
735+ entry:
736+ br label %loop
737+
738+ loop:
739+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
740+ %red = phi double [ 3 .000000e+00 , %entry ], [ %red.next , %loop ]
741+ %gep.0 = getelementptr %t , ptr %src.0 , i64 %iv
742+ %l.0 = load double , ptr %gep.0 , align 8
743+ %gep.8 = getelementptr i8 , ptr %gep.0 , i64 8
744+ %l.1 = load double , ptr %gep.8 , align 8
745+ %gep.16 = getelementptr i8 , ptr %gep.0 , i64 16
746+ %l.2 = load double , ptr %gep.16 , align 8
747+ %mul.0 = fmul double %l.0 , 3 .000000e+00
748+ %mul.1 = fmul double %l.1 , 3 .000000e+00
749+ %mul.2 = fmul double %l.2 , 3 .000000e+00
750+ %add.0 = fadd double %mul.0 , %mul.1
751+ %add.1 = fadd double %add.0 , %mul.2
752+ %gep.src = getelementptr double , ptr %src.1 , i64 %iv
753+ %l = load double , ptr %gep.src , align 8
754+ %mul256.us = fmul double %add.1 , %l
755+ %gep.src.2 = getelementptr %t.2 , ptr %src.2 , i64 %iv
756+ %gep.72 = getelementptr i8 , ptr %gep.src.2 , i64 72
757+ %l.p.2 = load ptr , ptr %gep.72 , align 8
758+ %lv = load double , ptr %l.p.2 , align 8
759+ %red.next = tail call double @llvm.fmuladd.f64 (double %mul256.us , double %lv , double %red )
760+ %iv.next = add i64 %iv , 1
761+ %ec = icmp eq i64 %iv , 1
762+ br i1 %ec , label %exit , label %loop
763+
764+ exit:
765+ ret double %red.next
766+ }
767+
662768attributes #0 = { "target-cpu" ="neoverse-512tvb" }
663769
664770!0 = !{!1 , !2 , i64 0 }
0 commit comments