Skip to content

Commit c506c28

Browse files
committed
[LV] Add additional tests for scalar load costs of addresses.
1 parent 70529df commit c506c28

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,112 @@ exit:
659659
ret void
660660
}
661661

662+
define void @cost_scalar_load_of_address(ptr noalias %src, ptr %dst) {
663+
; CHECK-LABEL: define void @cost_scalar_load_of_address(
664+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
665+
; CHECK-NEXT: [[ENTRY:.*]]:
666+
; CHECK-NEXT: br label %[[LOOP:.*]]
667+
; CHECK: [[LOOP]]:
668+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
669+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
670+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
671+
; CHECK-NEXT: [[L_EXT:%.*]] = sext i32 [[L]] to i64
672+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[L_EXT]]
673+
; CHECK-NEXT: store i32 0, ptr [[GEP_DST]], align 4
674+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
675+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 8
676+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
677+
; CHECK: [[EXIT]]:
678+
; CHECK-NEXT: ret void
679+
;
680+
entry:
681+
br label %loop
682+
683+
loop:
684+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
685+
%gep.src = getelementptr i32, ptr %src, i64 %iv
686+
%l = load i32, ptr %gep.src, align 4
687+
%l.ext = sext i32 %l to i64
688+
%gep.dst = getelementptr i32, ptr %dst, i64 %l.ext
689+
store i32 0, ptr %gep.dst, align 4
690+
%iv.next = add i64 %iv, 1
691+
%ec = icmp eq i64 %iv, 8
692+
br i1 %ec, label %exit, label %loop
693+
694+
exit:
695+
ret void
696+
}
697+
698+
%t = type { [3 x double] }
699+
%t.2 = type { [ 64 x double ] }
700+
701+
define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.1, ptr %src.2) {
702+
; CHECK-LABEL: define double @test_scalarization_cost_for_load_of_address(
703+
; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], ptr [[SRC_2:%.*]]) {
704+
; CHECK-NEXT: [[ENTRY:.*]]:
705+
; CHECK-NEXT: br label %[[LOOP:.*]]
706+
; CHECK: [[LOOP]]:
707+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
708+
; CHECK-NEXT: [[RED:%.*]] = phi double [ 3.000000e+00, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
709+
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]]
710+
; CHECK-NEXT: [[L_0:%.*]] = load double, ptr [[GEP_0]], align 8
711+
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_0]], i64 8
712+
; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_8]], align 8
713+
; CHECK-NEXT: [[GEP_16:%.*]] = getelementptr i8, ptr [[GEP_0]], i64 16
714+
; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[GEP_16]], align 8
715+
; CHECK-NEXT: [[MUL_0:%.*]] = fmul double [[L_0]], 3.000000e+00
716+
; CHECK-NEXT: [[MUL_1:%.*]] = fmul double [[L_1]], 3.000000e+00
717+
; CHECK-NEXT: [[MUL_2:%.*]] = fmul double [[L_2]], 3.000000e+00
718+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd double [[MUL_0]], [[MUL_1]]
719+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd double [[ADD_0]], [[MUL_2]]
720+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]]
721+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
722+
; CHECK-NEXT: [[MUL256_US:%.*]] = fmul double [[ADD_1]], [[L]]
723+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
724+
; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
725+
; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
726+
; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[L_P_2]], align 8
727+
; CHECK-NEXT: [[RED_NEXT]] = tail call double @llvm.fmuladd.f64(double [[MUL256_US]], double [[LV]], double [[RED]])
728+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
729+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
730+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
731+
; CHECK: [[EXIT]]:
732+
; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi double [ [[RED_NEXT]], %[[LOOP]] ]
733+
; CHECK-NEXT: ret double [[RED_NEXT_LCSSA]]
734+
;
735+
entry:
736+
br label %loop
737+
738+
loop:
739+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
740+
%red = phi double [ 3.000000e+00, %entry ], [ %red.next, %loop ]
741+
%gep.0 = getelementptr %t, ptr %src.0, i64 %iv
742+
%l.0 = load double, ptr %gep.0, align 8
743+
%gep.8 = getelementptr i8, ptr %gep.0, i64 8
744+
%l.1 = load double, ptr %gep.8, align 8
745+
%gep.16 = getelementptr i8, ptr %gep.0, i64 16
746+
%l.2 = load double, ptr %gep.16, align 8
747+
%mul.0 = fmul double %l.0, 3.000000e+00
748+
%mul.1 = fmul double %l.1, 3.000000e+00
749+
%mul.2 = fmul double %l.2, 3.000000e+00
750+
%add.0 = fadd double %mul.0, %mul.1
751+
%add.1 = fadd double %add.0, %mul.2
752+
%gep.src = getelementptr double, ptr %src.1, i64 %iv
753+
%l = load double, ptr %gep.src, align 8
754+
%mul256.us = fmul double %add.1, %l
755+
%gep.src.2 = getelementptr %t.2, ptr %src.2, i64 %iv
756+
%gep.72 = getelementptr i8, ptr %gep.src.2, i64 72
757+
%l.p.2 = load ptr, ptr %gep.72, align 8
758+
%lv = load double, ptr %l.p.2, align 8
759+
%red.next = tail call double @llvm.fmuladd.f64(double %mul256.us, double %lv, double %red)
760+
%iv.next = add i64 %iv, 1
761+
%ec = icmp eq i64 %iv, 1
762+
br i1 %ec, label %exit, label %loop
763+
764+
exit:
765+
ret double %red.next
766+
}
767+
662768
attributes #0 = { "target-cpu"="neoverse-512tvb" }
663769

664770
!0 = !{!1, !2, i64 0}

0 commit comments

Comments
 (0)