Skip to content

Commit a976843

Browse files
committed
[AArch64] Add a phase-ordering test for a mla reduction sum. NFC
1 parent cfe1909 commit a976843

File tree

2 files changed

+366
-0
lines changed

2 files changed

+366
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt -S -O3 < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "aarch64"
6+
7+
; This function (a 16x reduction of a[i] * b[i]) should be vectorized successfully.
8+
9+
define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1) #0 {
10+
; CHECK-LABEL: define dso_local nofpclass(nan inf) float @vmlaq
11+
; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
13+
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA4]]
14+
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <16 x float> [[TMP4]], [[TMP3]]
15+
; CHECK-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP5]])
16+
; CHECK-NEXT: ret float [[TMP6]]
17+
;
18+
%3 = alloca ptr, align 8
19+
%4 = alloca ptr, align 8
20+
%5 = alloca float, align 4
21+
%6 = alloca i32, align 4
22+
store ptr %0, ptr %3, align 8, !tbaa !4
23+
store ptr %1, ptr %4, align 8, !tbaa !4
24+
call void @llvm.lifetime.start.p0(ptr %5) #2
25+
store float 0.000000e+00, ptr %5, align 4, !tbaa !9
26+
call void @llvm.lifetime.start.p0(ptr %6) #2
27+
store i32 0, ptr %6, align 4, !tbaa !11
28+
br label %7
29+
30+
7: ; preds = %25, %2
31+
%8 = load i32, ptr %6, align 4, !tbaa !11
32+
%9 = icmp slt i32 %8, 16
33+
br i1 %9, label %11, label %10
34+
35+
10: ; preds = %7
36+
call void @llvm.lifetime.end.p0(ptr %6) #2
37+
br label %28
38+
39+
11: ; preds = %7
40+
%12 = load ptr, ptr %3, align 8, !tbaa !4
41+
%13 = load i32, ptr %6, align 4, !tbaa !11
42+
%14 = sext i32 %13 to i64
43+
%15 = getelementptr inbounds float, ptr %12, i64 %14
44+
%16 = load float, ptr %15, align 4, !tbaa !9
45+
%17 = load ptr, ptr %4, align 8, !tbaa !4
46+
%18 = load i32, ptr %6, align 4, !tbaa !11
47+
%19 = sext i32 %18 to i64
48+
%20 = getelementptr inbounds float, ptr %17, i64 %19
49+
%21 = load float, ptr %20, align 4, !tbaa !9
50+
%22 = fmul fast float %16, %21
51+
%23 = load float, ptr %5, align 4, !tbaa !9
52+
%24 = fadd fast float %23, %22
53+
store float %24, ptr %5, align 4, !tbaa !9
54+
br label %25
55+
56+
25: ; preds = %11
57+
%26 = load i32, ptr %6, align 4, !tbaa !11
58+
%27 = add nsw i32 %26, 1
59+
store i32 %27, ptr %6, align 4, !tbaa !11
60+
br label %7, !llvm.loop !13
61+
62+
28: ; preds = %10
63+
%29 = load float, ptr %5, align 4, !tbaa !9
64+
call void @llvm.lifetime.end.p0(ptr %5) #2
65+
ret float %29
66+
}
67+
68+
declare void @llvm.lifetime.start.p0(ptr captures(none)) #1
69+
declare void @llvm.lifetime.end.p0(ptr captures(none)) #1
70+
71+
attributes #0 = { nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
72+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
73+
attributes #2 = { nounwind }
74+
75+
!llvm.module.flags = !{!0, !1, !2}
76+
!llvm.ident = !{!3}
77+
78+
!0 = !{i32 1, !"wchar_size", i32 4}
79+
!1 = !{i32 7, !"uwtable", i32 2}
80+
!2 = !{i32 7, !"frame-pointer", i32 1}
81+
!3 = !{!"clang version 22.0.0git"}
82+
!4 = !{!5, !5, i64 0}
83+
!5 = !{!"p1 float", !6, i64 0}
84+
!6 = !{!"any pointer", !7, i64 0}
85+
!7 = !{!"omnipotent char", !8, i64 0}
86+
!8 = !{!"Simple C/C++ TBAA"}
87+
!9 = !{!10, !10, i64 0}
88+
!10 = !{!"float", !7, i64 0}
89+
!11 = !{!12, !12, i64 0}
90+
!12 = !{!"int", !7, i64 0}
91+
!13 = distinct !{!13, !14}
92+
!14 = !{!"llvm.loop.mustprogress"}
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt -S -O3 < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "aarch64"
6+
7+
; This function (a more complex reduction of (a[i] - b[i]) * itself) should be vectorized successfully.
8+
9+
define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) #0 {
10+
; CHECK-LABEL: define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii
11+
; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: .preheader.i:
13+
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
14+
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP2]] to i64
15+
; CHECK-NEXT: [[TMP6:%.*]] = load <20 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
16+
; CHECK-NEXT: [[TMP7:%.*]] = load <20 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA4]]
17+
; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <20 x float> [[TMP6]], [[TMP7]]
18+
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <20 x float> [[TMP8]], [[TMP8]]
19+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 80
20+
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA4]]
21+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
22+
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA4]]
23+
; CHECK-NEXT: [[TMP14:%.*]] = fsub fast float [[TMP11]], [[TMP13]]
24+
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast float [[TMP14]], [[TMP14]]
25+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[TMP5]]
26+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[TMP4]]
27+
; CHECK-NEXT: [[OP_RDX:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP15]], <20 x float> [[TMP9]])
28+
; CHECK-NEXT: [[TMP18:%.*]] = load <20 x float>, ptr [[TMP16]], align 4, !tbaa [[TBAA4]]
29+
; CHECK-NEXT: [[TMP19:%.*]] = load <20 x float>, ptr [[TMP17]], align 4, !tbaa [[TBAA4]]
30+
; CHECK-NEXT: [[TMP20:%.*]] = fsub fast <20 x float> [[TMP18]], [[TMP19]]
31+
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <20 x float> [[TMP20]], [[TMP20]]
32+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP16]], i64 80
33+
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4, !tbaa [[TBAA4]]
34+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 80
35+
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA4]]
36+
; CHECK-NEXT: [[TMP26:%.*]] = fsub fast float [[TMP23]], [[TMP25]]
37+
; CHECK-NEXT: [[TMP27:%.*]] = fmul fast float [[TMP26]], [[TMP26]]
38+
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP5]]
39+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP4]]
40+
; CHECK-NEXT: [[OP_RDX_1:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP27]], <20 x float> [[TMP21]])
41+
; CHECK-NEXT: [[OP_RDX3_1:%.*]] = fadd fast float [[OP_RDX_1]], [[OP_RDX]]
42+
; CHECK-NEXT: [[TMP30:%.*]] = load <20 x float>, ptr [[TMP28]], align 4, !tbaa [[TBAA4]]
43+
; CHECK-NEXT: [[TMP31:%.*]] = load <20 x float>, ptr [[TMP29]], align 4, !tbaa [[TBAA4]]
44+
; CHECK-NEXT: [[TMP32:%.*]] = fsub fast <20 x float> [[TMP30]], [[TMP31]]
45+
; CHECK-NEXT: [[TMP33:%.*]] = fmul fast <20 x float> [[TMP32]], [[TMP32]]
46+
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP28]], i64 80
47+
; CHECK-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA4]]
48+
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP29]], i64 80
49+
; CHECK-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA4]]
50+
; CHECK-NEXT: [[TMP38:%.*]] = fsub fast float [[TMP35]], [[TMP37]]
51+
; CHECK-NEXT: [[TMP39:%.*]] = fmul fast float [[TMP38]], [[TMP38]]
52+
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP5]]
53+
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[TMP4]]
54+
; CHECK-NEXT: [[OP_RDX_2:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP39]], <20 x float> [[TMP33]])
55+
; CHECK-NEXT: [[OP_RDX3_2:%.*]] = fadd fast float [[OP_RDX_2]], [[OP_RDX3_1]]
56+
; CHECK-NEXT: [[TMP42:%.*]] = load <20 x float>, ptr [[TMP40]], align 4, !tbaa [[TBAA4]]
57+
; CHECK-NEXT: [[TMP43:%.*]] = load <20 x float>, ptr [[TMP41]], align 4, !tbaa [[TBAA4]]
58+
; CHECK-NEXT: [[TMP44:%.*]] = fsub fast <20 x float> [[TMP42]], [[TMP43]]
59+
; CHECK-NEXT: [[TMP45:%.*]] = fmul fast <20 x float> [[TMP44]], [[TMP44]]
60+
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP40]], i64 80
61+
; CHECK-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP46]], align 4, !tbaa [[TBAA4]]
62+
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP41]], i64 80
63+
; CHECK-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA4]]
64+
; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float [[TMP47]], [[TMP49]]
65+
; CHECK-NEXT: [[TMP51:%.*]] = fmul fast float [[TMP50]], [[TMP50]]
66+
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, ptr [[TMP40]], i64 [[TMP5]]
67+
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP41]], i64 [[TMP4]]
68+
; CHECK-NEXT: [[OP_RDX_3:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP51]], <20 x float> [[TMP45]])
69+
; CHECK-NEXT: [[OP_RDX3_3:%.*]] = fadd fast float [[OP_RDX_3]], [[OP_RDX3_2]]
70+
; CHECK-NEXT: [[TMP54:%.*]] = load <20 x float>, ptr [[TMP52]], align 4, !tbaa [[TBAA4]]
71+
; CHECK-NEXT: [[TMP55:%.*]] = load <20 x float>, ptr [[TMP53]], align 4, !tbaa [[TBAA4]]
72+
; CHECK-NEXT: [[TMP56:%.*]] = fsub fast <20 x float> [[TMP54]], [[TMP55]]
73+
; CHECK-NEXT: [[TMP57:%.*]] = fmul fast <20 x float> [[TMP56]], [[TMP56]]
74+
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP52]], i64 80
75+
; CHECK-NEXT: [[TMP59:%.*]] = load float, ptr [[TMP58]], align 4, !tbaa [[TBAA4]]
76+
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP53]], i64 80
77+
; CHECK-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP60]], align 4, !tbaa [[TBAA4]]
78+
; CHECK-NEXT: [[TMP62:%.*]] = fsub fast float [[TMP59]], [[TMP61]]
79+
; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP62]], [[TMP62]]
80+
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP52]], i64 [[TMP5]]
81+
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP53]], i64 [[TMP4]]
82+
; CHECK-NEXT: [[OP_RDX_4:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP63]], <20 x float> [[TMP57]])
83+
; CHECK-NEXT: [[OP_RDX3_4:%.*]] = fadd fast float [[OP_RDX_4]], [[OP_RDX3_3]]
84+
; CHECK-NEXT: [[TMP66:%.*]] = load <20 x float>, ptr [[TMP64]], align 4, !tbaa [[TBAA4]]
85+
; CHECK-NEXT: [[TMP67:%.*]] = load <20 x float>, ptr [[TMP65]], align 4, !tbaa [[TBAA4]]
86+
; CHECK-NEXT: [[TMP68:%.*]] = fsub fast <20 x float> [[TMP66]], [[TMP67]]
87+
; CHECK-NEXT: [[TMP69:%.*]] = fmul fast <20 x float> [[TMP68]], [[TMP68]]
88+
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP64]], i64 80
89+
; CHECK-NEXT: [[TMP71:%.*]] = load float, ptr [[TMP70]], align 4, !tbaa [[TBAA4]]
90+
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP65]], i64 80
91+
; CHECK-NEXT: [[TMP73:%.*]] = load float, ptr [[TMP72]], align 4, !tbaa [[TBAA4]]
92+
; CHECK-NEXT: [[TMP74:%.*]] = fsub fast float [[TMP71]], [[TMP73]]
93+
; CHECK-NEXT: [[TMP75:%.*]] = fmul fast float [[TMP74]], [[TMP74]]
94+
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i64 [[TMP5]]
95+
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, ptr [[TMP65]], i64 [[TMP4]]
96+
; CHECK-NEXT: [[OP_RDX_5:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP75]], <20 x float> [[TMP69]])
97+
; CHECK-NEXT: [[OP_RDX3_5:%.*]] = fadd fast float [[OP_RDX_5]], [[OP_RDX3_4]]
98+
; CHECK-NEXT: [[TMP78:%.*]] = load <20 x float>, ptr [[TMP76]], align 4, !tbaa [[TBAA4]]
99+
; CHECK-NEXT: [[TMP79:%.*]] = load <20 x float>, ptr [[TMP77]], align 4, !tbaa [[TBAA4]]
100+
; CHECK-NEXT: [[TMP80:%.*]] = fsub fast <20 x float> [[TMP78]], [[TMP79]]
101+
; CHECK-NEXT: [[TMP81:%.*]] = fmul fast <20 x float> [[TMP80]], [[TMP80]]
102+
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP76]], i64 80
103+
; CHECK-NEXT: [[TMP83:%.*]] = load float, ptr [[TMP82]], align 4, !tbaa [[TBAA4]]
104+
; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP77]], i64 80
105+
; CHECK-NEXT: [[TMP85:%.*]] = load float, ptr [[TMP84]], align 4, !tbaa [[TBAA4]]
106+
; CHECK-NEXT: [[TMP86:%.*]] = fsub fast float [[TMP83]], [[TMP85]]
107+
; CHECK-NEXT: [[TMP87:%.*]] = fmul fast float [[TMP86]], [[TMP86]]
108+
; CHECK-NEXT: [[OP_RDX_6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP87]], <20 x float> [[TMP81]])
109+
; CHECK-NEXT: [[OP_RDX3_6:%.*]] = fadd fast float [[OP_RDX_6]], [[OP_RDX3_5]]
110+
; CHECK-NEXT: ret float [[OP_RDX3_6]]
111+
;
112+
%5 = alloca ptr, align 8
113+
%6 = alloca ptr, align 8
114+
%7 = alloca i32, align 4
115+
%8 = alloca i32, align 4
116+
store ptr %0, ptr %5, align 8, !tbaa !4
117+
store ptr %1, ptr %6, align 8, !tbaa !4
118+
store i32 %2, ptr %7, align 4, !tbaa !9
119+
store i32 %3, ptr %8, align 4, !tbaa !9
120+
%9 = load ptr, ptr %5, align 8, !tbaa !4
121+
%10 = load ptr, ptr %6, align 8, !tbaa !4
122+
%11 = load i32, ptr %7, align 4, !tbaa !9
123+
%12 = load i32, ptr %8, align 4, !tbaa !9
124+
%13 = call fast noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr noundef %9, ptr noundef %10, i32 noundef %11, i32 noundef %12)
125+
ret float %13
126+
}
127+
128+
define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) #1 {
129+
%5 = alloca ptr, align 8
130+
%6 = alloca ptr, align 8
131+
%7 = alloca i32, align 4
132+
%8 = alloca i32, align 4
133+
%9 = alloca i32, align 4
134+
%10 = alloca i32, align 4
135+
%11 = alloca i32, align 4
136+
%12 = alloca float, align 4
137+
%13 = alloca i32, align 4
138+
%14 = alloca i32, align 4
139+
%15 = alloca float, align 4
140+
%16 = alloca i32, align 4
141+
%17 = alloca float, align 4
142+
store ptr %0, ptr %5, align 8, !tbaa !4
143+
store ptr %1, ptr %6, align 8, !tbaa !4
144+
store i32 %2, ptr %7, align 4, !tbaa !9
145+
store i32 %3, ptr %8, align 4, !tbaa !9
146+
call void @llvm.lifetime.start.p0(ptr %9) #3
147+
store i32 3, ptr %9, align 4, !tbaa !9
148+
call void @llvm.lifetime.start.p0(ptr %10) #3
149+
store i32 3, ptr %10, align 4, !tbaa !9
150+
call void @llvm.lifetime.start.p0(ptr %11) #3
151+
store i32 7, ptr %11, align 4, !tbaa !9
152+
call void @llvm.lifetime.start.p0(ptr %12) #3
153+
store float 0.000000e+00, ptr %12, align 4, !tbaa !11
154+
call void @llvm.lifetime.start.p0(ptr %13) #3
155+
store i32 0, ptr %13, align 4, !tbaa !9
156+
br label %18
157+
158+
18: ; preds = %59, %4
159+
%19 = load i32, ptr %13, align 4, !tbaa !9
160+
%20 = icmp slt i32 %19, 7
161+
br i1 %20, label %22, label %21
162+
163+
21: ; preds = %18
164+
store i32 2, ptr %14, align 4
165+
call void @llvm.lifetime.end.p0(ptr %13) #3
166+
br label %62
167+
168+
22: ; preds = %18
169+
call void @llvm.lifetime.start.p0(ptr %15) #3
170+
store float 0.000000e+00, ptr %15, align 4, !tbaa !11
171+
call void @llvm.lifetime.start.p0(ptr %16) #3
172+
store i32 0, ptr %16, align 4, !tbaa !9
173+
br label %23
174+
175+
23: ; preds = %44, %22
176+
%24 = load i32, ptr %16, align 4, !tbaa !9
177+
%25 = icmp slt i32 %24, 21
178+
br i1 %25, label %27, label %26
179+
180+
26: ; preds = %23
181+
store i32 5, ptr %14, align 4
182+
call void @llvm.lifetime.end.p0(ptr %16) #3
183+
br label %47
184+
185+
27: ; preds = %23
186+
call void @llvm.lifetime.start.p0(ptr %17) #3
187+
%28 = load ptr, ptr %5, align 8, !tbaa !4
188+
%29 = load i32, ptr %16, align 4, !tbaa !9
189+
%30 = sext i32 %29 to i64
190+
%31 = getelementptr inbounds float, ptr %28, i64 %30
191+
%32 = load float, ptr %31, align 4, !tbaa !11
192+
%33 = load ptr, ptr %6, align 8, !tbaa !4
193+
%34 = load i32, ptr %16, align 4, !tbaa !9
194+
%35 = sext i32 %34 to i64
195+
%36 = getelementptr inbounds float, ptr %33, i64 %35
196+
%37 = load float, ptr %36, align 4, !tbaa !11
197+
%38 = fsub fast float %32, %37
198+
store float %38, ptr %17, align 4, !tbaa !11
199+
%39 = load float, ptr %17, align 4, !tbaa !11
200+
%40 = load float, ptr %17, align 4, !tbaa !11
201+
%41 = fmul fast float %39, %40
202+
%42 = load float, ptr %15, align 4, !tbaa !11
203+
%43 = fadd fast float %42, %41
204+
store float %43, ptr %15, align 4, !tbaa !11
205+
call void @llvm.lifetime.end.p0(ptr %17) #3
206+
br label %44
207+
208+
44: ; preds = %27
209+
%45 = load i32, ptr %16, align 4, !tbaa !9
210+
%46 = add nsw i32 %45, 1
211+
store i32 %46, ptr %16, align 4, !tbaa !9
212+
br label %23, !llvm.loop !13
213+
214+
47: ; preds = %26
215+
%48 = load i32, ptr %7, align 4, !tbaa !9
216+
%49 = load ptr, ptr %5, align 8, !tbaa !4
217+
%50 = sext i32 %48 to i64
218+
%51 = getelementptr inbounds float, ptr %49, i64 %50
219+
store ptr %51, ptr %5, align 8, !tbaa !4
220+
%52 = load i32, ptr %8, align 4, !tbaa !9
221+
%53 = load ptr, ptr %6, align 8, !tbaa !4
222+
%54 = sext i32 %52 to i64
223+
%55 = getelementptr inbounds float, ptr %53, i64 %54
224+
store ptr %55, ptr %6, align 8, !tbaa !4
225+
%56 = load float, ptr %15, align 4, !tbaa !11
226+
%57 = load float, ptr %12, align 4, !tbaa !11
227+
%58 = fadd fast float %57, %56
228+
store float %58, ptr %12, align 4, !tbaa !11
229+
call void @llvm.lifetime.end.p0(ptr %15) #3
230+
br label %59
231+
232+
59: ; preds = %47
233+
%60 = load i32, ptr %13, align 4, !tbaa !9
234+
%61 = add nsw i32 %60, 1
235+
store i32 %61, ptr %13, align 4, !tbaa !9
236+
br label %18, !llvm.loop !15
237+
238+
62: ; preds = %21
239+
%63 = load float, ptr %12, align 4, !tbaa !11
240+
store i32 1, ptr %14, align 4
241+
call void @llvm.lifetime.end.p0(ptr %12) #3
242+
call void @llvm.lifetime.end.p0(ptr %11) #3
243+
call void @llvm.lifetime.end.p0(ptr %10) #3
244+
call void @llvm.lifetime.end.p0(ptr %9) #3
245+
ret float %63
246+
}
247+
248+
declare void @llvm.lifetime.start.p0(ptr captures(none)) #2
249+
declare void @llvm.lifetime.end.p0(ptr captures(none)) #2
250+
251+
attributes #0 = { mustprogress uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
252+
attributes #1 = { inlinehint mustprogress nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
253+
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
254+
attributes #3 = { nounwind }
255+
256+
!llvm.module.flags = !{!0, !1, !2}
257+
!llvm.ident = !{!3}
258+
259+
!0 = !{i32 1, !"wchar_size", i32 4}
260+
!1 = !{i32 7, !"uwtable", i32 2}
261+
!2 = !{i32 7, !"frame-pointer", i32 1}
262+
!3 = !{!"clang version 22.0.0git"}
263+
!4 = !{!5, !5, i64 0}
264+
!5 = !{!"p1 float", !6, i64 0}
265+
!6 = !{!"any pointer", !7, i64 0}
266+
!7 = !{!"omnipotent char", !8, i64 0}
267+
!8 = !{!"Simple C++ TBAA"}
268+
!9 = !{!10, !10, i64 0}
269+
!10 = !{!"int", !7, i64 0}
270+
!11 = !{!12, !12, i64 0}
271+
!12 = !{!"float", !7, i64 0}
272+
!13 = distinct !{!13, !14}
273+
!14 = !{!"llvm.loop.mustprogress"}
274+
!15 = distinct !{!15, !14}

0 commit comments

Comments
 (0)