1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
1
2
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
2
3
3
- ; CHECK-LABEL: stp_int
4
- ; CHECK: stp w0, w1, [x2]
5
4
define void @stp_int (i32 %a , i32 %b , ptr nocapture %p ) nounwind {
5
+ ; CHECK-LABEL: stp_int:
6
+ ; CHECK: // %bb.0:
7
+ ; CHECK-NEXT: stp w0, w1, [x2]
8
+ ; CHECK-NEXT: ret
6
9
store i32 %a , ptr %p , align 4
7
10
%add.ptr = getelementptr inbounds i32 , ptr %p , i64 1
8
11
store i32 %b , ptr %add.ptr , align 4
9
12
ret void
10
13
}
11
14
12
- ; CHECK-LABEL: stp_long
13
- ; CHECK: stp x0, x1, [x2]
14
15
define void @stp_long (i64 %a , i64 %b , ptr nocapture %p ) nounwind {
16
+ ; CHECK-LABEL: stp_long:
17
+ ; CHECK: // %bb.0:
18
+ ; CHECK-NEXT: stp x0, x1, [x2]
19
+ ; CHECK-NEXT: ret
15
20
store i64 %a , ptr %p , align 8
16
21
%add.ptr = getelementptr inbounds i64 , ptr %p , i64 1
17
22
store i64 %b , ptr %add.ptr , align 8
18
23
ret void
19
24
}
20
25
21
- ; CHECK-LABEL: stp_float
22
- ; CHECK: stp s0, s1, [x0]
23
26
define void @stp_float (float %a , float %b , ptr nocapture %p ) nounwind {
27
+ ; CHECK-LABEL: stp_float:
28
+ ; CHECK: // %bb.0:
29
+ ; CHECK-NEXT: stp s0, s1, [x0]
30
+ ; CHECK-NEXT: ret
24
31
store float %a , ptr %p , align 4
25
32
%add.ptr = getelementptr inbounds float , ptr %p , i64 1
26
33
store float %b , ptr %add.ptr , align 4
27
34
ret void
28
35
}
29
36
30
- ; CHECK-LABEL: stp_double
31
- ; CHECK: stp d0, d1, [x0]
32
37
define void @stp_double (double %a , double %b , ptr nocapture %p ) nounwind {
38
+ ; CHECK-LABEL: stp_double:
39
+ ; CHECK: // %bb.0:
40
+ ; CHECK-NEXT: stp d0, d1, [x0]
41
+ ; CHECK-NEXT: ret
33
42
store double %a , ptr %p , align 8
34
43
%add.ptr = getelementptr inbounds double , ptr %p , i64 1
35
44
store double %b , ptr %add.ptr , align 8
36
45
ret void
37
46
}
38
47
39
- ; CHECK-LABEL: stp_doublex2
40
- ; CHECK: stp q0, q1, [x0]
41
48
define void @stp_doublex2 (<2 x double > %a , <2 x double > %b , ptr nocapture %p ) nounwind {
49
+ ; CHECK-LABEL: stp_doublex2:
50
+ ; CHECK: // %bb.0:
51
+ ; CHECK-NEXT: stp q0, q1, [x0]
52
+ ; CHECK-NEXT: ret
42
53
store <2 x double > %a , ptr %p , align 16
43
54
%add.ptr = getelementptr inbounds <2 x double >, ptr %p , i64 1
44
55
store <2 x double > %b , ptr %add.ptr , align 16
@@ -47,9 +58,10 @@ define void @stp_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) no
47
58
48
59
; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
49
60
define void @stur_int (i32 %a , i32 %b , ptr nocapture %p ) nounwind {
50
- ; CHECK-LABEL: stur_int
51
- ; CHECK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
52
- ; CHECK-NEXT: ret
61
+ ; CHECK-LABEL: stur_int:
62
+ ; CHECK: // %bb.0:
63
+ ; CHECK-NEXT: stp w1, w0, [x2, #-8]
64
+ ; CHECK-NEXT: ret
53
65
%p1 = getelementptr inbounds i32 , ptr %p , i32 -1
54
66
store i32 %a , ptr %p1 , align 2
55
67
%p2 = getelementptr inbounds i32 , ptr %p , i32 -2
@@ -58,9 +70,10 @@ define void @stur_int(i32 %a, i32 %b, ptr nocapture %p) nounwind {
58
70
}
59
71
60
72
define void @stur_long (i64 %a , i64 %b , ptr nocapture %p ) nounwind {
61
- ; CHECK-LABEL: stur_long
62
- ; CHECK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
63
- ; CHECK-NEXT: ret
73
+ ; CHECK-LABEL: stur_long:
74
+ ; CHECK: // %bb.0:
75
+ ; CHECK-NEXT: stp x1, x0, [x2, #-16]
76
+ ; CHECK-NEXT: ret
64
77
%p1 = getelementptr inbounds i64 , ptr %p , i32 -1
65
78
store i64 %a , ptr %p1 , align 2
66
79
%p2 = getelementptr inbounds i64 , ptr %p , i32 -2
@@ -69,9 +82,10 @@ define void @stur_long(i64 %a, i64 %b, ptr nocapture %p) nounwind {
69
82
}
70
83
71
84
define void @stur_float (float %a , float %b , ptr nocapture %p ) nounwind {
72
- ; CHECK-LABEL: stur_float
73
- ; CHECK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
74
- ; CHECK-NEXT: ret
85
+ ; CHECK-LABEL: stur_float:
86
+ ; CHECK: // %bb.0:
87
+ ; CHECK-NEXT: stp s1, s0, [x0, #-8]
88
+ ; CHECK-NEXT: ret
75
89
%p1 = getelementptr inbounds float , ptr %p , i32 -1
76
90
store float %a , ptr %p1 , align 2
77
91
%p2 = getelementptr inbounds float , ptr %p , i32 -2
@@ -80,9 +94,10 @@ define void @stur_float(float %a, float %b, ptr nocapture %p) nounwind {
80
94
}
81
95
82
96
define void @stur_double (double %a , double %b , ptr nocapture %p ) nounwind {
83
- ; CHECK-LABEL: stur_double
84
- ; CHECK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
85
- ; CHECK-NEXT: ret
97
+ ; CHECK-LABEL: stur_double:
98
+ ; CHECK: // %bb.0:
99
+ ; CHECK-NEXT: stp d1, d0, [x0, #-16]
100
+ ; CHECK-NEXT: ret
86
101
%p1 = getelementptr inbounds double , ptr %p , i32 -1
87
102
store double %a , ptr %p1 , align 2
88
103
%p2 = getelementptr inbounds double , ptr %p , i32 -2
@@ -91,9 +106,10 @@ define void @stur_double(double %a, double %b, ptr nocapture %p) nounwind {
91
106
}
92
107
93
108
define void @stur_doublex2 (<2 x double > %a , <2 x double > %b , ptr nocapture %p ) nounwind {
94
- ; CHECK-LABEL: stur_doublex2
95
- ; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x{{[0-9]+}}, #-32]
96
- ; CHECK-NEXT: ret
109
+ ; CHECK-LABEL: stur_doublex2:
110
+ ; CHECK: // %bb.0:
111
+ ; CHECK-NEXT: stp q1, q0, [x0, #-32]
112
+ ; CHECK-NEXT: ret
97
113
%p1 = getelementptr inbounds <2 x double >, ptr %p , i32 -1
98
114
store <2 x double > %a , ptr %p1 , align 2
99
115
%p2 = getelementptr inbounds <2 x double >, ptr %p , i32 -2
@@ -102,13 +118,12 @@ define void @stur_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) n
102
118
}
103
119
104
120
define void @splat_v4i32 (i32 %v , ptr %p ) {
121
+ ; CHECK-LABEL: splat_v4i32:
122
+ ; CHECK: // %bb.0: // %entry
123
+ ; CHECK-NEXT: dup v0.4s, w0
124
+ ; CHECK-NEXT: str q0, [x1]
125
+ ; CHECK-NEXT: ret
105
126
entry:
106
-
107
- ; CHECK-LABEL: splat_v4i32
108
- ; CHECK-DAG: dup v0.4s, w0
109
- ; CHECK-DAG: str q0, [x1]
110
- ; CHECK: ret
111
-
112
127
%p17 = insertelement <4 x i32 > undef , i32 %v , i32 0
113
128
%p18 = insertelement <4 x i32 > %p17 , i32 %v , i32 1
114
129
%p19 = insertelement <4 x i32 > %p18 , i32 %v , i32 2
@@ -120,17 +135,22 @@ entry:
120
135
; Check that a non-splat store that is storing a vector created by 4
121
136
; insertelements that is not a splat vector does not get split.
122
137
define void @nosplat_v4i32 (i32 %v , ptr %p ) {
123
- entry:
124
-
125
138
; CHECK-LABEL: nosplat_v4i32:
126
- ; CHECK: str w0,
127
- ; CHECK: ldr q[[REG1:[0-9]+]],
128
- ; CHECK-DAG: mov v[[REG1]].s[1], w0
129
- ; CHECK-DAG: mov v[[REG1]].s[2], w0
130
- ; CHECK-DAG: mov v[[REG1]].s[3], w0
131
- ; CHECK: str q[[REG1]], [x1]
132
- ; CHECK: ret
133
-
139
+ ; CHECK: // %bb.0: // %entry
140
+ ; CHECK-NEXT: sub sp, sp, #16
141
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
142
+ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
143
+ ; CHECK-NEXT: mov x8, sp
144
+ ; CHECK-NEXT: bfi x8, x0, #2, #2
145
+ ; CHECK-NEXT: str w0, [x8]
146
+ ; CHECK-NEXT: ldr q0, [sp]
147
+ ; CHECK-NEXT: mov v0.s[1], w0
148
+ ; CHECK-NEXT: mov v0.s[2], w0
149
+ ; CHECK-NEXT: mov v0.s[3], w0
150
+ ; CHECK-NEXT: str q0, [x1]
151
+ ; CHECK-NEXT: add sp, sp, #16
152
+ ; CHECK-NEXT: ret
153
+ entry:
134
154
%p17 = insertelement <4 x i32 > undef , i32 %v , i32 %v
135
155
%p18 = insertelement <4 x i32 > %p17 , i32 %v , i32 1
136
156
%p19 = insertelement <4 x i32 > %p18 , i32 %v , i32 2
@@ -142,15 +162,14 @@ entry:
142
162
; Check that a non-splat store that is storing a vector created by 4
143
163
; insertelements that is not a splat vector does not get split.
144
164
define void @nosplat2_v4i32 (i32 %v , ptr %p , <4 x i32 > %vin ) {
145
- entry:
146
-
147
165
; CHECK-LABEL: nosplat2_v4i32:
148
- ; CHECK: mov v[[REG1]].s[1], w0
149
- ; CHECK-DAG: mov v[[REG1]].s[2], w0
150
- ; CHECK-DAG: mov v[[REG1]].s[3], w0
151
- ; CHECK: str q[[REG1]], [x1]
152
- ; CHECK: ret
153
-
166
+ ; CHECK: // %bb.0: // %entry
167
+ ; CHECK-NEXT: mov v0.s[1], w0
168
+ ; CHECK-NEXT: mov v0.s[2], w0
169
+ ; CHECK-NEXT: mov v0.s[3], w0
170
+ ; CHECK-NEXT: str q0, [x1]
171
+ ; CHECK-NEXT: ret
172
+ entry:
154
173
%p18 = insertelement <4 x i32 > %vin , i32 %v , i32 1
155
174
%p19 = insertelement <4 x i32 > %p18 , i32 %v , i32 2
156
175
%p20 = insertelement <4 x i32 > %p19 , i32 %v , i32 3
@@ -159,12 +178,14 @@ entry:
159
178
}
160
179
161
180
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
162
- ; CHECK-LABEL: stp_int_rar_hazard
163
- ; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
164
- ; CHECK: add w8, [[REG]], w1
165
- ; CHECK: stp w0, w1, [x2]
166
- ; CHECK: ret
167
181
define i32 @stp_int_rar_hazard (i32 %a , i32 %b , ptr nocapture %p ) nounwind {
182
+ ; CHECK-LABEL: stp_int_rar_hazard:
183
+ ; CHECK: // %bb.0:
184
+ ; CHECK-NEXT: ldr w8, [x2, #8]
185
+ ; CHECK-NEXT: add w8, w8, w1
186
+ ; CHECK-NEXT: stp w0, w1, [x2]
187
+ ; CHECK-NEXT: mov x0, x8
188
+ ; CHECK-NEXT: ret
168
189
store i32 %a , ptr %p , align 4
169
190
%ld.ptr = getelementptr inbounds i32 , ptr %p , i64 2
170
191
%tmp = load i32 , ptr %ld.ptr , align 4
@@ -175,12 +196,13 @@ define i32 @stp_int_rar_hazard(i32 %a, i32 %b, ptr nocapture %p) nounwind {
175
196
}
176
197
177
198
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
178
- ; CHECK-LABEL: stp_int_rar_hazard_after
179
- ; CHECK: ldr [[REG:w[0-9]+]], [x3, #4]
180
- ; CHECK: add w0, [[REG]], w2
181
- ; CHECK: stp w1, w2, [x3]
182
- ; CHECK: ret
183
199
define i32 @stp_int_rar_hazard_after (i32 %w0 , i32 %a , i32 %b , ptr nocapture %p ) nounwind {
200
+ ; CHECK-LABEL: stp_int_rar_hazard_after:
201
+ ; CHECK: // %bb.0:
202
+ ; CHECK-NEXT: ldr w8, [x3, #4]
203
+ ; CHECK-NEXT: add w0, w8, w2
204
+ ; CHECK-NEXT: stp w1, w2, [x3]
205
+ ; CHECK-NEXT: ret
184
206
store i32 %a , ptr %p , align 4
185
207
%ld.ptr = getelementptr inbounds i32 , ptr %p , i64 1
186
208
%tmp = load i32 , ptr %ld.ptr , align 4
0 commit comments