You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
+54-94Lines changed: 54 additions & 94 deletions
Original file line number
Diff line number
Diff line change
@@ -9,26 +9,20 @@ define <vscale x 8 x bfloat> @fmla_nxv8bf16(<vscale x 8 x bfloat> %acc, <vscale
9
9
; SVE: // %bb.0:
10
10
; SVE-NEXT: uunpkhi z3.s, z2.h
11
11
; SVE-NEXT: uunpkhi z4.s, z1.h
12
+
; SVE-NEXT: uunpkhi z5.s, z0.h
12
13
; SVE-NEXT: uunpklo z2.s, z2.h
13
14
; SVE-NEXT: uunpklo z1.s, z1.h
15
+
; SVE-NEXT: uunpklo z0.s, z0.h
14
16
; SVE-NEXT: ptrue p0.s
15
17
; SVE-NEXT: lsl z3.s, z3.s, #16
16
18
; SVE-NEXT: lsl z4.s, z4.s, #16
19
+
; SVE-NEXT: lsl z5.s, z5.s, #16
17
20
; SVE-NEXT: lsl z2.s, z2.s, #16
18
21
; SVE-NEXT: lsl z1.s, z1.s, #16
19
-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
20
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
21
-
; SVE-NEXT: bfcvt z2.h, p0/m, z3.s
22
-
; SVE-NEXT: uunpkhi z3.s, z0.h
23
-
; SVE-NEXT: uunpklo z0.s, z0.h
24
-
; SVE-NEXT: bfcvt z1.h, p0/m, z1.s
25
-
; SVE-NEXT: lsl z2.s, z2.s, #16
26
-
; SVE-NEXT: lsl z3.s, z3.s, #16
27
22
; SVE-NEXT: lsl z0.s, z0.s, #16
28
-
; SVE-NEXT: lsl z1.s, z1.s, #16
29
-
; SVE-NEXT: fadd z2.s, z3.s, z2.s
30
-
; SVE-NEXT: fadd z0.s, z0.s, z1.s
31
-
; SVE-NEXT: bfcvt z1.h, p0/m, z2.s
23
+
; SVE-NEXT: fmad z3.s, p0/m, z4.s, z5.s
24
+
; SVE-NEXT: fmla z0.s, p0/m, z1.s, z2.s
25
+
; SVE-NEXT: bfcvt z1.h, p0/m, z3.s
32
26
; SVE-NEXT: bfcvt z0.h, p0/m, z0.s
33
27
; SVE-NEXT: uzp1 z0.h, z0.h, z1.h
34
28
; SVE-NEXT: ret
@@ -48,12 +42,9 @@ define <vscale x 4 x bfloat> @fmla_nxv4bf16(<vscale x 4 x bfloat> %acc, <vscale
48
42
; SVE: // %bb.0:
49
43
; SVE-NEXT: lsl z2.s, z2.s, #16
50
44
; SVE-NEXT: lsl z1.s, z1.s, #16
51
-
; SVE-NEXT: ptrue p0.s
52
45
; SVE-NEXT: lsl z0.s, z0.s, #16
53
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
54
-
; SVE-NEXT: bfcvt z1.h, p0/m, z1.s
55
-
; SVE-NEXT: lsl z1.s, z1.s, #16
56
-
; SVE-NEXT: fadd z0.s, z0.s, z1.s
46
+
; SVE-NEXT: ptrue p0.s
47
+
; SVE-NEXT: fmla z0.s, p0/m, z1.s, z2.s
57
48
; SVE-NEXT: bfcvt z0.h, p0/m, z0.s
58
49
; SVE-NEXT: ret
59
50
;
@@ -72,12 +63,9 @@ define <vscale x 2 x bfloat> @fmla_nxv2bf16(<vscale x 2 x bfloat> %acc, <vscale
72
63
; SVE: // %bb.0:
73
64
; SVE-NEXT: lsl z2.s, z2.s, #16
74
65
; SVE-NEXT: lsl z1.s, z1.s, #16
75
-
; SVE-NEXT: ptrue p0.d
76
66
; SVE-NEXT: lsl z0.s, z0.s, #16
77
-
; SVE-NEXT: fmul z1.s, p0/m, z1.s, z2.s
78
-
; SVE-NEXT: bfcvt z1.h, p0/m, z1.s
79
-
; SVE-NEXT: lsl z1.s, z1.s, #16
80
-
; SVE-NEXT: fadd z0.s, p0/m, z0.s, z1.s
67
+
; SVE-NEXT: ptrue p0.d
68
+
; SVE-NEXT: fmla z0.s, p0/m, z1.s, z2.s
81
69
; SVE-NEXT: bfcvt z0.h, p0/m, z0.s
82
70
; SVE-NEXT: ret
83
71
;
@@ -94,28 +82,24 @@ define <vscale x 2 x bfloat> @fmla_nxv2bf16(<vscale x 2 x bfloat> %acc, <vscale
94
82
define <vscale x 8 x bfloat> @fmls_nxv8bf16(<vscale x 8 x bfloat> %acc, <vscale x 8 x bfloat> %m1, <vscale x 8 x bfloat> %m2) {
95
83
; SVE-LABEL: fmls_nxv8bf16:
96
84
; SVE: // %bb.0:
85
+
; SVE-NEXT: ptrue p0.h
97
86
; SVE-NEXT: uunpkhi z3.s, z2.h
98
-
; SVE-NEXT: uunpkhi z4.s, z1.h
87
+
; SVE-NEXT: uunpkhi z4.s, z0.h
99
88
; SVE-NEXT: uunpklo z2.s, z2.h
100
-
; SVE-NEXT: uunpklo z1.s, z1.h
89
+
; SVE-NEXT: uunpklo z0.s, z0.h
90
+
; SVE-NEXT: fneg z1.h, p0/m, z1.h
101
91
; SVE-NEXT: ptrue p0.s
102
92
; SVE-NEXT: lsl z3.s, z3.s, #16
103
93
; SVE-NEXT: lsl z4.s, z4.s, #16
104
94
; SVE-NEXT: lsl z2.s, z2.s, #16
105
-
; SVE-NEXT: lsl z1.s, z1.s, #16
106
-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
107
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
108
-
; SVE-NEXT: bfcvt z2.h, p0/m, z3.s
109
-
; SVE-NEXT: uunpkhi z3.s, z0.h
110
-
; SVE-NEXT: uunpklo z0.s, z0.h
111
-
; SVE-NEXT: bfcvt z1.h, p0/m, z1.s
112
-
; SVE-NEXT: lsl z2.s, z2.s, #16
113
-
; SVE-NEXT: lsl z3.s, z3.s, #16
114
95
; SVE-NEXT: lsl z0.s, z0.s, #16
96
+
; SVE-NEXT: uunpkhi z5.s, z1.h
97
+
; SVE-NEXT: uunpklo z1.s, z1.h
98
+
; SVE-NEXT: lsl z5.s, z5.s, #16
115
99
; SVE-NEXT: lsl z1.s, z1.s, #16
116
-
; SVE-NEXT: fsub z2.s, z3.s, z2.s
117
-
; SVE-NEXT: fsub z0.s, z0.s, z1.s
118
-
; SVE-NEXT: bfcvt z1.h, p0/m, z2.s
100
+
; SVE-NEXT: fmad z3.s, p0/m, z5.s, z4.s
101
+
; SVE-NEXT: fmla z0.s, p0/m, z1.s, z2.s
102
+
; SVE-NEXT: bfcvt z1.h, p0/m, z3.s
119
103
; SVE-NEXT: bfcvt z0.h, p0/m, z0.s
120
104
; SVE-NEXT: uzp1 z0.h, z0.h, z1.h
121
105
; SVE-NEXT: ret
@@ -133,14 +117,12 @@ define <vscale x 8 x bfloat> @fmls_nxv8bf16(<vscale x 8 x bfloat> %acc, <vscale
133
117
define <vscale x 4 x bfloat> @fmls_nxv4bf16(<vscale x 4 x bfloat> %acc, <vscale x 4 x bfloat> %m1, <vscale x 4 x bfloat> %m2) {
134
118
; SVE-LABEL: fmls_nxv4bf16:
135
119
; SVE: // %bb.0:
136
-
; SVE-NEXT: lsl z2.s, z2.s, #16
137
-
; SVE-NEXT: lsl z1.s, z1.s, #16
138
120
; SVE-NEXT: ptrue p0.s
121
+
; SVE-NEXT: lsl z2.s, z2.s, #16
139
122
; SVE-NEXT: lsl z0.s, z0.s, #16
140
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
141
-
; SVE-NEXT: bfcvt z1.h, p0/m, z1.s
123
+
; SVE-NEXT: fneg z1.h, p0/m, z1.h
142
124
; SVE-NEXT: lsl z1.s, z1.s, #16
143
-
; SVE-NEXT: fsub z0.s, z0.s, z1.s
125
+
; SVE-NEXT: fmla z0.s, p0/m, z1.s, z2.s
144
126
; SVE-NEXT: bfcvt z0.h, p0/m, z0.s
145
127
; SVE-NEXT: ret
146
128
;
@@ -157,14 +139,12 @@ define <vscale x 4 x bfloat> @fmls_nxv4bf16(<vscale x 4 x bfloat> %acc, <vscale
157
139
define <vscale x 2 x bfloat> @fmls_nxv2bf16(<vscale x 2 x bfloat> %acc, <vscale x 2 x bfloat> %m1, <vscale x 2 x bfloat> %m2) {
158
140
; SVE-LABEL: fmls_nxv2bf16:
159
141
; SVE: // %bb.0:
160
-
; SVE-NEXT: lsl z2.s, z2.s, #16
161
-
; SVE-NEXT: lsl z1.s, z1.s, #16
162
142
; SVE-NEXT: ptrue p0.d
143
+
; SVE-NEXT: lsl z2.s, z2.s, #16
163
144
; SVE-NEXT: lsl z0.s, z0.s, #16
164
-
; SVE-NEXT: fmul z1.s, p0/m, z1.s, z2.s
165
-
; SVE-NEXT: bfcvt z1.h, p0/m, z1.s
145
+
; SVE-NEXT: fneg z1.h, p0/m, z1.h
166
146
; SVE-NEXT: lsl z1.s, z1.s, #16
167
-
; SVE-NEXT: fsub z0.s, p0/m, z0.s, z1.s
147
+
; SVE-NEXT: fmla z0.s, p0/m, z1.s, z2.s
168
148
; SVE-NEXT: bfcvt z0.h, p0/m, z0.s
169
149
; SVE-NEXT: ret
170
150
;
@@ -183,26 +163,20 @@ define <vscale x 8 x bfloat> @fmla_sel_nxv8bf16(<vscale x 8 x i1> %pred, <vscale
183
163
; SVE: // %bb.0:
184
164
; SVE-NEXT: uunpkhi z3.s, z2.h
185
165
; SVE-NEXT: uunpkhi z4.s, z1.h
166
+
; SVE-NEXT: uunpkhi z5.s, z0.h
186
167
; SVE-NEXT: uunpklo z2.s, z2.h
187
168
; SVE-NEXT: uunpklo z1.s, z1.h
169
+
; SVE-NEXT: uunpklo z6.s, z0.h
188
170
; SVE-NEXT: ptrue p1.s
189
171
; SVE-NEXT: lsl z3.s, z3.s, #16
190
172
; SVE-NEXT: lsl z4.s, z4.s, #16
173
+
; SVE-NEXT: lsl z5.s, z5.s, #16
191
174
; SVE-NEXT: lsl z2.s, z2.s, #16
192
175
; SVE-NEXT: lsl z1.s, z1.s, #16
193
-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
194
-
; SVE-NEXT: uunpklo z4.s, z0.h
195
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
176
+
; SVE-NEXT: lsl z6.s, z6.s, #16
177
+
; SVE-NEXT: fmad z3.s, p1/m, z4.s, z5.s
178
+
; SVE-NEXT: fmad z1.s, p1/m, z2.s, z6.s
196
179
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
197
-
; SVE-NEXT: uunpkhi z3.s, z0.h
198
-
; SVE-NEXT: lsl z4.s, z4.s, #16
199
-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
200
-
; SVE-NEXT: lsl z2.s, z2.s, #16
201
-
; SVE-NEXT: lsl z3.s, z3.s, #16
202
-
; SVE-NEXT: lsl z1.s, z1.s, #16
203
-
; SVE-NEXT: fadd z2.s, z3.s, z2.s
204
-
; SVE-NEXT: fadd z1.s, z4.s, z1.s
205
-
; SVE-NEXT: bfcvt z2.h, p1/m, z2.s
206
180
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
207
181
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
208
182
; SVE-NEXT: mov z0.h, p0/m, z1.h
@@ -223,12 +197,9 @@ define <vscale x 4 x bfloat> @fmla_sel_nxv4bf16(<vscale x 4 x i1> %pred, <vscale
223
197
; SVE: // %bb.0:
224
198
; SVE-NEXT: lsl z2.s, z2.s, #16
225
199
; SVE-NEXT: lsl z1.s, z1.s, #16
200
+
; SVE-NEXT: lsl z3.s, z0.s, #16
226
201
; SVE-NEXT: ptrue p1.s
227
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
228
-
; SVE-NEXT: lsl z2.s, z0.s, #16
229
-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
230
-
; SVE-NEXT: lsl z1.s, z1.s, #16
231
-
; SVE-NEXT: fadd z1.s, z2.s, z1.s
202
+
; SVE-NEXT: fmad z1.s, p1/m, z2.s, z3.s
232
203
; SVE-NEXT: bfcvt z0.h, p0/m, z1.s
233
204
; SVE-NEXT: ret
234
205
;
@@ -247,12 +218,9 @@ define <vscale x 2 x bfloat> @fmla_sel_nxv2bf16(<vscale x 2 x i1> %pred, <vscale
247
218
; SVE: // %bb.0:
248
219
; SVE-NEXT: lsl z2.s, z2.s, #16
249
220
; SVE-NEXT: lsl z1.s, z1.s, #16
221
+
; SVE-NEXT: lsl z3.s, z0.s, #16
250
222
; SVE-NEXT: ptrue p1.d
251
-
; SVE-NEXT: fmul z1.s, p1/m, z1.s, z2.s
252
-
; SVE-NEXT: lsl z2.s, z0.s, #16
253
-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
254
-
; SVE-NEXT: lsl z1.s, z1.s, #16
255
-
; SVE-NEXT: fadd z1.s, p1/m, z1.s, z2.s
223
+
; SVE-NEXT: fmad z1.s, p1/m, z2.s, z3.s
256
224
; SVE-NEXT: bfcvt z0.h, p0/m, z1.s
257
225
; SVE-NEXT: ret
258
226
;
@@ -269,28 +237,24 @@ define <vscale x 2 x bfloat> @fmla_sel_nxv2bf16(<vscale x 2 x i1> %pred, <vscale
269
237
define <vscale x 8 x bfloat> @fmls_sel_nxv8bf16(<vscale x 8 x i1> %pred, <vscale x 8 x bfloat> %acc, <vscale x 8 x bfloat> %m1, <vscale x 8 x bfloat> %m2) {
270
238
; SVE-LABEL: fmls_sel_nxv8bf16:
271
239
; SVE: // %bb.0:
240
+
; SVE-NEXT: ptrue p1.h
272
241
; SVE-NEXT: uunpkhi z3.s, z2.h
273
-
; SVE-NEXT: uunpkhi z4.s, z1.h
242
+
; SVE-NEXT: uunpkhi z4.s, z0.h
274
243
; SVE-NEXT: uunpklo z2.s, z2.h
275
-
; SVE-NEXT: uunpklo z1.s, z1.h
244
+
; SVE-NEXT: uunpklo z6.s, z0.h
245
+
; SVE-NEXT: fneg z1.h, p1/m, z1.h
276
246
; SVE-NEXT: ptrue p1.s
277
247
; SVE-NEXT: lsl z3.s, z3.s, #16
278
248
; SVE-NEXT: lsl z4.s, z4.s, #16
279
249
; SVE-NEXT: lsl z2.s, z2.s, #16
250
+
; SVE-NEXT: lsl z6.s, z6.s, #16
251
+
; SVE-NEXT: uunpkhi z5.s, z1.h
252
+
; SVE-NEXT: uunpklo z1.s, z1.h
253
+
; SVE-NEXT: lsl z5.s, z5.s, #16
280
254
; SVE-NEXT: lsl z1.s, z1.s, #16
281
-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
282
-
; SVE-NEXT: uunpklo z4.s, z0.h
283
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
255
+
; SVE-NEXT: fmad z3.s, p1/m, z5.s, z4.s
256
+
; SVE-NEXT: fmad z1.s, p1/m, z2.s, z6.s
284
257
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
285
-
; SVE-NEXT: uunpkhi z3.s, z0.h
286
-
; SVE-NEXT: lsl z4.s, z4.s, #16
287
-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
288
-
; SVE-NEXT: lsl z2.s, z2.s, #16
289
-
; SVE-NEXT: lsl z3.s, z3.s, #16
290
-
; SVE-NEXT: lsl z1.s, z1.s, #16
291
-
; SVE-NEXT: fsub z2.s, z3.s, z2.s
292
-
; SVE-NEXT: fsub z1.s, z4.s, z1.s
293
-
; SVE-NEXT: bfcvt z2.h, p1/m, z2.s
294
258
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
295
259
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
296
260
; SVE-NEXT: mov z0.h, p0/m, z1.h
@@ -309,14 +273,12 @@ define <vscale x 8 x bfloat> @fmls_sel_nxv8bf16(<vscale x 8 x i1> %pred, <vscale
309
273
define <vscale x 4 x bfloat> @fmls_sel_nxv4bf16(<vscale x 4 x i1> %pred, <vscale x 4 x bfloat> %acc, <vscale x 4 x bfloat> %m1, <vscale x 4 x bfloat> %m2) {
310
274
; SVE-LABEL: fmls_sel_nxv4bf16:
311
275
; SVE: // %bb.0:
312
-
; SVE-NEXT: lsl z2.s, z2.s, #16
313
-
; SVE-NEXT: lsl z1.s, z1.s, #16
314
276
; SVE-NEXT: ptrue p1.s
315
-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
316
-
; SVE-NEXT: lsl z2.s, z0.s, #16
317
-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
277
+
; SVE-NEXT: lsl z2.s, z2.s, #16
278
+
; SVE-NEXT: lsl z3.s, z0.s, #16
279
+
; SVE-NEXT: fneg z1.h, p1/m, z1.h
318
280
; SVE-NEXT: lsl z1.s, z1.s, #16
319
-
; SVE-NEXT: fsub z1.s, z2.s, z1.s
281
+
; SVE-NEXT: fmad z1.s, p1/m, z2.s, z3.s
320
282
; SVE-NEXT: bfcvt z0.h, p0/m, z1.s
321
283
; SVE-NEXT: ret
322
284
;
@@ -333,14 +295,12 @@ define <vscale x 4 x bfloat> @fmls_sel_nxv4bf16(<vscale x 4 x i1> %pred, <vscale
333
295
define <vscale x 2 x bfloat> @fmls_sel_nxv2bf16(<vscale x 2 x i1> %pred, <vscale x 2 x bfloat> %acc, <vscale x 2 x bfloat> %m1, <vscale x 2 x bfloat> %m2) {
0 commit comments