@@ -36,6 +36,17 @@ define <vscale x 16 x i8> @saba_b_promoted_ops(<vscale x 16 x i8> %a, <vscale x
36
36
ret <vscale x 16 x i8 > %add
37
37
}
38
38
39
+ define <vscale x 16 x i8 > @saba_b_from_sabd (<vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b , <vscale x 16 x i8 > %c ) #0 {
40
+ ; CHECK-LABEL: saba_b_from_sabd:
41
+ ; CHECK: // %bb.0:
42
+ ; CHECK-NEXT: saba z0.b, z1.b, z2.b
43
+ ; CHECK-NEXT: ret
44
+ %1 = call <vscale x 16 x i1 > @llvm.aarch64.sve.ptrue.nxv16i1 (i32 31 )
45
+ %2 = call <vscale x 16 x i8 > @llvm.aarch64.sve.sabd.nxv16i8 (<vscale x 16 x i1 > %1 , <vscale x 16 x i8 > %b , <vscale x 16 x i8 > %c )
46
+ %3 = add <vscale x 16 x i8 > %2 , %a
47
+ ret <vscale x 16 x i8 > %3
48
+ }
49
+
39
50
define <vscale x 8 x i16 > @saba_h (<vscale x 8 x i16 > %a , <vscale x 8 x i16 > %b , <vscale x 8 x i16 > %c ) #0 {
40
51
; CHECK-LABEL: saba_h:
41
52
; CHECK: // %bb.0:
@@ -66,6 +77,17 @@ define <vscale x 8 x i16> @saba_h_promoted_ops(<vscale x 8 x i16> %a, <vscale x
66
77
ret <vscale x 8 x i16 > %add
67
78
}
68
79
80
+ define <vscale x 8 x i16 > @saba_h_from_sabd (<vscale x 8 x i16 > %a , <vscale x 8 x i16 > %b , <vscale x 8 x i16 > %c ) #0 {
81
+ ; CHECK-LABEL: saba_h_from_sabd:
82
+ ; CHECK: // %bb.0:
83
+ ; CHECK-NEXT: saba z0.h, z1.h, z2.h
84
+ ; CHECK-NEXT: ret
85
+ %1 = call <vscale x 8 x i1 > @llvm.aarch64.sve.ptrue.nxv8i1 (i32 31 )
86
+ %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.sabd.nxv8i16 (<vscale x 8 x i1 > %1 , <vscale x 8 x i16 > %b , <vscale x 8 x i16 > %c )
87
+ %3 = add <vscale x 8 x i16 > %2 , %a
88
+ ret <vscale x 8 x i16 > %3
89
+ }
90
+
69
91
define <vscale x 4 x i32 > @saba_s (<vscale x 4 x i32 > %a , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c ) #0 {
70
92
; CHECK-LABEL: saba_s:
71
93
; CHECK: // %bb.0:
@@ -96,6 +118,17 @@ define <vscale x 4 x i32> @saba_s_promoted_ops(<vscale x 4 x i32> %a, <vscale x
96
118
ret <vscale x 4 x i32 > %add
97
119
}
98
120
121
+ define <vscale x 4 x i32 > @saba_s_from_sabd (<vscale x 4 x i32 > %a , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c ) #0 {
122
+ ; CHECK-LABEL: saba_s_from_sabd:
123
+ ; CHECK: // %bb.0:
124
+ ; CHECK-NEXT: saba z0.s, z1.s, z2.s
125
+ ; CHECK-NEXT: ret
126
+ %1 = call <vscale x 4 x i1 > @llvm.aarch64.sve.ptrue.nxv4i1 (i32 31 )
127
+ %2 = call <vscale x 4 x i32 > @llvm.aarch64.sve.sabd.nxv4i32 (<vscale x 4 x i1 > %1 , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c )
128
+ %3 = add <vscale x 4 x i32 > %2 , %a
129
+ ret <vscale x 4 x i32 > %3
130
+ }
131
+
99
132
define <vscale x 2 x i64 > @saba_d (<vscale x 2 x i64 > %a , <vscale x 2 x i64 > %b , <vscale x 2 x i64 > %c ) #0 {
100
133
; CHECK-LABEL: saba_d:
101
134
; CHECK: // %bb.0:
@@ -126,6 +159,17 @@ define <vscale x 2 x i64> @saba_d_promoted_ops(<vscale x 2 x i64> %a, <vscale x
126
159
ret <vscale x 2 x i64 > %add
127
160
}
128
161
162
+ define <vscale x 2 x i64 > @saba_d_from_sabd (<vscale x 2 x i64 > %a , <vscale x 2 x i64 > %b , <vscale x 2 x i64 > %c ) #0 {
163
+ ; CHECK-LABEL: saba_d_from_sabd:
164
+ ; CHECK: // %bb.0:
165
+ ; CHECK-NEXT: saba z0.d, z1.d, z2.d
166
+ ; CHECK-NEXT: ret
167
+ %1 = call <vscale x 2 x i1 > @llvm.aarch64.sve.ptrue.nxv2i1 (i32 31 )
168
+ %2 = call <vscale x 2 x i64 > @llvm.aarch64.sve.sabd.nxv2i64 (<vscale x 2 x i1 > %1 , <vscale x 2 x i64 > %b , <vscale x 2 x i64 > %c )
169
+ %3 = add <vscale x 2 x i64 > %2 , %a
170
+ ret <vscale x 2 x i64 > %3
171
+ }
172
+
129
173
;
130
174
; UABA
131
175
;
@@ -159,6 +203,17 @@ define <vscale x 16 x i8> @uaba_b_promoted_ops(<vscale x 16 x i8> %a, <vscale x
159
203
ret <vscale x 16 x i8 > %add
160
204
}
161
205
206
+ define <vscale x 16 x i8 > @uaba_b_from_uabd (<vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b , <vscale x 16 x i8 > %c ) #0 {
207
+ ; CHECK-LABEL: uaba_b_from_uabd:
208
+ ; CHECK: // %bb.0:
209
+ ; CHECK-NEXT: uaba z0.b, z1.b, z2.b
210
+ ; CHECK-NEXT: ret
211
+ %1 = call <vscale x 16 x i1 > @llvm.aarch64.sve.ptrue.nxv16i1 (i32 31 )
212
+ %2 = call <vscale x 16 x i8 > @llvm.aarch64.sve.uabd.nxv16i8 (<vscale x 16 x i1 > %1 , <vscale x 16 x i8 > %b , <vscale x 16 x i8 > %c )
213
+ %3 = add <vscale x 16 x i8 > %2 , %a
214
+ ret <vscale x 16 x i8 > %3
215
+ }
216
+
162
217
define <vscale x 8 x i16 > @uaba_h (<vscale x 8 x i16 > %a , <vscale x 8 x i16 > %b , <vscale x 8 x i16 > %c ) #0 {
163
218
; CHECK-LABEL: uaba_h:
164
219
; CHECK: // %bb.0:
@@ -188,6 +243,17 @@ define <vscale x 8 x i16> @uaba_h_promoted_ops(<vscale x 8 x i16> %a, <vscale x
188
243
ret <vscale x 8 x i16 > %add
189
244
}
190
245
246
+ define <vscale x 8 x i16 > @uaba_h_from_uabd (<vscale x 8 x i16 > %a , <vscale x 8 x i16 > %b , <vscale x 8 x i16 > %c ) #0 {
247
+ ; CHECK-LABEL: uaba_h_from_uabd:
248
+ ; CHECK: // %bb.0:
249
+ ; CHECK-NEXT: uaba z0.h, z1.h, z2.h
250
+ ; CHECK-NEXT: ret
251
+ %1 = call <vscale x 8 x i1 > @llvm.aarch64.sve.ptrue.nxv8i1 (i32 31 )
252
+ %2 = call <vscale x 8 x i16 > @llvm.aarch64.sve.uabd.nxv8i16 (<vscale x 8 x i1 > %1 , <vscale x 8 x i16 > %b , <vscale x 8 x i16 > %c )
253
+ %3 = add <vscale x 8 x i16 > %2 , %a
254
+ ret <vscale x 8 x i16 > %3
255
+ }
256
+
191
257
define <vscale x 4 x i32 > @uaba_s (<vscale x 4 x i32 > %a , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c ) #0 {
192
258
; CHECK-LABEL: uaba_s:
193
259
; CHECK: // %bb.0:
@@ -217,6 +283,17 @@ define <vscale x 4 x i32> @uaba_s_promoted_ops(<vscale x 4 x i32> %a, <vscale x
217
283
ret <vscale x 4 x i32 > %add
218
284
}
219
285
286
+ define <vscale x 4 x i32 > @uaba_s_from_uabd (<vscale x 4 x i32 > %a , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c ) #0 {
287
+ ; CHECK-LABEL: uaba_s_from_uabd:
288
+ ; CHECK: // %bb.0:
289
+ ; CHECK-NEXT: uaba z0.s, z1.s, z2.s
290
+ ; CHECK-NEXT: ret
291
+ %1 = call <vscale x 4 x i1 > @llvm.aarch64.sve.ptrue.nxv4i1 (i32 31 )
292
+ %2 = call <vscale x 4 x i32 > @llvm.aarch64.sve.uabd.nxv4i32 (<vscale x 4 x i1 > %1 , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c )
293
+ %3 = add <vscale x 4 x i32 > %2 , %a
294
+ ret <vscale x 4 x i32 > %3
295
+ }
296
+
220
297
define <vscale x 2 x i64 > @uaba_d (<vscale x 2 x i64 > %a , <vscale x 2 x i64 > %b , <vscale x 2 x i64 > %c ) #0 {
221
298
; CHECK-LABEL: uaba_d:
222
299
; CHECK: // %bb.0:
@@ -246,6 +323,17 @@ define <vscale x 2 x i64> @uaba_d_promoted_ops(<vscale x 2 x i64> %a, <vscale x
246
323
ret <vscale x 2 x i64 > %add
247
324
}
248
325
326
+ define <vscale x 2 x i64 > @uaba_d_from_uabd (<vscale x 2 x i64 > %a , <vscale x 2 x i64 > %b , <vscale x 2 x i64 > %c ) #0 {
327
+ ; CHECK-LABEL: uaba_d_from_uabd:
328
+ ; CHECK: // %bb.0:
329
+ ; CHECK-NEXT: uaba z0.d, z1.d, z2.d
330
+ ; CHECK-NEXT: ret
331
+ %1 = call <vscale x 2 x i1 > @llvm.aarch64.sve.ptrue.nxv2i1 (i32 31 )
332
+ %2 = call <vscale x 2 x i64 > @llvm.aarch64.sve.uabd.nxv2i64 (<vscale x 2 x i1 > %1 , <vscale x 2 x i64 > %b , <vscale x 2 x i64 > %c )
333
+ %3 = add <vscale x 2 x i64 > %2 , %a
334
+ ret <vscale x 2 x i64 > %3
335
+ }
336
+
249
337
; A variant of uaba_s but with the add operands switched.
250
338
define <vscale x 4 x i32 > @uaba_s_commutative (<vscale x 4 x i32 > %a , <vscale x 4 x i32 > %b , <vscale x 4 x i32 > %c ) #0 {
251
339
; CHECK-LABEL: uaba_s_commutative:
@@ -262,16 +350,27 @@ define <vscale x 4 x i32> @uaba_s_commutative(<vscale x 4 x i32> %a, <vscale x 4
262
350
}
263
351
264
352
declare <vscale x 16 x i8 > @llvm.abs.nxv16i8 (<vscale x 16 x i8 >, i1 )
265
-
266
353
declare <vscale x 8 x i16 > @llvm.abs.nxv8i16 (<vscale x 8 x i16 >, i1 )
267
354
declare <vscale x 16 x i16 > @llvm.abs.nxv16i16 (<vscale x 16 x i16 >, i1 )
268
-
269
355
declare <vscale x 4 x i32 > @llvm.abs.nxv4i32 (<vscale x 4 x i32 >, i1 )
270
356
declare <vscale x 8 x i32 > @llvm.abs.nxv8i32 (<vscale x 8 x i32 >, i1 )
271
-
272
357
declare <vscale x 2 x i64 > @llvm.abs.nxv2i64 (<vscale x 2 x i64 >, i1 )
273
358
declare <vscale x 4 x i64 > @llvm.abs.nxv4i64 (<vscale x 4 x i64 >, i1 )
274
-
275
359
declare <vscale x 2 x i128 > @llvm.abs.nxv2i128 (<vscale x 2 x i128 >, i1 )
276
360
361
+ declare <vscale x 2 x i1 > @llvm.aarch64.sve.ptrue.nxv2i1 (i32 )
362
+ declare <vscale x 4 x i1 > @llvm.aarch64.sve.ptrue.nxv4i1 (i32 )
363
+ declare <vscale x 8 x i1 > @llvm.aarch64.sve.ptrue.nxv8i1 (i32 )
364
+ declare <vscale x 16 x i1 > @llvm.aarch64.sve.ptrue.nxv16i1 (i32 )
365
+
366
+ declare <vscale x 16 x i8 > @llvm.aarch64.sve.sabd.nxv16i8 (<vscale x 16 x i1 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >)
367
+ declare <vscale x 8 x i16 > @llvm.aarch64.sve.sabd.nxv8i16 (<vscale x 8 x i1 >, <vscale x 8 x i16 >, <vscale x 8 x i16 >)
368
+ declare <vscale x 4 x i32 > @llvm.aarch64.sve.sabd.nxv4i32 (<vscale x 4 x i1 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >)
369
+ declare <vscale x 2 x i64 > @llvm.aarch64.sve.sabd.nxv2i64 (<vscale x 2 x i1 >, <vscale x 2 x i64 >, <vscale x 2 x i64 >)
370
+
371
+ declare <vscale x 16 x i8 > @llvm.aarch64.sve.uabd.nxv16i8 (<vscale x 16 x i1 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >)
372
+ declare <vscale x 8 x i16 > @llvm.aarch64.sve.uabd.nxv8i16 (<vscale x 8 x i1 >, <vscale x 8 x i16 >, <vscale x 8 x i16 >)
373
+ declare <vscale x 4 x i32 > @llvm.aarch64.sve.uabd.nxv4i32 (<vscale x 4 x i1 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >)
374
+ declare <vscale x 2 x i64 > @llvm.aarch64.sve.uabd.nxv2i64 (<vscale x 2 x i1 >, <vscale x 2 x i64 >, <vscale x 2 x i64 >)
375
+
277
376
attributes #0 = { "target-features" ="+neon,+sve,+sve2" }
0 commit comments