@@ -20,7 +20,7 @@ define void @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
2020; CHECK-NEXT: mov z5.b, p0/m, za0h.b[w12, 10]
2121; CHECK-NEXT: mov z6.b, p0/m, za0h.b[w12, 12]
2222; CHECK-NEXT: mov z7.b, p0/m, za0h.b[w12, 14]
23- ; CHECK-NEXT: b dummy_use_8_nxv16i8
23+ ; CHECK-NEXT: b use
2424 %z0 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.horiz.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice )
2525 %tileslice.2 = add i32 %tileslice , 2
2626 %z1 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.horiz.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.2 )
@@ -38,8 +38,8 @@ define void @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
3838 %z7 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.horiz.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.14 )
3939
4040 ; Force retention of z0..z7
41- tail call void @dummy_use_8_nxv16i8 (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
42- <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
41+ tail call void @use (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
42+ <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
4343 ret void
4444}
4545
@@ -62,7 +62,7 @@ define void @extract_col_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
6262; CHECK-NEXT: mov z5.b, p0/m, za0v.b[w12, 11]
6363; CHECK-NEXT: mov z6.b, p0/m, za0v.b[w12, 13]
6464; CHECK-NEXT: mov z7.b, p0/m, za0v.b[w12, 15]
65- ; CHECK-NEXT: b dummy_use_8_nxv16i8
65+ ; CHECK-NEXT: b use
6666 %tileslice.1 = add i32 %tileslice , 1
6767 %z0 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.vert.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.1 )
6868 %tileslice.3 = add i32 %tileslice , 3
@@ -80,8 +80,8 @@ define void @extract_col_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
8080 %tileslice.15 = add i32 %tileslice , 15
8181 %z7 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.vert.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.15 )
8282
83- tail call void @dummy_use_8_nxv16i8 (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
84- <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
83+ tail call void @use (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
84+ <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
8585 ret void
8686}
8787
@@ -96,7 +96,7 @@ define void @extract_row_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
9696; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 2]
9797; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4]
9898; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 6]
99- ; CHECK-NEXT: b dummy_use_4_nxv8i16
99+ ; CHECK-NEXT: b use
100100 %z0 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.horiz.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice )
101101 %tileslice.2 = add i32 %tileslice , 2
102102 %z1 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.horiz.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.2 )
@@ -105,7 +105,7 @@ define void @extract_row_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
105105 %tileslice.6 = add i32 %tileslice , 6
106106 %z3 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.horiz.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.6 )
107107
108- tail call void @dummy_use_4_nxv8i16 (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
108+ tail call void @use (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
109109 ret void
110110}
111111
@@ -120,7 +120,7 @@ define void @extract_col_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
120120; CHECK-NEXT: mov z1.h, p0/m, za1v.h[w12, 3]
121121; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 5]
122122; CHECK-NEXT: mov z3.h, p0/m, za1v.h[w12, 7]
123- ; CHECK-NEXT: b dummy_use_4_nxv8i16
123+ ; CHECK-NEXT: b use
124124 %tileslice.1 = add i32 %tileslice , 1
125125 %z0 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.vert.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 1 , i32 %tileslice.1 )
126126 %tileslice.3 = add i32 %tileslice , 3
@@ -130,7 +130,7 @@ define void @extract_col_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
130130 %tileslice.7 = add i32 %tileslice , 7
131131 %z3 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.vert.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 1 , i32 %tileslice.7 )
132132
133- tail call void @dummy_use_4_nxv8i16 (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
133+ tail call void @use (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
134134 ret void
135135}
136136
@@ -153,7 +153,7 @@ define void @extract_f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 %ti
153153; CHECK-NEXT: mov z5.h, p0/m, za0h.h[w12, 5]
154154; CHECK-NEXT: mov z6.h, p0/m, za0v.h[w12, 6]
155155; CHECK-NEXT: mov z7.h, p0/m, za0v.h[w12, 7]
156- ; CHECK-NEXT: b dummy_use_8_nxv8f16
156+ ; CHECK-NEXT: b use
157157 %z0 = call <vscale x 8 x half > @llvm.aarch64.sme.read.horiz.nxv8f16 (<vscale x 8 x half > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice )
158158 %tileslice.1 = add i32 %tileslice , 1
159159 %z1 = call <vscale x 8 x half > @llvm.aarch64.sme.read.horiz.nxv8f16 (<vscale x 8 x half > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.1 )
@@ -170,8 +170,8 @@ define void @extract_f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 %ti
170170 %tileslice.7 = add i32 %tileslice , 7
171171 %z7 = call <vscale x 8 x half > @llvm.aarch64.sme.read.vert.nxv8f16 (<vscale x 8 x half > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.7 )
172172
173- tail call void @dummy_use_8_nxv8f16 (<vscale x 8 x half > %z0 , <vscale x 8 x half > %z1 , <vscale x 8 x half > %z2 , <vscale x 8 x half > %z3 ,
174- <vscale x 8 x half > %z4 , <vscale x 8 x half > %z5 , <vscale x 8 x half > %z6 , <vscale x 8 x half > %z7 )
173+ tail call void @use (<vscale x 8 x half > %z0 , <vscale x 8 x half > %z1 , <vscale x 8 x half > %z2 , <vscale x 8 x half > %z3 ,
174+ <vscale x 8 x half > %z4 , <vscale x 8 x half > %z5 , <vscale x 8 x half > %z6 , <vscale x 8 x half > %z7 )
175175 ret void
176176}
177177
@@ -194,7 +194,7 @@ define void @extract_bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32
194194; CHECK-NEXT: mov z5.h, p0/m, za0h.h[w12, 5]
195195; CHECK-NEXT: mov z6.h, p0/m, za0v.h[w12, 6]
196196; CHECK-NEXT: mov z7.h, p0/m, za0v.h[w12, 7]
197- ; CHECK-NEXT: b dummy_use_8_nxv8bf16
197+ ; CHECK-NEXT: b use
198198 %z0 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16 (<vscale x 8 x bfloat> %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice )
199199 %tileslice.1 = add i32 %tileslice , 1
200200 %z1 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16 (<vscale x 8 x bfloat> %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.1 )
@@ -211,8 +211,8 @@ define void @extract_bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32
211211 %tileslice.7 = add i32 %tileslice , 7
212212 %z7 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16 (<vscale x 8 x bfloat> %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.7 )
213213
214- tail call void @dummy_use_8_nxv8bf16 (<vscale x 8 x bfloat> %z0 , <vscale x 8 x bfloat> %z1 , <vscale x 8 x bfloat> %z2 , <vscale x 8 x bfloat> %z3 ,
215- <vscale x 8 x bfloat> %z4 , <vscale x 8 x bfloat> %z5 , <vscale x 8 x bfloat> %z6 , <vscale x 8 x bfloat> %z7 )
214+ tail call void @use (<vscale x 8 x bfloat> %z0 , <vscale x 8 x bfloat> %z1 , <vscale x 8 x bfloat> %z2 , <vscale x 8 x bfloat> %z3 ,
215+ <vscale x 8 x bfloat> %z4 , <vscale x 8 x bfloat> %z5 , <vscale x 8 x bfloat> %z6 , <vscale x 8 x bfloat> %z7 )
216216 ret void
217217}
218218
@@ -223,12 +223,12 @@ define void @extract_row_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 %t
223223; CHECK-NEXT: mov w12, w0
224224; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 0]
225225; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 2]
226- ; CHECK-NEXT: b dummy_use_2_nxv4i32
226+ ; CHECK-NEXT: b use
227227 %z0 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.horiz.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice )
228228 %tileslice.2 = add i32 %tileslice , 2
229229 %z1 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.horiz.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice.2 )
230230
231- tail call void @dummy_use_2_nxv4i32 (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
231+ tail call void @use (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
232232 ret void
233233}
234234
@@ -239,13 +239,13 @@ define void @extract_col_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 %t
239239; CHECK-NEXT: mov w12, w0
240240; CHECK-NEXT: mov z0.s, p0/m, za3v.s[w12, 1]
241241; CHECK-NEXT: mov z1.s, p0/m, za3v.s[w12, 3]
242- ; CHECK-NEXT: b dummy_use_2_nxv4i32
242+ ; CHECK-NEXT: b use
243243 %tileslice.1 = add i32 %tileslice , 1
244244 %z0 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.vert.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 3 , i32 %tileslice.1 )
245245 %tileslice.3 = add i32 %tileslice , 3
246246 %z1 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.vert.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 3 , i32 %tileslice.3 )
247247
248- tail call void @dummy_use_2_nxv4i32 (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
248+ tail call void @use (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
249249 ret void
250250}
251251
@@ -260,7 +260,7 @@ define void @extract_f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 %t
260260; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 1]
261261; CHECK-NEXT: mov z2.s, p0/m, za0v.s[w12, 2]
262262; CHECK-NEXT: mov z3.s, p0/m, za0v.s[w12, 3]
263- ; CHECK-NEXT: b dummy_use_4_nxv4f32
263+ ; CHECK-NEXT: b use
264264 %z0 = call <vscale x 4 x float > @llvm.aarch64.sme.read.horiz.nxv4f32 (<vscale x 4 x float > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice )
265265 %tileslice.1 = add i32 %tileslice , 1
266266 %z1 = call <vscale x 4 x float > @llvm.aarch64.sme.read.horiz.nxv4f32 (<vscale x 4 x float > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice.1 )
@@ -269,7 +269,7 @@ define void @extract_f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 %t
269269 %tileslice.3 = add i32 %tileslice , 3
270270 %z3 = call <vscale x 4 x float > @llvm.aarch64.sme.read.vert.nxv4f32 (<vscale x 4 x float > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice.3 )
271271
272- tail call void @dummy_use_4_nxv4f32 (<vscale x 4 x float > %z0 , <vscale x 4 x float > %z1 , <vscale x 4 x float > %z2 , <vscale x 4 x float > %z3 )
272+ tail call void @use (<vscale x 4 x float > %z0 , <vscale x 4 x float > %z1 , <vscale x 4 x float > %z2 , <vscale x 4 x float > %z3 )
273273 ret void
274274}
275275
@@ -301,12 +301,12 @@ define void @extract_f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 %
301301; CHECK-NEXT: mov w12, w0
302302; CHECK-NEXT: mov z0.d, p0/m, za0h.d[w12, 0]
303303; CHECK-NEXT: mov z1.d, p0/m, za0v.d[w12, 1]
304- ; CHECK-NEXT: b dummy_use_2_nxv2f64
304+ ; CHECK-NEXT: b use
305305 %z0 = call <vscale x 2 x double > @llvm.aarch64.sme.read.horiz.nxv2f64 (<vscale x 2 x double > %zd , <vscale x 2 x i1 > %pg , i32 0 , i32 %tileslice )
306306 %tileslice.1 = add i32 %tileslice , 1
307307 %z1 = call <vscale x 2 x double > @llvm.aarch64.sme.read.vert.nxv2f64 (<vscale x 2 x double > %zd , <vscale x 2 x i1 > %pg , i32 0 , i32 %tileslice.1 )
308308
309- tail call void @dummy_use_2_nxv2f64 (<vscale x 2 x double > %z0 , <vscale x 2 x double > %z1 )
309+ tail call void @use (<vscale x 2 x double > %z0 , <vscale x 2 x double > %z1 )
310310 ret void
311311}
312312
@@ -485,6 +485,7 @@ for.body:
485485exit:
486486 %tmp1 = add <vscale x 4 x i32 > %z0 , %z1
487487 %res = add <vscale x 4 x i32 > %tmp1 , %z2
488+ tail call void @use (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 , <vscale x 4 x i32 > %z2 )
488489 ret <vscale x 4 x i32 > %res
489490}
490491
@@ -523,33 +524,7 @@ declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i6
523524declare <vscale x 2 x double > @llvm.aarch64.sme.readq.vert.nxv2f64 (<vscale x 2 x double >, <vscale x 2 x i1 >, i32 , i32 )
524525
525526; ------------------------------------------------------------------------------
526- ; Dummy external functions to force code retention.
527- ; The compiler does not see their implementations, so it must keep the calls.
527+ ; Dummy external function to force code retention.
528528; ------------------------------------------------------------------------------
529529
530- declare void @dummy_use_8_nxv16i8 (
531- <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >,
532- <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >
533- )
534-
535- declare void @dummy_use_4_nxv8i16 (
536- <vscale x 8 x i16 >, <vscale x 8 x i16 >, <vscale x 8 x i16 >, <vscale x 8 x i16 >
537- )
538-
539- declare void @dummy_use_8_nxv8f16 (
540- <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >,
541- <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >
542- )
543-
544- declare void @dummy_use_8_nxv8bf16 (
545- <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
546- <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>
547- )
548-
549- declare void @dummy_use_2_nxv4i32 (<vscale x 4 x i32 >, <vscale x 4 x i32 >)
550-
551- declare void @dummy_use_4_nxv4f32 (
552- <vscale x 4 x float >, <vscale x 4 x float >, <vscale x 4 x float >, <vscale x 4 x float >
553- )
554-
555- declare void @dummy_use_2_nxv2f64 (<vscale x 2 x double >, <vscale x 2 x double >)
530+ declare void @use (...)
0 commit comments