|
6 | 6 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8 |
7 | 7 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8 |
8 | 8 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v2i8 |
9 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v3i8 |
10 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v4i8 |
11 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v8i8 |
12 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v16i8 |
13 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v32i8 |
14 | 9 |
|
15 | 10 | define <2 x i8> @dup_v2i8(i8 %a) { |
16 | 11 | ; CHECK-LABEL: dup_v2i8: |
@@ -127,14 +122,25 @@ entry: |
127 | 122 | } |
128 | 123 |
|
129 | 124 | define <3 x i8> @loaddup_str_v3i8(ptr %p) { |
130 | | -; CHECK-LABEL: loaddup_str_v3i8: |
131 | | -; CHECK: // %bb.0: // %entry |
132 | | -; CHECK-NEXT: mov x8, x0 |
133 | | -; CHECK-NEXT: ldrb w0, [x0] |
134 | | -; CHECK-NEXT: strb wzr, [x8] |
135 | | -; CHECK-NEXT: mov w1, w0 |
136 | | -; CHECK-NEXT: mov w2, w0 |
137 | | -; CHECK-NEXT: ret |
| 125 | +; CHECK-SD-LABEL: loaddup_str_v3i8: |
| 126 | +; CHECK-SD: // %bb.0: // %entry |
| 127 | +; CHECK-SD-NEXT: mov x8, x0 |
| 128 | +; CHECK-SD-NEXT: ldrb w0, [x0] |
| 129 | +; CHECK-SD-NEXT: strb wzr, [x8] |
| 130 | +; CHECK-SD-NEXT: mov w1, w0 |
| 131 | +; CHECK-SD-NEXT: mov w2, w0 |
| 132 | +; CHECK-SD-NEXT: ret |
| 133 | +; |
| 134 | +; CHECK-GI-LABEL: loaddup_str_v3i8: |
| 135 | +; CHECK-GI: // %bb.0: // %entry |
| 136 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 137 | +; CHECK-GI-NEXT: mov x8, x0 |
| 138 | +; CHECK-GI-NEXT: strb wzr, [x8] |
| 139 | +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] |
| 140 | +; CHECK-GI-NEXT: umov w0, v0.b[0] |
| 141 | +; CHECK-GI-NEXT: umov w1, v0.b[1] |
| 142 | +; CHECK-GI-NEXT: umov w2, v0.b[2] |
| 143 | +; CHECK-GI-NEXT: ret |
138 | 144 | entry: |
139 | 145 | %a = load i8, ptr %p |
140 | 146 | %b = insertelement <3 x i8> poison, i8 %a, i64 0 |
@@ -201,12 +207,21 @@ entry: |
201 | 207 | } |
202 | 208 |
|
203 | 209 | define <4 x i8> @loaddup_str_v4i8(ptr %p) { |
204 | | -; CHECK-LABEL: loaddup_str_v4i8: |
205 | | -; CHECK: // %bb.0: // %entry |
206 | | -; CHECK-NEXT: ldrb w8, [x0] |
207 | | -; CHECK-NEXT: strb wzr, [x0] |
208 | | -; CHECK-NEXT: dup v0.4h, w8 |
209 | | -; CHECK-NEXT: ret |
| 210 | +; CHECK-SD-LABEL: loaddup_str_v4i8: |
| 211 | +; CHECK-SD: // %bb.0: // %entry |
| 212 | +; CHECK-SD-NEXT: ldrb w8, [x0] |
| 213 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 214 | +; CHECK-SD-NEXT: dup v0.4h, w8 |
| 215 | +; CHECK-SD-NEXT: ret |
| 216 | +; |
| 217 | +; CHECK-GI-LABEL: loaddup_str_v4i8: |
| 218 | +; CHECK-GI: // %bb.0: // %entry |
| 219 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 220 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 221 | +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] |
| 222 | +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 |
| 223 | +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 224 | +; CHECK-GI-NEXT: ret |
210 | 225 | entry: |
211 | 226 | %a = load i8, ptr %p |
212 | 227 | %b = insertelement <4 x i8> poison, i8 %a, i64 0 |
@@ -250,11 +265,18 @@ entry: |
250 | 265 | } |
251 | 266 |
|
252 | 267 | define <8 x i8> @loaddup_str_v8i8(ptr %p) { |
253 | | -; CHECK-LABEL: loaddup_str_v8i8: |
254 | | -; CHECK: // %bb.0: // %entry |
255 | | -; CHECK-NEXT: ld1r { v0.8b }, [x0] |
256 | | -; CHECK-NEXT: strb wzr, [x0] |
257 | | -; CHECK-NEXT: ret |
| 268 | +; CHECK-SD-LABEL: loaddup_str_v8i8: |
| 269 | +; CHECK-SD: // %bb.0: // %entry |
| 270 | +; CHECK-SD-NEXT: ld1r { v0.8b }, [x0] |
| 271 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 272 | +; CHECK-SD-NEXT: ret |
| 273 | +; |
| 274 | +; CHECK-GI-LABEL: loaddup_str_v8i8: |
| 275 | +; CHECK-GI: // %bb.0: // %entry |
| 276 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 277 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 278 | +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] |
| 279 | +; CHECK-GI-NEXT: ret |
258 | 280 | entry: |
259 | 281 | %a = load i8, ptr %p |
260 | 282 | %b = insertelement <8 x i8> poison, i8 %a, i64 0 |
@@ -297,11 +319,18 @@ entry: |
297 | 319 | } |
298 | 320 |
|
299 | 321 | define <16 x i8> @loaddup_str_v16i8(ptr %p) { |
300 | | -; CHECK-LABEL: loaddup_str_v16i8: |
301 | | -; CHECK: // %bb.0: // %entry |
302 | | -; CHECK-NEXT: ld1r { v0.16b }, [x0] |
303 | | -; CHECK-NEXT: strb wzr, [x0] |
304 | | -; CHECK-NEXT: ret |
| 322 | +; CHECK-SD-LABEL: loaddup_str_v16i8: |
| 323 | +; CHECK-SD: // %bb.0: // %entry |
| 324 | +; CHECK-SD-NEXT: ld1r { v0.16b }, [x0] |
| 325 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 326 | +; CHECK-SD-NEXT: ret |
| 327 | +; |
| 328 | +; CHECK-GI-LABEL: loaddup_str_v16i8: |
| 329 | +; CHECK-GI: // %bb.0: // %entry |
| 330 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 331 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 332 | +; CHECK-GI-NEXT: dup v0.16b, v0.b[0] |
| 333 | +; CHECK-GI-NEXT: ret |
305 | 334 | entry: |
306 | 335 | %a = load i8, ptr %p |
307 | 336 | %b = insertelement <16 x i8> poison, i8 %a, i64 0 |
@@ -353,12 +382,20 @@ entry: |
353 | 382 | } |
354 | 383 |
|
355 | 384 | define <32 x i8> @loaddup_str_v32i8(ptr %p) { |
356 | | -; CHECK-LABEL: loaddup_str_v32i8: |
357 | | -; CHECK: // %bb.0: // %entry |
358 | | -; CHECK-NEXT: ld1r { v0.16b }, [x0] |
359 | | -; CHECK-NEXT: strb wzr, [x0] |
360 | | -; CHECK-NEXT: mov v1.16b, v0.16b |
361 | | -; CHECK-NEXT: ret |
| 385 | +; CHECK-SD-LABEL: loaddup_str_v32i8: |
| 386 | +; CHECK-SD: // %bb.0: // %entry |
| 387 | +; CHECK-SD-NEXT: ld1r { v0.16b }, [x0] |
| 388 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 389 | +; CHECK-SD-NEXT: mov v1.16b, v0.16b |
| 390 | +; CHECK-SD-NEXT: ret |
| 391 | +; |
| 392 | +; CHECK-GI-LABEL: loaddup_str_v32i8: |
| 393 | +; CHECK-GI: // %bb.0: // %entry |
| 394 | +; CHECK-GI-NEXT: ldr b1, [x0] |
| 395 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 396 | +; CHECK-GI-NEXT: dup v0.16b, v1.b[0] |
| 397 | +; CHECK-GI-NEXT: dup v1.16b, v1.b[0] |
| 398 | +; CHECK-GI-NEXT: ret |
362 | 399 | entry: |
363 | 400 | %a = load i8, ptr %p |
364 | 401 | %b = insertelement <32 x i8> poison, i8 %a, i64 0 |
|
0 commit comments