Skip to content

Commit ebe7587

Browse files
committed
[AArch64] Add some tests for bitcast vector loads and scalarizing loaded vectors. NFC
1 parent 332eb5f commit ebe7587

File tree

3 files changed

+1055
-4
lines changed

3 files changed

+1055
-4
lines changed

llvm/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
23

34
define i32 @foo(ptr %__a) nounwind {
45
; CHECK-LABEL: foo:
5-
; CHECK: umov.h w{{[0-9]+}}, v{{[0-9]+}}[0]
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ldr d0, [x0]
8+
; CHECK-NEXT: umov.h w8, v0[0]
9+
; CHECK-NEXT: umov.h w9, v0[0]
10+
; CHECK-NEXT: add w0, w9, w8, uxth #1
11+
; CHECK-NEXT: ret
612
%tmp18 = load <4 x i16>, ptr %__a, align 8
713
%vget_lane = extractelement <4 x i16> %tmp18, i32 0
814
%conv = zext i16 %vget_lane to i32

llvm/test/CodeGen/AArch64/bitcast-extend.ll

Lines changed: 325 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
3+
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for load_zext_i8_v4bf16
6+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for load_zext_i16_v4bf16
7+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for load_zext_i32_v4bf16
48

59
define <4 x i16> @z_i32_v4i16(i32 %x) {
610
; CHECK-SD-LABEL: z_i32_v4i16:
@@ -284,6 +288,324 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
284288
ret void
285289
}
286290

291+
define <8 x i8> @load_zext_i8_v8i8(ptr %p) {
292+
; CHECK-LABEL: load_zext_i8_v8i8:
293+
; CHECK: // %bb.0:
294+
; CHECK-NEXT: ldrb w8, [x0]
295+
; CHECK-NEXT: fmov d0, x8
296+
; CHECK-NEXT: ret
297+
%l = load i8, ptr %p
298+
%z = zext i8 %l to i64
299+
%b = bitcast i64 %z to <8 x i8>
300+
ret <8 x i8> %b
301+
}
302+
303+
define <8 x i8> @load_zext_i16_v8i8(ptr %p) {
304+
; CHECK-LABEL: load_zext_i16_v8i8:
305+
; CHECK: // %bb.0:
306+
; CHECK-NEXT: ldrh w8, [x0]
307+
; CHECK-NEXT: fmov d0, x8
308+
; CHECK-NEXT: ret
309+
%l = load i16, ptr %p
310+
%z = zext i16 %l to i64
311+
%b = bitcast i64 %z to <8 x i8>
312+
ret <8 x i8> %b
313+
}
314+
315+
define <8 x i8> @load_zext_i32_v8i8(ptr %p) {
316+
; CHECK-LABEL: load_zext_i32_v8i8:
317+
; CHECK: // %bb.0:
318+
; CHECK-NEXT: ldr w8, [x0]
319+
; CHECK-NEXT: fmov d0, x8
320+
; CHECK-NEXT: ret
321+
%l = load i32, ptr %p
322+
%z = zext i32 %l to i64
323+
%b = bitcast i64 %z to <8 x i8>
324+
ret <8 x i8> %b
325+
}
326+
327+
define <8 x i8> @load_sext_i32_v8i8(ptr %p) {
328+
; CHECK-LABEL: load_sext_i32_v8i8:
329+
; CHECK: // %bb.0:
330+
; CHECK-NEXT: ldrsw x8, [x0]
331+
; CHECK-NEXT: fmov d0, x8
332+
; CHECK-NEXT: ret
333+
%l = load i32, ptr %p
334+
%z = sext i32 %l to i64
335+
%b = bitcast i64 %z to <8 x i8>
336+
ret <8 x i8> %b
337+
}
338+
339+
define <16 x i8> @load_zext_v16i8(ptr %p) {
340+
; CHECK-SD-LABEL: load_zext_v16i8:
341+
; CHECK-SD: // %bb.0:
342+
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
343+
; CHECK-SD-NEXT: ldr w8, [x0]
344+
; CHECK-SD-NEXT: mov v0.d[0], x8
345+
; CHECK-SD-NEXT: ret
346+
;
347+
; CHECK-GI-LABEL: load_zext_v16i8:
348+
; CHECK-GI: // %bb.0:
349+
; CHECK-GI-NEXT: ldr w8, [x0]
350+
; CHECK-GI-NEXT: mov v0.d[0], x8
351+
; CHECK-GI-NEXT: mov v0.d[1], xzr
352+
; CHECK-GI-NEXT: ret
353+
%l = load i32, ptr %p
354+
%z = zext i32 %l to i128
355+
%b = bitcast i128 %z to <16 x i8>
356+
ret <16 x i8> %b
357+
}
358+
359+
360+
define <4 x i16> @load_zext_i8_v4i16(ptr %p) {
361+
; CHECK-LABEL: load_zext_i8_v4i16:
362+
; CHECK: // %bb.0:
363+
; CHECK-NEXT: ldrb w8, [x0]
364+
; CHECK-NEXT: fmov d0, x8
365+
; CHECK-NEXT: ret
366+
%l = load i8, ptr %p
367+
%z = zext i8 %l to i64
368+
%b = bitcast i64 %z to <4 x i16>
369+
ret <4 x i16> %b
370+
}
371+
372+
define <4 x i16> @load_zext_i16_v4i16(ptr %p) {
373+
; CHECK-LABEL: load_zext_i16_v4i16:
374+
; CHECK: // %bb.0:
375+
; CHECK-NEXT: ldrh w8, [x0]
376+
; CHECK-NEXT: fmov d0, x8
377+
; CHECK-NEXT: ret
378+
%l = load i16, ptr %p
379+
%z = zext i16 %l to i64
380+
%b = bitcast i64 %z to <4 x i16>
381+
ret <4 x i16> %b
382+
}
383+
384+
define <4 x i16> @load_zext_i32_v4i16(ptr %p) {
385+
; CHECK-LABEL: load_zext_i32_v4i16:
386+
; CHECK: // %bb.0:
387+
; CHECK-NEXT: ldr w8, [x0]
388+
; CHECK-NEXT: fmov d0, x8
389+
; CHECK-NEXT: ret
390+
%l = load i32, ptr %p
391+
%z = zext i32 %l to i64
392+
%b = bitcast i64 %z to <4 x i16>
393+
ret <4 x i16> %b
394+
}
395+
396+
define <2 x i32> @load_zext_i8_v2i32(ptr %p) {
397+
; CHECK-LABEL: load_zext_i8_v2i32:
398+
; CHECK: // %bb.0:
399+
; CHECK-NEXT: ldrb w8, [x0]
400+
; CHECK-NEXT: fmov d0, x8
401+
; CHECK-NEXT: ret
402+
%l = load i8, ptr %p
403+
%z = zext i8 %l to i64
404+
%b = bitcast i64 %z to <2 x i32>
405+
ret <2 x i32> %b
406+
}
407+
408+
define <2 x i32> @load_zext_i16_v2i32(ptr %p) {
409+
; CHECK-LABEL: load_zext_i16_v2i32:
410+
; CHECK: // %bb.0:
411+
; CHECK-NEXT: ldrh w8, [x0]
412+
; CHECK-NEXT: fmov d0, x8
413+
; CHECK-NEXT: ret
414+
%l = load i16, ptr %p
415+
%z = zext i16 %l to i64
416+
%b = bitcast i64 %z to <2 x i32>
417+
ret <2 x i32> %b
418+
}
419+
420+
define <2 x i32> @load_zext_i32_v2i32(ptr %p) {
421+
; CHECK-LABEL: load_zext_i32_v2i32:
422+
; CHECK: // %bb.0:
423+
; CHECK-NEXT: ldr w8, [x0]
424+
; CHECK-NEXT: fmov d0, x8
425+
; CHECK-NEXT: ret
426+
%l = load i32, ptr %p
427+
%z = zext i32 %l to i64
428+
%b = bitcast i64 %z to <2 x i32>
429+
ret <2 x i32> %b
430+
}
431+
432+
define <1 x i64> @load_zext_i8_v1i64(ptr %p) {
433+
; CHECK-LABEL: load_zext_i8_v1i64:
434+
; CHECK: // %bb.0:
435+
; CHECK-NEXT: ldrb w8, [x0]
436+
; CHECK-NEXT: fmov d0, x8
437+
; CHECK-NEXT: ret
438+
%l = load i8, ptr %p
439+
%z = zext i8 %l to i64
440+
%b = bitcast i64 %z to <1 x i64>
441+
ret <1 x i64> %b
442+
}
443+
444+
define <1 x i64> @load_zext_i16_v1i64(ptr %p) {
445+
; CHECK-LABEL: load_zext_i16_v1i64:
446+
; CHECK: // %bb.0:
447+
; CHECK-NEXT: ldrh w8, [x0]
448+
; CHECK-NEXT: fmov d0, x8
449+
; CHECK-NEXT: ret
450+
%l = load i16, ptr %p
451+
%z = zext i16 %l to i64
452+
%b = bitcast i64 %z to <1 x i64>
453+
ret <1 x i64> %b
454+
}
455+
456+
define <1 x i64> @load_zext_i32_v1i64(ptr %p) {
457+
; CHECK-LABEL: load_zext_i32_v1i64:
458+
; CHECK: // %bb.0:
459+
; CHECK-NEXT: ldr w8, [x0]
460+
; CHECK-NEXT: fmov d0, x8
461+
; CHECK-NEXT: ret
462+
%l = load i32, ptr %p
463+
%z = zext i32 %l to i64
464+
%b = bitcast i64 %z to <1 x i64>
465+
ret <1 x i64> %b
466+
}
467+
468+
469+
define <4 x half> @load_zext_i8_v4f16(ptr %p) {
470+
; CHECK-LABEL: load_zext_i8_v4f16:
471+
; CHECK: // %bb.0:
472+
; CHECK-NEXT: ldrb w8, [x0]
473+
; CHECK-NEXT: fmov d0, x8
474+
; CHECK-NEXT: ret
475+
%l = load i8, ptr %p
476+
%z = zext i8 %l to i64
477+
%b = bitcast i64 %z to <4 x half>
478+
ret <4 x half> %b
479+
}
287480

288-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
289-
; CHECK: {{.*}}
481+
define <4 x half> @load_zext_i16_v4f16(ptr %p) {
482+
; CHECK-LABEL: load_zext_i16_v4f16:
483+
; CHECK: // %bb.0:
484+
; CHECK-NEXT: ldrh w8, [x0]
485+
; CHECK-NEXT: fmov d0, x8
486+
; CHECK-NEXT: ret
487+
%l = load i16, ptr %p
488+
%z = zext i16 %l to i64
489+
%b = bitcast i64 %z to <4 x half>
490+
ret <4 x half> %b
491+
}
492+
493+
define <4 x half> @load_zext_i32_v4f16(ptr %p) {
494+
; CHECK-LABEL: load_zext_i32_v4f16:
495+
; CHECK: // %bb.0:
496+
; CHECK-NEXT: ldr w8, [x0]
497+
; CHECK-NEXT: fmov d0, x8
498+
; CHECK-NEXT: ret
499+
%l = load i32, ptr %p
500+
%z = zext i32 %l to i64
501+
%b = bitcast i64 %z to <4 x half>
502+
ret <4 x half> %b
503+
}
504+
505+
define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) {
506+
; CHECK-LABEL: load_zext_i8_v4bf16:
507+
; CHECK: // %bb.0:
508+
; CHECK-NEXT: ldrb w8, [x0]
509+
; CHECK-NEXT: fmov d0, x8
510+
; CHECK-NEXT: ret
511+
%l = load i8, ptr %p
512+
%z = zext i8 %l to i64
513+
%b = bitcast i64 %z to <4 x bfloat>
514+
ret <4 x bfloat> %b
515+
}
516+
517+
define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) {
518+
; CHECK-LABEL: load_zext_i16_v4bf16:
519+
; CHECK: // %bb.0:
520+
; CHECK-NEXT: ldrh w8, [x0]
521+
; CHECK-NEXT: fmov d0, x8
522+
; CHECK-NEXT: ret
523+
%l = load i16, ptr %p
524+
%z = zext i16 %l to i64
525+
%b = bitcast i64 %z to <4 x bfloat>
526+
ret <4 x bfloat> %b
527+
}
528+
529+
define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) {
530+
; CHECK-LABEL: load_zext_i32_v4bf16:
531+
; CHECK: // %bb.0:
532+
; CHECK-NEXT: ldr w8, [x0]
533+
; CHECK-NEXT: fmov d0, x8
534+
; CHECK-NEXT: ret
535+
%l = load i32, ptr %p
536+
%z = zext i32 %l to i64
537+
%b = bitcast i64 %z to <4 x bfloat>
538+
ret <4 x bfloat> %b
539+
}
540+
541+
define <2 x float> @load_zext_i8_v2f32(ptr %p) {
542+
; CHECK-LABEL: load_zext_i8_v2f32:
543+
; CHECK: // %bb.0:
544+
; CHECK-NEXT: ldrb w8, [x0]
545+
; CHECK-NEXT: fmov d0, x8
546+
; CHECK-NEXT: ret
547+
%l = load i8, ptr %p
548+
%z = zext i8 %l to i64
549+
%b = bitcast i64 %z to <2 x float>
550+
ret <2 x float> %b
551+
}
552+
553+
define <2 x float> @load_zext_i16_v2f32(ptr %p) {
554+
; CHECK-LABEL: load_zext_i16_v2f32:
555+
; CHECK: // %bb.0:
556+
; CHECK-NEXT: ldrh w8, [x0]
557+
; CHECK-NEXT: fmov d0, x8
558+
; CHECK-NEXT: ret
559+
%l = load i16, ptr %p
560+
%z = zext i16 %l to i64
561+
%b = bitcast i64 %z to <2 x float>
562+
ret <2 x float> %b
563+
}
564+
565+
define <2 x float> @load_zext_i32_v2f32(ptr %p) {
566+
; CHECK-LABEL: load_zext_i32_v2f32:
567+
; CHECK: // %bb.0:
568+
; CHECK-NEXT: ldr w8, [x0]
569+
; CHECK-NEXT: fmov d0, x8
570+
; CHECK-NEXT: ret
571+
%l = load i32, ptr %p
572+
%z = zext i32 %l to i64
573+
%b = bitcast i64 %z to <2 x float>
574+
ret <2 x float> %b
575+
}
576+
577+
define <1 x double> @load_zext_i8_v1f64(ptr %p) {
578+
; CHECK-LABEL: load_zext_i8_v1f64:
579+
; CHECK: // %bb.0:
580+
; CHECK-NEXT: ldrb w8, [x0]
581+
; CHECK-NEXT: fmov d0, x8
582+
; CHECK-NEXT: ret
583+
%l = load i8, ptr %p
584+
%z = zext i8 %l to i64
585+
%b = bitcast i64 %z to <1 x double>
586+
ret <1 x double> %b
587+
}
588+
589+
define <1 x double> @load_zext_i16_v1f64(ptr %p) {
590+
; CHECK-LABEL: load_zext_i16_v1f64:
591+
; CHECK: // %bb.0:
592+
; CHECK-NEXT: ldrh w8, [x0]
593+
; CHECK-NEXT: fmov d0, x8
594+
; CHECK-NEXT: ret
595+
%l = load i16, ptr %p
596+
%z = zext i16 %l to i64
597+
%b = bitcast i64 %z to <1 x double>
598+
ret <1 x double> %b
599+
}
600+
601+
define <1 x double> @load_zext_i32_v1f64(ptr %p) {
602+
; CHECK-LABEL: load_zext_i32_v1f64:
603+
; CHECK: // %bb.0:
604+
; CHECK-NEXT: ldr w8, [x0]
605+
; CHECK-NEXT: fmov d0, x8
606+
; CHECK-NEXT: ret
607+
%l = load i32, ptr %p
608+
%z = zext i32 %l to i64
609+
%b = bitcast i64 %z to <1 x double>
610+
ret <1 x double> %b
611+
}

0 commit comments

Comments
 (0)