11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm | FileCheck %s
3- ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a | FileCheck %s
2+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a | FileCheck %s --check-prefixes=CHECK,CHECK-SD
4+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5+
6+ ; CHECK-GI: warning: Instruction selection used fallback path for test_sqrdmlah_extracted_lane_s32
7+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlahq_extracted_lane_s32
8+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlsh_extracted_lane_s32
9+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlshq_extracted_lane_s32
10+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlah_i32
11+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlsh_i32
12+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlah_extract_i32
13+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrdmlsh_extract_i32
14+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlahs_s32
15+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlahs_lane_s32
16+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlahs_laneq_s32
17+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlshs_s32
18+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlshs_lane_s32
19+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqrdmlshs_laneq_s32
420
521declare <4 x i16 > @llvm.aarch64.neon.sqrdmulh.v4i16 (<4 x i16 >, <4 x i16 >)
622declare <8 x i16 > @llvm.aarch64.neon.sqrdmulh.v8i16 (<8 x i16 >, <8 x i16 >)
@@ -404,15 +420,25 @@ define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
404420}
405421
406422define i32 @test_sqrdmlah_v1i32 (i32 %acc , i32 %x , i32 %y ) {
407- ; CHECK-LABEL: test_sqrdmlah_v1i32:
408- ; CHECK: // %bb.0:
409- ; CHECK-NEXT: fmov s0, w1
410- ; CHECK-NEXT: fmov s1, w2
411- ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
412- ; CHECK-NEXT: fmov s1, w0
413- ; CHECK-NEXT: sqadd v0.4s, v1.4s, v0.4s
414- ; CHECK-NEXT: fmov w0, s0
415- ; CHECK-NEXT: ret
423+ ; CHECK-SD-LABEL: test_sqrdmlah_v1i32:
424+ ; CHECK-SD: // %bb.0:
425+ ; CHECK-SD-NEXT: fmov s0, w1
426+ ; CHECK-SD-NEXT: fmov s1, w2
427+ ; CHECK-SD-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
428+ ; CHECK-SD-NEXT: fmov s1, w0
429+ ; CHECK-SD-NEXT: sqadd v0.4s, v1.4s, v0.4s
430+ ; CHECK-SD-NEXT: fmov w0, s0
431+ ; CHECK-SD-NEXT: ret
432+ ;
433+ ; CHECK-GI-LABEL: test_sqrdmlah_v1i32:
434+ ; CHECK-GI: // %bb.0:
435+ ; CHECK-GI-NEXT: mov v0.s[0], w1
436+ ; CHECK-GI-NEXT: mov v1.s[0], w2
437+ ; CHECK-GI-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
438+ ; CHECK-GI-NEXT: mov v1.s[0], w0
439+ ; CHECK-GI-NEXT: sqadd v0.4s, v1.4s, v0.4s
440+ ; CHECK-GI-NEXT: fmov w0, s0
441+ ; CHECK-GI-NEXT: ret
416442 %x_vec = insertelement <4 x i32 > undef , i32 %x , i64 0
417443 %y_vec = insertelement <4 x i32 > undef , i32 %y , i64 0
418444 %prod_vec = call <4 x i32 > @llvm.aarch64.neon.sqrdmulh.v4i32 (<4 x i32 > %x_vec , <4 x i32 > %y_vec )
@@ -443,15 +469,25 @@ define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
443469}
444470
445471define i32 @test_sqrdmlsh_v1i32 (i32 %acc , i32 %x , i32 %y ) {
446- ; CHECK-LABEL: test_sqrdmlsh_v1i32:
447- ; CHECK: // %bb.0:
448- ; CHECK-NEXT: fmov s0, w1
449- ; CHECK-NEXT: fmov s1, w2
450- ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
451- ; CHECK-NEXT: fmov s1, w0
452- ; CHECK-NEXT: sqsub v0.4s, v1.4s, v0.4s
453- ; CHECK-NEXT: fmov w0, s0
454- ; CHECK-NEXT: ret
472+ ; CHECK-SD-LABEL: test_sqrdmlsh_v1i32:
473+ ; CHECK-SD: // %bb.0:
474+ ; CHECK-SD-NEXT: fmov s0, w1
475+ ; CHECK-SD-NEXT: fmov s1, w2
476+ ; CHECK-SD-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
477+ ; CHECK-SD-NEXT: fmov s1, w0
478+ ; CHECK-SD-NEXT: sqsub v0.4s, v1.4s, v0.4s
479+ ; CHECK-SD-NEXT: fmov w0, s0
480+ ; CHECK-SD-NEXT: ret
481+ ;
482+ ; CHECK-GI-LABEL: test_sqrdmlsh_v1i32:
483+ ; CHECK-GI: // %bb.0:
484+ ; CHECK-GI-NEXT: mov v0.s[0], w1
485+ ; CHECK-GI-NEXT: mov v1.s[0], w2
486+ ; CHECK-GI-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s
487+ ; CHECK-GI-NEXT: mov v1.s[0], w0
488+ ; CHECK-GI-NEXT: sqsub v0.4s, v1.4s, v0.4s
489+ ; CHECK-GI-NEXT: fmov w0, s0
490+ ; CHECK-GI-NEXT: ret
455491 %x_vec = insertelement <4 x i32 > undef , i32 %x , i64 0
456492 %y_vec = insertelement <4 x i32 > undef , i32 %y , i64 0
457493 %prod_vec = call <4 x i32 > @llvm.aarch64.neon.sqrdmulh.v4i32 (<4 x i32 > %x_vec , <4 x i32 > %y_vec )
@@ -568,21 +604,33 @@ define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
568604; Using sqrdmlah intrinsics
569605
570606define <4 x i16 > @test_vqrdmlah_laneq_s16 (<4 x i16 > %a , <4 x i16 > %b , <8 x i16 > %v ) {
571- ; CHECK-LABEL: test_vqrdmlah_laneq_s16:
572- ; CHECK: // %bb.0: // %entry
573- ; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
574- ; CHECK-NEXT: ret
607+ ; CHECK-SD-LABEL: test_vqrdmlah_laneq_s16:
608+ ; CHECK-SD: // %bb.0: // %entry
609+ ; CHECK-SD-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
610+ ; CHECK-SD-NEXT: ret
611+ ;
612+ ; CHECK-GI-LABEL: test_vqrdmlah_laneq_s16:
613+ ; CHECK-GI: // %bb.0: // %entry
614+ ; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
615+ ; CHECK-GI-NEXT: sqrdmlah v0.4h, v1.4h, v2.4h
616+ ; CHECK-GI-NEXT: ret
575617entry:
576618 %lane = shufflevector <8 x i16 > %v , <8 x i16 > poison, <4 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 >
577619 %vqrdmlah_v3.i = tail call <4 x i16 > @llvm.aarch64.neon.sqrdmlah.v4i16 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %lane ) #4
578620 ret <4 x i16 > %vqrdmlah_v3.i
579621}
580622
581623define <2 x i32 > @test_vqrdmlah_laneq_s32 (<2 x i32 > %a , <2 x i32 > %b , <4 x i32 > %v ) {
582- ; CHECK-LABEL: test_vqrdmlah_laneq_s32:
583- ; CHECK: // %bb.0: // %entry
584- ; CHECK-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
585- ; CHECK-NEXT: ret
624+ ; CHECK-SD-LABEL: test_vqrdmlah_laneq_s32:
625+ ; CHECK-SD: // %bb.0: // %entry
626+ ; CHECK-SD-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
627+ ; CHECK-SD-NEXT: ret
628+ ;
629+ ; CHECK-GI-LABEL: test_vqrdmlah_laneq_s32:
630+ ; CHECK-GI: // %bb.0: // %entry
631+ ; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
632+ ; CHECK-GI-NEXT: sqrdmlah v0.2s, v1.2s, v2.2s
633+ ; CHECK-GI-NEXT: ret
586634entry:
587635 %lane = shufflevector <4 x i32 > %v , <4 x i32 > poison, <2 x i32 > <i32 3 , i32 3 >
588636 %vqrdmlah_v3.i = tail call <2 x i32 > @llvm.aarch64.neon.sqrdmlah.v2i32 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %lane ) #4
@@ -644,14 +692,23 @@ entry:
644692}
645693
646694define i16 @test_vqrdmlahh_lane_s16 (i16 %a , i16 %b , <4 x i16 > %c ) {
647- ; CHECK-LABEL: test_vqrdmlahh_lane_s16:
648- ; CHECK: // %bb.0: // %entry
649- ; CHECK-NEXT: fmov s1, w0
650- ; CHECK-NEXT: fmov s2, w1
651- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
652- ; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
653- ; CHECK-NEXT: umov w0, v1.h[0]
654- ; CHECK-NEXT: ret
695+ ; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
696+ ; CHECK-SD: // %bb.0: // %entry
697+ ; CHECK-SD-NEXT: fmov s1, w0
698+ ; CHECK-SD-NEXT: fmov s2, w1
699+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
700+ ; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
701+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
702+ ; CHECK-SD-NEXT: ret
703+ ;
704+ ; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
705+ ; CHECK-GI: // %bb.0: // %entry
706+ ; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
707+ ; CHECK-GI-NEXT: fmov s1, w0
708+ ; CHECK-GI-NEXT: fmov s2, w1
709+ ; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
710+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
711+ ; CHECK-GI-NEXT: ret
655712entry:
656713 %0 = insertelement <4 x i16 > undef , i16 %a , i64 0
657714 %1 = insertelement <4 x i16 > undef , i16 %b , i64 0
@@ -677,13 +734,22 @@ entry:
677734}
678735
679736define i16 @test_vqrdmlahh_laneq_s16 (i16 %a , i16 %b , <8 x i16 > %c ) {
680- ; CHECK-LABEL: test_vqrdmlahh_laneq_s16:
681- ; CHECK: // %bb.0: // %entry
682- ; CHECK-NEXT: fmov s1, w0
683- ; CHECK-NEXT: fmov s2, w1
684- ; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7]
685- ; CHECK-NEXT: umov w0, v1.h[0]
686- ; CHECK-NEXT: ret
737+ ; CHECK-SD-LABEL: test_vqrdmlahh_laneq_s16:
738+ ; CHECK-SD: // %bb.0: // %entry
739+ ; CHECK-SD-NEXT: fmov s1, w0
740+ ; CHECK-SD-NEXT: fmov s2, w1
741+ ; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7]
742+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
743+ ; CHECK-SD-NEXT: ret
744+ ;
745+ ; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
746+ ; CHECK-GI: // %bb.0: // %entry
747+ ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
748+ ; CHECK-GI-NEXT: fmov s1, w0
749+ ; CHECK-GI-NEXT: fmov s2, w1
750+ ; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
751+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
752+ ; CHECK-GI-NEXT: ret
687753entry:
688754 %0 = insertelement <4 x i16 > undef , i16 %a , i64 0
689755 %1 = insertelement <4 x i16 > undef , i16 %b , i64 0
@@ -708,21 +774,33 @@ entry:
708774}
709775
710776define <4 x i16 > @test_vqrdmlsh_laneq_s16 (<4 x i16 > %a , <4 x i16 > %b , <8 x i16 > %v ) {
711- ; CHECK-LABEL: test_vqrdmlsh_laneq_s16:
712- ; CHECK: // %bb.0: // %entry
713- ; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
714- ; CHECK-NEXT: ret
777+ ; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s16:
778+ ; CHECK-SD: // %bb.0: // %entry
779+ ; CHECK-SD-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
780+ ; CHECK-SD-NEXT: ret
781+ ;
782+ ; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s16:
783+ ; CHECK-GI: // %bb.0: // %entry
784+ ; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
785+ ; CHECK-GI-NEXT: sqrdmlsh v0.4h, v1.4h, v2.4h
786+ ; CHECK-GI-NEXT: ret
715787entry:
716788 %lane = shufflevector <8 x i16 > %v , <8 x i16 > poison, <4 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 >
717789 %vqrdmlsh_v3.i = tail call <4 x i16 > @llvm.aarch64.neon.sqrdmlsh.v4i16 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %lane ) #4
718790 ret <4 x i16 > %vqrdmlsh_v3.i
719791}
720792
721793define <2 x i32 > @test_vqrdmlsh_laneq_s32 (<2 x i32 > %a , <2 x i32 > %b , <4 x i32 > %v ) {
722- ; CHECK-LABEL: test_vqrdmlsh_laneq_s32:
723- ; CHECK: // %bb.0: // %entry
724- ; CHECK-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
725- ; CHECK-NEXT: ret
794+ ; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s32:
795+ ; CHECK-SD: // %bb.0: // %entry
796+ ; CHECK-SD-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
797+ ; CHECK-SD-NEXT: ret
798+ ;
799+ ; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s32:
800+ ; CHECK-GI: // %bb.0: // %entry
801+ ; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
802+ ; CHECK-GI-NEXT: sqrdmlsh v0.2s, v1.2s, v2.2s
803+ ; CHECK-GI-NEXT: ret
726804entry:
727805 %lane = shufflevector <4 x i32 > %v , <4 x i32 > poison, <2 x i32 > <i32 3 , i32 3 >
728806 %vqrdmlsh_v3.i = tail call <2 x i32 > @llvm.aarch64.neon.sqrdmlsh.v2i32 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %lane ) #4
@@ -784,14 +862,23 @@ entry:
784862}
785863
786864define i16 @test_vqrdmlshh_lane_s16 (i16 %a , i16 %b , <4 x i16 > %c ) {
787- ; CHECK-LABEL: test_vqrdmlshh_lane_s16:
788- ; CHECK: // %bb.0: // %entry
789- ; CHECK-NEXT: fmov s1, w0
790- ; CHECK-NEXT: fmov s2, w1
791- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
792- ; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
793- ; CHECK-NEXT: umov w0, v1.h[0]
794- ; CHECK-NEXT: ret
865+ ; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
866+ ; CHECK-SD: // %bb.0: // %entry
867+ ; CHECK-SD-NEXT: fmov s1, w0
868+ ; CHECK-SD-NEXT: fmov s2, w1
869+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
870+ ; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
871+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
872+ ; CHECK-SD-NEXT: ret
873+ ;
874+ ; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
875+ ; CHECK-GI: // %bb.0: // %entry
876+ ; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
877+ ; CHECK-GI-NEXT: fmov s1, w0
878+ ; CHECK-GI-NEXT: fmov s2, w1
879+ ; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
880+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
881+ ; CHECK-GI-NEXT: ret
795882entry:
796883 %0 = insertelement <4 x i16 > undef , i16 %a , i64 0
797884 %1 = insertelement <4 x i16 > undef , i16 %b , i64 0
@@ -817,13 +904,22 @@ entry:
817904}
818905
819906define i16 @test_vqrdmlshh_laneq_s16 (i16 %a , i16 %b , <8 x i16 > %c ) {
820- ; CHECK-LABEL: test_vqrdmlshh_laneq_s16:
821- ; CHECK: // %bb.0: // %entry
822- ; CHECK-NEXT: fmov s1, w0
823- ; CHECK-NEXT: fmov s2, w1
824- ; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7]
825- ; CHECK-NEXT: umov w0, v1.h[0]
826- ; CHECK-NEXT: ret
907+ ; CHECK-SD-LABEL: test_vqrdmlshh_laneq_s16:
908+ ; CHECK-SD: // %bb.0: // %entry
909+ ; CHECK-SD-NEXT: fmov s1, w0
910+ ; CHECK-SD-NEXT: fmov s2, w1
911+ ; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7]
912+ ; CHECK-SD-NEXT: umov w0, v1.h[0]
913+ ; CHECK-SD-NEXT: ret
914+ ;
915+ ; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
916+ ; CHECK-GI: // %bb.0: // %entry
917+ ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
918+ ; CHECK-GI-NEXT: fmov s1, w0
919+ ; CHECK-GI-NEXT: fmov s2, w1
920+ ; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
921+ ; CHECK-GI-NEXT: umov w0, v1.h[0]
922+ ; CHECK-GI-NEXT: ret
827923entry:
828924 %0 = insertelement <4 x i16 > undef , i16 %a , i64 0
829925 %1 = insertelement <4 x i16 > undef , i16 %b , i64 0
0 commit comments