1- ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+ ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
24
3- define <8 x i8 > @test_vextd (ptr %A , ptr %B ) nounwind {
4- ;CHECK-LABEL: test_vextd:
5- ;CHECK: {{ext.8b.*#3}}
6- %tmp1 = load < 8 x i8 >, ptr %A
7- %tmp2 = load < 8 x i8 >, ptr %B
8- %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 >
9- ret <8 x i8 > %tmp3
5+ define <8 x i8 > @test_vextd (< 8 x i8 > %tmp1 , < 8 x i8 > %tmp2 ) {
6+ ; CHECK-LABEL: test_vextd:
7+ ; CHECK: // %bb.0:
8+ ; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #3
9+ ; CHECK-NEXT: ret
10+ %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 >
11+ ret <8 x i8 > %tmp3
1012}
1113
12- define <8 x i8 > @test_vextRd (ptr %A , ptr %B ) nounwind {
13- ;CHECK-LABEL: test_vextRd:
14- ;CHECK: {{ext.8b.*#5}}
15- %tmp1 = load < 8 x i8 >, ptr %A
16- %tmp2 = load < 8 x i8 >, ptr %B
17- %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 13 , i32 14 , i32 15 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 >
18- ret <8 x i8 > %tmp3
14+ define <8 x i8 > @test_vextRd (< 8 x i8 > %tmp1 , < 8 x i8 > %tmp2 ) {
15+ ; CHECK-LABEL: test_vextRd:
16+ ; CHECK: // %bb.0:
17+ ; CHECK-NEXT: ext v0.8b, v1.8b, v0.8b, #5
18+ ; CHECK-NEXT: ret
19+ %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 13 , i32 14 , i32 15 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 >
20+ ret <8 x i8 > %tmp3
1921}
2022
21- define <16 x i8 > @test_vextq (ptr %A , ptr %B ) nounwind {
22- ;CHECK-LABEL: test_vextq:
23- ;CHECK: {{ext.16b.*3}}
24- %tmp1 = load < 16 x i8 >, ptr %A
25- %tmp2 = load < 16 x i8 >, ptr %B
26- %tmp3 = shufflevector <16 x i8 > %tmp1 , <16 x i8 > %tmp2 , <16 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 >
27- ret <16 x i8 > %tmp3
23+ define <16 x i8 > @test_vextq (< 16 x i8 > %tmp1 , < 16 x i8 > %tmp2 ) {
24+ ; CHECK-LABEL: test_vextq:
25+ ; CHECK: // %bb.0:
26+ ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #3
27+ ; CHECK-NEXT: ret
28+ %tmp3 = shufflevector <16 x i8 > %tmp1 , <16 x i8 > %tmp2 , <16 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 >
29+ ret <16 x i8 > %tmp3
2830}
2931
30- define <16 x i8 > @test_vextRq (ptr %A , ptr %B ) nounwind {
31- ;CHECK-LABEL: test_vextRq:
32- ;CHECK: {{ext.16b.*7}}
33- %tmp1 = load < 16 x i8 >, ptr %A
34- %tmp2 = load < 16 x i8 >, ptr %B
35- %tmp3 = shufflevector <16 x i8 > %tmp1 , <16 x i8 > %tmp2 , <16 x i32 > <i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 >
36- ret <16 x i8 > %tmp3
32+ define <16 x i8 > @test_vextRq (< 16 x i8 > %tmp1 , < 16 x i8 > %tmp2 ) {
33+ ; CHECK-LABEL: test_vextRq:
34+ ; CHECK: // %bb.0:
35+ ; CHECK-NEXT: ext v0.16b, v1.16b, v0.16b, #7
36+ ; CHECK-NEXT: ret
37+ %tmp3 = shufflevector <16 x i8 > %tmp1 , <16 x i8 > %tmp2 , <16 x i32 > <i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 >
38+ ret <16 x i8 > %tmp3
3739}
3840
39- define <4 x i16 > @test_vextd16 (ptr %A , ptr %B ) nounwind {
40- ;CHECK-LABEL: test_vextd16:
41- ;CHECK: {{ext.8b.*#6}}
42- %tmp1 = load < 4 x i16 >, ptr %A
43- %tmp2 = load < 4 x i16 >, ptr %B
44- %tmp3 = shufflevector <4 x i16 > %tmp1 , <4 x i16 > %tmp2 , <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 >
45- ret <4 x i16 > %tmp3
41+ define <4 x i16 > @test_vextd16 (< 4 x i16 > %tmp1 , < 4 x i16 > %tmp2 ) {
42+ ; CHECK-LABEL: test_vextd16:
43+ ; CHECK: // %bb.0:
44+ ; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #6
45+ ; CHECK-NEXT: ret
46+ %tmp3 = shufflevector <4 x i16 > %tmp1 , <4 x i16 > %tmp2 , <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 >
47+ ret <4 x i16 > %tmp3
4648}
4749
48- define <4 x i32 > @test_vextq32 (ptr %A , ptr %B ) nounwind {
49- ;CHECK-LABEL: test_vextq32:
50- ;CHECK: {{ext.16b.*12}}
51- %tmp1 = load < 4 x i32 >, ptr %A
52- %tmp2 = load < 4 x i32 >, ptr %B
53- %tmp3 = shufflevector <4 x i32 > %tmp1 , <4 x i32 > %tmp2 , <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 >
54- ret <4 x i32 > %tmp3
50+ define <4 x i32 > @test_vextq32 (< 4 x i32 > %tmp1 , < 4 x i32 > %tmp2 ) {
51+ ; CHECK-LABEL: test_vextq32:
52+ ; CHECK: // %bb.0:
53+ ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12
54+ ; CHECK-NEXT: ret
55+ %tmp3 = shufflevector <4 x i32 > %tmp1 , <4 x i32 > %tmp2 , <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 >
56+ ret <4 x i32 > %tmp3
5557}
5658
5759; Undef shuffle indices should not prevent matching to VEXT:
5860
59- define <8 x i8 > @test_vextd_undef (ptr %A , ptr %B ) nounwind {
60- ;CHECK-LABEL: test_vextd_undef:
61- ;CHECK: {{ext.8b.*}}
62- %tmp1 = load < 8 x i8 >, ptr %A
63- %tmp2 = load < 8 x i8 >, ptr %B
64- %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 3 , i32 undef , i32 undef , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 >
65- ret <8 x i8 > %tmp3
61+ define <8 x i8 > @test_vextd_undef (< 8 x i8 > %tmp1 , < 8 x i8 > %tmp2 ) {
62+ ; CHECK-LABEL: test_vextd_undef:
63+ ; CHECK: // %bb.0:
64+ ; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #3
65+ ; CHECK-NEXT: ret
66+ %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 3 , i32 undef , i32 undef , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 >
67+ ret <8 x i8 > %tmp3
6668}
6769
68- define <8 x i8 > @test_vextd_undef2 (ptr %A , ptr %B ) nounwind {
69- ;CHECK-LABEL: test_vextd_undef2:
70- ;CHECK: {{ext.8b.*#6}}
71- %tmp1 = load <8 x i8 >, ptr %A
72- %tmp2 = load <8 x i8 >, ptr %B
70+ define <8 x i8 > @test_vextd_undef2 (<8 x i8 > %tmp1 , <8 x i8 > %tmp2 ) {
71+ ; CHECK-SD-LABEL: test_vextd_undef2:
72+ ; CHECK-SD: // %bb.0:
73+ ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v0.8b, #6
74+ ; CHECK-SD-NEXT: ret
75+ ;
76+ ; CHECK-GI-LABEL: test_vextd_undef2:
77+ ; CHECK-GI: // %bb.0:
78+ ; CHECK-GI-NEXT: ext v0.8b, v1.8b, v0.8b, #6
79+ ; CHECK-GI-NEXT: ret
7380 %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 undef , i32 undef , i32 undef , i32 undef , i32 2 , i32 3 , i32 4 , i32 5 >
7481 ret <8 x i8 > %tmp3
7582}
7683
77- define <16 x i8 > @test_vextRq_undef (ptr %A , ptr %B ) nounwind {
78- ;CHECK-LABEL: test_vextRq_undef:
79- ;CHECK: {{ext.16b.*#7}}
80- %tmp1 = load < 16 x i8 >, ptr %A
81- %tmp2 = load < 16 x i8 >, ptr %B
82- %tmp3 = shufflevector <16 x i8 > %tmp1 , <16 x i8 > %tmp2 , <16 x i32 > <i32 23 , i32 24 , i32 25 , i32 26 , i32 undef , i32 undef , i32 29 , i32 30 , i32 31 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 undef , i32 6 >
83- ret <16 x i8 > %tmp3
84+ define <16 x i8 > @test_vextRq_undef (< 16 x i8 > %tmp1 , < 16 x i8 > %tmp2 ) {
85+ ; CHECK-LABEL: test_vextRq_undef:
86+ ; CHECK: // %bb.0:
87+ ; CHECK-NEXT: ext v0.16b, v1.16b, v0.16b, #7
88+ ; CHECK-NEXT: ret
89+ %tmp3 = shufflevector <16 x i8 > %tmp1 , <16 x i8 > %tmp2 , <16 x i32 > <i32 23 , i32 24 , i32 25 , i32 26 , i32 undef , i32 undef , i32 29 , i32 30 , i32 31 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 undef , i32 6 >
90+ ret <16 x i8 > %tmp3
8491}
8592
86- define <8 x i16 > @test_vextRq_undef2 (ptr %A ) nounwind {
87- ;CHECK-LABEL: test_vextRq_undef2:
88- ;CHECK: {{ext.16b.*#10}}
89- %tmp1 = load <8 x i16 >, ptr %A
93+ define <8 x i16 > @test_vextRq_undef2 (<8 x i16 > %tmp1 ) nounwind {
94+ ; CHECK-LABEL: test_vextRq_undef2:
95+ ; CHECK: // %bb.0:
96+ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #10
97+ ; CHECK-NEXT: ret
9098 %vext = shufflevector <8 x i16 > %tmp1 , <8 x i16 > undef , <8 x i32 > <i32 undef , i32 undef , i32 undef , i32 undef , i32 1 , i32 2 , i32 3 , i32 4 >
9199 ret <8 x i16 > %vext ;
92100}
@@ -95,11 +103,22 @@ define <8 x i16> @test_vextRq_undef2(ptr %A) nounwind {
95103; chosen to reach lowering phase as a BUILD_VECTOR.
96104
97105; An undef in the shuffle list should still be optimizable
98- define <4 x i16 > @test_undef (ptr %A , ptr %B ) nounwind {
99- ;CHECK-LABEL: test_undef:
100- ;CHECK: zip1.4h
101- %tmp1 = load <8 x i16 >, ptr %A
102- %tmp2 = load <8 x i16 >, ptr %B
103- %tmp3 = shufflevector <8 x i16 > %tmp1 , <8 x i16 > %tmp2 , <4 x i32 > <i32 undef , i32 8 , i32 5 , i32 9 >
104- ret <4 x i16 > %tmp3
106+ define <4 x i16 > @test_undef (<8 x i16 > %tmp1 , <8 x i16 > %tmp2 ) {
107+ ; CHECK-SD-LABEL: test_undef:
108+ ; CHECK-SD: // %bb.0:
109+ ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
110+ ; CHECK-SD-NEXT: zip1 v0.4h, v0.4h, v1.4h
111+ ; CHECK-SD-NEXT: ret
112+ ;
113+ ; CHECK-GI-LABEL: test_undef:
114+ ; CHECK-GI: // %bb.0:
115+ ; CHECK-GI-NEXT: adrp x8, .LCPI10_0
116+ ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
117+ ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
118+ ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
119+ ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
120+ ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
121+ ; CHECK-GI-NEXT: ret
122+ %tmp3 = shufflevector <8 x i16 > %tmp1 , <8 x i16 > %tmp2 , <4 x i32 > <i32 undef , i32 8 , i32 5 , i32 9 >
123+ ret <4 x i16 > %tmp3
105124}
0 commit comments