@@ -17,14 +17,25 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
17
17
ret <32 x i8 > %shuffle
18
18
}
19
19
20
+ define <32 x i8 > @shuffle_v32i8_same_lane (<32 x i8 > %a ) {
21
+ ; CHECK-LABEL: shuffle_v32i8_same_lane:
22
+ ; CHECK: # %bb.0:
23
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
24
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI1_0)
25
+ ; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
26
+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
27
+ ; CHECK-NEXT: ret
28
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > poison, <32 x i32 > <i32 14 , i32 15 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
29
+ ret <32 x i8 > %shuffle
30
+ }
20
31
21
32
define <16 x i16 > @shuffle_v16i16 (<16 x i16 > %a ) {
22
33
; CHECK-LABEL: shuffle_v16i16:
23
34
; CHECK: # %bb.0:
24
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0 )
25
- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0 )
26
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_1 )
27
- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI1_1 )
35
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0 )
36
+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0 )
37
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1 )
38
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1 )
28
39
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
29
40
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
30
41
; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
@@ -34,13 +45,25 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
34
45
ret <16 x i16 > %shuffle
35
46
}
36
47
48
+ define <16 x i16 > @shuffle_v16i16_same_lane (<16 x i16 > %a ) {
49
+ ; CHECK-LABEL: shuffle_v16i16_same_lane:
50
+ ; CHECK: # %bb.0:
51
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
52
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
53
+ ; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
54
+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
55
+ ; CHECK-NEXT: ret
56
+ %shuffle = shufflevector <16 x i16 > %a , <16 x i16 > poison, <16 x i32 > <i32 6 , i32 7 , i32 0 , i32 5 , i32 2 , i32 3 , i32 6 , i32 5 , i32 8 , i32 9 , i32 10 , i32 13 , i32 12 , i32 15 , i32 13 , i32 15 >
57
+ ret <16 x i16 > %shuffle
58
+ }
59
+
37
60
define <8 x i32 > @shuffle_v8i32 (<8 x i32 > %a ) {
38
61
; CHECK-LABEL: shuffle_v8i32:
39
62
; CHECK: # %bb.0:
40
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0 )
41
- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0 )
42
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1 )
43
- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1 )
63
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0 )
64
+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0 )
65
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1 )
66
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1 )
44
67
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
45
68
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
46
69
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -50,13 +73,25 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
50
73
ret <8 x i32 > %shuffle
51
74
}
52
75
76
+ define <8 x i32 > @shuffle_v8i32_same_lane (<8 x i32 > %a ) {
77
+ ; CHECK-LABEL: shuffle_v8i32_same_lane:
78
+ ; CHECK: # %bb.0:
79
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
80
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
81
+ ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
82
+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
83
+ ; CHECK-NEXT: ret
84
+ %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > poison, <8 x i32 > <i32 2 , i32 3 , i32 0 , i32 1 , i32 4 , i32 5 , i32 6 , i32 7 >
85
+ ret <8 x i32 > %shuffle
86
+ }
87
+
53
88
define <4 x i64 > @shuffle_v4i64 (<4 x i64 > %a ) {
54
89
; CHECK-LABEL: shuffle_v4i64:
55
90
; CHECK: # %bb.0:
56
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0 )
57
- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0 )
58
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1 )
59
- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_1 )
91
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0 )
92
+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0 )
93
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1 )
94
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1 )
60
95
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
61
96
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
62
97
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -66,13 +101,25 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
66
101
ret <4 x i64 > %shuffle
67
102
}
68
103
104
+ define <4 x i64 > @shuffle_v4i64_same_lane (<4 x i64 > %a ) {
105
+ ; CHECK-LABEL: shuffle_v4i64_same_lane:
106
+ ; CHECK: # %bb.0:
107
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
108
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
109
+ ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
110
+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
111
+ ; CHECK-NEXT: ret
112
+ %shuffle = shufflevector <4 x i64 > %a , <4 x i64 > poison, <4 x i32 > <i32 1 , i32 0 , i32 2 , i32 3 >
113
+ ret <4 x i64 > %shuffle
114
+ }
115
+
69
116
define <8 x float > @shuffle_v8f32 (<8 x float > %a ) {
70
117
; CHECK-LABEL: shuffle_v8f32:
71
118
; CHECK: # %bb.0:
72
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0 )
73
- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0 )
74
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1 )
75
- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1 )
119
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0 )
120
+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI8_0 )
121
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1 )
122
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI8_1 )
76
123
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
77
124
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
78
125
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -82,13 +129,26 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) {
82
129
ret <8 x float > %shuffle
83
130
}
84
131
132
+ define <8 x float > @shuffle_v8f32_same_lane (<8 x float > %a ) {
133
+ ; CHECK-LABEL: shuffle_v8f32_same_lane:
134
+ ; CHECK: # %bb.0:
135
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
136
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI9_0)
137
+ ; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
138
+ ; CHECK-NEXT: xvshuf.w $xr1, $xr0, $xr0
139
+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
140
+ ; CHECK-NEXT: ret
141
+ %shuffle = shufflevector <8 x float > %a , <8 x float > poison, <8 x i32 > <i32 3 , i32 2 , i32 0 , i32 2 , i32 3 , i32 1 , i32 2 , i32 3 >
142
+ ret <8 x float > %shuffle
143
+ }
144
+
85
145
define <4 x double > @shuffle_v4f64 (<4 x double > %a ) {
86
146
; CHECK-LABEL: shuffle_v4f64:
87
147
; CHECK: # %bb.0:
88
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0 )
89
- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0 )
90
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1 )
91
- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_1 )
148
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0 )
149
+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0 )
150
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1 )
151
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1 )
92
152
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
93
153
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
94
154
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -97,3 +157,16 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
97
157
%shuffle = shufflevector <4 x double > %a , <4 x double > poison, <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
98
158
ret <4 x double > %shuffle
99
159
}
160
+
161
+ define <4 x double > @shuffle_v4f64_same_lane (<4 x double > %a ) {
162
+ ; CHECK-LABEL: shuffle_v4f64_same_lane:
163
+ ; CHECK: # %bb.0:
164
+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
165
+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
166
+ ; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
167
+ ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
168
+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
169
+ ; CHECK-NEXT: ret
170
+ %shuffle = shufflevector <4 x double > %a , <4 x double > poison, <4 x i32 > <i32 3 , i32 2 , i32 0 , i32 1 >
171
+ ret <4 x double > %shuffle
172
+ }
0 commit comments