11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
3- ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
4- ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
5- ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
2+ ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
3+ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
4+ ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
5+ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
6+
7+ define <4 x bfloat> @shuffle_v4bf16 (<4 x bfloat> %x , <4 x bfloat> %y ) {
8+ ; CHECK-LABEL: shuffle_v4bf16:
9+ ; CHECK: # %bb.0:
10+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
11+ ; CHECK-NEXT: vmv.v.i v0, 11
12+ ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
13+ ; CHECK-NEXT: ret
14+ %s = shufflevector <4 x bfloat> %x , <4 x bfloat> %y , <4 x i32 > <i32 0 , i32 1 , i32 6 , i32 3 >
15+ ret <4 x bfloat> %s
16+ }
617
718define <4 x half > @shuffle_v4f16 (<4 x half > %x , <4 x half > %y ) {
819; CHECK-LABEL: shuffle_v4f16:
@@ -30,8 +41,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
3041define <4 x double > @shuffle_fv_v4f64 (<4 x double > %x ) {
3142; CHECK-LABEL: shuffle_fv_v4f64:
3243; CHECK: # %bb.0:
33- ; CHECK-NEXT: lui a0, %hi(.LCPI2_0 )
34- ; CHECK-NEXT: fld fa5, %lo(.LCPI2_0 )(a0)
44+ ; CHECK-NEXT: lui a0, %hi(.LCPI3_0 )
45+ ; CHECK-NEXT: fld fa5, %lo(.LCPI3_0 )(a0)
3546; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
3647; CHECK-NEXT: vmv.v.i v0, 9
3748; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -44,8 +55,8 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
4455define <4 x double > @shuffle_vf_v4f64 (<4 x double > %x ) {
4556; CHECK-LABEL: shuffle_vf_v4f64:
4657; CHECK: # %bb.0:
47- ; CHECK-NEXT: lui a0, %hi(.LCPI3_0 )
48- ; CHECK-NEXT: fld fa5, %lo(.LCPI3_0 )(a0)
58+ ; CHECK-NEXT: lui a0, %hi(.LCPI4_0 )
59+ ; CHECK-NEXT: fld fa5, %lo(.LCPI4_0 )(a0)
4960; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
5061; CHECK-NEXT: vmv.v.i v0, 6
5162; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -92,8 +103,8 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
92103define <4 x double > @vrgather_shuffle_vv_v4f64 (<4 x double > %x , <4 x double > %y ) {
93104; CHECK-LABEL: vrgather_shuffle_vv_v4f64:
94105; CHECK: # %bb.0:
95- ; CHECK-NEXT: lui a0, %hi(.LCPI6_0 )
96- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0 )
106+ ; CHECK-NEXT: lui a0, %hi(.LCPI7_0 )
107+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0 )
97108; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
98109; CHECK-NEXT: vle16.v v14, (a0)
99110; CHECK-NEXT: vmv.v.i v0, 8
@@ -109,8 +120,8 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
109120define <4 x double > @vrgather_shuffle_xv_v4f64 (<4 x double > %x ) {
110121; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
111122; CHECK: # %bb.0:
112- ; CHECK-NEXT: lui a0, %hi(.LCPI7_0 )
113- ; CHECK-NEXT: fld fa5, %lo(.LCPI7_0 )(a0)
123+ ; CHECK-NEXT: lui a0, %hi(.LCPI8_0 )
124+ ; CHECK-NEXT: fld fa5, %lo(.LCPI8_0 )(a0)
114125; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
115126; CHECK-NEXT: vid.v v10
116127; CHECK-NEXT: vrsub.vi v12, v10, 4
@@ -129,8 +140,8 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
129140; CHECK: # %bb.0:
130141; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
131142; CHECK-NEXT: vid.v v10
132- ; CHECK-NEXT: lui a0, %hi(.LCPI8_0 )
133- ; CHECK-NEXT: fld fa5, %lo(.LCPI8_0 )(a0)
143+ ; CHECK-NEXT: lui a0, %hi(.LCPI9_0 )
144+ ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0 )(a0)
134145; CHECK-NEXT: li a0, 3
135146; CHECK-NEXT: vmul.vx v12, v10, a0
136147; CHECK-NEXT: vmv.v.i v0, 3
@@ -143,6 +154,28 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
143154 ret <4 x double > %s
144155}
145156
157+ define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_1 (<8 x bfloat> %x ) {
158+ ; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_1:
159+ ; CHECK: # %bb.0: # %entry
160+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
161+ ; CHECK-NEXT: vslidedown.vi v8, v8, 1
162+ ; CHECK-NEXT: ret
163+ entry:
164+ %s = shufflevector <8 x bfloat> %x , <8 x bfloat> poison, <4 x i32 > <i32 1 , i32 2 , i32 3 , i32 4 >
165+ ret <4 x bfloat> %s
166+ }
167+
168+ define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_3 (<8 x bfloat> %x ) {
169+ ; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_3:
170+ ; CHECK: # %bb.0: # %entry
171+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
172+ ; CHECK-NEXT: vslidedown.vi v8, v8, 3
173+ ; CHECK-NEXT: ret
174+ entry:
175+ %s = shufflevector <8 x bfloat> %x , <8 x bfloat> poison, <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 6 >
176+ ret <4 x bfloat> %s
177+ }
178+
146179define <4 x half > @shuffle_v8f16_to_vslidedown_1 (<8 x half > %x ) {
147180; CHECK-LABEL: shuffle_v8f16_to_vslidedown_1:
148181; CHECK: # %bb.0: # %entry
@@ -176,6 +209,16 @@ entry:
176209 ret <2 x float > %s
177210}
178211
212+ define <4 x bfloat> @slidedown_v4bf16 (<4 x bfloat> %x ) {
213+ ; CHECK-LABEL: slidedown_v4bf16:
214+ ; CHECK: # %bb.0:
215+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
216+ ; CHECK-NEXT: vslidedown.vi v8, v8, 1
217+ ; CHECK-NEXT: ret
218+ %s = shufflevector <4 x bfloat> %x , <4 x bfloat> poison, <4 x i32 > <i32 1 , i32 2 , i32 3 , i32 undef >
219+ ret <4 x bfloat> %s
220+ }
221+
179222define <4 x half > @slidedown_v4f16 (<4 x half > %x ) {
180223; CHECK-LABEL: slidedown_v4f16:
181224; CHECK: # %bb.0:
@@ -265,6 +308,50 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
265308 ret <8 x double > %s
266309}
267310
311+ define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16 (<4 x bfloat> %x ) {
312+ ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16:
313+ ; CHECK: # %bb.0:
314+ ; CHECK-NEXT: lui a0, 4096
315+ ; CHECK-NEXT: addi a0, a0, 513
316+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
317+ ; CHECK-NEXT: vmv.s.x v9, a0
318+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
319+ ; CHECK-NEXT: vsext.vf2 v10, v9
320+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
321+ ; CHECK-NEXT: vmv1r.v v8, v9
322+ ; CHECK-NEXT: ret
323+ %s = shufflevector <4 x bfloat> %x , <4 x bfloat> poison, <4 x i32 > <i32 1 , i32 2 , i32 0 , i32 1 >
324+ ret <4 x bfloat> %s
325+ }
326+
327+ define <4 x bfloat> @vrgather_shuffle_vv_v4bf16 (<4 x bfloat> %x , <4 x bfloat> %y ) {
328+ ; CHECK-LABEL: vrgather_shuffle_vv_v4bf16:
329+ ; CHECK: # %bb.0:
330+ ; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
331+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
332+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
333+ ; CHECK-NEXT: vle16.v v11, (a0)
334+ ; CHECK-NEXT: vmv.v.i v0, 8
335+ ; CHECK-NEXT: vrgather.vv v10, v8, v11
336+ ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
337+ ; CHECK-NEXT: vmv1r.v v8, v10
338+ ; CHECK-NEXT: ret
339+ %s = shufflevector <4 x bfloat> %x , <4 x bfloat> %y , <4 x i32 > <i32 1 , i32 2 , i32 0 , i32 5 >
340+ ret <4 x bfloat> %s
341+ }
342+
343+ define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load (ptr %p ) {
344+ ; CHECK-LABEL: vrgather_shuffle_vx_v4bf16_load:
345+ ; CHECK: # %bb.0:
346+ ; CHECK-NEXT: lh a0, 2(a0)
347+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
348+ ; CHECK-NEXT: vmv.v.x v8, a0
349+ ; CHECK-NEXT: ret
350+ %v = load <4 x bfloat>, ptr %p
351+ %s = shufflevector <4 x bfloat> %v , <4 x bfloat> undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
352+ ret <4 x bfloat> %s
353+ }
354+
268355define <4 x half > @vrgather_permute_shuffle_vu_v4f16 (<4 x half > %x ) {
269356; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
270357; CHECK: # %bb.0:
@@ -284,8 +371,8 @@ define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
284371define <4 x half > @vrgather_shuffle_vv_v4f16 (<4 x half > %x , <4 x half > %y ) {
285372; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
286373; CHECK: # %bb.0:
287- ; CHECK-NEXT: lui a0, %hi(.LCPI21_0 )
288- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0 )
374+ ; CHECK-NEXT: lui a0, %hi(.LCPI28_0 )
375+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0 )
289376; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
290377; CHECK-NEXT: vle16.v v11, (a0)
291378; CHECK-NEXT: vmv.v.i v0, 8
0 commit comments