11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \
2+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+ optimized-zero-stride-load \
33; RUN: -verify-machineinstrs < %s \
44; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
5- ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \
5+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+ optimized-zero-stride-load \
66; RUN: -verify-machineinstrs < %s \
77; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
8- ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh \
8+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
99; RUN: -verify-machineinstrs < %s \
10- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT
11- ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh \
10+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
11+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
1212; RUN: -verify-machineinstrs < %s \
13- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT
13+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
14+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
15+ ; RUN: -verify-machineinstrs < %s \
16+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
17+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
18+ ; RUN: -verify-machineinstrs < %s \
19+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
20+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
21+ ; RUN: -verify-machineinstrs < %s \
22+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
23+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
24+ ; RUN: -verify-machineinstrs < %s \
25+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
1426
1527declare <2 x i8 > @llvm.experimental.vp.strided.load.v2i8.p0.i8 (ptr , i8 , <2 x i1 >, i32 )
1628
@@ -278,6 +290,62 @@ define <8 x i64> @strided_vpload_v8i64(ptr %ptr, i32 signext %stride, <8 x i1> %
278290 ret <8 x i64 > %load
279291}
280292
293+ declare <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
294+
295+ define <2 x bfloat> @strided_vpload_v2bf16 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
296+ ; CHECK-LABEL: strided_vpload_v2bf16:
297+ ; CHECK: # %bb.0:
298+ ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
299+ ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
300+ ; CHECK-NEXT: ret
301+ %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32 (ptr %ptr , i32 %stride , <2 x i1 > %m , i32 %evl )
302+ ret <2 x bfloat> %load
303+ }
304+
305+ define <2 x bfloat> @strided_vpload_v2bf16_allones_mask (ptr %ptr , i32 signext %stride , i32 zeroext %evl ) {
306+ ; CHECK-LABEL: strided_vpload_v2bf16_allones_mask:
307+ ; CHECK: # %bb.0:
308+ ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
309+ ; CHECK-NEXT: vlse16.v v8, (a0), a1
310+ ; CHECK-NEXT: ret
311+ %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32 (ptr %ptr , i32 %stride , <2 x i1 > splat (i1 true ), i32 %evl )
312+ ret <2 x bfloat> %load
313+ }
314+
315+ declare <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32 (ptr , i32 , <4 x i1 >, i32 )
316+
317+ define <4 x bfloat> @strided_vpload_v4bf16 (ptr %ptr , i32 signext %stride , <4 x i1 > %m , i32 zeroext %evl ) {
318+ ; CHECK-LABEL: strided_vpload_v4bf16:
319+ ; CHECK: # %bb.0:
320+ ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
321+ ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
322+ ; CHECK-NEXT: ret
323+ %load = call <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32 (ptr %ptr , i32 %stride , <4 x i1 > %m , i32 %evl )
324+ ret <4 x bfloat> %load
325+ }
326+
327+ declare <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32 (ptr , i32 , <8 x i1 >, i32 )
328+
329+ define <8 x bfloat> @strided_vpload_v8bf16 (ptr %ptr , i32 signext %stride , <8 x i1 > %m , i32 zeroext %evl ) {
330+ ; CHECK-LABEL: strided_vpload_v8bf16:
331+ ; CHECK: # %bb.0:
332+ ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
333+ ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
334+ ; CHECK-NEXT: ret
335+ %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32 (ptr %ptr , i32 %stride , <8 x i1 > %m , i32 %evl )
336+ ret <8 x bfloat> %load
337+ }
338+
339+ define <8 x bfloat> @strided_vpload_v8bf16_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
340+ ; CHECK-LABEL: strided_vpload_v8bf16_unit_stride:
341+ ; CHECK: # %bb.0:
342+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
343+ ; CHECK-NEXT: vle16.v v8, (a0), v0.t
344+ ; CHECK-NEXT: ret
345+ %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32 (ptr %ptr , i32 2 , <8 x i1 > %m , i32 %evl )
346+ ret <8 x bfloat> %load
347+ }
348+
281349declare <2 x half > @llvm.experimental.vp.strided.load.v2f16.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
282350
283351define <2 x half > @strided_vpload_v2f16 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -477,10 +545,10 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
477545; CHECK-NEXT: li a4, 16
478546; CHECK-NEXT: vmv1r.v v9, v0
479547; CHECK-NEXT: mv a3, a2
480- ; CHECK-NEXT: bltu a2, a4, .LBB40_2
548+ ; CHECK-NEXT: bltu a2, a4, .LBB45_2
481549; CHECK-NEXT: # %bb.1:
482550; CHECK-NEXT: li a3, 16
483- ; CHECK-NEXT: .LBB40_2 :
551+ ; CHECK-NEXT: .LBB45_2 :
484552; CHECK-NEXT: mul a4, a3, a1
485553; CHECK-NEXT: add a4, a0, a4
486554; CHECK-NEXT: addi a5, a2, -16
@@ -505,10 +573,10 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %
505573; CHECK: # %bb.0:
506574; CHECK-NEXT: li a4, 16
507575; CHECK-NEXT: mv a3, a2
508- ; CHECK-NEXT: bltu a2, a4, .LBB41_2
576+ ; CHECK-NEXT: bltu a2, a4, .LBB46_2
509577; CHECK-NEXT: # %bb.1:
510578; CHECK-NEXT: li a3, 16
511- ; CHECK-NEXT: .LBB41_2 :
579+ ; CHECK-NEXT: .LBB46_2 :
512580; CHECK-NEXT: mul a4, a3, a1
513581; CHECK-NEXT: add a4, a0, a4
514582; CHECK-NEXT: addi a5, a2, -16
@@ -533,21 +601,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
533601; CHECK-RV32-NEXT: li a5, 32
534602; CHECK-RV32-NEXT: vmv1r.v v8, v0
535603; CHECK-RV32-NEXT: mv a3, a4
536- ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2
604+ ; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_2
537605; CHECK-RV32-NEXT: # %bb.1:
538606; CHECK-RV32-NEXT: li a3, 32
539- ; CHECK-RV32-NEXT: .LBB42_2 :
607+ ; CHECK-RV32-NEXT: .LBB47_2 :
540608; CHECK-RV32-NEXT: mul a6, a3, a2
541609; CHECK-RV32-NEXT: addi a5, a4, -32
542610; CHECK-RV32-NEXT: sltu a7, a4, a5
543611; CHECK-RV32-NEXT: addi a7, a7, -1
544612; CHECK-RV32-NEXT: and a7, a7, a5
545613; CHECK-RV32-NEXT: li a5, 16
546614; CHECK-RV32-NEXT: add a6, a1, a6
547- ; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4
615+ ; CHECK-RV32-NEXT: bltu a7, a5, .LBB47_4
548616; CHECK-RV32-NEXT: # %bb.3:
549617; CHECK-RV32-NEXT: li a7, 16
550- ; CHECK-RV32-NEXT: .LBB42_4 :
618+ ; CHECK-RV32-NEXT: .LBB47_4 :
551619; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
552620; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
553621; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
@@ -556,10 +624,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
556624; CHECK-RV32-NEXT: sltu a3, a3, a6
557625; CHECK-RV32-NEXT: addi a3, a3, -1
558626; CHECK-RV32-NEXT: and a3, a3, a6
559- ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6
627+ ; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_6
560628; CHECK-RV32-NEXT: # %bb.5:
561629; CHECK-RV32-NEXT: li a4, 16
562- ; CHECK-RV32-NEXT: .LBB42_6 :
630+ ; CHECK-RV32-NEXT: .LBB47_6 :
563631; CHECK-RV32-NEXT: mul a5, a4, a2
564632; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
565633; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
@@ -583,21 +651,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
583651; CHECK-RV64-NEXT: li a5, 32
584652; CHECK-RV64-NEXT: vmv1r.v v8, v0
585653; CHECK-RV64-NEXT: mv a4, a3
586- ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2
654+ ; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_2
587655; CHECK-RV64-NEXT: # %bb.1:
588656; CHECK-RV64-NEXT: li a4, 32
589- ; CHECK-RV64-NEXT: .LBB42_2 :
657+ ; CHECK-RV64-NEXT: .LBB47_2 :
590658; CHECK-RV64-NEXT: mul a6, a4, a2
591659; CHECK-RV64-NEXT: addi a5, a3, -32
592660; CHECK-RV64-NEXT: sltu a7, a3, a5
593661; CHECK-RV64-NEXT: addi a7, a7, -1
594662; CHECK-RV64-NEXT: and a7, a7, a5
595663; CHECK-RV64-NEXT: li a5, 16
596664; CHECK-RV64-NEXT: add a6, a1, a6
597- ; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4
665+ ; CHECK-RV64-NEXT: bltu a7, a5, .LBB47_4
598666; CHECK-RV64-NEXT: # %bb.3:
599667; CHECK-RV64-NEXT: li a7, 16
600- ; CHECK-RV64-NEXT: .LBB42_4 :
668+ ; CHECK-RV64-NEXT: .LBB47_4 :
601669; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
602670; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
603671; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
@@ -606,10 +674,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
606674; CHECK-RV64-NEXT: sltu a4, a4, a6
607675; CHECK-RV64-NEXT: addi a4, a4, -1
608676; CHECK-RV64-NEXT: and a4, a4, a6
609- ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6
677+ ; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_6
610678; CHECK-RV64-NEXT: # %bb.5:
611679; CHECK-RV64-NEXT: li a3, 16
612- ; CHECK-RV64-NEXT: .LBB42_6 :
680+ ; CHECK-RV64-NEXT: .LBB47_6 :
613681; CHECK-RV64-NEXT: mul a5, a3, a2
614682; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
615683; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
@@ -659,12 +727,19 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
659727; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
660728; CHECK-OPT-NEXT: ret
661729;
662- ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
663- ; CHECK-NO-OPT: # %bb.0:
664- ; CHECK-NO-OPT-NEXT: flh fa5, 0(a0)
665- ; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
666- ; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5
667- ; CHECK-NO-OPT-NEXT: ret
730+ ; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_4f16:
731+ ; CHECK-NO-OPT-ZVFH: # %bb.0:
732+ ; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0)
733+ ; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
734+ ; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5
735+ ; CHECK-NO-OPT-ZVFH-NEXT: ret
736+ ;
737+ ; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_4f16:
738+ ; CHECK-NO-OPT-ZVFHMIN: # %bb.0:
739+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0)
740+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
741+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0
742+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: ret
668743 %load = call <4 x half > @llvm.experimental.vp.strided.load.4f16.p0.i32 (ptr %ptr , i32 0 , <4 x i1 > splat (i1 true ), i32 3 )
669744 ret <4 x half > %load
670745}
0 commit comments