diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index bf333b7b79016..076ed173f64e2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1076,7 +1076,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, - ISD::VECTOR_REVERSE}, + ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) diff --git a/llvm/test/Analysis/CostModel/RISCV/splice.ll b/llvm/test/Analysis/CostModel/RISCV/splice.ll index 8d7d1576a532d..ddfaa8c13d425 100644 --- a/llvm/test/Analysis/CostModel/RISCV/splice.ll +++ b/llvm/test/Analysis/CostModel/RISCV/splice.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s ; RUN: opt < %s -passes="print" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE ; RUN: opt < %s -passes="print" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE @@ -34,6 +34,13 @@ define void @vector_splice() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16i64 = call @llvm.vector.splice.nxv16i64( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32i64 = call @llvm.vector.splice.nxv32i64( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64i64 = call @llvm.vector.splice.nxv64i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1bf16 = call @llvm.vector.splice.nxv1bf16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2bf16 = call @llvm.vector.splice.nxv2bf16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32bf16 = call @llvm.vector.splice.nxv32bf16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64bf16 = call @llvm.vector.splice.nxv64bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call @llvm.vector.splice.nxv1f16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call @llvm.vector.splice.nxv2f16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call @llvm.vector.splice.nxv4f16( zeroinitializer, zeroinitializer, i32 -1) @@ -86,6 +93,13 @@ define void @vector_splice() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i64 = call @llvm.vector.splice.nxv16i64( zeroinitializer, zeroinitializer, i32 -1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i64 = call @llvm.vector.splice.nxv32i64( zeroinitializer, zeroinitializer, i32 -1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i64 = call @llvm.vector.splice.nxv64i64( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv1bf16 = call @llvm.vector.splice.nxv1bf16( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv2bf16 = call @llvm.vector.splice.nxv2bf16( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv32bf16 = call @llvm.vector.splice.nxv32bf16( zeroinitializer, zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv64bf16 = call @llvm.vector.splice.nxv64bf16( zeroinitializer, zeroinitializer, i32 -1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call @llvm.vector.splice.nxv1f16( zeroinitializer, zeroinitializer, i32 -1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call @llvm.vector.splice.nxv2f16( zeroinitializer, zeroinitializer, i32 -1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call @llvm.vector.splice.nxv4f16( zeroinitializer, zeroinitializer, i32 -1) @@ -141,6 +155,14 @@ define void @vector_splice() { %splice.nxv32i64 = call @llvm.vector.splice.nxv32i64( zeroinitializer, zeroinitializer, i32 -1) %splice.nxv64i64 = call @llvm.vector.splice.nxv64i64( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv1bf16 = call @llvm.vector.splice.nxv1bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv2bf16 = call @llvm.vector.splice.nxv2bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv32bf16 = call @llvm.vector.splice.nxv32bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv64bf16 = call @llvm.vector.splice.nxv64bf16( zeroinitializer, zeroinitializer, i32 -1) + %splice.nxv1f16 = call @llvm.vector.splice.nxv1f16( zeroinitializer, zeroinitializer, i32 -1) %splice.nxv2f16 = call @llvm.vector.splice.nxv2f16( zeroinitializer, zeroinitializer, i32 -1) %splice.nxv4f16 = call @llvm.vector.splice.nxv4f16( zeroinitializer, zeroinitializer, i32 -1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index 8cb6fed2f588a..5460caea196cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zfh,+zvfh < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zfh,+zvfh < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s ; Tests assume VLEN=128 or vscale_range_min=2. @@ -1533,6 +1535,333 @@ define @splice_nxv8i64_offset_max( %a, %res } +declare @llvm.vector.splice.nxv1bf16(, , i32) + +define @splice_nxv1bf16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1bf16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv1bf16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1bf16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1bf16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv1bf16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1bf16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1bf16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv1bf16( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1bf16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1bf16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv1bf16( %a, %b, i32 1) + ret %res +} + +declare @llvm.vector.splice.nxv2bf16(, , i32) + +define @splice_nxv2bf16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2bf16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv2bf16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2bf16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2bf16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv2bf16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2bf16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2bf16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv2bf16( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2bf16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2bf16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv2bf16( %a, %b, i32 3) + ret %res +} + +declare @llvm.vector.splice.nxv4bf16(, , i32) + +define @splice_nxv4bf16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4bf16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv4bf16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4bf16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4bf16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv4bf16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4bf16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4bf16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 8 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv4bf16( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4bf16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4bf16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv4bf16( %a, %b, i32 7) + ret %res +} + +declare @llvm.vector.splice.nxv8bf16(, , i32) + +define @splice_nxv8bf16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8bf16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8bf16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8bf16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8bf16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8bf16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 16 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8bf16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8bf16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 15) + ret %res +} + +declare @llvm.vector.splice.nxv16bf16(, , i32) + +define @splice_nxv16bf16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16bf16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv16bf16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16bf16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv16bf16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16bf16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 -32) + ret %res +} + +define @splice_nxv16bf16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16bf16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -31 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 31 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 31) + ret %res +} + +declare @llvm.vector.splice.nxv32bf16(, , i32) + +define @splice_nxv32bf16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32bf16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv32bf16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32bf16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv32bf16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32bf16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -64 +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 -64) + ret %res +} + +define @splice_nxv32bf16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32bf16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -63 +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 63) + ret %res +} + declare @llvm.vector.splice.nxv1f16(, , i32) define @splice_nxv1f16_offset_zero( %a, %b) #0 {