From c9e159fe98bcfd4348a43a16966595f6fc6aacb7 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 5 Nov 2024 12:41:54 +0800 Subject: [PATCH] [RISCV] Handle zvfhmin/zvfbfmin in lowerVECTOR_SHUFFLEAsVSlide1 Most of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16 vectors, with the exception of the vslide1 lowering which tries to emit vfslide1s. Handle this case as an integer vslide1 via fmv.x.h. Fixes #114893 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 18 +++++ .../rvv/fixed-vectors-shuffle-vslide1down.ll | 60 ++++++++++++++--- .../rvv/fixed-vectors-shuffle-vslide1up.ll | 66 +++++++++++++++---- 3 files changed, 123 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d2d03d4572dac..96490cdec6c69 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4817,6 +4817,24 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h + + // vslide1{down,up}.vx instead. + if (VT.getVectorElementType() == MVT::bf16 || + (VT.getVectorElementType() == MVT::f16 && + !Subtarget.hasVInstructionsF16())) { + MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); + Splat = + DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat); + V2 = DAG.getBitcast( + IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget)); + SDValue Vec = DAG.getNode( + IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL, + IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL); + Vec = DAG.getBitcast(ContainerVT, Vec); + return convertFromScalableVector(VT, Vec, DAG, Subtarget); + } + auto OpCode = IsVSlidedown ? (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll index f531ff3a835e4..563b90dfa47ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -131,23 +133,61 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) { ret <4 x i64> %v1 } -define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) { -; CHECK-LABEL: vslide1down_2xf16: +define <2 x bfloat> @vslide1down_2xbf16(<2 x bfloat> %v, bfloat %b) { +; CHECK-LABEL: vslide1down_2xbf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: ret + %vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0 + %v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> + ret <2 x bfloat> %v1 +} + +define <4 x bfloat> @vslide1down_4xbf16(<4 x bfloat> %v, bfloat %b) { +; CHECK-LABEL: vslide1down_4xbf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: ret + %vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0 + %v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> + ret <4 x bfloat> %v1 +} + +define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) { +; ZVFH-LABEL: vslide1down_2xf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfslide1down.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vslide1down_2xf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret %vb = insertelement <2 x half> poison, half %b, i64 0 %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> ret <2 x half> %v1 } define <4 x half> @vslide1down_4xf16(<4 x half> %v, half %b) { -; CHECK-LABEL: vslide1down_4xf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vslide1down_4xf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vfslide1down.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vslide1down_4xf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret %vb = insertelement <4 x half> poison, half %b, i64 0 %v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> ret <4 x half> %v1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll index b3390b6eeeccd..0f6d68dc1a6c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -140,25 +142,67 @@ define <4 x i64> @vslide1up_4xi64(<4 x i64> %v, i64 %b) { ret <4 x i64> %v1 } -define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) { -; CHECK-LABEL: vslide1up_2xf16: +define <2 x bfloat> @vslide1up_2xbf16(<2 x bfloat> %v, bfloat %b) { +; CHECK-LABEL: vslide1up_2xbf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %vb = insertelement <2 x half> poison, half %b, i64 0 - %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> - ret <2 x half> %v1 + %vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0 + %v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> + ret <2 x bfloat> %v1 } -define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) { -; CHECK-LABEL: vslide1up_4xf16: +define <4 x bfloat> @vslide1up_4xbf16(<4 x bfloat> %v, bfloat %b) { +; CHECK-LABEL: vslide1up_4xbf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret + %vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0 + %v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> + ret <4 x bfloat> %v1 +} + +define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) { +; ZVFH-LABEL: vslide1up_2xf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0 +; ZVFH-NEXT: vmv1r.v v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vslide1up_2xf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a0 +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret + %vb = insertelement <2 x half> poison, half %b, i64 0 + %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> + ret <2 x half> %v1 +} + +define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) { +; ZVFH-LABEL: vslide1up_4xf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0 +; ZVFH-NEXT: vmv1r.v v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vslide1up_4xf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a0 +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret %vb = insertelement <4 x half> poison, half %b, i64 0 %v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> ret <4 x half> %v1