diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index f1f64f77ee71a..2fb2be1e77793 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -111,6 +111,7 @@ Changes to the RISC-V Backend extension. * Adds experimental assembler support for the Qualcomm 'Xqccmp' extension, which is a frame-pointer convention compatible version of Zcmp. +* Added non-quadratic ``log-vrgather`` cost model for ``vrgather.vv`` instruction Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 24828cde28079..6ebabe9dc4b2d 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1445,6 +1445,10 @@ def FeatureUnalignedVectorMem "true", "Has reasonably performant unaligned vector " "loads and stores">; +def TuneNLogNVRGather + : SubtargetFeature<"log-vrgather", "RISCVVRGatherCostModel", "NLog2N", + "Has vrgather.vv with LMUL*log2(LMUL) latency">; + def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6a259e4b0334c..6d3241e79adfe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2869,10 +2869,19 @@ InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv -/// is generally quadratic in the number of vreg implied by LMUL. Note that +/// may be quadratic in the number of vreg implied by LMUL, and is assumed to +/// be by default. VRGatherCostModel reflects available options. Note that /// operand (index and possibly mask) are handled separately. InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { - return getLMULCost(VT) * getLMULCost(VT); + auto LMULCost = getLMULCost(VT); + bool Log2CostModel = + Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N; + if (Log2CostModel && LMULCost.isValid()) { + unsigned Log = Log2_64(*LMULCost.getValue()); + if (Log > 0) + return LMULCost * Log; + } + return LMULCost * LMULCost; } /// Return the cost of a vrgather.vi (or vx) instruction for the type VT. diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index c2d98c2180299..9d48adeec5e86 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -494,6 +494,7 @@ def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8", FeatureUnalignedScalarMem, FeatureUnalignedVectorMem]), [TuneNoDefaultUnroll, + TuneNLogNVRGather, TuneOptimizedZeroStrideLoad, TunePostRAScheduler]>; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index a059ed9202e33..cc9aef2d52556 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -84,11 +84,16 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { VentanaVeyron, MIPSP8700, }; + enum RISCVVRGatherCostModelEnum : uint8_t { + Quadratic, + NLog2N, + }; // clang-format on private: virtual void anchor(); RISCVProcFamilyEnum RISCVProcFamily = Others; + RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic; #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool ATTRIBUTE = DEFAULT; @@ -155,6 +160,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { /// initializeProperties(). RISCVProcFamilyEnum getProcFamily() const { return RISCVProcFamily; } + RISCVVRGatherCostModelEnum getVRGatherCostModel() const { return RISCVVRGatherCostModel; } + #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } #include "RISCVGenSubtargetInfo.inc" diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll index 105cc8a1c2ba3..c4f6f3f1d874b 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll @@ -3,6 +3,8 @@ ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s ; RUN: opt < %s -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE ; RUN: opt < %s -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh,+log-vrgather | FileCheck %s --check-prefix=LOG-VRG +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin,+log-vrgather | FileCheck %s --check-prefix=LOG-VRG ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv32 @@ -44,6 +46,24 @@ define void @general_permute_single_source() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; LOG-VRG-LABEL: 'general_permute_single_source' +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> @@ -133,6 +153,37 @@ define void @general_permute_two_source() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; LOG-VRG-LABEL: 'general_permute_two_source' +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> +; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 70fbda47a14a1..ff29777a3ec37 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -35,6 +35,7 @@ ; CHECK: h - 'H' (Hypervisor). ; CHECK: i - 'I' (Base Integer Instruction Set). ; CHECK: ld-add-fusion - Enable LD+ADD macrofusion. +; CHECK: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency ; CHECK: lui-addi-fusion - Enable LUI+ADDI macro fusion. ; CHECK: m - 'M' (Integer Multiplication and Division). ; CHECK: mips-p8700 - MIPS p8700 processor.