Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ Changes to the RISC-V Backend
extension.
* Adds experimental assembler support for the Qualcomm 'Xqccmp' extension, which
is a frame-pointer convention compatible version of Zcmp.
* Added non-quadratic ``log-vrgather`` cost model for ``vrgather.vv`` instruction

Changes to the WebAssembly Backend
----------------------------------
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,10 @@ def FeatureUnalignedVectorMem
"true", "Has reasonably performant unaligned vector "
"loads and stores">;

def TuneNLogNVRGather
: SubtargetFeature<"log-vrgather", "RISCVVRGatherCostModel", "NLog2N",
"Has vrgather.vv with LMUL*log2(LMUL) latency">;

def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;

Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2869,10 +2869,19 @@ InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {


/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
/// is generally quadratic in the number of vreg implied by LMUL. Note that
/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
/// be by default. VRGatherCostModel reflects available options. Note that
/// operand (index and possibly mask) are handled separately.
InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
return getLMULCost(VT) * getLMULCost(VT);
auto LMULCost = getLMULCost(VT);
bool Log2CostModel =
Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
if (Log2CostModel && LMULCost.isValid()) {
unsigned Log = Log2_64(*LMULCost.getValue());
if (Log > 0)
return LMULCost * Log;
}
return LMULCost * LMULCost;
}

/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8",
FeatureUnalignedScalarMem,
FeatureUnalignedVectorMem]),
[TuneNoDefaultUnroll,
TuneNLogNVRGather,
TuneOptimizedZeroStrideLoad,
TunePostRAScheduler]>;

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
VentanaVeyron,
MIPSP8700,
};
enum RISCVVRGatherCostModelEnum : uint8_t {
Quadratic,
NLog2N,
};
// clang-format on
private:
virtual void anchor();

RISCVProcFamilyEnum RISCVProcFamily = Others;
RISCVVRGatherCostModelEnum RISCVVRGatherCostModel = Quadratic;

#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool ATTRIBUTE = DEFAULT;
Expand Down Expand Up @@ -155,6 +160,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
/// initializeProperties().
RISCVProcFamilyEnum getProcFamily() const { return RISCVProcFamily; }

RISCVVRGatherCostModelEnum getVRGatherCostModel() const { return RISCVVRGatherCostModel; }

#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool GETTER() const { return ATTRIBUTE; }
#include "RISCVGenSubtargetInfo.inc"
Expand Down
51 changes: 51 additions & 0 deletions llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh,+log-vrgather | FileCheck %s --check-prefix=LOG-VRG
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin,+log-vrgather | FileCheck %s --check-prefix=LOG-VRG
; Check that we don't crash querying costs when vectors are not enabled.
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32

Expand Down Expand Up @@ -44,6 +46,24 @@ define void @general_permute_single_source() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 0>
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; LOG-VRG-LABEL: 'general_permute_single_source'
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 9, i32 6, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 5, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 11, i32 11, i32 11, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 4, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 12, i32 12, i32 12, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
%v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
Expand Down Expand Up @@ -133,6 +153,37 @@ define void @general_permute_two_source() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; LOG-VRG-LABEL: 'general_permute_two_source'
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 3, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 14, i32 6, i32 5, i32 4, i32 13, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 17, i32 11, i32 20, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; LOG-VRG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 0>
%v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 5, i32 7, i32 1, i32 0>
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/features-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
; CHECK: h - 'H' (Hypervisor).
; CHECK: i - 'I' (Base Integer Instruction Set).
; CHECK: ld-add-fusion - Enable LD+ADD macrofusion.
; CHECK: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency
; CHECK: lui-addi-fusion - Enable LUI+ADDI macro fusion.
; CHECK: m - 'M' (Integer Multiplication and Division).
; CHECK: mips-p8700 - MIPS p8700 processor.
Expand Down
Loading