Skip to content

Commit bd1b6f8

Browse files
committed
Address review comments
1 parent 0c4d943 commit bd1b6f8

File tree

3 files changed

+69
-29
lines changed

3 files changed

+69
-29
lines changed
Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,59 @@
11
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
2-
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
2+
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \
3+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
34
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
4-
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
5+
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \
6+
# RUN: --min-instructions=100 | \
7+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT1
8+
9+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
10+
# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \
11+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
12+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
13+
# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \
14+
# RUN: --min-instructions=100 | \
15+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT2
16+
17+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
18+
# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \
19+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
20+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
21+
# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \
22+
# RUN: --min-instructions=100 | \
23+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT3
24+
25+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
26+
# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \
27+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
28+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
29+
# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \
30+
# RUN: --min-instructions=100 | \
31+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT4
32+
33+
# These instructions are only eligible under the inverse throughput mode.
534

635
# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
736
# LATENCY-NOT: PseudoVCPOP_M_B32
37+
# LATENCY-NOT: PseudoVRGATHEREI16_VV_M2_E32_M1
38+
# LATENCY-NOT: PseudoVRGATHER_VI_M2
39+
# LATENCY-NOT: PseudoVRGATHER_VV_M8_E32
40+
# LATENCY-NOT: PseudoVRGATHER_VX_M4
41+
# LATENCY-NOT: PseudoVSLIDE1UP_VX_M1
42+
# LATENCY-NOT: PseudoVSLIDEUP_VI_M2
43+
# LATENCY-NOT: PseudoVSLIDEUP_VX_M2
44+
# LATENCY-NOT: PseudoVNCLIPU_WI_M2
45+
# LATENCY-NOT: PseudoVNSRA_WI_M2
46+
# LATENCY-NOT: PseudoVNSRL_WI_M2
847

9-
# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
10-
# RTHROUGHPUT: PseudoVCPOP_M_B32
48+
# RTHROUGHPUT1: PseudoVCOMPRESS_VM_M2_E8
49+
# RTHROUGHPUT1: PseudoVCPOP_M_B32
50+
# RTHROUGHPUT2: PseudoVRGATHEREI16_VV_M2_E32_M1
51+
# RTHROUGHPUT2: PseudoVRGATHER_VI_M2
52+
# RTHROUGHPUT2: PseudoVRGATHER_VV_M8_E32
53+
# RTHROUGHPUT2: PseudoVRGATHER_VX_M4
54+
# RTHROUGHPUT3: PseudoVSLIDE1UP_VX_M1
55+
# RTHROUGHPUT3: PseudoVSLIDEUP_VI_M2
56+
# RTHROUGHPUT3: PseudoVSLIDEUP_VX_M2
57+
# RTHROUGHPUT4: PseudoVNCLIPU_WI_M2
58+
# RTHROUGHPUT4: PseudoVNSRA_WI_M2
59+
# RTHROUGHPUT4: PseudoVNSRL_WI_M2

llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ Register RISCVExegesisPostprocessing::allocateGPRRegister(
8181
const MachineFunction &MF, const MachineRegisterInfo &MRI) {
8282
const auto &TRI = *MRI.getTargetRegisterInfo();
8383

84-
const TargetRegisterClass *GPRClass =
85-
TRI.getRegClass(RISCV::GPRJALRRegClassID);
84+
// We hope to avoid allocating callee-saved registers. And GPRTC
85+
// happens to account for nearly all caller-saved registers.
86+
const TargetRegisterClass *GPRClass = TRI.getRegClass(RISCV::GPRTCRegClassID);
8687
BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass);
8788

8889
for (unsigned SetIdx : Candidates.set_bits()) {

llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -149,17 +149,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
149149
RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
150150

151151
for (unsigned RegClassID : StandaloneRegClasses)
152-
for (unsigned Reg : RegInfo.getRegClass(RegClassID)) {
152+
for (unsigned Reg : RegInfo.getRegClass(RegClassID))
153153
AggregateRegisters.reset(Reg);
154-
}
155154

156155
// Initialize ELEN and VLEN.
157-
// FIXME: We could have obtained these two from RISCVSubtarget
156+
// FIXME: We could have obtained these two constants from RISCVSubtarget
158157
// but in order to get that from TargetMachine, we need a Function.
159-
const Triple &TT = State.getTargetMachine().getTargetTriple();
160-
ELEN = TT.isRISCV32() ? 32 : 64;
161-
162158
const MCSubtargetInfo &STI = State.getSubtargetInfo();
159+
ELEN = STI.checkFeatures("+zve64x") ? 64 : 32;
160+
163161
std::string ZvlQuery;
164162
for (unsigned I = 5U, Size = (1 << I); I < 17U; ++I, Size <<= 1) {
165163
ZvlQuery = "+zvl";
@@ -175,15 +173,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
175173
const BitVector &ForbiddenRegisters) const override;
176174
};
177175

178-
static bool isMaskedSibiling(unsigned MaskedOp, unsigned UnmaskedOp) {
176+
static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
179177
const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
180178
return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
181179
}
182180

183181
// There are primarily two kinds of opcodes that are not eligible
184182
// in a serial snippet:
185-
// (1) Only has a single use operand that can not be overlap with
186-
// the def operand.
183+
// (1) Has a use operand that can not overlap with the def operand
184+
// (i.e. early clobber).
187185
// (2) The register file of the only use operand is different from
188186
// that of the def operand. For instance, use operand is vector and
189187
// the result is a scalar.
@@ -197,23 +195,15 @@ static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
197195
case RISCV::VCOMPRESS_VM:
198196
case RISCV::VCPOP_M:
199197
case RISCV::VCPOP_V:
198+
// The permutation instructions listed below cannot have destination
199+
// overlapping with the source.
200200
case RISCV::VRGATHEREI16_VV:
201201
case RISCV::VRGATHER_VI:
202202
case RISCV::VRGATHER_VV:
203203
case RISCV::VRGATHER_VX:
204204
case RISCV::VSLIDE1UP_VX:
205205
case RISCV::VSLIDEUP_VI:
206206
case RISCV::VSLIDEUP_VX:
207-
// The truncate instructions that arraive here are those who cannot
208-
// have any overlap between source and dest at all (i.e.
209-
// those whoe don't satisfy condition 2 and 3 in RVV spec
210-
// 5.2).
211-
case RISCV::VNCLIPU_WI:
212-
case RISCV::VNCLIPU_WV:
213-
case RISCV::VNCLIPU_WX:
214-
case RISCV::VNCLIP_WI:
215-
case RISCV::VNCLIP_WV:
216-
case RISCV::VNCLIP_WX:
217207
return true;
218208
default:
219209
return false;
@@ -372,8 +362,8 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
372362
const auto *RVVBase =
373363
RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
374364
if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
375-
isMaskedSibiling(VPseudoOpcode, RVVBase->Pseudo) ||
376-
isMaskedSibiling(RVVBase->Pseudo, VPseudoOpcode))) {
365+
isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
366+
isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
377367
// There is an integrated SEW, remove all but the SEW pushed last.
378368
SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
379369
break;
@@ -395,7 +385,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
395385
}
396386
}
397387

398-
// The EEW for source operand in VSEXT and VZEXT is a fractional
388+
// The EEW for source operand in VSEXT and VZEXT is a fraction
399389
// of the SEW, hence only SEWs that will lead to valid EEW are allowed.
400390
if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
401391
if (*SEW / *Frac < MinSEW) {
@@ -411,7 +401,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
411401
Feature_HasStdExtZvksedBit,
412402
Feature_HasStdExtZvkshBit})) {
413403
if (*SEW != 32)
414-
// Zvknhb support SEW=64 as well.
404+
// Zvknhb supports SEW=64 as well.
415405
if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
416406
!isOpcodeAvailableIn(BaseOpcode,
417407
{Feature_HasStdExtZvknhaOrZvknhbBit})) {

0 commit comments

Comments
 (0)