Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/MC/MCSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
for (; I != E; ++I) {
if (!I->ReleaseAtCycle)
continue;
assert(I->ReleaseAtCycle > I->AcquireAtCycle);
unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
double Temp = NumUnits * 1.0 / I->ReleaseAtCycle;
double Temp = NumUnits * 1.0 / (I->ReleaseAtCycle - I->AcquireAtCycle);
Throughput = Throughput ? std::min(*Throughput, Temp) : Temp;
}
if (Throughput)
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,19 @@ char RISCVInsertWriteVXRM::ID = 0;
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
false, false)

static unsigned getAndCacheRVVMCOpcode(unsigned VPseudoOpcode) {
// VPseudo opcode -> MC opcode
static DenseMap<unsigned, unsigned> OpcodeCache;
auto It = OpcodeCache.find(VPseudoOpcode);
if (It != OpcodeCache.end())
return It->second;
unsigned MCOpcode = RISCV::getRVVMCOpcode(VPseudoOpcode);
OpcodeCache.insert({VPseudoOpcode, MCOpcode});
return MCOpcode;
}

static bool ignoresVXRM(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
switch (getAndCacheRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VNCLIP_WI:
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/deserialize-obj-file.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -start-before-phase=measure --mode=latency --dry-run-measurement --use-dummy-perf-counters \
# RUN: --dump-object-to-disk=%t.o %s > %t.result.yml
# RUN: llvm-objdump -d %t.o | FileCheck %s

# CHECK: vsetvli {{.*}}, zero, e32, m1, tu, ma
# CHECK: fsrmi {{.*}}, 0x0
# CHECK: vfwredusum.vs

---
mode: latency
key:
instructions:
- 'PseudoVFWREDUSUM_VS_M1_E32 V13 V13 V13 V7 i_0x0 i_0xffffffffffffffff i_0x5 i_0x0'
config: 'vtype = {FRM: rne, AVL: VLMAX, SEW: e32, Policy: tu/mu}'
register_initial_values:
- 'V13=0x0'
- 'V7=0x0'
cpu_name: sifive-x280
llvm_triple: riscv64
num_repetitions: 100
measurements: []
error: actual measurements skipped.
info: ''
assembled_snippet: 57730009F3532000D796D3C6D796D3C6D796D3C6D796D3C6739023008280
object_file:
compression: zlib
original_size: 5632
compressed_bytes: 'eJztWDFvEzEUfk6btEgMoWVAogMSHSokrJybRrCgIFQQEjAUKiYU3V3s9kQul5zN6egC4hd0YmTuL2FGYuB3oK5IYPt8SXBcIbYO/qTn973Pfs8v5zflw/6zxw2EoAaCc5hHC7heuaa0vmZ9WHef9PDw8PDw8PDw8PDw8PDwuGR4zeHK+ctb8OPz96/eLo/x09vw6ePDFgLIEx4XgH7J11ptN/Oi103IJBikZNIZhIoxMiGDoVpipRWBXE6SmOdEE0bHMU00Z8dB5dJkrFkUVi7SrqC7hM1YaVivO5wxNmNm11Qs5iWLUUDumXojster6S6p2V4wo72uZiVnskLEZI2O/EEqnKZhHE+zqdxWc9o284pODgCVCN282tDaDaN/+cdfUWvq68HP3+7dxpJydIEe6XV1SX+j1+aSfkfaxkKdus8tE9+3b8GClgL2S3pEecKfjln2inIBWE8BDoXIk+idoBxYlgEeZ4LiJy8O73IRxm/lKToKMT0esDxMKWAuchFG0r9Pld8eYqKWALZL3HF/iv/Ec2krDv10s/IjS7efCRlr2QXMgy+9a/vvEDtq6rxrDtFxVs2P7H9yUf6alWDnPzKaPSlnG5XfsfR1K34A1TT1Lb3cnPen+4Bquur8Wj903K3wzdx/ttB3y5H/B0zRwDY='
...
10 changes: 10 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT

# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
# LATENCY-NOT: PseudoVCPOP_M_B32

# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
# RTHROUGHPUT: PseudoVCPOP_M_B32
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# Make sure none of the config has SEW other than e32
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
# CHECK: SEW: e32
# CHECK-NOT: SEW: e{{(8|16|64)}}
6 changes: 6 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
# RUN: FileCheck %s

# Make sure reduction ops don't have alias between vd and vs1
# CHECK: instructions:
# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32
# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
6 changes: 6 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
# RUN: FileCheck %s

# Make sure all def / use operands are the same in latency mode.
# CHECK: instructions:
# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
12 changes: 12 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM

# VXRM: PseudoVAADDU_VV_M1
# VXRM: VXRM: rnu
# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}

# FRM: PseudoVFADD_VFPR16_M1_E16
# FRM: FRM: rne
# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
30 changes: 30 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSHA2MS_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY

# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
# ZVK-NOT: SEW: e{{(8|16)}}
# ZVK: SEW: e32
# ZVK-NOT: SEW: e64

# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)

# ZVKNH-NOT: SEW: e{{(8|16)}}
# ZVKNH: SEW: e{{(32|64)}}

# EMPTY-NOT: SEW: e{{(8|16|32|64)}}
41 changes: 41 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=FP
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=VEXT
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=VFRED --allow-empty

# Make sure only the supported SEWs are generated for fractional LMUL.
# FRAC-LMUL: PseudoVMUL_VV_MF4_MASK
# FRAC-LMUL: SEW: e8
# FRAC-LMUL: SEW: e16
# FRAC-LMUL-NOT: SEW: e{{(32|64)}}

# Make sure only SEWs that are equal to the supported FLEN are generated
# FP: PseudoVFADD_VFPR16_M1_E16
# FP-NOT: SEW: e8
# FP: PseudoVFADD_VV_M2_E16
# FP-NOT: SEW: e8
# FP: PseudoVFCLASS_V_MF2
# FP-NOT: SEW: e8

# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the
# source operand.
# VEXT: PseudoVSEXT_VF8_M2
# VEXT-NOT: SEW: e8
# VEXT-NOT: SEW: e16
# VEXT-NOT: SEW: e32
# VEXT: SEW: e64
# VEXT: PseudoVZEXT_VF8_M2
# VEXT-NOT: SEW: e8
# VEXT-NOT: SEW: e16
# VEXT-NOT: SEW: e32
# VEXT: SEW: e64

# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist
# VFRED-NOT: PseudoVFREDUSUM_VS_M1_E16
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
# CHECK: AVL: VLMAX
# CHECK-NOT: AVL: {{(simm5|<MCOperand: .*>)}}
13 changes: 13 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VFWREDUSUM
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVSSRL_VX_MF4 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VSSRL

# Make sure the correct VSETVL / VXRM write / FRM write instructions are generated
# VFWREDUSUM: vsetvli {{.*}}, zero, e32, m1, tu, ma
# VFWREDUSUM: fsrmi {{.*}}, 0x0

# VSSRL: vsetvli {{.*}}, zero, e8, mf4, tu, ma
# VSSRL: csrwi vxrm, 0x0
8 changes: 8 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 | FileCheck %s

# A simple check on object file serialization
# CHECK: object_file:
# CHECK-NEXT: compression: {{(zlib|zstd)}}
# CHECK-NEXT: original_size: {{[0-9]+}}
# CHECK-NEXT: compressed_bytes: '{{.*}}'
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-exegesis/X86/analysis-noise.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clusters-output-file="" -analysis-numpoints=3 | FileCheck %s
# XFAIL: *

# CHECK: DOCTYPE
# CHECK: [noise] Cluster (1 points)
Expand Down
Loading