Skip to content

Commit a70c435

Browse files
committed
not full merge
1 parent e40610d commit a70c435

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2925
-618
lines changed

llvm/lib/MC/MCSchedule.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,9 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
103103
for (; I != E; ++I) {
104104
if (!I->ReleaseAtCycle)
105105
continue;
106+
assert(I->ReleaseAtCycle > I->AcquireAtCycle);
106107
unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
107-
double Temp = NumUnits * 1.0 / I->ReleaseAtCycle;
108+
double Temp = NumUnits * 1.0 / (I->ReleaseAtCycle - I->AcquireAtCycle);
108109
Throughput = Throughput ? std::min(*Throughput, Temp) : Temp;
109110
}
110111
if (Throughput)

llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,19 @@ char RISCVInsertWriteVXRM::ID = 0;
227227
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
228228
false, false)
229229

230+
static unsigned getAndCacheRVVMCOpcode(unsigned VPseudoOpcode) {
231+
// VPseudo opcode -> MC opcode
232+
static DenseMap<unsigned, unsigned> OpcodeCache;
233+
auto It = OpcodeCache.find(VPseudoOpcode);
234+
if (It != OpcodeCache.end())
235+
return It->second;
236+
unsigned MCOpcode = RISCV::getRVVMCOpcode(VPseudoOpcode);
237+
OpcodeCache.insert({VPseudoOpcode, MCOpcode});
238+
return MCOpcode;
239+
}
240+
230241
static bool ignoresVXRM(const MachineInstr &MI) {
231-
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
242+
switch (getAndCacheRVVMCOpcode(MI.getOpcode())) {
232243
default:
233244
return false;
234245
case RISCV::VNCLIP_WI:
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -start-before-phase=measure --mode=latency --dry-run-measurement --use-dummy-perf-counters \
2+
# RUN: --dump-object-to-disk=%t.o %s > %t.result.yml
3+
# RUN: llvm-objdump -d %t.o | FileCheck %s
4+
5+
# CHECK: vsetvli {{.*}}, zero, e32, m1, tu, ma
6+
# CHECK: fsrmi {{.*}}, 0x0
7+
# CHECK: vfwredusum.vs
8+
9+
---
10+
mode: latency
11+
key:
12+
instructions:
13+
- 'PseudoVFWREDUSUM_VS_M1_E32 V13 V13 V13 V7 i_0x0 i_0xffffffffffffffff i_0x5 i_0x0'
14+
config: 'vtype = {FRM: rne, AVL: VLMAX, SEW: e32, Policy: tu/mu}'
15+
register_initial_values:
16+
- 'V13=0x0'
17+
- 'V7=0x0'
18+
cpu_name: sifive-x280
19+
llvm_triple: riscv64
20+
num_repetitions: 100
21+
measurements: []
22+
error: actual measurements skipped.
23+
info: ''
24+
assembled_snippet: 57730009F3532000D796D3C6D796D3C6D796D3C6D796D3C6739023008280
25+
object_file:
26+
compression: zlib
27+
original_size: 5632
28+
compressed_bytes: 'eJztWDFvEzEUfk6btEgMoWVAogMSHSokrJybRrCgIFQQEjAUKiYU3V3s9kQul5zN6egC4hd0YmTuL2FGYuB3oK5IYPt8SXBcIbYO/qTn973Pfs8v5zflw/6zxw2EoAaCc5hHC7heuaa0vmZ9WHef9PDw8PDw8PDw8PDw8PDwuGR4zeHK+ctb8OPz96/eLo/x09vw6ePDFgLIEx4XgH7J11ptN/Oi103IJBikZNIZhIoxMiGDoVpipRWBXE6SmOdEE0bHMU00Z8dB5dJkrFkUVi7SrqC7hM1YaVivO5wxNmNm11Qs5iWLUUDumXojster6S6p2V4wo72uZiVnskLEZI2O/EEqnKZhHE+zqdxWc9o284pODgCVCN282tDaDaN/+cdfUWvq68HP3+7dxpJydIEe6XV1SX+j1+aSfkfaxkKdus8tE9+3b8GClgL2S3pEecKfjln2inIBWE8BDoXIk+idoBxYlgEeZ4LiJy8O73IRxm/lKToKMT0esDxMKWAuchFG0r9Pld8eYqKWALZL3HF/iv/Ec2krDv10s/IjS7efCRlr2QXMgy+9a/vvEDtq6rxrDtFxVs2P7H9yUf6alWDnPzKaPSlnG5XfsfR1K34A1TT1Lb3cnPen+4Bquur8Wj903K3wzdx/ttB3y5H/B0zRwDY='
29+
...
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
2+
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
3+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
4+
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
5+
6+
# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
7+
# LATENCY-NOT: PseudoVCPOP_M_B32
8+
9+
# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
10+
# RTHROUGHPUT: PseudoVCPOP_M_B32
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
2+
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
3+
4+
# Make sure none of the config has SEW other than e32
5+
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
6+
# CHECK: SEW: e32
7+
# CHECK-NOT: SEW: e{{(8|16|64)}}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
2+
# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
3+
4+
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
5+
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
6+
# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
2+
# RUN: FileCheck %s
3+
4+
# Make sure reduction ops don't have alias between vd and vs1
5+
# CHECK: instructions:
6+
# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32
7+
# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
2+
# RUN: FileCheck %s
3+
4+
# Make sure all def / use operands are the same in latency mode.
5+
# CHECK: instructions:
6+
# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
2+
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
3+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
4+
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM
5+
6+
# VXRM: PseudoVAADDU_VV_M1
7+
# VXRM: VXRM: rnu
8+
# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}
9+
10+
# FRM: PseudoVFADD_VFPR16_M1_E16
11+
# FRM: FRM: rne
12+
# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
2+
# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
3+
# RUN: FileCheck %s --check-prefix=ZVK
4+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
5+
# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
6+
# RUN: FileCheck %s --check-prefix=ZVK
7+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
8+
# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
9+
# RUN: FileCheck %s --check-prefix=ZVK
10+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
11+
# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
12+
# RUN: FileCheck %s --check-prefix=ZVK
13+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
14+
# RUN: --opcode-name=PseudoVSHA2MS_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
15+
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
16+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
17+
# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
18+
# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY
19+
20+
# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
21+
# ZVK-NOT: SEW: e{{(8|16)}}
22+
# ZVK: SEW: e32
23+
# ZVK-NOT: SEW: e64
24+
25+
# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)
26+
27+
# ZVKNH-NOT: SEW: e{{(8|16)}}
28+
# ZVKNH: SEW: e{{(32|64)}}
29+
30+
# EMPTY-NOT: SEW: e{{(8|16|32|64)}}

0 commit comments

Comments
 (0)