Skip to content

Commit 4880c78

Browse files
committed
Implement MOP fusion in emulator.
1 parent e2a231e commit 4880c78

File tree

3 files changed

+137
-1
lines changed

3 files changed

+137
-1
lines changed

freestanding/riscv64-arnavion-none-elf.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"data-layout": "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128",
88
"eh-frame-header": false,
99
"emit-debug-gdb-scripts": false,
10-
"features": "+forced-atomics,+zba,+zbs,+zca,+zcb,+zicond,+zicsr,+zicntr",
10+
"features": "+forced-atomics,+zba,+zbs,+zca,+zcb,+zicond,+zicsr,+zicntr,+auipc-addi-fusion,+ld-add-fusion,+lui-addi-fusion",
1111
"linker": "rust-lld",
1212
"linker-flavor": "gnu-lld",
1313
"llvm-target": "riscv64",

tc/txt/alu.txt

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,59 @@ M
256256
| remu rd, rs1, rs2 | 12 | 7 | 1 | | -> | rs1 | rs2 | %u |
257257
| remuw rd, rs1, rs2 | 14 | 7 | 1 | | -> | rs1uw | rs2uw | %uw |
258258
+---------------------------------+--------+--------+--------+--------+----+-------------+-------------+----------------+
259+
260+
---
261+
262+
Zarnavion (produced by MOP fusion)
263+
264+
+---------------------------------+--------+--------+--------+--------+----+---------------------------+----------------+
265+
| inst | opcode | funct3 | funct7 | funct5 | -> | Misc | rd |
266+
| | | | | | -> | in3 | in4 | |
267+
+=================================+========+========+========+========+====+=============+=============+================+
268+
| abs rd, rs1 | 2 | 7 | 0 | | -> | rs1 | | abs |
269+
+---------------------------------+--------+--------+--------+--------+----+---------------------------+----------------+
270+
271+
+---------------------------------+--------+--------+--------+--------+----+--------------------------+----------------+------+-------+---------+
272+
| inst | opcode | funct3 | funct7 | funct5 | -> | Adder | rd | RAM | RAM | RAM |
273+
| | | | | | -> | in1 | in2 | | Load | Store | Address |
274+
+=================================+========+========+========+========+====+===================+======+================+======+=======+=========+
275+
| lb.pc rd, imm(pc) | 2 | 0 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
276+
| lh.pc rd, imm(pc) | 2 | 1 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
277+
| lw.pc rd, imm(pc) | 2 | 2 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
278+
| ld.pc rd, imm(pc) | 2 | 3 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
279+
| lbu.pc rd, imm(pc) | 2 | 4 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
280+
| lhu.pc rd, imm(pc) | 2 | 5 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
281+
| lwu.pc rd, imm(pc) | 2 | 6 | 0 | | -> | pc | imm | RAM Load Value | 1 | | + |
282+
+---------------------------------+--------+--------+--------+--------+----+-------------------+------+----------------+------+-------+---------+
283+
| lb.add rd, rs2(rs1) | 2 | 0 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
284+
| lh.add rd, rs2(rs1) | 2 | 1 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
285+
| lw.add rd, rs2(rs1) | 2 | 2 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
286+
| ld.add rd, rs2(rs1) | 2 | 3 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
287+
| lbu.add rd, rs2(rs1) | 2 | 4 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
288+
| lhu.add rd, rs2(rs1) | 2 | 5 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
289+
| lwu.add rd, rs2(rs1) | 2 | 6 | 1 | | -> | rs1 | rs2 | RAM Load Value | 1 | | + |
290+
+---------------------------------+--------+--------+--------+--------+----+-------------------+------+----------------+------+-------+---------+
291+
| lb.sh1add rd, (rs1 << 1 + rs2) | 2 | 0 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
292+
| lh.sh1add rd, (rs1 << 1 + rs2) | 2 | 1 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
293+
| lw.sh1add rd, (rs1 << 1 + rs2) | 2 | 2 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
294+
| ld.sh1add rd, (rs1 << 1 + rs2) | 2 | 3 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
295+
| lbu.sh1add rd, (rs1 << 1 + rs2) | 2 | 4 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
296+
| lhu.sh1add rd, (rs1 << 1 + rs2) | 2 | 5 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
297+
| lwu.sh1add rd, (rs1 << 1 + rs2) | 2 | 6 | 2 | | -> | rs1 << 1 | rs2 | RAM Load Value | 1 | | + |
298+
+---------------------------------+--------+--------+--------+--------+----+-------------------+------+----------------+------+-------+---------+
299+
| lb.sh2add rd, (rs1 << 2 + rs2) | 2 | 0 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
300+
| lh.sh2add rd, (rs1 << 2 + rs2) | 2 | 1 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
301+
| lw.sh2add rd, (rs1 << 2 + rs2) | 2 | 2 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
302+
| ld.sh2add rd, (rs1 << 2 + rs2) | 2 | 3 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
303+
| lbu.sh2add rd, (rs1 << 2 + rs2) | 2 | 4 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
304+
| lhu.sh2add rd, (rs1 << 2 + rs2) | 2 | 5 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
305+
| lwu.sh2add rd, (rs1 << 2 + rs2) | 2 | 6 | 3 | | -> | rs1 << 2 | rs2 | RAM Load Value | 1 | | + |
306+
+---------------------------------+--------+--------+--------+--------+----+-------------------+------+----------------+------+-------+---------+
307+
| lb.sh3add rd, (rs1 << 3 + rs2) | 2 | 0 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
308+
| lh.sh3add rd, (rs1 << 3 + rs2) | 2 | 1 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
309+
| lw.sh3add rd, (rs1 << 3 + rs2) | 2 | 2 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
310+
| ld.sh3add rd, (rs1 << 3 + rs2) | 2 | 3 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
311+
| lbu.sh3add rd, (rs1 << 3 + rs2) | 2 | 4 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
312+
| lhu.sh3add rd, (rs1 << 3 + rs2) | 2 | 5 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
313+
| lwu.sh3add rd, (rs1 << 3 + rs2) | 2 | 6 | 4 | | -> | rs1 << 3 | rs2 | RAM Load Value | 1 | | + |
314+
+---------------------------------+--------+--------+--------+--------+----+-------------------+------+----------------+------+-------+---------+

tc/txt/mop-fusion.txt

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
Refs:
2+
3+
- https://arxiv.org/pdf/1607.02318
4+
5+
- https://en.wikichip.org/wiki/macro-operation_fusion#RISC-V
6+
7+
- https://github.com/llvm/llvm-project/blob/173907b5d77115623f160978a95159e36e05ee6c/llvm/lib/Target/RISCV/RISCVMacroFusion.td
8+
9+
---
10+
11+
Load immediate
12+
13+
+---------------------------+-------------------------------------------------+--------------------------------------+
14+
| Instructions | Fusion condition | Fused instruction |
15+
+===========================+=================================================+======================================+
16+
| auipc rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | addi rd_b, x0, (imm_a + imm_b) |
17+
| addi rd_b, rs1_b, imm_b | | |
18+
+---------------------------+-------------------------------------------------+--------------------------------------+
19+
| lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b && rs1_b != rs2_b | addi rd_a, rs2_b, imm_a |
20+
| add rd_b, rs1_b, rs2_b | | |
21+
+---------------------------+-------------------------------------------------+--------------------------------------+
22+
| lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | addi rd_b, x0, (imm_a + imm_b) |
23+
| addi rd_b, rs1_b, imm_b | | |
24+
+---------------------------+-------------------------------------------------+--------------------------------------+
25+
| lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | addiw rd_b, x0, (imm_a + imm_b) |
26+
| addiw rd_b, rs1_b, imm_b | | |
27+
+---------------------------+-------------------------------------------------+--------------------------------------+
28+
29+
Op
30+
31+
+---------------------------+-------------------------------------------------+--------------------------------------+
32+
| Instructions | Fusion condition | Fused instruction |
33+
+===========================+=================================================+======================================+
34+
| sub rd_a, x0, rs2_a | rd_a == rd_b && rd_a == rs1_b | abs rd_b, rs2_a |
35+
| max rd_b, rs1_b, rs2_b | | |
36+
+---------------------------+-------------------------------------------------+--------------------------------------+
37+
38+
Jump
39+
40+
+---------------------------+-------------------------------------------------+--------------------------------------+
41+
| Instructions | Fusion condition | Fused instruction |
42+
+===========================+=================================================+======================================+
43+
| auipc rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | jal rd_b, (imm_a + imm_b) |
44+
| jalr rd_b, imm_b(rs1_b) | | |
45+
+---------------------------+-------------------------------------------------+--------------------------------------+
46+
47+
Load
48+
49+
+---------------------------+-------------------------------------------------+--------------------------------------+
50+
| Instructions | Fusion condition | Fused instruction |
51+
+===========================+=================================================+======================================+
52+
| auipc rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | ld.pc rd_b, (imm_a + imm_b)pc |
53+
| ld rd_b, imm_b(rs1_b) | | |
54+
+---------------------------+-------------------------------------------------+--------------------------------------+
55+
| lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | ld rd_b, (imm_a + imm_b)x0 |
56+
| ld rd_b, imm_b(rs1_b) | | |
57+
+---------------------------+-------------------------------------------------+--------------------------------------+
58+
| add rd_a, rs1_a, rs2_a | rd_a == rd_b && rd_a == rs1_b && imm_b == 0 | ld.add rd_b, (rs1_a)(rs2_a) |
59+
| ld rd_b, imm_b(rs1_b) | | |
60+
+---------------------------+-------------------------------------------------+--------------------------------------+
61+
| sh1add rd_a, rs1_a, rs2_a | rd_a == rd_b && rd_a == rs1_b && imm_b == 0 | ld.sh1add rd_b, rs1_a, rs2_a |
62+
| ld rd_b, imm_b(rs1_b) | | |
63+
+---------------------------+-------------------------------------------------+--------------------------------------+
64+
65+
---
66+
67+
Fused instruction length
68+
69+
+------+-------+-------+------+------+------+
70+
| fuse | RVC_2 | RVC_1 | len2 | len4 | len8 |
71+
+======+=======+=======+======+======+======+
72+
| 0 | 0 | 0 | 0 | 1 | 0 |
73+
| 0 | 0 | 1 | 1 | 0 | 0 |
74+
| 0 | 1 | 0 | 0 | 1 | 0 |
75+
| 0 | 1 | 1 | 1 | 0 | 0 |
76+
| 1 | 0 | 0 | 0 | 0 | 1 |
77+
| 1 | 0 | 1 | 1 | 1 | 0 |
78+
| 1 | 1 | 0 | 1 | 1 | 0 |
79+
| 1 | 1 | 1 | 0 | 1 | 0 |
80+
+------+-------+-------+------+------+------+

0 commit comments

Comments
 (0)