Skip to content

Commit b9b6257

Browse files
committed
jit: Implement build-time generation of jit infrastructure
generate_jit_assets.py expands the automatated code generation to include: - Opcode enumerations in arm32_opcodes.h. - Decoder lookup tables in arm32_table.c - Computed-goto jump tables foe the interpreter in handler_table.inc. Relocates arm32.inc to src/jit/common/a32_instructions.inc. Implements the primary execution loop in src/jit/interpreter/arm32/instruction.c. The code is messy and will be rewritten in the future. Signed-off-by: Ronald Caesar <github43132@proton.me>
1 parent e714dc4 commit b9b6257

File tree

16 files changed

+3613
-427
lines changed

16 files changed

+3613
-427
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ set(GEN_TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/decoder/test_arm32_genera
105105
add_custom_command(
106106
OUTPUT ${GEN_TEST_SRC}
107107
COMMAND Python3::Interpreter ${CMAKE_SOURCE_DIR}/scripts/generate_jit_decoder_tests.py
108-
${CMAKE_SOURCE_DIR}/src/jit/frontend/decoder/arm32.inc
108+
${CMAKE_SOURCE_DIR}/src/jit/common/a32_instructions.inc
109109
${GEN_TEST_SRC}
110110
DEPENDS ${CMAKE_SOURCE_DIR}/scripts/generate_jit_decoder_tests.py
111-
${CMAKE_SOURCE_DIR}/src/jit/frontend/decoder/arm32.inc
111+
${CMAKE_SOURCE_DIR}/src/jit/common/a32_instructions.inc
112112
COMMENT "Generating ARM32 Decoder Tests"
113113
)
114114

scripts/generate_jit_assets.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python3
2+
import re
3+
import sys
4+
import argparse
5+
6+
# Increased bucket size to handle overlapping wildcards
7+
MAX_BUCKET_SIZE = 64
8+
TABLE_SIZE = 4096
9+
10+
# Bits [27:20] and [7:4]
11+
HASH_BITS_MASK = 0x0FF000F0
12+
13+
14+
class Instruction:
15+
def __init__(self, name, mnemonic, bitstring, array_index):
16+
self.name = name
17+
self.mnemonic = mnemonic
18+
self.bitstring = bitstring
19+
self.array_index = array_index
20+
self.mask = 0
21+
self.expected = 0
22+
self.parse_bits()
23+
24+
def parse_bits(self):
25+
if len(self.bitstring) != 32:
26+
print(
27+
f"Error: Bitstring length {len(self.bitstring)} invalid for {self.name}"
28+
)
29+
sys.exit(1)
30+
31+
for i, char in enumerate(self.bitstring):
32+
bit_pos = 31 - i
33+
if char == "0":
34+
self.mask |= 1 << bit_pos
35+
elif char == "1":
36+
self.mask |= 1 << bit_pos
37+
self.expected |= 1 << bit_pos
38+
# Variable bits (c, n, d, m, etc) leave mask as 0
39+
40+
41+
def parse_inc_file(input_path):
42+
instructions = []
43+
regex = re.compile(r'INST\(\s*([A-Za-z0-9_]+),\s*"(.*?)",\s*"(.*?)"\s*\)')
44+
45+
try:
46+
with open(input_path, "r") as f:
47+
lines = f.readlines()
48+
except FileNotFoundError:
49+
print(f"Error: Could not find input file: {input_path}")
50+
sys.exit(1)
51+
52+
index_counter = 0
53+
for line in lines:
54+
line = line.strip()
55+
if not line or line.startswith("//"):
56+
continue
57+
58+
match = regex.search(line)
59+
if match:
60+
inst = Instruction(
61+
match.group(1), match.group(2), match.group(3), index_counter
62+
)
63+
instructions.append(inst)
64+
index_counter += 1
65+
return instructions
66+
67+
68+
def generate_lookup_table(instructions):
69+
buckets = {i: [] for i in range(TABLE_SIZE)}
70+
71+
# Iterate over every possible hash index to determine which instructions belong in it
72+
for i in range(TABLE_SIZE):
73+
# Reconstruct the 32-bit value that would generate this hash index
74+
# Hash algorithm: (Major << 4) | Minor
75+
# Major is bits [27:20], Minor is bits [7:4]
76+
77+
major_val = (i >> 4) & 0xFF
78+
minor_val = i & 0x0F
79+
80+
# Create a "Probe" value with the hash bits set
81+
probe_val = (major_val << 20) | (minor_val << 4)
82+
83+
for inst in instructions:
84+
# Check if this instruction matches this hash index.
85+
# An instruction matches if its FIXED bits (mask) match the Probe bits
86+
# for the specific positions used by the hash.
87+
88+
relevant_mask = inst.mask & HASH_BITS_MASK
89+
relevant_expected = inst.expected & HASH_BITS_MASK
90+
91+
if (probe_val & relevant_mask) == relevant_expected:
92+
buckets[i].append(inst)
93+
94+
if len(buckets[i]) > MAX_BUCKET_SIZE:
95+
print(
96+
f"FATAL ERROR: Bucket {i:#05x} overflowed! Size: {len(buckets[i])}"
97+
)
98+
print(
99+
"This means too many instructions map to the same hash index."
100+
)
101+
sys.exit(1)
102+
103+
return buckets
104+
105+
106+
def write_decoder_table_h_file(path):
107+
print(f"Generating decoder table header file: {path}")
108+
with open(path, "w") as f:
109+
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
110+
f.write("/* This file is generated by scripts/generate_jit_assets.py */\n")
111+
f.write("#ifndef POUND_JIT_DECODER_ARM32_GENERATED_H\n")
112+
f.write("#define POUND_JIT_DECODER_ARM32_GENERATED_H\n\n")
113+
f.write('#include "arm32.h"\n')
114+
f.write("#include <stddef.h>\n\n")
115+
f.write(f"#define LOOKUP_TABLE_MAX_BUCKET_SIZE {MAX_BUCKET_SIZE}U\n\n")
116+
f.write("typedef struct {\n")
117+
f.write(
118+
" const pvm_jit_decoder_arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];\n"
119+
)
120+
f.write(" size_t count;\n")
121+
f.write("} decode_bucket_t;\n\n")
122+
f.write(
123+
f"extern const decode_bucket_t g_decoder_lookup_table[{TABLE_SIZE}];\n\n"
124+
)
125+
f.write("#endif\n")
126+
127+
128+
def write_opcodes_header(path, instructions):
129+
"""Generates the arm32_opcodes.h file with a unique enum for each mnemonic."""
130+
print(f"Generating opcode header file: {path}")
131+
seen = set()
132+
with open(path, "w") as f:
133+
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
134+
f.write("/* This file is generated by scripts/generate_jit_assets.py */\n")
135+
f.write("#ifndef POUND_JIT_DECODER_ARM32_OPCODES_H\n")
136+
f.write("#define POUND_JIT_DECODER_ARM32_OPCODES_H\n\n")
137+
f.write("typedef enum {\n")
138+
for inst in instructions:
139+
enum_name = f" PVM_A32_OP_{inst.name.upper()},\n"
140+
if enum_name not in seen:
141+
f.write(enum_name)
142+
seen.add(enum_name)
143+
144+
f.write(" PVM_A32_OP_STOP,\n")
145+
f.write("} pvm_jit_decoder_arm32_opcode_t;\n\n")
146+
f.write("#endif // POUND_JIT_DECODER_ARM32_OPCODES_H\n")
147+
148+
149+
def write_decoder_table_c_file(path, instructions, buckets):
150+
"""Writes the decoder C file, now including the opcode enum."""
151+
print(f"Generating decoder table source file: {path}")
152+
with open(path, "w") as f:
153+
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
154+
f.write("/* This file is generated by scripts/generate_jit_assets.py */\n")
155+
f.write('#include "arm32.h"\n')
156+
f.write('#include "arm32_table.h"\n\n')
157+
f.write(
158+
f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n"
159+
)
160+
for inst in instructions:
161+
f.write(
162+
f' {{ "{inst.mnemonic}", "{inst.bitstring}", PVM_A32_OP_{inst.name.upper()}, {inst.mask:#010x}U, {inst.expected:#010x}U }},\n'
163+
)
164+
f.write("};\n")
165+
166+
f.write(f"const decode_bucket_t g_decoder_lookup_table[{TABLE_SIZE}] = {{\n")
167+
168+
for i in range(TABLE_SIZE):
169+
if len(buckets[i]) > 0:
170+
f.write(f" [{i:#05x}] = {{ .instructions = {{ ")
171+
for inst in buckets[i]:
172+
f.write(f"&g_instructions[{inst.array_index}], ")
173+
f.write(f"}}, .count = {len(buckets[i])}U }},\n")
174+
f.write("};\n")
175+
176+
177+
def write_interpreter_handler_table(path, instructions):
178+
"""Generates the dispatch table."""
179+
print(f"Generating interpreter handler table: {path}")
180+
seen = set()
181+
with open(path, "w") as f:
182+
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
183+
f.write("/* This file is generated by scripts/generate_jit_assets.py */\n")
184+
for inst in instructions:
185+
enum_name = f"PVM_A32_OP_{inst.name.upper()}"
186+
if enum_name not in seen:
187+
f.write(f" [{enum_name}] = &&HANDLER_{enum_name},\n")
188+
seen.add(enum_name)
189+
190+
191+
def write_interpreter_handler_skeletons(path, instructions):
192+
"""Generates a skeleton file for handlers."""
193+
194+
print(f"Generating new skeleton file: {path}")
195+
seen = set()
196+
with open(path, "w") as f:
197+
f.write("/*\n")
198+
f.write(" * GENERATED FILE - DO NOT EDIT\n")
199+
f.write(" * This file is generated by scripts/generate_jit_assets.py \n")
200+
f.write(
201+
" * This file contains pre-generated, empty handler blocks for the every instruction.\n"
202+
)
203+
f.write(" */\n\n")
204+
for inst in instructions:
205+
enum_name = f"HANDLER(PVM_A32_OP_{inst.name.upper()}): {{\n"
206+
if enum_name not in seen:
207+
f.write(enum_name)
208+
seen.add(enum_name)
209+
f.write(f" // TODO: Implement handler for {inst.mnemonic}\n")
210+
f.write(" DISPATCH();\n")
211+
f.write("}\n\n")
212+
213+
f.write(f"HANDLER(PVM_A32_OP_STOP): {{\n")
214+
f.write(f" // TODO: Implement handler for PVM_A32_OP_STOP\n")
215+
f.write(" DISPATCH();\n")
216+
f.write("}\n\n")
217+
218+
219+
def main():
220+
parser = argparse.ArgumentParser(description="Generate ARM32 Decoder Tables")
221+
parser.add_argument("input")
222+
parser.add_argument("--out-opcodes-h")
223+
parser.add_argument("--out-decoder-c")
224+
parser.add_argument("--out-decoder-h")
225+
parser.add_argument("--out-handler-table-inc")
226+
parser.add_argument("--out-handler-skeletons-inc")
227+
228+
args = parser.parse_args()
229+
230+
instructions = parse_inc_file(args.input)
231+
buckets = generate_lookup_table(instructions)
232+
233+
# Generate all necessary files
234+
write_opcodes_header(args.out_opcodes_h, instructions)
235+
write_decoder_table_c_file(args.out_decoder_c, instructions, buckets)
236+
write_decoder_table_h_file(args.out_decoder_h)
237+
write_interpreter_handler_table(args.out_handler_table_inc, instructions)
238+
write_interpreter_handler_skeletons(args.out_handler_skeletons_inc, instructions)
239+
240+
241+
if __name__ == "__main__":
242+
main()

0 commit comments

Comments
 (0)