Skip to content

Commit a56b0b1

Browse files
committed
Add LLVM Check mock-up
Signed-off-by: Afonso Oliveira <[email protected]>
1 parent db49639 commit a56b0b1

File tree

1 file changed

+330
-0
lines changed

1 file changed

+330
-0
lines changed

ext/auto-inst/parsing.py

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
import os
2+
import json
3+
import re
4+
import sys
5+
from collections import defaultdict
6+
import yaml # Make sure you have PyYAML installed
7+
8+
REPO_INSTRUCTIONS = {} # Will store the mapping from YAML instr_name -> category
9+
REPO_DIRECTORY = None
10+
11+
def safe_get(data, key, default=""):
12+
"""Safely get a value from a dictionary, return default if not found or error."""
13+
try:
14+
if isinstance(data, dict):
15+
return data.get(key, default)
16+
return default
17+
except:
18+
return default
19+
20+
def load_yaml_encoding(instr_name):
21+
"""
22+
Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data.
23+
We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations.
24+
"""
25+
candidates = set()
26+
lower_name = instr_name.lower()
27+
candidates.add(lower_name)
28+
candidates.add(lower_name.replace('_', '.'))
29+
30+
yaml_file_path = None
31+
yaml_category = None
32+
for cand in candidates:
33+
if cand in REPO_INSTRUCTIONS:
34+
yaml_category = REPO_INSTRUCTIONS[cand]
35+
yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml")
36+
if os.path.isfile(yaml_file_path):
37+
break
38+
else:
39+
yaml_file_path = None
40+
41+
if not yaml_file_path or not os.path.isfile(yaml_file_path):
42+
# YAML not found
43+
return None, None
44+
45+
# Load the YAML file
46+
with open(yaml_file_path, 'r') as yf:
47+
ydata = yaml.safe_load(yf)
48+
49+
encoding = safe_get(ydata, 'encoding', {})
50+
yaml_match = safe_get(encoding, 'match', None)
51+
yaml_vars = safe_get(encoding, 'variables', [])
52+
53+
return yaml_match, yaml_vars
54+
55+
def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
56+
"""
57+
Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
58+
Return a list of differences.
59+
"""
60+
if not yaml_match:
61+
return ["No YAML match field available for comparison."]
62+
if not json_encoding_str:
63+
return ["No JSON encoding available for comparison."]
64+
65+
yaml_pattern_str = yaml_match.replace('-', '.')
66+
if len(yaml_pattern_str) != 32:
67+
return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."]
68+
69+
def parse_location(loc_str):
70+
high, low = loc_str.split('-')
71+
return int(high), int(low)
72+
73+
yaml_var_positions = {}
74+
for var in yaml_vars:
75+
high, low = parse_location(var["location"])
76+
yaml_var_positions[var["name"]] = (high, low)
77+
78+
# Tokenize JSON encoding
79+
tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str)
80+
json_bits = []
81+
bit_index = 31
82+
for t in tokens:
83+
json_bits.append((bit_index, t))
84+
bit_index -= 1
85+
86+
if bit_index != -1:
87+
return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
88+
89+
differences = []
90+
91+
# Check fixed bits
92+
for b in range(32):
93+
yaml_bit = yaml_pattern_str[31 - b]
94+
token = [tt for (pos, tt) in json_bits if pos == b]
95+
if not token:
96+
differences.append(f"Bit {b}: No corresponding JSON bit found.")
97+
continue
98+
json_bit_str = token[0]
99+
100+
if yaml_bit in ['0', '1']:
101+
if json_bit_str not in ['0', '1']:
102+
differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
103+
elif json_bit_str != yaml_bit:
104+
differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
105+
else:
106+
# Variable bit in YAML
107+
if json_bit_str in ['0', '1']:
108+
differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
109+
110+
# Check variable fields
111+
for var_name, (high, low) in yaml_var_positions.items():
112+
json_var_fields = []
113+
for bb in range(low, high+1):
114+
token = [tt for (pos, tt) in json_bits if pos == bb]
115+
if token:
116+
json_var_fields.append(token[0])
117+
else:
118+
json_var_fields.append('?')
119+
120+
field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields)))
121+
if len(field_names) == 0:
122+
differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
123+
elif len(field_names) > 1:
124+
differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
125+
126+
return differences
127+
128+
def safe_print_instruction_details(name: str, data: dict, output_stream):
129+
"""Print formatted instruction details and compare YAML/JSON encodings."""
130+
try:
131+
output_stream.write(f"\n{name} Instruction Details\n")
132+
output_stream.write("=" * 50 + "\n")
133+
134+
# Basic Information
135+
output_stream.write("\nBasic Information:\n")
136+
output_stream.write("-" * 20 + "\n")
137+
output_stream.write(f"Name: {name}\n")
138+
output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n")
139+
output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n")
140+
141+
# Location
142+
locs = safe_get(data, '!locs', [])
143+
loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
144+
output_stream.write(f"Location: {loc}\n")
145+
146+
# Operands
147+
output_stream.write("\nOperands:\n")
148+
output_stream.write("-" * 20 + "\n")
149+
try:
150+
in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A')
151+
output_stream.write(f"Inputs: {in_ops}\n")
152+
except:
153+
output_stream.write("Inputs: N/A\n")
154+
155+
try:
156+
out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A')
157+
output_stream.write(f"Outputs: {out_ops}\n")
158+
except:
159+
output_stream.write("Outputs: N/A\n")
160+
161+
# Instruction Properties
162+
output_stream.write("\nInstruction Properties:\n")
163+
output_stream.write("-" * 20 + "\n")
164+
output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
165+
output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
166+
output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
167+
output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
168+
169+
# Scheduling Info
170+
sched = safe_get(data, 'SchedRW', [])
171+
if sched:
172+
output_stream.write("\nScheduling Information:\n")
173+
output_stream.write("-" * 20 + "\n")
174+
output_stream.write("Operations:\n")
175+
try:
176+
for op in sched:
177+
if isinstance(op, dict):
178+
output_stream.write(f" - {op.get('printable', 'N/A')}\n")
179+
except:
180+
output_stream.write(" - Unable to parse scheduling information\n")
181+
182+
# Encoding
183+
output_stream.write("\nEncoding Pattern:\n")
184+
output_stream.write("-" * 20 + "\n")
185+
encoding_bits = []
186+
try:
187+
inst = safe_get(data, 'Inst', [])
188+
for bit in inst:
189+
if isinstance(bit, dict):
190+
encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
191+
else:
192+
encoding_bits.append(str(bit))
193+
# Reverse the bit order before joining
194+
encoding_bits.reverse()
195+
encoding = "".join(encoding_bits)
196+
output_stream.write(f"Binary Format: {encoding}\n")
197+
except:
198+
output_stream.write("Binary Format: Unable to parse encoding\n")
199+
encoding = ""
200+
201+
# Now compare YAML vs JSON encodings
202+
yaml_match, yaml_vars = load_yaml_encoding(name)
203+
if yaml_match is not None and encoding:
204+
differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
205+
if differences:
206+
output_stream.write("\nDifferences in encoding:\n")
207+
for d in differences:
208+
output_stream.write(f" - {d}\n")
209+
print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console
210+
else:
211+
output_stream.write("\nNo encoding differences found.\n")
212+
else:
213+
# If we have no YAML match or no encoding, we note that we can't compare
214+
if yaml_match is None:
215+
output_stream.write("\nNo YAML encoding match found for comparison.\n")
216+
if not encoding:
217+
output_stream.write("\nNo JSON encoding found for comparison.\n")
218+
219+
output_stream.write("\n")
220+
except Exception as e:
221+
output_stream.write(f"Error processing instruction {name}: {str(e)}\n")
222+
output_stream.write("Continuing with next instruction...\n\n")
223+
224+
def get_repo_instructions(repo_directory):
225+
"""
226+
Recursively find all YAML files in the repository and extract instruction names along with their category.
227+
"""
228+
repo_instructions = {}
229+
for root, _, files in os.walk(repo_directory):
230+
rel_path = os.path.relpath(root, repo_directory)
231+
if rel_path == '.':
232+
category = "Other"
233+
else:
234+
parts = rel_path.split(os.sep)
235+
category = parts[0] if parts else "Other"
236+
237+
for file in files:
238+
if file.endswith(".yaml"):
239+
instr_name = os.path.splitext(file)[0]
240+
# Store lowercase key for easy lookup
241+
repo_instructions[instr_name.lower()] = category
242+
return repo_instructions
243+
244+
def find_json_key(instr_name, json_data):
245+
"""
246+
Attempt to find a matching key in json_data for instr_name, considering different
247+
naming conventions: replacing '.' with '_', and trying various case transformations.
248+
"""
249+
lower_name = instr_name.lower()
250+
lower_name_underscore = lower_name.replace('.', '_')
251+
variants = {
252+
lower_name,
253+
lower_name_underscore,
254+
instr_name.upper(),
255+
instr_name.replace('.', '_').upper(),
256+
instr_name.capitalize(),
257+
instr_name.replace('.', '_').capitalize()
258+
}
259+
260+
for v in variants:
261+
if v in json_data:
262+
return v
263+
return None
264+
265+
def main():
266+
global REPO_INSTRUCTIONS, REPO_DIRECTORY
267+
268+
if len(sys.argv) != 3:
269+
print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
270+
sys.exit(1)
271+
272+
json_file = sys.argv[1]
273+
REPO_DIRECTORY = sys.argv[2]
274+
275+
# Get instructions and categories from the repository structure
276+
REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
277+
if not REPO_INSTRUCTIONS:
278+
print("No instructions found in the provided repository directory.")
279+
sys.exit(1)
280+
281+
try:
282+
# Read and parse JSON
283+
with open(json_file, 'r') as f:
284+
data = json.loads(f.read())
285+
except Exception as e:
286+
print(f"Error reading file: {str(e)}")
287+
sys.exit(1)
288+
289+
categories = defaultdict(list)
290+
291+
# For each YAML instruction, try to find it in the JSON data
292+
for yaml_instr_name, category in REPO_INSTRUCTIONS.items():
293+
json_key = find_json_key(yaml_instr_name, data)
294+
if json_key is None:
295+
print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
296+
continue
297+
298+
instr_data = data.get(json_key)
299+
if not isinstance(instr_data, dict):
300+
print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
301+
continue
302+
303+
categories[category].append((json_key, instr_data))
304+
305+
with open("output.txt", "w") as outfile:
306+
outfile.write("RISC-V Instruction Summary\n")
307+
outfile.write("=" * 50 + "\n")
308+
total = 0
309+
for category, instructions in sorted(categories.items()):
310+
count = len(instructions)
311+
total += count
312+
outfile.write(f"\n{category}: {count} instructions\n")
313+
for name, _ in sorted(instructions, key=lambda x: x[0].lower()):
314+
outfile.write(f" - {name}\n")
315+
outfile.write(f"\nTotal Instructions Found: {total}\n")
316+
317+
outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
318+
outfile.write("=" * 80 + "\n")
319+
320+
for category, instructions in sorted(categories.items()):
321+
outfile.write(f"\n{category} INSTRUCTIONS\n")
322+
outfile.write("=" * 50 + "\n")
323+
324+
for name, instr_data in sorted(instructions, key=lambda x: x[0].lower()):
325+
safe_print_instruction_details(name, instr_data, outfile)
326+
327+
print("Output has been written to output.txt")
328+
329+
if __name__ == '__main__':
330+
main()

0 commit comments

Comments
 (0)