|
| 1 | +import os |
| 2 | +import json |
| 3 | +import re |
| 4 | +import sys |
| 5 | +from collections import defaultdict |
| 6 | +import yaml # Make sure you have PyYAML installed |
| 7 | + |
| 8 | +REPO_INSTRUCTIONS = {} # Will store the mapping from YAML instr_name -> category |
| 9 | +REPO_DIRECTORY = None |
| 10 | + |
| 11 | +def safe_get(data, key, default=""): |
| 12 | + """Safely get a value from a dictionary, return default if not found or error.""" |
| 13 | + try: |
| 14 | + if isinstance(data, dict): |
| 15 | + return data.get(key, default) |
| 16 | + return default |
| 17 | + except: |
| 18 | + return default |
| 19 | + |
| 20 | +def load_yaml_encoding(instr_name): |
| 21 | + """ |
| 22 | + Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data. |
| 23 | + We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations. |
| 24 | + """ |
| 25 | + candidates = set() |
| 26 | + lower_name = instr_name.lower() |
| 27 | + candidates.add(lower_name) |
| 28 | + candidates.add(lower_name.replace('_', '.')) |
| 29 | + |
| 30 | + yaml_file_path = None |
| 31 | + yaml_category = None |
| 32 | + for cand in candidates: |
| 33 | + if cand in REPO_INSTRUCTIONS: |
| 34 | + yaml_category = REPO_INSTRUCTIONS[cand] |
| 35 | + yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml") |
| 36 | + if os.path.isfile(yaml_file_path): |
| 37 | + break |
| 38 | + else: |
| 39 | + yaml_file_path = None |
| 40 | + |
| 41 | + if not yaml_file_path or not os.path.isfile(yaml_file_path): |
| 42 | + # YAML not found |
| 43 | + return None, None |
| 44 | + |
| 45 | + # Load the YAML file |
| 46 | + with open(yaml_file_path, 'r') as yf: |
| 47 | + ydata = yaml.safe_load(yf) |
| 48 | + |
| 49 | + encoding = safe_get(ydata, 'encoding', {}) |
| 50 | + yaml_match = safe_get(encoding, 'match', None) |
| 51 | + yaml_vars = safe_get(encoding, 'variables', []) |
| 52 | + |
| 53 | + return yaml_match, yaml_vars |
| 54 | + |
| 55 | +def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): |
| 56 | + """ |
| 57 | + Compare the YAML encoding (match + vars) with the JSON encoding (binary format). |
| 58 | + Return a list of differences. |
| 59 | + """ |
| 60 | + if not yaml_match: |
| 61 | + return ["No YAML match field available for comparison."] |
| 62 | + if not json_encoding_str: |
| 63 | + return ["No JSON encoding available for comparison."] |
| 64 | + |
| 65 | + yaml_pattern_str = yaml_match.replace('-', '.') |
| 66 | + if len(yaml_pattern_str) != 32: |
| 67 | + return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."] |
| 68 | + |
| 69 | + def parse_location(loc_str): |
| 70 | + high, low = loc_str.split('-') |
| 71 | + return int(high), int(low) |
| 72 | + |
| 73 | + yaml_var_positions = {} |
| 74 | + for var in yaml_vars: |
| 75 | + high, low = parse_location(var["location"]) |
| 76 | + yaml_var_positions[var["name"]] = (high, low) |
| 77 | + |
| 78 | + # Tokenize JSON encoding |
| 79 | + tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str) |
| 80 | + json_bits = [] |
| 81 | + bit_index = 31 |
| 82 | + for t in tokens: |
| 83 | + json_bits.append((bit_index, t)) |
| 84 | + bit_index -= 1 |
| 85 | + |
| 86 | + if bit_index != -1: |
| 87 | + return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."] |
| 88 | + |
| 89 | + differences = [] |
| 90 | + |
| 91 | + # Check fixed bits |
| 92 | + for b in range(32): |
| 93 | + yaml_bit = yaml_pattern_str[31 - b] |
| 94 | + token = [tt for (pos, tt) in json_bits if pos == b] |
| 95 | + if not token: |
| 96 | + differences.append(f"Bit {b}: No corresponding JSON bit found.") |
| 97 | + continue |
| 98 | + json_bit_str = token[0] |
| 99 | + |
| 100 | + if yaml_bit in ['0', '1']: |
| 101 | + if json_bit_str not in ['0', '1']: |
| 102 | + differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'") |
| 103 | + elif json_bit_str != yaml_bit: |
| 104 | + differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'") |
| 105 | + else: |
| 106 | + # Variable bit in YAML |
| 107 | + if json_bit_str in ['0', '1']: |
| 108 | + differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'") |
| 109 | + |
| 110 | + # Check variable fields |
| 111 | + for var_name, (high, low) in yaml_var_positions.items(): |
| 112 | + json_var_fields = [] |
| 113 | + for bb in range(low, high+1): |
| 114 | + token = [tt for (pos, tt) in json_bits if pos == bb] |
| 115 | + if token: |
| 116 | + json_var_fields.append(token[0]) |
| 117 | + else: |
| 118 | + json_var_fields.append('?') |
| 119 | + |
| 120 | + field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields))) |
| 121 | + if len(field_names) == 0: |
| 122 | + differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") |
| 123 | + elif len(field_names) > 1: |
| 124 | + differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}") |
| 125 | + |
| 126 | + return differences |
| 127 | + |
| 128 | +def safe_print_instruction_details(name: str, data: dict, output_stream): |
| 129 | + """Print formatted instruction details and compare YAML/JSON encodings.""" |
| 130 | + try: |
| 131 | + output_stream.write(f"\n{name} Instruction Details\n") |
| 132 | + output_stream.write("=" * 50 + "\n") |
| 133 | + |
| 134 | + # Basic Information |
| 135 | + output_stream.write("\nBasic Information:\n") |
| 136 | + output_stream.write("-" * 20 + "\n") |
| 137 | + output_stream.write(f"Name: {name}\n") |
| 138 | + output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n") |
| 139 | + output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n") |
| 140 | + |
| 141 | + # Location |
| 142 | + locs = safe_get(data, '!locs', []) |
| 143 | + loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A" |
| 144 | + output_stream.write(f"Location: {loc}\n") |
| 145 | + |
| 146 | + # Operands |
| 147 | + output_stream.write("\nOperands:\n") |
| 148 | + output_stream.write("-" * 20 + "\n") |
| 149 | + try: |
| 150 | + in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A') |
| 151 | + output_stream.write(f"Inputs: {in_ops}\n") |
| 152 | + except: |
| 153 | + output_stream.write("Inputs: N/A\n") |
| 154 | + |
| 155 | + try: |
| 156 | + out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A') |
| 157 | + output_stream.write(f"Outputs: {out_ops}\n") |
| 158 | + except: |
| 159 | + output_stream.write("Outputs: N/A\n") |
| 160 | + |
| 161 | + # Instruction Properties |
| 162 | + output_stream.write("\nInstruction Properties:\n") |
| 163 | + output_stream.write("-" * 20 + "\n") |
| 164 | + output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") |
| 165 | + output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") |
| 166 | + output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") |
| 167 | + output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") |
| 168 | + |
| 169 | + # Scheduling Info |
| 170 | + sched = safe_get(data, 'SchedRW', []) |
| 171 | + if sched: |
| 172 | + output_stream.write("\nScheduling Information:\n") |
| 173 | + output_stream.write("-" * 20 + "\n") |
| 174 | + output_stream.write("Operations:\n") |
| 175 | + try: |
| 176 | + for op in sched: |
| 177 | + if isinstance(op, dict): |
| 178 | + output_stream.write(f" - {op.get('printable', 'N/A')}\n") |
| 179 | + except: |
| 180 | + output_stream.write(" - Unable to parse scheduling information\n") |
| 181 | + |
| 182 | + # Encoding |
| 183 | + output_stream.write("\nEncoding Pattern:\n") |
| 184 | + output_stream.write("-" * 20 + "\n") |
| 185 | + encoding_bits = [] |
| 186 | + try: |
| 187 | + inst = safe_get(data, 'Inst', []) |
| 188 | + for bit in inst: |
| 189 | + if isinstance(bit, dict): |
| 190 | + encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]") |
| 191 | + else: |
| 192 | + encoding_bits.append(str(bit)) |
| 193 | + # Reverse the bit order before joining |
| 194 | + encoding_bits.reverse() |
| 195 | + encoding = "".join(encoding_bits) |
| 196 | + output_stream.write(f"Binary Format: {encoding}\n") |
| 197 | + except: |
| 198 | + output_stream.write("Binary Format: Unable to parse encoding\n") |
| 199 | + encoding = "" |
| 200 | + |
| 201 | + # Now compare YAML vs JSON encodings |
| 202 | + yaml_match, yaml_vars = load_yaml_encoding(name) |
| 203 | + if yaml_match is not None and encoding: |
| 204 | + differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding) |
| 205 | + if differences: |
| 206 | + output_stream.write("\nDifferences in encoding:\n") |
| 207 | + for d in differences: |
| 208 | + output_stream.write(f" - {d}\n") |
| 209 | + print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console |
| 210 | + else: |
| 211 | + output_stream.write("\nNo encoding differences found.\n") |
| 212 | + else: |
| 213 | + # If we have no YAML match or no encoding, we note that we can't compare |
| 214 | + if yaml_match is None: |
| 215 | + output_stream.write("\nNo YAML encoding match found for comparison.\n") |
| 216 | + if not encoding: |
| 217 | + output_stream.write("\nNo JSON encoding found for comparison.\n") |
| 218 | + |
| 219 | + output_stream.write("\n") |
| 220 | + except Exception as e: |
| 221 | + output_stream.write(f"Error processing instruction {name}: {str(e)}\n") |
| 222 | + output_stream.write("Continuing with next instruction...\n\n") |
| 223 | + |
| 224 | +def get_repo_instructions(repo_directory): |
| 225 | + """ |
| 226 | + Recursively find all YAML files in the repository and extract instruction names along with their category. |
| 227 | + """ |
| 228 | + repo_instructions = {} |
| 229 | + for root, _, files in os.walk(repo_directory): |
| 230 | + rel_path = os.path.relpath(root, repo_directory) |
| 231 | + if rel_path == '.': |
| 232 | + category = "Other" |
| 233 | + else: |
| 234 | + parts = rel_path.split(os.sep) |
| 235 | + category = parts[0] if parts else "Other" |
| 236 | + |
| 237 | + for file in files: |
| 238 | + if file.endswith(".yaml"): |
| 239 | + instr_name = os.path.splitext(file)[0] |
| 240 | + # Store lowercase key for easy lookup |
| 241 | + repo_instructions[instr_name.lower()] = category |
| 242 | + return repo_instructions |
| 243 | + |
| 244 | +def find_json_key(instr_name, json_data): |
| 245 | + """ |
| 246 | + Attempt to find a matching key in json_data for instr_name, considering different |
| 247 | + naming conventions: replacing '.' with '_', and trying various case transformations. |
| 248 | + """ |
| 249 | + lower_name = instr_name.lower() |
| 250 | + lower_name_underscore = lower_name.replace('.', '_') |
| 251 | + variants = { |
| 252 | + lower_name, |
| 253 | + lower_name_underscore, |
| 254 | + instr_name.upper(), |
| 255 | + instr_name.replace('.', '_').upper(), |
| 256 | + instr_name.capitalize(), |
| 257 | + instr_name.replace('.', '_').capitalize() |
| 258 | + } |
| 259 | + |
| 260 | + for v in variants: |
| 261 | + if v in json_data: |
| 262 | + return v |
| 263 | + return None |
| 264 | + |
| 265 | +def main(): |
| 266 | + global REPO_INSTRUCTIONS, REPO_DIRECTORY |
| 267 | + |
| 268 | + if len(sys.argv) != 3: |
| 269 | + print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>") |
| 270 | + sys.exit(1) |
| 271 | + |
| 272 | + json_file = sys.argv[1] |
| 273 | + REPO_DIRECTORY = sys.argv[2] |
| 274 | + |
| 275 | + # Get instructions and categories from the repository structure |
| 276 | + REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY) |
| 277 | + if not REPO_INSTRUCTIONS: |
| 278 | + print("No instructions found in the provided repository directory.") |
| 279 | + sys.exit(1) |
| 280 | + |
| 281 | + try: |
| 282 | + # Read and parse JSON |
| 283 | + with open(json_file, 'r') as f: |
| 284 | + data = json.loads(f.read()) |
| 285 | + except Exception as e: |
| 286 | + print(f"Error reading file: {str(e)}") |
| 287 | + sys.exit(1) |
| 288 | + |
| 289 | + categories = defaultdict(list) |
| 290 | + |
| 291 | + # For each YAML instruction, try to find it in the JSON data |
| 292 | + for yaml_instr_name, category in REPO_INSTRUCTIONS.items(): |
| 293 | + json_key = find_json_key(yaml_instr_name, data) |
| 294 | + if json_key is None: |
| 295 | + print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr) |
| 296 | + continue |
| 297 | + |
| 298 | + instr_data = data.get(json_key) |
| 299 | + if not isinstance(instr_data, dict): |
| 300 | + print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr) |
| 301 | + continue |
| 302 | + |
| 303 | + categories[category].append((json_key, instr_data)) |
| 304 | + |
| 305 | + with open("output.txt", "w") as outfile: |
| 306 | + outfile.write("RISC-V Instruction Summary\n") |
| 307 | + outfile.write("=" * 50 + "\n") |
| 308 | + total = 0 |
| 309 | + for category, instructions in sorted(categories.items()): |
| 310 | + count = len(instructions) |
| 311 | + total += count |
| 312 | + outfile.write(f"\n{category}: {count} instructions\n") |
| 313 | + for name, _ in sorted(instructions, key=lambda x: x[0].lower()): |
| 314 | + outfile.write(f" - {name}\n") |
| 315 | + outfile.write(f"\nTotal Instructions Found: {total}\n") |
| 316 | + |
| 317 | + outfile.write("\nDETAILED INSTRUCTION INFORMATION\n") |
| 318 | + outfile.write("=" * 80 + "\n") |
| 319 | + |
| 320 | + for category, instructions in sorted(categories.items()): |
| 321 | + outfile.write(f"\n{category} INSTRUCTIONS\n") |
| 322 | + outfile.write("=" * 50 + "\n") |
| 323 | + |
| 324 | + for name, instr_data in sorted(instructions, key=lambda x: x[0].lower()): |
| 325 | + safe_print_instruction_details(name, instr_data, outfile) |
| 326 | + |
| 327 | + print("Output has been written to output.txt") |
| 328 | + |
| 329 | +if __name__ == '__main__': |
| 330 | + main() |
0 commit comments