From 7f82b46878ddad26c953fc92d08ffa900d3a223b Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 13 Dec 2024 14:12:57 +0000
Subject: [PATCH 01/33] Add simple Docker environment variable

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py | 328 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 328 insertions(+)
 create mode 100644 ext/auto-inst/parsing.py

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
new file mode 100644
index 0000000000..be17878746
--- /dev/null
+++ b/ext/auto-inst/parsing.py
@@ -0,0 +1,328 @@
+import os
+import json
+import re
+import sys
+from collections import defaultdict
+import yaml  
+
+REPO_INSTRUCTIONS = {} 
+REPO_DIRECTORY = None
+
+def safe_get(data, key, default=""):
+    """Safely get a value from a dictionary, return default if not found or error."""
+    try:
+        if isinstance(data, dict):
+            return data.get(key, default)
+        return default
+    except:
+        return default
+
+def load_yaml_encoding(instr_name):
+    """
+    Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data.
+    We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations.
+    """
+    candidates = set()
+    lower_name = instr_name.lower()
+    candidates.add(lower_name)
+    candidates.add(lower_name.replace('_', '.'))
+
+    yaml_file_path = None
+    yaml_category = None
+    for cand in candidates:
+        if cand in REPO_INSTRUCTIONS:
+            yaml_category = REPO_INSTRUCTIONS[cand]
+            yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml")
+            if os.path.isfile(yaml_file_path):
+                break
+            else:
+                yaml_file_path = None
+
+    if not yaml_file_path or not os.path.isfile(yaml_file_path):
+        # YAML not found
+        return None, None
+
+    # Load the YAML file
+    with open(yaml_file_path, 'r') as yf:
+        ydata = yaml.safe_load(yf)
+
+    encoding = safe_get(ydata, 'encoding', {})
+    yaml_match = safe_get(encoding, 'match', None)
+    yaml_vars = safe_get(encoding, 'variables', [])
+
+    return yaml_match, yaml_vars
+
+def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
+    """
+    Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
+    Return a list of differences.
+    """
+    if not yaml_match:
+        return ["No YAML match field available for comparison."]
+    if not json_encoding_str:
+        return ["No JSON encoding available for comparison."]
+
+    yaml_pattern_str = yaml_match.replace('-', '.')
+    if len(yaml_pattern_str) != 32:
+        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."]
+
+    def parse_location(loc_str):
+        high, low = loc_str.split('-')
+        return int(high), int(low)
+
+    yaml_var_positions = {}
+    for var in yaml_vars:
+        high, low = parse_location(var["location"])
+        yaml_var_positions[var["name"]] = (high, low)
+
+    # Tokenize JSON encoding
+    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str)
+    json_bits = []
+    bit_index = 31
+    for t in tokens:
+        json_bits.append((bit_index, t))
+        bit_index -= 1
+
+    if bit_index != -1:
+        return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
+
+    differences = []
+
+    # Check fixed bits
+    for b in range(32):
+        yaml_bit = yaml_pattern_str[31 - b]
+        token = [tt for (pos, tt) in json_bits if pos == b]
+        if not token:
+            differences.append(f"Bit {b}: No corresponding JSON bit found.")
+            continue
+        json_bit_str = token[0]
+
+        if yaml_bit in ['0', '1']:
+            if json_bit_str not in ['0', '1']:
+                differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
+            elif json_bit_str != yaml_bit:
+                differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
+        else:
+            # Variable bit in YAML
+            if json_bit_str in ['0', '1']:
+                differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
+
+    # Check variable fields
+    for var_name, (high, low) in yaml_var_positions.items():
+        json_var_fields = []
+        for bb in range(low, high+1):
+            token = [tt for (pos, tt) in json_bits if pos == bb]
+            if token:
+                json_var_fields.append(token[0])
+            else:
+                json_var_fields.append('?')
+
+        field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields)))
+        if len(field_names) == 0:
+            differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
+        elif len(field_names) > 1:
+            differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
+
+    return differences
+
+def safe_print_instruction_details(name: str, data: dict, output_stream):
+    """Print formatted instruction details and compare YAML/JSON encodings."""
+    try:
+        # Print the instruction details without separating by category
+        output_stream.write(f"\n{name} Instruction Details\n")
+        output_stream.write("=" * 50 + "\n")
+
+        # Basic Information
+        output_stream.write("\nBasic Information:\n")
+        output_stream.write("-" * 20 + "\n")
+        output_stream.write(f"Name:              {name}\n")
+        output_stream.write(f"Assembly Format:   {safe_get(data, 'AsmString', 'N/A')}\n")
+        output_stream.write(f"Size:              {safe_get(data, 'Size', 'N/A')} bytes\n")
+
+        # Location
+        locs = safe_get(data, '!locs', [])
+        loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
+        output_stream.write(f"Location:          {loc}\n")
+
+        # Operands
+        output_stream.write("\nOperands:\n")
+        output_stream.write("-" * 20 + "\n")
+        try:
+            in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A')
+            output_stream.write(f"Inputs:            {in_ops}\n")
+        except:
+            output_stream.write("Inputs:            N/A\n")
+
+        try:
+            out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A')
+            output_stream.write(f"Outputs:           {out_ops}\n")
+        except:
+            output_stream.write("Outputs:           N/A\n")
+
+        # Instruction Properties
+        output_stream.write("\nInstruction Properties:\n")
+        output_stream.write("-" * 20 + "\n")
+        output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
+        output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
+        output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
+        output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
+
+        # Scheduling Info
+        sched = safe_get(data, 'SchedRW', [])
+        if sched:
+            output_stream.write("\nScheduling Information:\n")
+            output_stream.write("-" * 20 + "\n")
+            output_stream.write("Operations:\n")
+            try:
+                for op in sched:
+                    if isinstance(op, dict):
+                        output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
+            except:
+                output_stream.write("  - Unable to parse scheduling information\n")
+
+        # Encoding
+        output_stream.write("\nEncoding Pattern:\n")
+        output_stream.write("-" * 20 + "\n")
+        encoding_bits = []
+        try:
+            inst = safe_get(data, 'Inst', [])
+            for bit in inst:
+                if isinstance(bit, dict):
+                    encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
+                else:
+                    encoding_bits.append(str(bit))
+            # Reverse the bit order before joining
+            encoding_bits.reverse()
+            encoding = "".join(encoding_bits)
+            output_stream.write(f"Binary Format:     {encoding}\n")
+        except:
+            output_stream.write("Binary Format:     Unable to parse encoding\n")
+            encoding = ""
+
+        # Now compare YAML vs JSON encodings
+        yaml_match, yaml_vars = load_yaml_encoding(name)
+        if yaml_match is not None and encoding:
+            differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
+            if differences:
+                output_stream.write("\nDifferences in encoding:\n")
+                for d in differences:
+                    output_stream.write(f"  - {d}\n")
+                    print(f"Difference in {name}: {d}", file=sys.stdout)  # Print to console
+            else:
+                output_stream.write("\nNo encoding differences found.\n")
+        else:
+            # If we have no YAML match or no encoding, we note that we can't compare
+            if yaml_match is None:
+                output_stream.write("\nNo YAML encoding match found for comparison.\n")
+            if not encoding:
+                output_stream.write("\nNo JSON encoding found for comparison.\n")
+
+        output_stream.write("\n")
+    except Exception as e:
+        output_stream.write(f"Error processing instruction {name}: {str(e)}\n")
+        output_stream.write("Continuing with next instruction...\n\n")
+
+def get_repo_instructions(repo_directory):
+    """
+    Recursively find all YAML files in the repository and extract instruction names along with their category.
+    """
+    repo_instructions = {}
+    for root, _, files in os.walk(repo_directory):
+        rel_path = os.path.relpath(root, repo_directory)
+        if rel_path == '.':
+            category = "Other"
+        else:
+            parts = rel_path.split(os.sep)
+            category = parts[0] if parts else "Other"
+
+        for file in files:
+            if file.endswith(".yaml"):
+                instr_name = os.path.splitext(file)[0]
+                # Store lowercase key for easy lookup
+                repo_instructions[instr_name.lower()] = category
+    return repo_instructions
+
+def find_json_key(instr_name, json_data):
+    """
+    Attempt to find a matching key in json_data for instr_name, considering different
+    naming conventions: replacing '.' with '_', and trying various case transformations.
+    """
+    lower_name = instr_name.lower()
+    lower_name_underscore = lower_name.replace('.', '_')
+    variants = {
+        lower_name,
+        lower_name_underscore,
+        instr_name.upper(),
+        instr_name.replace('.', '_').upper(),
+        instr_name.capitalize(),
+        instr_name.replace('.', '_').capitalize()
+    }
+
+    for v in variants:
+        if v in json_data:
+            return v
+    return None
+
+def main():
+    global REPO_INSTRUCTIONS, REPO_DIRECTORY
+
+    if len(sys.argv) != 3:
+        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
+        sys.exit(1)
+
+    json_file = sys.argv[1]
+    REPO_DIRECTORY = sys.argv[2]
+
+    # Get instructions and categories from the repository structure
+    REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
+    if not REPO_INSTRUCTIONS:
+        print("No instructions found in the provided repository directory.")
+        sys.exit(1)
+
+    try:
+        # Read and parse JSON
+        with open(json_file, 'r') as f:
+            data = json.loads(f.read())
+    except Exception as e:
+        print(f"Error reading file: {str(e)}")
+        sys.exit(1)
+
+    all_instructions = []
+
+    # For each YAML instruction, try to find it in the JSON data
+    for yaml_instr_name, category in REPO_INSTRUCTIONS.items():
+        json_key = find_json_key(yaml_instr_name, data)
+        if json_key is None:
+            print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
+            continue
+
+        instr_data = data.get(json_key)
+        if not isinstance(instr_data, dict):
+            print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
+            continue
+
+        # Add this instruction to our list
+        all_instructions.append((json_key, instr_data))
+
+    # Sort all instructions by name
+    all_instructions.sort(key=lambda x: x[0].lower())
+
+    with open("output.txt", "w") as outfile:
+        outfile.write("RISC-V Instruction Summary\n")
+        outfile.write("=" * 50 + "\n")
+        total = len(all_instructions)
+        outfile.write(f"\nTotal Instructions Found: {total}\n")
+        for name, _ in all_instructions:
+            outfile.write(f"  - {name}\n")
+
+        outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
+        outfile.write("=" * 80 + "\n")
+
+        # Print details for each instruction directly, no category splitting
+        for name, instr_data in all_instructions:
+            safe_print_instruction_details(name, instr_data, outfile)
+
+    print("Output has been written to output.txt")
+
+if __name__ == '__main__':
+    main()

From 7141a9cb22a8477adaaf453208d5462ded583565 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Wed, 18 Dec 2024 09:01:28 +0000
Subject: [PATCH 02/33] Fix errors due to incorrect parsing of VM

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py | 86 +++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 40 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index be17878746..449a7c5835 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -55,6 +55,7 @@ def load_yaml_encoding(instr_name):
 def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
     """
     Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
+    If the JSON has a variable like vm[?], it should be treated as just vm.
     Return a list of differences.
     """
     if not yaml_match:
@@ -75,8 +76,7 @@ def parse_location(loc_str):
         high, low = parse_location(var["location"])
         yaml_var_positions[var["name"]] = (high, low)
 
-    # Tokenize JSON encoding
-    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str)
+    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
     json_bits = []
     bit_index = 31
     for t in tokens:
@@ -86,6 +86,13 @@ def parse_location(loc_str):
     if bit_index != -1:
         return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
 
+    normalized_json_bits = []
+    for pos, tt in json_bits:
+        if re.match(r'vm\[[^\]]*\]', tt):
+            tt = 'vm'
+        normalized_json_bits.append((pos, tt))
+    json_bits = normalized_json_bits
+
     differences = []
 
     # Check fixed bits
@@ -103,7 +110,6 @@ def parse_location(loc_str):
             elif json_bit_str != yaml_bit:
                 differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
         else:
-            # Variable bit in YAML
             if json_bit_str in ['0', '1']:
                 differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
 
@@ -117,7 +123,8 @@ def parse_location(loc_str):
             else:
                 json_var_fields.append('?')
 
-        field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields)))
+        # Extract field names from something like varName[index]. After normalizing, vm and others won't have indices.
+        field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
         if len(field_names) == 0:
             differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
         elif len(field_names) > 1:
@@ -128,23 +135,19 @@ def parse_location(loc_str):
 def safe_print_instruction_details(name: str, data: dict, output_stream):
     """Print formatted instruction details and compare YAML/JSON encodings."""
     try:
-        # Print the instruction details without separating by category
         output_stream.write(f"\n{name} Instruction Details\n")
         output_stream.write("=" * 50 + "\n")
 
-        # Basic Information
         output_stream.write("\nBasic Information:\n")
         output_stream.write("-" * 20 + "\n")
         output_stream.write(f"Name:              {name}\n")
         output_stream.write(f"Assembly Format:   {safe_get(data, 'AsmString', 'N/A')}\n")
         output_stream.write(f"Size:              {safe_get(data, 'Size', 'N/A')} bytes\n")
 
-        # Location
         locs = safe_get(data, '!locs', [])
         loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
         output_stream.write(f"Location:          {loc}\n")
 
-        # Operands
         output_stream.write("\nOperands:\n")
         output_stream.write("-" * 20 + "\n")
         try:
@@ -159,26 +162,26 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
         except:
             output_stream.write("Outputs:           N/A\n")
 
-        # Instruction Properties
-        output_stream.write("\nInstruction Properties:\n")
-        output_stream.write("-" * 20 + "\n")
-        output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
-        output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
-        output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
-        output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
-
-        # Scheduling Info
-        sched = safe_get(data, 'SchedRW', [])
-        if sched:
-            output_stream.write("\nScheduling Information:\n")
-            output_stream.write("-" * 20 + "\n")
-            output_stream.write("Operations:\n")
-            try:
-                for op in sched:
-                    if isinstance(op, dict):
-                        output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
-            except:
-                output_stream.write("  - Unable to parse scheduling information\n")
+        # # Instruction Properties
+        # output_stream.write("\nInstruction Properties:\n")
+        # output_stream.write("-" * 20 + "\n")
+        # output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
+        # output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
+        # output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
+        # output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
+
+        # # Scheduling Info
+        # sched = safe_get(data, 'SchedRW', [])
+        # if sched:
+        #     output_stream.write("\nScheduling Information:\n")
+        #     output_stream.write("-" * 20 + "\n")
+        #     output_stream.write("Operations:\n")
+        #     try:
+        #         for op in sched:
+        #             if isinstance(op, dict):
+        #                 output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
+        #     except:
+        #         output_stream.write("  - Unable to parse scheduling information\n")
 
         # Encoding
         output_stream.write("\nEncoding Pattern:\n")
@@ -194,28 +197,31 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
             # Reverse the bit order before joining
             encoding_bits.reverse()
             encoding = "".join(encoding_bits)
-            output_stream.write(f"Binary Format:     {encoding}\n")
+            output_stream.write(f"JSON Encoding:     {encoding}\n")
         except:
-            output_stream.write("Binary Format:     Unable to parse encoding\n")
+            output_stream.write("JSON Encoding:     Unable to parse encoding\n")
             encoding = ""
 
-        # Now compare YAML vs JSON encodings
+        # compare YAML vs JSON encodings
         yaml_match, yaml_vars = load_yaml_encoding(name)
-        if yaml_match is not None and encoding:
+        if yaml_match is not None:
+            output_stream.write(f"YAML Encoding:     {yaml_match}\n")
+        else:
+            output_stream.write("YAML Encoding:     Not found\n")
+
+        if yaml_match and encoding:
+            # Perform comparison
             differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
-            if differences:
-                output_stream.write("\nDifferences in encoding:\n")
+            if differences and len(differences) > 0:
+                output_stream.write("\nEncodings do not match. Differences:\n")
                 for d in differences:
                     output_stream.write(f"  - {d}\n")
                     print(f"Difference in {name}: {d}", file=sys.stdout)  # Print to console
             else:
-                output_stream.write("\nNo encoding differences found.\n")
+                output_stream.write("\nEncodings Match: No differences found.\n")
         else:
-            # If we have no YAML match or no encoding, we note that we can't compare
-            if yaml_match is None:
-                output_stream.write("\nNo YAML encoding match found for comparison.\n")
-            if not encoding:
-                output_stream.write("\nNo JSON encoding found for comparison.\n")
+            # If we have no YAML match or no JSON encoding, we note that we can't compare
+            output_stream.write("\nComparison: Cannot compare encodings (missing YAML or JSON encoding).\n")
 
         output_stream.write("\n")
     except Exception as e:

From 6e45c3b59cf380f3b7a3b8f17306bf0b2efaaf09 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 08:06:33 +0000
Subject: [PATCH 03/33] First Refactor to pytest

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../conftest.cpython-310-pytest-8.3.4.pyc     | Bin 0 -> 549 bytes
 .../__pycache__/parsing.cpython-310.pyc       | Bin 0 -> 9093 bytes
 .../test.cpython-310-pytest-8.3.4.pyc         | Bin 0 -> 1994 bytes
 ext/auto-inst/parsing.py                      |  77 +++++++-----------
 ext/auto-inst/test.py                         |  56 +++++++++++++
 ext/riscv-opcodes                             |   2 +-
 6 files changed, 88 insertions(+), 47 deletions(-)
 create mode 100644 ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc
 create mode 100644 ext/auto-inst/__pycache__/parsing.cpython-310.pyc
 create mode 100644 ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
 create mode 100644 ext/auto-inst/test.py

diff --git a/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b8bc1d08667e8e7a0f516d6da2fb738b7cfd03e
GIT binary patch
literal 549
zcmYjOJx{|h5Vg~!X$zG)z`&LPRmzZNV?tFC8-jw=i7rtZJFQC^N45i`Vq|4w;vevr
zydnnv0u$$yRz2xH-Z}5xIp*DN2Z6nR%;w*azYN$n3&03m_8|~NaE7M1&ep17A&ge`
z)WL{Yr16TTO@b%Lw}0SUG)}Nd|7L0j{1RNALU=(-P%}JYzcug%JAj)TOb{r!<Mlxh
zEG5sQlx0*qQgK1o$9J(>c#3;!LA|TV?btJ?zNMRSq8QJ$y`WjK?gW9L1&;_5>$81|
z=}HDtl?P`usjGzNDrPy$=idFr&2=p)eXQFtA-piM0GdTCBo&`XxNwKxVYuKc8pbKl
zCFdcbf<A<T$>cF8bCxnn0x}EfvkK!<@qp!0g)k>oRH{Ky>0VKpGTMzWze(B<diV&7
zHWa?ATeFg73ds6!R8-LpokSb@AeOL`*>E7}lVFO%(%Y7Hx4jOKH`7F|Twh@?v|yTU
Qov%14Gdip{F|Xf8fBQq3DgXcg

literal 0
HcmV?d00001

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..757e1865563e2916a5b76637ec4bd05e7c6b60ae
GIT binary patch
literal 9093
zcmb7K&2Jn>cJJ!$nd#~I;P6Y|mfEr{YHV_3TSzwM%F@b`Y%8`YuPCpf5ALKn)g)UQ
zP7kYlD6vuFIG_OA2P0k}xx`RF0=YOqZaD<|4<wi5ut_eBAUW(Mx+Z%ff>nO6dWJJY
z%Sd*JuCA`C_p0jEt5?7Gib1JlYxte{=f-OGbxr#<eGLCheEbAYxPydioHeyO>6_hQ
z>RZ2~tG98-z*}#c>zO+lmbS^>$@-d`<IJ`A5pMF#V|FLcvpn}$yJK^U=kYFZ+bi;d
zS6VT7k(VBucgnoX$51oID|{U93O~vx`4ni!`7}R-k_m5;AAX>V1#e<oLtkuBJ=J4R
zF-mQr%8~;2R@_cA;(jAaw1z?Dy&vVXc&^|HYe?d;NRwK`wsldG+Kw)n)OYnywM9*;
zl%WfTd{(=SvMP%|Saess=9aVSMULxiy3LN~tcYM8AJm^8v|O<@>j*FEh?dhEiQ}(0
ztsrt%f=-J&fp9z_0#U0PiS37eD~#M$!%IvxU;~^{VRAQe6XSummE?L{zS9IP*xuXc
z)`E5KoVyaV!XP-uJ>fk#C;YInIoE0VE566)`0_dLQFPAjM8O>KKi75zI$Eu@w-PJF
z4C?5mKtd=Y(RG`ZSr&g56I9S(Ncul&X5mpX`%h#dZCmRyNQ>=iT}TX4({~x9q;fk3
zXB;x55^i)2Jf>t<wWX8YjLbylrY7EsOqn^Vb<HSqKf9BQEQxP!9@cizUX|sgvCG7d
znAReCW8=6q#5;qU@G8$NYLP7sp54~}wA9T;g`J|z?y_#~Q}k+TrnYT>+Im9^Z$HFH
z=<hb3s3dbd=cBf3A#Xtu4dnAGpI@bNTjr&`qGPq~_*b@r-+TV1*K*vJL!uNNwaQM*
zUH6=`YU%%Q@y0@R)>-kfxY3&DGy);KupP8GMDE<a{LzP~YkJ_ooo3*2$B#nCYc&E@
zPYYdB{e0@pG@DK&wwx$%*4?PFrr4=o>#9GLyF(0@x10_wt8??($2aQN7Zz{b{N&25
z>o*n_RbP>CTj5F|zzTX9rky0Lp0L`&UZ%F?t~aat!a~ACV)ciUWK_pVe$y4cyWI4`
zYB@1NFG@1a;GriH!{t2539sFBp|j)P>p9VbFfrS1w3e9n1HYA&Rg<gNZ(h4{>&DI7
zN!AZZ8i^USy;fq<D3ZL=Ns^7yziI&+gJi8{o!AXX&WD`5FtODr((*FV)`?kN^;c5g
z4YXSg#8zSwS)y;PB$+J|vKWW*CbnwSo21HPrlHzdX+rv*Owd^Bs=6<kDU>Ulk<D1l
zW)sZRO>7wYn_@*~;#>c`qMLfrD6$jJC@nHe7q5YC#0VWm0d-;!!#~qC7@#o2*(xlV
z)I*!=DyQv0k-536V@oh;Y%`vDV2D#Zn|^((@!S)GTBf$NYx){C7;UtPZWbFV%k87u
zHoK>Hb6rd3q_v{U3`z?}wF}zDOr&F@7RlIkq4r&ym%0U+2X(1il=hA(3$nP9ZtDUs
z|AE#mY`iNAd<<i7c?G%hVGUz2(=mWUN!p;EM1SS(7`SKnxE$Nn#Ri|~j>!@@mgQK=
zal@0Pt__+>l;xRi8uQXu8a8w3OYMjT!!Z7Zw!)O%w}|UD>rTjt@GoU$$K=yrQoo0u
zz(6o<<4@!SKMaaVInIy3wEUBt#G8p<4z)ch$3bySP9i=2B-f(@1?KXPL+wsHzn$Xy
z??dfg=yA&QIAwNnyBg-eG54ud-k95Tl$DS>$!S|h?eVYJ7YwsHiPLOoR^KpT$!SPO
z%|mhgFZ$-E(tKp8m2&utJ9#<1%XW20;}CS<5HtekAn4Djv7MGvnBPk>ht!bPx7rh!
zLrCb?y|(2CCfdF?)b?e02yI^(qOej5S%L986aRxUN>_q)93GysM9P%)_k(jbHBqnO
z6cl0AdFV%}y&`isFmGp<eL50x_K_W`rbg)cisGkQuDKzMTHo9_P5*)CY_8wEv@{Dl
z>4a-Rr^%gV3_y6;+1x?%`<*aCo0Qkh^r&?~iPB&N=kUr3&O{ukJ(CyDEd;66Oh=3(
z5-i<K*Kd-AqoKiO(=`1sXw~9Z(R_3gKiGDU&51Jh@lyv${DKlt+6gRD<N*RSz1AuO
z<A+W>GwXPd+FpZZasJJkbH!~DDoDHboVI{P6wR&L$ni^7f}TY@;YlPoc!79!X=k&p
zj@4z`3@P+N9a{Q=m_@6qo@Byy(~lCvZxNu#HG@VSCN(kF{MEI@fE_((^vZ-1ygC}H
z1LIxlkw9RXRapFHZ<wi3ybnRu`<hU@KqSrZVr`T`V)a=iCLLe#*|V4C@4o)VlDvC)
z?v6VbzdrZY(rYNYclRED?cS2a`_jFo>ZR(_Q|R_7p^&G~@QdFXT@D_2p#oHHyY0Dx
zj!P)W`8S<qoUAqHT8l%aBKmUT;^z9jyLW$nZ)xe(drOJFx&A%r(iA80{PzhYtB~)K
zoyu0SFEt=dz?)y_8vq0Yd3|RjdhPYZXydHYqX?jrEf1X@_65>K64*tL7{kVEC$zZs
z{}JJUG&bL%37npHppT=NLWzT>rx0aLkfgBU?Dd4b5g(gRl|9w95ZL1p6pmWY#FZIm
zxf7+@O&f1!kD>4Y!Av~UE7{+bM7>W4T~2w<{GcqwJVrE-m-sOakR|%^keI|zQ22=2
zT}HCUF2ygN9afC$b2MX*IdT4HJ!s;@v(J%Dy1P@T?YGjYV))p96Z6hT@C*Dl?Rl~R
zC_B;v<^7gq3smc>E$E=eMgdCEg_q<A-?`0Zk|nKcaWO&F8DOkcFUk0kw;l?@2;w-B
z>S0l({HxSvd}Oz%O;}OTqGuzhM4YFV6MaBZhi3T-wIv1?LA*iC-lQhkD0twt!X&Sl
z(omB;(mDp`JxW-VOtr|;HvQP+_5C*DZDMed7!-!=V`8ZeL7!w4-=x?lQNy&*cZ#*f
zR5^X7l1mEettwOWRMo{L>f{<F3Pc_IVSWD*8ifLhnau(^Wm!=#0#aGJX<D#06Zi(8
z0<<avYFRpgErL@f>Ii5NxI!5oodB3_F$eFHO#j@{k06~gsBQv$E8uj<D5I{-PNT-u
z#m{Mgzeh<9$p|#7pn!a=D|o^R%<eZVu1lQ^t|Zu@r?5knC;*~zCLNn9ryzmKWmS7N
z)ILY1n!wt&0Zzj;wyG)gYKke1Dk~|E#E5ihL<W4+oA8_n3gR#qHvyQ+N3|B?d3abf
z#xZFg;9MCUSv;+{j+6fbfHuMNfPWc9J1H{*+E2AszVG>h?-X&lt@Ws<N2s%k`jE{2
zAobytTD3beIv&8JTzpB5>8P5iB`rXu)9}Z1nUCPxiGP*3ot(^Lw8!}IT^+rgP+u?f
zzD_<diRZ>IsU1AY{5Fop-gkZ%9v?roV@rTO_;n;BN7^GJEey@`MWW=Vy7p}?D##Q(
z7XXI~{B(x_JdG<3FUi7yLo1cu%S0&)Xmd;2lt!SE0MhU$OR|KL1*}{tT{-$voL*5>
zCCvC}x2#Ie$nt=9RN^yAi_TK%6EiB`ALG@pbq1~6INvHLidTt8ZxpX7eo9*@2T%C)
zB0S-!@(tVif@$3`rp3n;)ojWa@*3M_s>gxm&ZV@sLERnuY>arMxrgkbR;|TXKNdg~
zJv>KHX4os{UIbKy(-*F8Z}+Ri=-kKw9@Y|O$J5UBA%FU6N@?$%fBSd-+3}S9jvM+7
zaNPHI=h4I5bAEO_xd1RS?~MH7$;)BrtuG_A^R7awRG;K8hwF<G=DQjj3+FD!lZ!B`
z2PhMh3geu!yafl<jx#R;^@Qq3jwe6tVW}aSxM1G^&Vz))c{?dxZ}q-D1Y}6^Z4xe_
z(s=TEtKEshAxg)I4?m|61GAIzjZQSgD4xXkLGt+Ua|($%p0uy^!x0~+xD`V))$;rl
zrce+ROxRr<b9Bn=u_Zx}G@ewe+9yp%8qYqb@RODT+Q4ZFA?|wz_Ch?VF!KQ(DUcp5
zT<8bT?D)+7a6^X&3JEIEU30Duu>ZWhXFATD6PE{t&{Zea=bi6ZVp7~7K0CzeBZ{~v
zKrwVe_7IjGzh__BcM;|fLU*qxy%0{3AF7aynijysh~mTjiC?%z!9PO#a8Oj!;SChO
zawTX*eyc-a9pzHB5Xl?fW3^h%wyWbw=Ai)SQ1KE$5H>Lhbc*+By(q>hGL(EkH3s2+
z#B4S_;CqEnR8&Hpgh@uJ9NKJ!Nj8jl(199Wd(`mSgg>g4!BLr%PzR+VXyCc)=n#rd
zo<svdomM?)he-}CRHoDyNnJq{ljfKh$g3#K033-=X@HC;%o~D>3O<}vK_N9dpd0j=
zM&9Z@8}kJS!w889cwjRdLCgtu41Y!aG;;K<BQkS{zy;Iwe$F;*00*67GoU)lD!PT1
z)XJuosGR~W;0P!HJ-`XD;<rW9HY$3BonS|p^|}6A@yF<R1pQhl*bgn=!w2FqEJBb}
zYP%HSc=?{uMJxwLwTZ726b#+b$&VASM%sM?5dfHAt|OL_Ro^&RK#AxLVmC}?ce1+_
zw<&Mn3*NaAN6I_D=`}it>~2y>TE!m$PzM3Rko27*k_guko<vkkS!QUIa2q39rQ(^e
z+s&Y_ccAm(7x$uRsjn@@FtOCz$cdxKC+0)9`GCTQJS1%u$EeU!o+o-CrE5}bPE4b`
zY9(bFNuB!wUNFVz#?|-G>LPY1f*=wl;*+608j2?J;zcSbfgOC)%{1V5P+_JK)UJPz
z;jcg#5->N-DYfPmKtX84=lcJc8Sxb=8swk#f3${qJPHCXBSZeS3Uuo`EDddU4Xlkp
z*9E&&+bivT86#!*TFds<+@>{`3{k3@%;<n7i`vG^+`wv@@!zV|yo`{+dOIS2HeJj#
z3QVQ|7Xh@G3WZZJlg{7NDkum(i_^9h`nU_A#(n#h>IhtKDntW>v6W9TPYQwfEfvqy
zW>PydQ%8J)&TRyssRCHv4>}<<Af({Sb3(}~@}X90p=zoXPF<!<2M&^;ux;CqlqXWn
zrt6nFEh-rM5=5I!52CqsT#djy!O!&)ayxvt1=ogc9$e%SNhWn{Ad~+9myrB+D?^)H
z#}=lWj50I#tUQHg^=Cl*-=M*sof@xN^e7s^{G`(RnEzkkNlWJ9MhLeY%+(`3EJwJ}
z*an{B2m!<=f#VQh!>z*@-Bs*lr9t68;7b#DOCPy;I5T|n08D)XSj^gbA4m+BWN`(2
z%_uJs00p{3_=XT0P!|ySXBp}%%@OJXPR=3>Xsa;e@24R~Ak>^Rw-LUTd9HUfu?Bt_
z70k;%F{0xA63-D<+_)jlowCdm0*=O{9aXmV@U#84)`7O!o$>VkgHRx7v-ja8aQm^w
z3tyuIv)6eMvws)i;=;}(FDcAPxiaOVDVarp66hBtW3mWrS?T#eS(v>N#$E!}rP^^>
z8q{w5jVhbi=QgQwL_@(Cgo~qTi4e0a(7n#a-+xcm|28aZ%x+4;S4gEQA<OD?{CBCr
z?4%Y^;ShoAm{q?quWH_?Iq%_8Z`c;4H+y{pifz?1Td6Tb1n*-Cm<>XiWUkZ}N<)Cd
ztWuKm)JF^8FY=s$I;Q(?s5R~l6^u@a;vXaaGgOP{i@Hbb*>(VlO>BHVY~%+cT@d8c
z##1Tiz_G3FoM@o)PCRw>+B=`TH$Q^IXX4qJ-n|w<nCj5QB|0QDGwXy8{B|2Q0#HOe
zAoj2!2nzM;RiKCho9cel=jvi~glOd8q}UZ1U=`q`+ehp#c2pT%tQ*3a)S$}J69j>_
zM+Yr}b)o65FLU?eNwf@!RZ)vih)xT)LWFk);mLkOaHHcoVGrZuH?J>Vnfuv@2o^i*
z>j+uf;*ZcMK5pL%V8w=8gzynyu3$#%Xd&)Xv(xs~Yqu_6|M1#XXXqjh`FC&J{OB@W
z%GvQxe`n9;{~)mA<2Mu}b10a%?BSY{P+^2O&XDRML8eiiapDN&HYlMWGVbOO!KFJr
zm(!68y%2<@L_o<$l#tOB6rxe65I5E-swAj61TD=l5Rq7?LWMjD7gfi4(J4|h@t9Z?
zQf*h4bqQNhkr(k9)l`63hT&b9jH#HEQus9XWc5KvVyW|wF2#HIgLITA&`DiU3=1oj
z0#*VL2Q|eWD8+A39o|A>T9!e9CSXEfJ|Mhj=I3P~%Cf?Frda_Zw15wFg(&G=F)Bt8
zm@!3%MTHk_AjdKen-jk&kC{yT6&N@0?Eehp2#;UE6B0U%aKzk)(dP`eP;?Zi10Z6!
z`B8x-s^tdIplZiW<XDuEIGlKfXTJuPgdx^o_Gbq)9D9hKYHr~vbr`?@N$9R(qqn!B
zwV>sw{}JeKiuRUsK{=YMUMoFLNrV@j3$AFasRbFpw2Ok`bx&z)iC-YW{`r<(*>ug7
zl9;Yo-Bb#enBF7ce3C)c5Pv~LN(ZvI6i4{=;G&r8uHSl-w(bO)LhKMMf%h_^dN{|Q
ziN8j1V%6)^u3ksqG&j7|NTw7F6@N~>T&F}0rqBqQO+ZY#_7-Z`r>IWt0ktK_&J28@
zRO^+$MEaX<7^VL-iTwf49Y(!AG=72&X{gh>upaOZ<VVITJcGp2(z{{uGw7z+)&WRf
Un)ulIz$)3()<vrbKjhi}0&Wp|NdN!<

literal 0
HcmV?d00001

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..deb31b17abb5f4e0bc803f28547204fba3f421a6
GIT binary patch
literal 1994
zcmah~&5ztP6t|O1lF4Lup%M}mRpl0hIzls3gv4Q3t%~S}ieR_e7FCQuqipQi^^%EG
z+u4sObJ|`J;*5}HkNh(nIQq&7i8F^@;5jq<QK)DlJNEN?&wlUs^NUzni4Z)0{g&?j
z86xz%om_rgnB0M;9zjPD#Tgpm=Icyd2VpezMlHNp@kf5M5{v@)I$1bvkJ=b9e?Ouw
z^`4^>JnGOE^`E0rOz~sX4gLVHXtRq=B+GnUBvLV%Vk=xe9(-;444RVA)o35hF~z5N
zhL>Xp#*TLO-MLpc^5ytK-l`qyo;ovE`xJD3LO(kv7$ca$%B7iKyJxc+*G}zDTzksR
zs?+Q3E0K>UJY!YV>q%CKG3By~M5&8X_w|vk-uz1Oe3yLwcx#i~2gM<&t}R5{psu(U
z@|fJ^lBIS8t5>!f<3^c0O7z~4bOYn3BA@VG)81uT6q<|NI6@h(00k2yJE|FY`-11D
zRY;!eu4e+to|Q~#(>_0^3C~VxI*U^_$NvS%RmkIu;b2cp*&vySTnRCtOtS9=lB@Kf
zSLS@e8ST-X0Xx!zq|~CvbEO9wcIX$!rlXiH3&2b7sg_0N3Usj(<JgVyDqeM<;mDD1
zfcPai+aLg%E#UkW1~vM|odJ9R4O*m};_uy=S9=tdNM6;LE}(MeE<k8O^Y&YF0O}O0
zTd?XiK-$$-vl`Sc^-r81;#sS1&BIeP^Pd7eJ$O%CjPme9q}C6?kA1&}MuXZvK=L8@
z3au~NZstkt&)Z-f(1>=PqdK??^qqyE6`$|8((H)oYGa3hDr>-HxdL%jTTegT-h8<E
zU`RFvvB{HVYOz#gBFdZ+AxV-GCM66+nwC;B+9$WZ<Wrs!kx`;j$qP*m6PW|6y44Nv
zvw)ElQqVGGl<XXni+j_rKDoEOy|q1Ds##8gxgxpHB$X`Duil|KL$|uNB%`JHf6NS)
z)jLmwPBQZFaxC|4EJJd;dhnWXd4Bm+{}bkW_wrOwh*%LSnxjt>o$mGF;!GyM(L7~p
z_!6faNVg^7V`%C+bi2T+Z#z&P@Eo1mVu4PvbZY#g^UMK^@C)4QI;I7X@WMn&CsM0J
zuJ=Ib^luw4<2h^Y$f8>VmnK@zxYDVZ78@Vgee9;!QT4IC5#tLC##4*m81NhKu)XAf
z3)x@KM4Du30|sX<&XGC`me7mn76iHR3N9>>`ryp!<rEQsx?sR2AkX>c)fYgUC5MzH
zUE>1`nT9fC3GoCXAVs1SQ)H*qq=^7jEd|RIvJK+1P`Z({flGUhjb9ww=#4i?cqT0Z
z(r@JK2;zi-B?^`s4~R!0%SFbuO|0s+o3bD~)*!Z>y)VX>OqH27cwz!5fSTpnv>F2w
z+2o9y@xNiY`TG371>V*B&_#IF4V@6`QtU#HV<*Db{tmqmhkghZtC6|#T~O_=m~f}$
znTCsN;ycCh_<VMCya>B*GbZhg?n`zkxn`z)F0$o#A&bDcIb$N_+u*s$lL_pf4mjRq
s(*zTKq)W*bd9nLn3e(nV*-$WCpTf1z*oOT-Dx0~RD8w=LAdYzLA9p@FdH?_b

literal 0
HcmV?d00001

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 449a7c5835..d227061d3e 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -3,9 +3,9 @@
 import re
 import sys
 from collections import defaultdict
-import yaml  
+import yaml
 
-REPO_INSTRUCTIONS = {} 
+REPO_INSTRUCTIONS = {}
 REPO_DIRECTORY = None
 
 def safe_get(data, key, default=""):
@@ -123,7 +123,7 @@ def parse_location(loc_str):
             else:
                 json_var_fields.append('?')
 
-        # Extract field names from something like varName[index]. After normalizing, vm and others won't have indices.
+        # Extract field names
         field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
         if len(field_names) == 0:
             differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
@@ -162,27 +162,6 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
         except:
             output_stream.write("Outputs:           N/A\n")
 
-        # # Instruction Properties
-        # output_stream.write("\nInstruction Properties:\n")
-        # output_stream.write("-" * 20 + "\n")
-        # output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
-        # output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
-        # output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
-        # output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
-
-        # # Scheduling Info
-        # sched = safe_get(data, 'SchedRW', [])
-        # if sched:
-        #     output_stream.write("\nScheduling Information:\n")
-        #     output_stream.write("-" * 20 + "\n")
-        #     output_stream.write("Operations:\n")
-        #     try:
-        #         for op in sched:
-        #             if isinstance(op, dict):
-        #                 output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
-        #     except:
-        #         output_stream.write("  - Unable to parse scheduling information\n")
-
         # Encoding
         output_stream.write("\nEncoding Pattern:\n")
         output_stream.write("-" * 20 + "\n")
@@ -234,18 +213,11 @@ def get_repo_instructions(repo_directory):
     """
     repo_instructions = {}
     for root, _, files in os.walk(repo_directory):
-        rel_path = os.path.relpath(root, repo_directory)
-        if rel_path == '.':
-            category = "Other"
-        else:
-            parts = rel_path.split(os.sep)
-            category = parts[0] if parts else "Other"
-
         for file in files:
             if file.endswith(".yaml"):
                 instr_name = os.path.splitext(file)[0]
-                # Store lowercase key for easy lookup
-                repo_instructions[instr_name.lower()] = category
+                relative_path = os.path.relpath(root, repo_directory)
+                repo_instructions[instr_name.lower()] = relative_path
     return repo_instructions
 
 def find_json_key(instr_name, json_data):
@@ -269,21 +241,21 @@ def find_json_key(instr_name, json_data):
             return v
     return None
 
-def main():
+def run_parser(json_file, repo_directory, output_file="output.txt"):
+    """
+    Run the parser logic:
+    1. Get instructions from the repo directory.
+    2. Parse the JSON file and match instructions.
+    3. Generate output.txt with instruction details.
+    """
     global REPO_INSTRUCTIONS, REPO_DIRECTORY
-
-    if len(sys.argv) != 3:
-        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
-        sys.exit(1)
-
-    json_file = sys.argv[1]
-    REPO_DIRECTORY = sys.argv[2]
+    REPO_DIRECTORY = repo_directory
 
     # Get instructions and categories from the repository structure
     REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
     if not REPO_INSTRUCTIONS:
         print("No instructions found in the provided repository directory.")
-        sys.exit(1)
+        return None
 
     try:
         # Read and parse JSON
@@ -291,7 +263,7 @@ def main():
             data = json.loads(f.read())
     except Exception as e:
         print(f"Error reading file: {str(e)}")
-        sys.exit(1)
+        return None
 
     all_instructions = []
 
@@ -313,7 +285,7 @@ def main():
     # Sort all instructions by name
     all_instructions.sort(key=lambda x: x[0].lower())
 
-    with open("output.txt", "w") as outfile:
+    with open(output_file, "w") as outfile:
         outfile.write("RISC-V Instruction Summary\n")
         outfile.write("=" * 50 + "\n")
         total = len(all_instructions)
@@ -324,11 +296,24 @@ def main():
         outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
         outfile.write("=" * 80 + "\n")
 
-        # Print details for each instruction directly, no category splitting
+        # Print details for each instruction directly
         for name, instr_data in all_instructions:
             safe_print_instruction_details(name, instr_data, outfile)
 
-    print("Output has been written to output.txt")
+    print(f"Output has been written to {output_file}")
+    return output_file
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
+        sys.exit(1)
+
+    json_file = sys.argv[1]
+    repo_directory = sys.argv[2]
+
+    result = run_parser(json_file, repo_directory, output_file="output.txt")
+    if result is None:
+        sys.exit(1)
 
 if __name__ == '__main__':
     main()
diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
new file mode 100644
index 0000000000..6f0b7b21ab
--- /dev/null
+++ b/ext/auto-inst/test.py
@@ -0,0 +1,56 @@
+import pytest
+import os
+from parsing import run_parser
+
+@pytest.fixture
+def setup_paths(request):
+    json_file = request.config.getoption("--json_file")
+    repo_dir = request.config.getoption("--repo_dir")
+
+    # Resolve absolute paths
+    json_file = os.path.abspath(json_file)
+    repo_dir = os.path.abspath(repo_dir)
+    output_file = os.path.join(repo_dir, "output.txt")
+
+    print(f"Using JSON File: {json_file}")
+    print(f"Using Repository Directory: {repo_dir}")
+    print(f"Output File Path: {output_file}")
+
+    return json_file, repo_dir, output_file
+
+def test_run_parser_mimic_old_behavior(setup_paths):
+    json_file, repo_dir, output_file = setup_paths
+
+    # Run the parser (similar to old behavior)
+    result = run_parser(json_file, repo_dir, output_file=output_file)
+
+    if result is None:
+        print("WARNING: No instructions found or an error occurred. (Mimic old script warning)")
+        # You could fail here if this was previously considered a hard error
+        pytest.fail("No output produced by run_parser.")
+
+    # Check output file content
+    if not os.path.exists(output_file):
+        print("ERROR: output.txt was not created. (Mimic old script error)")
+        pytest.fail("Output file was not created.")
+
+    with open(output_file, 'r') as f:
+        content = f.read()
+
+    # Mimic old behavior: print warnings if no instructions found
+    if "Total Instructions Found: 0" in content:
+        print("WARNING: No instructions found in output.txt (Mimic old script warning)")
+
+    # Check for encoding differences
+    # In the original script, encoding mismatches were printed like:
+    # "Encodings do not match. Differences:"
+    # If we find that line, we mimic the old error messages
+    if "Encodings do not match. Differences:" in content:
+        # Extract differences lines
+        lines = content.splitlines()
+        diff_lines = [line for line in lines if line.strip().startswith("-")]
+        print("ERROR: Encoding differences found! (Mimic old script error)")
+        pytest.fail("Encodings do not match as per old behavior.")
+
+    # If we reach here, we mimic the old success output
+    print("No warnings or errors detected. Test passes but mimics old success behavior.")
diff --git a/ext/riscv-opcodes b/ext/riscv-opcodes
index 5ce8977a59..9226b0d091 160000
--- a/ext/riscv-opcodes
+++ b/ext/riscv-opcodes
@@ -1 +1 @@
-Subproject commit 5ce8977a5961a6bbfc1638e6676e60489665d882
+Subproject commit 9226b0d091b0d2ea9ccad6f7f8ca1283a3b15e88

From ac04c2887e78ccacf898d053a83ac8eba707c075 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 08:19:59 +0000
Subject: [PATCH 04/33] Allow 16 bit instructions for C extension

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9093 -> 9422 bytes
 .../test.cpython-310-pytest-8.3.4.pyc         | Bin 1994 -> 1944 bytes
 ext/auto-inst/parsing.py                      |  30 +++++++++++++-----
 ext/auto-inst/test.py                         |   2 +-
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index 757e1865563e2916a5b76637ec4bd05e7c6b60ae..09227159071d37a12bdaf3dbfea20c3722291076 100644
GIT binary patch
delta 1997
zcmZuyZ)hAv6yLYEx3{;qdw-M5U22ofB{g1?G-#@gwzaL5)*4ELwu-gwxykNb&m?!r
z+$L?$S+4e~Nu@~WC<qeG`K6%rL&<lGBH|ZCD2QAU>c@a0f{6H~zS(Q_1n1b_zL|ON
zH}CyscJ9r?-yR8<bzK#(eewNtVR-CjIOnfPynnx?@vY(G!>976C<qiPehUh4OrXAs
zYKfjDtP7T}0VHNgC9wRiL}48)*$Om7*YAkbUk579_{fl@w0a*1R**`F>YAT3SRqTh
zEVyz}a04|eJ}aD#y9$-5V(Dq24yQyn=&CeG)x1apI2uk1dxV8C$G0w75$eNbLIuIq
zsANS`f~(UIGU#rYYU_Tg({NsF00>mKqT@Kba2%}|rbDc_mqdckbP1SF0Mn^qK1dd!
z8j>V%>8KmSg=DLP#u_5IWOceR8bRVHjkSmumT1J)FedH<s8k1x%Uuzuzm?94kP^~@
zn^+d|U?r`t%L-?%gWEwnDmQ6--EXB<_~Pj`)DVQ)!Zq4qbz?{;O;}r&1o9K@#5a&{
z+MFMv2@KgvI}v-<0z44oFaqnk+veB%|9&3tZ*6|YeI|dS)}jimD-8iFsiOkPe>Omx
z@vlNfrmEFFBdh{RdlY>%iD_hL5E17P`#oU?)`DJQf1oy9s10r*7rM?jc<U8d22^C*
ze-&AU8hMg-p}d|3JbV=vGJM2(_(9JJdOJU8J+iLSt_Czjlrx1#D^&+fOvJ>@w#2gk
zCblNG61wjjosrP@LYwb)nn_{)Lv0~SD}=_63E5%uAP54>C?D5U1KW|jQ7ly)k}H{W
zwo!3R;#7=tMdz$BcrZ6OY&d7_lHm~Bbhz2_U7U7`<&t4mj7Og^rivIaTP_uLczl!S
zMa~r~HimjpMTa|%J=X5%d9tq+H_;Ri<~F1FZEhL}f58X|v&mq8I5ce7=jZKd$F_`W
zH@g@-38U;z@DLnj8TIuLmLP22!&V(&H`D>Knqaq81|?Pq{aC98Uo=V4oSL<bYUnA<
zZQAtmt#%s4icwy4jB?%}W~pEs`7$x8u@S7Hy|NwCoc}@NHwV8_68Mna9Qb|Hf<0(u
zp(##HwfZ6KhsXiMx46$U?5Z}xzSK_Xp{Cl>mP^a}^u2PL^q~vO={*CH2c_m_?NY%x
z+l*c?=Vo*BrsLS8l*8R2kF!;Mt31X@cd`wAxBN6W_p#sgoV=f#huF*E?7k|Rk}87)
zGQ@qFs6h%8QIZshLL4-o1PcDbpyFr>B%cNv`bP1QM9mjvx5B-H9pV6p*pFibvOri&
z`ohqIQ5g^~N+L1&db`-Ya7KxvIIB>7bt4-Y5wj<#iih!pqfvo|s7BSggjbo4W0Q_V
zbv)W(@3>AfH+BGavRkoHSlReBHZID?Ip^Vxk&aJb<aut(9Q+)PA~by{LNh?@C7V=i
zvW1TiaWI*b+y=YY_T*_e$ySo%@E-d-c?QDF=<0z>8~eIu0Ltvs)LyvDHd9;S>P8^_
z8Ne^>tL|;UHh%7Y#fQh{xt_OSnBDGq0jAkNZ=XEHx#!qK??<((+)QzBIP@SirPnIu
zQqw<MHmyoi!eefcb9|C;nB>4XEFv`JieuS?v<l+qsI4lgQYH?W=Yw9+m(a>ann6yV
z%NJ*DvfRx%LU?HB7aeb?33(FW5pS@%@e-DP$LJSDxYl<HuCvp9-|ZjA$xZyD6Ue&=
zSs)+sm)u&xOGz?GVaY3}DcR?X4!O=fd}NIWEwj0d0sGlInJuF?aG<H=a&u;}l*=I%
z{}%A&5qA*&-%8%+Fu`Gw{g4?Q@J@#(gtvRVB;sl4Ny1kmH`q}BMC6Pzq3CL-^0cC<
HAl>^L(dqhD

delta 1616
zcmZuxO>7fK6rMM`Ua!~Q^~R24n}o!63@8#Pln6D2CWWdEr4fh<RHPQ-8per25)zM>
zD5bMC$f-gqRSF%Ds-m=Ug*b4lTvcihJ)$03`ge;|IP}C7ajGiOH|rcysVnW9Z{B|2
zd-LAR?8D<99dqVw+ambv{^R15vHxqQ6sl`#zV)PWjN+?&wOf=BL24leQ*fFHTC*4p
z5ZNG1V|tVNng>E_01M4%ypI{}*&V{d>zZORlSP(^uTK*{B;Yl2KH(ce7Y4Jkqygus
zANI|<2n%zPiV(V^S@JTe?(yh`#$tkEg2)u{EoS>s5ka}_$Aq<^i>QcA(k1{AQRWO2
zKUy6_6vsuBnX7zhf{W1Gi@*rBuuz@EgpMD_=poi2;!Vn{EaBJV!p4w}h_^#lE5i0I
z6uF+kLJhU*^Q%N?%)UV~Buo5`6*37d$vT%!6}AC>LL_Qm2zNtgsT(SB*Se+_SG_J0
zERBk!=wO*e!XJtxj=;a)#?9762TFE`B--xv@RkrISk2Gdh&}($3j#md#@o3SDYO+S
zv=MF+EP`S6o$Z!maVgIbrU+xF>ZC#ObN4E&0M^y}-@4X-n<9n%2`ULfK2>wp*T$Y1
zY*votHzFd{geJv)bm2MZ!c&o+p{O4W7FQ6RSYW>hqt%3YhpZbxguf1Lk)0VG$;Y;l
zi=qq3yS7QpwnVt6$-wTbKgw0}D|s(+-TE8$aR-m=lbk$c^*ej^<|<1Im5W}5<%SN(
z8S5R`D<4>6FedHj2X3EYf-=wXx%!fn(IQ3OcTouyc_Q|wd=$+N@<Yhr>aw)WVMGJ4
zNiSSz&($^IN6;)Oa8!O{?~6p<F7xT~h1rUHY)^nC-*iSGEkAL3lkWTFx!KY}+4Cy=
za;fHVJ|ur}b{a1z@d5eNIc$t5I4XzYCF2zZUzeZ72SzwT-7r8017b7=2{5Uyo8Tgf
zQ62pzIAEbS0Xlw<p~uA+u@?K(?aS*(OrY3BdY9%vFVYJ@uj!Oes5A%Vm|HMiyzy&r
zwbWX0_t8CPgoS%?#*3+&Cai{rH^@fMNlzCxZftw6PRa*~qc9}R<RGlHhLgk8cuPeb
zYb~bkgZqZUhB6^##^wHW4@}4t>0NM8*3#$Uto$uK3@g&gOh8OdXL_ODx{;Xyn3d16
zBYnT4s@91{*E_LD3oCH2JbI1(qlfrv>(q`P03OSD&n{SN9qf5mgBf|V?_=ndJ-Jix
zo}A0&jgrcIN#4qR8C_K{qfAwf=6h0}g7eCpP-aaTF6Z;zuZ|<!)NAwH<8#Uz6jDbt
z5Z{bks?A?6O-|2N__Cr!=dXARSG>Tp36ngm1Ru(u@@KKgVxgZtBl39R9Nd-n3cntC
z5x+jczbfH3(F_2;qXwg$LGARWUglGayc)pD(zM6#Du%{C$AK@)Z~JrO2k_|D5;%8)
zu2|FphpI!MKj-(A&@7ea%F~xiB}|}x<TwQBI(b$_xvk7`Ww^{22aCZe2pSRWlPZ)4
c%?etg^7H%BD~`ucnd7EyrOZ(?W?`~_0Uy?Xod5s;

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
index deb31b17abb5f4e0bc803f28547204fba3f421a6..bf345f660203a51a0b63b051ab24a0a32a775131 100644
GIT binary patch
delta 53
zcmX@bKZBn)pO=@50SG=^OHRMMk=K}&OSUAnxFkL&r!04KGHWa&7bl!Q`7zsb0J#wo
AL;wH)

delta 103
zcmbQie~O<spO=@50SLZbN=|>Xk=K}2*{meBxFo))G%vm&v8Xt;C_XncH#0dtKPM$V
aDK#UpEHl4ovlnYDqp}iVg_Ae3JqG}2swVmX

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index d227061d3e..bd5fac67c1 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -52,10 +52,14 @@ def load_yaml_encoding(instr_name):
 
     return yaml_match, yaml_vars
 
-def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
+def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str):
     """
     Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
     If the JSON has a variable like vm[?], it should be treated as just vm.
+
+    If instr_name starts with 'C_', then treat the instruction as 16 bits long.
+    Otherwise, treat it as 32 bits long.
+
     Return a list of differences.
     """
     if not yaml_match:
@@ -63,9 +67,12 @@ def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
     if not json_encoding_str:
         return ["No JSON encoding available for comparison."]
 
+    # Determine expected length based on whether it's a compressed instruction (C_)
+    expected_length = 16 if instr_name.startswith('C_') else 32
+
     yaml_pattern_str = yaml_match.replace('-', '.')
-    if len(yaml_pattern_str) != 32:
-        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."]
+    if len(yaml_pattern_str) != expected_length:
+        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
 
     def parse_location(loc_str):
         high, low = loc_str.split('-')
@@ -76,16 +83,18 @@ def parse_location(loc_str):
         high, low = parse_location(var["location"])
         yaml_var_positions[var["name"]] = (high, low)
 
+    # Tokenize the JSON encoding string. We assume it should match the expected_length in bits.
     tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
     json_bits = []
-    bit_index = 31
+    bit_index = expected_length - 1
     for t in tokens:
         json_bits.append((bit_index, t))
         bit_index -= 1
 
     if bit_index != -1:
-        return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
+        return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."]
 
+    # Normalize JSON bits (handle vm[?] etc.)
     normalized_json_bits = []
     for pos, tt in json_bits:
         if re.match(r'vm\[[^\]]*\]', tt):
@@ -96,8 +105,8 @@ def parse_location(loc_str):
     differences = []
 
     # Check fixed bits
-    for b in range(32):
-        yaml_bit = yaml_pattern_str[31 - b]
+    for b in range(expected_length):
+        yaml_bit = yaml_pattern_str[expected_length - 1 - b]
         token = [tt for (pos, tt) in json_bits if pos == b]
         if not token:
             differences.append(f"Bit {b}: No corresponding JSON bit found.")
@@ -115,6 +124,11 @@ def parse_location(loc_str):
 
     # Check variable fields
     for var_name, (high, low) in yaml_var_positions.items():
+        # Ensure the variable range fits within the expected_length
+        if high >= expected_length or low < 0:
+            differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
+            continue
+
         json_var_fields = []
         for bb in range(low, high+1):
             token = [tt for (pos, tt) in json_bits if pos == bb]
@@ -190,7 +204,7 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
 
         if yaml_match and encoding:
             # Perform comparison
-            differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
+            differences = compare_yaml_json_encoding(name, yaml_match, yaml_vars, encoding)
             if differences and len(differences) > 0:
                 output_stream.write("\nEncodings do not match. Differences:\n")
                 for d in differences:
diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
index 6f0b7b21ab..eb0c69191a 100644
--- a/ext/auto-inst/test.py
+++ b/ext/auto-inst/test.py
@@ -18,7 +18,7 @@ def setup_paths(request):
 
     return json_file, repo_dir, output_file
 
-def test_run_parser_mimic_old_behavior(setup_paths):
+def test_llvm(setup_paths):
     json_file, repo_dir, output_file = setup_paths
 
     # Run the parser (similar to old behavior)

From f1b8613d22edb265c573d509c85907c45acbe202 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 09:16:27 +0000
Subject: [PATCH 05/33] Revert bad parsing

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9422 -> 9422 bytes
 .../test.cpython-310-pytest-8.3.4.pyc         | Bin 1944 -> 1777 bytes
 ext/auto-inst/test.py                         |  16 ++++++----------
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index 09227159071d37a12bdaf3dbfea20c3722291076..6e1d3e4c34d9c9a2038f2ebd613ceb4a4676afeb 100644
GIT binary patch
delta 19
ZcmX@-dCrq7pO=@50SGQW-pF-C1pqwz1|t9f

delta 19
ZcmX@-dCrq7pO=@50SFFV+sJi91pqu?1_uBD

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
index bf345f660203a51a0b63b051ab24a0a32a775131..82b12de2706eb3018c923f41d81b8a5588cb79b4 100644
GIT binary patch
delta 145
zcmbQi|B;t3pO=@50SGvsB&R#GZ{+h~W;C4~&1^b(6|)MX(&Y2ZN|U)*>=|`|V)BzS
zS@<STWLeB;Fxiw<3n-hwsw7<{?wXgJpOTrEUaXLkuaK8tqL7<dlANJeB|rHTtNdg!
ow&lu9984UHTueO7K$4M%iHlL@KL?P_!OFqJ#|ROd{D5r-0CXuKxc~qF

delta 315
zcmey!JA<DupO=@50SG=^OHRMcv60V*nbCi8G_$FchHqwWX0k$lPKrWta#3bMi9&f|
zQC?<Vy5{5w%qoo5leaP}Nh)BfN-ZkNFVdWRo!Oqz9cYmJWHS~%e1^HOEM_#G{DMUb
zXrwT!k_S#BtF&G7lJir5E+|$=$ydnBFHy)%EJ@B#NGw(;NG$@pG$}PBu`DycNUzFg
zasaEmjzUOkafw1fVsUY5u|iTQPzT6U#SmYWCMTyB1BIb_Cg-uPP-f;};$Y-r;$a4o
dj66(Sj57Z@SU4Ct*f^N@7{Ox9lP%eH0068dU<m*K

diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
index eb0c69191a..63ef51fae2 100644
--- a/ext/auto-inst/test.py
+++ b/ext/auto-inst/test.py
@@ -21,36 +21,32 @@ def setup_paths(request):
 def test_llvm(setup_paths):
     json_file, repo_dir, output_file = setup_paths
 
-    # Run the parser (similar to old behavior)
     result = run_parser(json_file, repo_dir, output_file=output_file)
 
     if result is None:
-        print("WARNING: No instructions found or an error occurred. (Mimic old script warning)")
+        print("WARNING: No instructions found or an error occurred. ")
         # You could fail here if this was previously considered a hard error
         pytest.fail("No output produced by run_parser.")
 
     # Check output file content
     if not os.path.exists(output_file):
-        print("ERROR: output.txt was not created. (Mimic old script error)")
+        print("ERROR: output.txt was not created.")
         pytest.fail("Output file was not created.")
 
     with open(output_file, 'r') as f:
         content = f.read()
 
-    # Mimic old behavior: print warnings if no instructions found
     if "Total Instructions Found: 0" in content:
-        print("WARNING: No instructions found in output.txt (Mimic old script warning)")
+        print("WARNING: No instructions found in output.txt ")
 
     # Check for encoding differences
     # In the original script, encoding mismatches were printed like:
     # "Encodings do not match. Differences:"
-    # If we find that line, we mimic the old error messages
     if "Encodings do not match. Differences:" in content:
         # Extract differences lines
         lines = content.splitlines()
         diff_lines = [line for line in lines if line.strip().startswith("-")]
-        print("ERROR: Encoding differences found! (Mimic old script error)")
-        pytest.fail("Encodings do not match as per old behavior.")
+        print("ERROR: Encoding differences found!")
+        pytest.fail("Encodings do not match.")
 
-    # If we reach here, we mimic the old success output
-    print("No warnings or errors detected. Test passes but mimics old success behavior.")
+    print("No warnings or errors detected.")

From e3e74560a5c902d735828b7dbb0e9e00577ecc8c Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 09:22:35 +0000
Subject: [PATCH 06/33] Allow only one value

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9422 -> 9483 bytes
 ext/auto-inst/parsing.py                      |  12 ++++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index 6e1d3e4c34d9c9a2038f2ebd613ceb4a4676afeb..e9334dd9950c43dcaadf78534695e91af9c81202 100644
GIT binary patch
delta 819
zcmX9+-Afcv6rVFQJ5IB^GZu=gZn&#!utgc9VM=E9fgdQ64?dI(>V7!2I@_IbwqTcn
z70Lo3Zbk&5z6ACnGSOpyLG<z1ya)BrOVnFn=ZrJl-}#-dd+s^+ej0l^uG{grCgAtz
z`(p7#@4mjKJ&;kAaLwRyMi5+~D%4>cc7&RcgR0;OthFj^i#gbVTVf4rqAOLQ4v1nM
z3gV1_3^Qs{6@SvR;uPz_ow9NuIW7w#2v!+Ixn#M4WUaX9X(-u?rsE!EfxKiDmjbC|
zR|3hKFY#Ym#)x5hOjJRKMo=ZyPegkGJbyBF0nQAek5L<kaEU5GATBH%>QVAsMc5#6
z&3_kf1L)_U_2Wt(S<ds{`k-=|=m<Y&n93ERWBjEt3^D%AC@MKJ4EWa@Peznk;^V&8
z{1#465?2T!1k(g(5rPJm_wvlivvx9_BFOQ{mTnl}PRkt3@vklY@S1Ch8m#$`6L$a@
z|I>Q0;|E4MO$f5w1S)9QtPU{{ABz9v2;1<d+dcsN<wmj_HvPWjO$qMuXB|(VmnT!x
zP~gj{w34T4W&S+16%89sknZuJbh7zAQ92;IL9j_s;dZ)xgpMA_j?G-=kTvXV4O!L*
zj@)+a6|-QK@@$>LqV}p=UUfrD00q`S33dK4J&WySI=evQqn&fG%inZ<86HIEPmv;C
zAY_4UlTZ!|RFDVqJS%$43u*qI<+5Fhkk}Rqw$9&or6xuJ$8rpHJsgTAYM_U2K|Wx6
zl&G5K^1QWTnpl9|E{d>R6KkbiUJ*<YRCqemlL_xLoI`kT)F%sP6;6cevpw!+CX9kQ
Pp~kglbx4hAARqh(Nuj?k

delta 735
zcmYk4&rcIk5XU>a+of#V-Nsm3N>N%%Vij!!4Hy)w2Z_{Q2_{AnnzoiwE#2nr7L2wr
zQkA6QX*BWT4^AXR63E`Yc_ni2vKLJ_c`(MmK;LXHZt_0wJM-qv&U^D-PVQV*b-!P6
z@o#spP#PTGR-Y-;A~$JF@nIa~Xt+MQ?!g+|6`If#OlUw0IE5C>2~)15aE+ySR8p|r
zMy*mdZLwT6lcL>K(F<9_WVSS4F3sCwMPI_WH=#&8ScL#Q8p7{d1h;&O)$sZOs#e=~
z22PCeWkxl1aF$YoE#&hZ+e<4KI9`yrX#Md=08XP%>ywAca|%=1m^@DMJl3?VoF;h*
zziDZZ@Ias>=g2W)wE{=o@=ekcmKEH9<CCOif}3!a!xpLjE>>JDGNZ_P$$yNH!**x@
zMsO=M19wmjkHQ<A2sdHb`VxKspySENIrxEkqz_iDSCP*Ezwk<Q0MKI5X%VWpbEpl2
zc)R}w6mX+ICg&;Z0`B&I@UD^!6HI&@iw2iSGQxEN66*LV)_cS$Q8IL9vKo1v6D)I;
z)a>s2hF;Cil`BQIg1=)u@D#h_w|L39_+b$6d3*-eaWDS$$taKiDbV)Uv=i8Sn&eKh
zI@h+8W2HrAI<ok%Y_fHV5ZOCEu@!R5tZ39L=6`LYHrdibu3Sw6e@6Sbse%U-Py_{3
z!2>Etf7zzXg+qy8$ocbr)<X&35;BB3&L&a`r(MqRowm^Nm~%zWMNlbh6Sor?ZPt_V
N_?4h%+~ZSt@_+sYt@Ho@

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index bd5fac67c1..a29e76cce2 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -75,8 +75,16 @@ def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_
         return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
 
     def parse_location(loc_str):
-        high, low = loc_str.split('-')
-        return int(high), int(low)
+        # Ensure loc_str is a string
+        loc_str = str(loc_str).strip()
+        if '-' in loc_str:
+            high, low = loc_str.split('-')
+            return int(high), int(low)
+        else:
+            # If no dash, treat it as a single bit field
+            val = int(loc_str)
+            return val, val
+
 
     yaml_var_positions = {}
     for var in yaml_vars:

From 4355eb0a57d68973cca392de6169912e209f6dba Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 17:32:17 +0000
Subject: [PATCH 07/33] Use AsmString instead of name

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9483 -> 9704 bytes
 ext/auto-inst/parsing.py                      |  46 ++++++++++++------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index e9334dd9950c43dcaadf78534695e91af9c81202..75bd77c41bc885711b1bbf6a2aef74f4c400a15d 100644
GIT binary patch
delta 832
zcmZuv&ubGw6rP#gt=nl5L{e;}%A{$b0rA(tLj@r!9;%I)6s6D-rrAxpw!3R)HcDYb
z4n;h891mWU<dl<#(!arfBH&37PyP$_&7>qE4(zw@y?yWP_q{hCHhw+H#n~)Ha1LL3
zt)06kId!Gyfcp?0=>epuj|QlZ2dIlu+$RI{2<@)!kS^(3iIrN3eN1G&O%o@zJnWHU
zYm#rLwk))1YWHUb$g>tv*GZie9T0IdB?MtqLv8BWHB@r=dyEiea2^M}$YY#^JZbC)
zQHuqUOhl)V1aSm5dnMzj?(u}Py(89$!`EEsEZ*Wek(FgwOHAYqKkWGOhG|;$la7ca
zOZNR~;{*SQ1x?nBJCS#tRpQ8JF(ioI+d%r`I#XO0ExB$i&Ck%OM?Cb|8W1b3vn^=)
zH%arYDdLcAmp7kI3a7-^yr7Ys7jD#^RhW^}h@%8F1X1r_&COWAG(i8Pmod&oO{oDL
z77L?f%xW@2E!8lZlK7u;Eu69*s1z+&y|6UEVo;@YLCkCCwuY((TZ6tSBBS9BKxsx4
z;5C|^{6)Chcg!kAl*_OV9ARCf>>ihXT?h2Egua1em_tb2TbjSN0Fg}gqD3*mdCLaO
zI4*H9IkPRHPQ}ts3qLsBW}6r*H7tC`3+mnSE*`2M%Om$A6qm60qP`Ui?od0H_^f^x
zSFgSRHS!Wfa@fTbQ$leLy5_KbCcdg$E0?pTHZK+-M|@ISE7durP2B}DPF9}J7Tx=9
HhC;<3kxt@C

delta 584
zcmY*V&ubGw6yBNL4V$&ui^k%Q!n8>Yfl??G4?PH?2T`hM&m~NEC&_5C8{bYgZQ0O^
z;6d;*#Df=;V^2luAK=~pV8Me2#j95@^-U~-58jXOd*8hGn4gc{JPeXb#kbI2e-FF2
zwmt^>`f(ZIJ@j-RLvGEixiz6Vosoz{bWSIBPIDW1Iyc=5_5#`eafo`QiIY3`tv!F@
z=59_h3pR3>TEyD5Ue+Uf*J?ROpL~XPS1B<Vs$-@S){$|<I2&*k_GH{;FT^2}F?*gS
zaXaFQvrYm?qzc+GABc4pCUGhw0Y)6jPDg-<)nY964T*vbDT%sm24dLfp`n`_O*WRQ
z$C~ZtI^($YvVud?$vDVll(LYgLVvTb3JCB&pn~CX%Ymu^%NPoGG#m;j{E!c&;(d7}
zS|wODo$|kXp=NNkpNz%di`%0ZuT8@QMB$lLJXUEzG99ec)^4KMbC(FEl$1!YaObyI
z_J~L5xkF)k_GIOUO}5T%ukH{s)vwmR5?}Av_Q;9;Tq`!;;eZx_nMozs!31Lrg~Oqn
zLAEHxfmCo}6gGU&U+N9=K>w=WAg8+0SgtH4T!Cxm>3hA|cy?)ON~4EU{kHM6T=njF
JOFnVV{{T2GrN00G

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index a29e76cce2..b272e3580c 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -244,23 +244,37 @@ def get_repo_instructions(repo_directory):
 
 def find_json_key(instr_name, json_data):
     """
-    Attempt to find a matching key in json_data for instr_name, considering different
-    naming conventions: replacing '.' with '_', and trying various case transformations.
+    Find a matching instruction in json_data by comparing against AsmString values.
+    Returns the matching key if found, None otherwise.
+    
+    Args:
+        instr_name (str): The instruction name from YAML
+        json_data (dict): The JSON data containing instruction information
+        
+    Returns:
+        str or None: The matching key from json_data if found, None otherwise
     """
-    lower_name = instr_name.lower()
-    lower_name_underscore = lower_name.replace('.', '_')
-    variants = {
-        lower_name,
-        lower_name_underscore,
-        instr_name.upper(),
-        instr_name.replace('.', '_').upper(),
-        instr_name.capitalize(),
-        instr_name.replace('.', '_').capitalize()
-    }
-
-    for v in variants:
-        if v in json_data:
-            return v
+    # First, normalize the instruction name for comparison
+    instr_name = instr_name.lower().strip()
+    
+    # Search through all entries in json_data
+    for key, value in json_data.items():
+        if not isinstance(value, dict):
+            continue
+            
+        # Get the AsmString value and normalize it
+        asm_string = safe_get(value, 'AsmString', '').lower().strip()
+        if not asm_string:
+            continue
+            
+        # Extract the base instruction name from AsmString
+        # AsmString might be in format like "add $rd, $rs1, $rs2" 
+        # We want just "add"
+        base_asm_name = asm_string.split()[0]
+        
+        if base_asm_name == instr_name:
+            return key
+            
     return None
 
 def run_parser(json_file, repo_directory, output_file="output.txt"):

From 84925811b752a4456fb800e90525a53e780dd608 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <afonso.oliveira707@gmail.com>
Date: Sun, 22 Dec 2024 12:56:54 +0000
Subject: [PATCH 08/33] Small Refactor on parsing.py

---
 ext/auto-inst/parsing.py | 113 +++++++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 47 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index b272e3580c..30eccbb308 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -5,7 +5,7 @@
 from collections import defaultdict
 import yaml
 
-REPO_INSTRUCTIONS = {}
+yaml_instructions = {}
 REPO_DIRECTORY = None
 
 def safe_get(data, key, default=""):
@@ -19,19 +19,20 @@ def safe_get(data, key, default=""):
 
 def load_yaml_encoding(instr_name):
     """
-    Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data.
-    We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations.
+    Given an instruction name, find the corresponding YAML file and load its encoding data.
+    We'll try to match instr_name to a YAML file by using yaml_instructions + transformations.
     """
     candidates = set()
     lower_name = instr_name.lower()
     candidates.add(lower_name)
+    # Also consider underscores replaced by dots, etc. e.g. 'my_instr' -> 'my.instr'
     candidates.add(lower_name.replace('_', '.'))
 
     yaml_file_path = None
     yaml_category = None
     for cand in candidates:
-        if cand in REPO_INSTRUCTIONS:
-            yaml_category = REPO_INSTRUCTIONS[cand]
+        if cand in yaml_instructions:
+            yaml_category = yaml_instructions[cand]
             yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml")
             if os.path.isfile(yaml_file_path):
                 break
@@ -85,9 +86,8 @@ def parse_location(loc_str):
             val = int(loc_str)
             return val, val
 
-
     yaml_var_positions = {}
-    for var in yaml_vars:
+    for var in (yaml_vars or []):
         high, low = parse_location(var["location"])
         yaml_var_positions[var["name"]] = (high, low)
 
@@ -203,15 +203,17 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
             output_stream.write("JSON Encoding:     Unable to parse encoding\n")
             encoding = ""
 
-        # compare YAML vs JSON encodings
-        yaml_match, yaml_vars = load_yaml_encoding(name)
+        # YAML
+        yaml_match = safe_get(data, 'yaml_match', None)
+        yaml_vars  = safe_get(data, 'yaml_vars', None)
+
         if yaml_match is not None:
             output_stream.write(f"YAML Encoding:     {yaml_match}\n")
         else:
             output_stream.write("YAML Encoding:     Not found\n")
 
+        # Compare
         if yaml_match and encoding:
-            # Perform comparison
             differences = compare_yaml_json_encoding(name, yaml_match, yaml_vars, encoding)
             if differences and len(differences) > 0:
                 output_stream.write("\nEncodings do not match. Differences:\n")
@@ -221,7 +223,6 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
             else:
                 output_stream.write("\nEncodings Match: No differences found.\n")
         else:
-            # If we have no YAML match or no JSON encoding, we note that we can't compare
             output_stream.write("\nComparison: Cannot compare encodings (missing YAML or JSON encoding).\n")
 
         output_stream.write("\n")
@@ -229,72 +230,84 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
         output_stream.write(f"Error processing instruction {name}: {str(e)}\n")
         output_stream.write("Continuing with next instruction...\n\n")
 
-def get_repo_instructions(repo_directory):
+def get_yaml_instructions(repo_directory):
     """
-    Recursively find all YAML files in the repository and extract instruction names along with their category.
+    Recursively find all YAML files in the repository *and* immediately load their encodings.
+    This function will return a dict of the form:
+       {
+         <instr_name_lower>: {
+             "category":   <relative_path>,
+             "yaml_match": <string or None>,
+             "yaml_vars":  <list or None>
+         },
+         ...
+       }
     """
-    repo_instructions = {}
+    global yaml_instructions, REPO_DIRECTORY
+    REPO_DIRECTORY = repo_directory
+    yaml_instructions = {}
+
+    # Step 1: Collect <instr_name_lower> -> <relative_path>
     for root, _, files in os.walk(repo_directory):
         for file in files:
             if file.endswith(".yaml"):
                 instr_name = os.path.splitext(file)[0]
                 relative_path = os.path.relpath(root, repo_directory)
-                repo_instructions[instr_name.lower()] = relative_path
-    return repo_instructions
+                yaml_instructions[instr_name.lower()] = relative_path
+
+    # Step 2: For each instruction, load YAML encodings right away
+    instructions_with_encodings = {}
+    for instr_name_lower, path in yaml_instructions.items():
+        yaml_match, yaml_vars = load_yaml_encoding(instr_name_lower)
+        instructions_with_encodings[instr_name_lower] = {
+            "category":   path,
+            "yaml_match": yaml_match,
+            "yaml_vars":  yaml_vars
+        }
+
+    # Debug print
+    print("Instructions + Encodings:\n", instructions_with_encodings)
+    return instructions_with_encodings
 
 def find_json_key(instr_name, json_data):
     """
     Find a matching instruction in json_data by comparing against AsmString values.
     Returns the matching key if found, None otherwise.
-    
+
     Args:
         instr_name (str): The instruction name from YAML
         json_data (dict): The JSON data containing instruction information
-        
+
     Returns:
         str or None: The matching key from json_data if found, None otherwise
     """
-    # First, normalize the instruction name for comparison
     instr_name = instr_name.lower().strip()
-    
-    # Search through all entries in json_data
     for key, value in json_data.items():
         if not isinstance(value, dict):
             continue
-            
-        # Get the AsmString value and normalize it
+
         asm_string = safe_get(value, 'AsmString', '').lower().strip()
         if not asm_string:
             continue
-            
-        # Extract the base instruction name from AsmString
-        # AsmString might be in format like "add $rd, $rs1, $rs2" 
-        # We want just "add"
+
         base_asm_name = asm_string.split()[0]
-        
         if base_asm_name == instr_name:
             return key
-            
     return None
 
 def run_parser(json_file, repo_directory, output_file="output.txt"):
     """
     Run the parser logic:
-    1. Get instructions from the repo directory.
+    1. Collect YAML instructions + encodings from the repo.
     2. Parse the JSON file and match instructions.
     3. Generate output.txt with instruction details.
+    4. Save updated JSON with YAML encodings inserted.
     """
-    global REPO_INSTRUCTIONS, REPO_DIRECTORY
-    REPO_DIRECTORY = repo_directory
-
-    # Get instructions and categories from the repository structure
-    REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
-    if not REPO_INSTRUCTIONS:
-        print("No instructions found in the provided repository directory.")
-        return None
+    # Step 1: get all instructions + YAML encoding data
+    instructions_with_encodings = get_yaml_instructions(repo_directory)
 
+    # Step 2: parse JSON
     try:
-        # Read and parse JSON
         with open(json_file, 'r') as f:
             data = json.loads(f.read())
     except Exception as e:
@@ -303,24 +316,29 @@ def run_parser(json_file, repo_directory, output_file="output.txt"):
 
     all_instructions = []
 
-    # For each YAML instruction, try to find it in the JSON data
-    for yaml_instr_name, category in REPO_INSTRUCTIONS.items():
-        json_key = find_json_key(yaml_instr_name, data)
+    # Step 3: For each YAML instruction, attempt to find it in JSON by AsmString
+    for yaml_instr_name_lower, yaml_data in instructions_with_encodings.items():
+        json_key = find_json_key(yaml_instr_name_lower, data)
         if json_key is None:
-            print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
+            print(f"DEBUG: Instruction '{yaml_instr_name_lower}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
             continue
 
         instr_data = data.get(json_key)
         if not isinstance(instr_data, dict):
-            print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
+            print(f"DEBUG: Instruction '{yaml_instr_name_lower}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
             continue
 
-        # Add this instruction to our list
+        # Insert the YAML fields (match & vars) into the JSON entry
+        instr_data["yaml_match"] = yaml_data["yaml_match"]
+        instr_data["yaml_vars"]  = yaml_data["yaml_vars"]
+
+        # We'll keep track of them so we can print details
         all_instructions.append((json_key, instr_data))
 
-    # Sort all instructions by name
+    # Sort instructions by JSON key
     all_instructions.sort(key=lambda x: x[0].lower())
 
+    # Step 4: Generate a text report
     with open(output_file, "w") as outfile:
         outfile.write("RISC-V Instruction Summary\n")
         outfile.write("=" * 50 + "\n")
@@ -336,7 +354,8 @@ def run_parser(json_file, repo_directory, output_file="output.txt"):
         for name, instr_data in all_instructions:
             safe_print_instruction_details(name, instr_data, outfile)
 
-    print(f"Output has been written to {output_file}")
+    print(f"Output (report) has been written to {output_file}")
+
     return output_file
 
 def main():

From 3bd1d2cdc34062699901b5fe9be8585e381406b9 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <afonso.oliveira707@gmail.com>
Date: Sun, 22 Dec 2024 13:42:34 +0000
Subject: [PATCH 09/33] refactor to do unit tests

---
 ext/auto-inst/parsing.py      |   9 +-
 ext/auto-inst/test_parsing.py | 288 ++++++++++++++++++++++++++++++++++
 2 files changed, 291 insertions(+), 6 deletions(-)
 create mode 100644 ext/auto-inst/test_parsing.py

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 30eccbb308..27dce382f4 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -265,8 +265,6 @@ def get_yaml_instructions(repo_directory):
             "yaml_vars":  yaml_vars
         }
 
-    # Debug print
-    print("Instructions + Encodings:\n", instructions_with_encodings)
     return instructions_with_encodings
 
 def find_json_key(instr_name, json_data):
@@ -309,7 +307,7 @@ def run_parser(json_file, repo_directory, output_file="output.txt"):
     # Step 2: parse JSON
     try:
         with open(json_file, 'r') as f:
-            data = json.loads(f.read())
+            json_data = json.loads(f.read())
     except Exception as e:
         print(f"Error reading file: {str(e)}")
         return None
@@ -318,12 +316,12 @@ def run_parser(json_file, repo_directory, output_file="output.txt"):
 
     # Step 3: For each YAML instruction, attempt to find it in JSON by AsmString
     for yaml_instr_name_lower, yaml_data in instructions_with_encodings.items():
-        json_key = find_json_key(yaml_instr_name_lower, data)
+        json_key = find_json_key(yaml_instr_name_lower, json_data)
         if json_key is None:
             print(f"DEBUG: Instruction '{yaml_instr_name_lower}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
             continue
 
-        instr_data = data.get(json_key)
+        instr_data = json_data.get(json_key)
         if not isinstance(instr_data, dict):
             print(f"DEBUG: Instruction '{yaml_instr_name_lower}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
             continue
@@ -334,7 +332,6 @@ def run_parser(json_file, repo_directory, output_file="output.txt"):
 
         # We'll keep track of them so we can print details
         all_instructions.append((json_key, instr_data))
-
     # Sort instructions by JSON key
     all_instructions.sort(key=lambda x: x[0].lower())
 
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
new file mode 100644
index 0000000000..c48c722820
--- /dev/null
+++ b/ext/auto-inst/test_parsing.py
@@ -0,0 +1,288 @@
+import pytest
+import json
+import os
+import re
+import yaml
+from pathlib import Path
+
+def get_json_path():
+    """Get the path to the JSON file relative to the test file."""
+    current_dir = Path(__file__).parent
+    return str(current_dir / "../../../llvm-project/build/unorder.json")
+
+def get_yaml_directory():
+    """Get the path to the YAML directory relative to the test file."""
+    current_dir = Path(__file__).parent
+    return str(current_dir / "../../arch/inst/")
+
+def load_inherited_variable(var_path, repo_dir):
+    """Load variable definition from an inherited YAML file."""
+    try:
+        # Parse the path to get directory and anchor
+        path, anchor = var_path.split('#')
+        if anchor.startswith('/'):
+            anchor = anchor[1:]  # Remove leading slash
+        
+        # Construct full path
+        full_path = os.path.join(repo_dir, path)
+        
+        if not os.path.exists(full_path):
+            print(f"Warning: Inherited file not found: {full_path}")
+            return None
+            
+        with open(full_path, 'r') as f:
+            data = yaml.safe_load(f)
+            
+        # Navigate through the anchor path
+        for key in anchor.split('/'):
+            if key in data:
+                data = data[key]
+            else:
+                print(f"Warning: Anchor path {anchor} not found in {path}")
+                return None
+                
+        return data
+    except Exception as e:
+        print(f"Error loading inherited variable {var_path}: {str(e)}")
+        return None
+
+def resolve_variable_definition(var, repo_dir):
+    """Resolve variable definition, handling inheritance if needed."""
+    if 'location' in var:
+        return var
+    elif '$inherits' in var:
+            print(f"Warning: Failed to resolve inheritance for variable: {var}")
+    return None
+
+def parse_location(loc_str):
+    """Parse location string that may contain multiple ranges."""
+    if not loc_str:
+        return []
+        
+    loc_str = str(loc_str).strip()
+    ranges = []
+    
+    # Split on pipe if there are multiple ranges
+    for range_str in loc_str.split('|'):
+        range_str = range_str.strip()
+        if '-' in range_str:
+            high, low = map(int, range_str.split('-'))
+            ranges.append((high, low))
+        else:
+            # Single bit case
+            try:
+                val = int(range_str)
+                ranges.append((val, val))
+            except ValueError:
+                print(f"Warning: Invalid location format: {range_str}")
+                continue
+    
+    return ranges
+
+def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str, repo_dir):
+    """Compare the YAML encoding with the JSON encoding."""
+    if not yaml_match:
+        return ["No YAML match field available for comparison."]
+    if not json_encoding_str:
+        return ["No JSON encoding available for comparison."]
+
+    # Determine expected length based on whether it's a compressed instruction (C_ or c.)
+    expected_length = 16 if instr_name.lower().startswith(('c_', 'c.')) else 32
+
+    yaml_pattern_str = yaml_match.replace('-', '.')
+    if len(yaml_pattern_str) != expected_length:
+        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
+
+    # Process variables and their locations
+    yaml_var_positions = {}
+    for var in (yaml_vars or []):
+        resolved_var = resolve_variable_definition(var, repo_dir)
+        if not resolved_var or 'location' not in resolved_var:
+            print(f"Warning: Could not resolve variable definition for {var.get('name', 'unknown')}")
+            continue
+        
+        ranges = parse_location(resolved_var['location'])
+        if ranges:
+            yaml_var_positions[var['name']] = ranges
+
+    # Tokenize the JSON encoding string
+    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
+    json_bits = []
+    bit_index = expected_length - 1
+    for t in tokens:
+        json_bits.append((bit_index, t))
+        bit_index -= 1
+
+    if bit_index != -1:
+        return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."]
+
+    # Normalize JSON bits (handle vm[?] etc.)
+    normalized_json_bits = []
+    for pos, tt in json_bits:
+        if re.match(r'vm\[[^\]]*\]', tt):
+            tt = 'vm'
+        normalized_json_bits.append((pos, tt))
+    json_bits = normalized_json_bits
+
+    differences = []
+
+    # Check fixed bits
+    for b in range(expected_length):
+        yaml_bit = yaml_pattern_str[expected_length - 1 - b]
+        token = [tt for (pos, tt) in json_bits if pos == b]
+        if not token:
+            differences.append(f"Bit {b}: No corresponding JSON bit found.")
+            continue
+        json_bit_str = token[0]
+
+        if yaml_bit in ['0', '1']:
+            if json_bit_str not in ['0', '1']:
+                differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
+            elif json_bit_str != yaml_bit:
+                differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
+        else:
+            if json_bit_str in ['0', '1']:
+                differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
+
+    # Check variable fields
+    for var_name, ranges in yaml_var_positions.items():
+        for high, low in ranges:
+            # Ensure the variable range fits within the expected_length
+            if high >= expected_length or low < 0:
+                differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
+                continue
+
+            json_var_fields = []
+            for bb in range(low, high+1):
+                token = [tt for (pos, tt) in json_bits if pos == bb]
+                if token:
+                    json_var_fields.append(token[0])
+                else:
+                    json_var_fields.append('?')
+
+            # Extract field names
+            field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
+            if len(field_names) == 0:
+                differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
+            elif len(field_names) > 1:
+                differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
+
+    return differences
+
+@pytest.fixture
+def yaml_instructions():
+    """Load all YAML instructions from the repository."""
+    from parsing import get_yaml_instructions
+    repo_dir = get_yaml_directory()
+    if not os.path.exists(repo_dir):
+        pytest.skip(f"Repository directory not found at {repo_dir}")
+    return get_yaml_instructions(repo_dir)
+
+@pytest.fixture
+def json_data():
+    """Load the real JSON data from the TableGen file."""
+    json_file = get_json_path()
+    if not os.path.exists(json_file):
+        pytest.skip(f"JSON file not found at {json_file}")
+    with open(json_file, 'r') as f:
+        return json.load(f)
+
+def pytest_configure(config):
+    """Configure the test session."""
+    print(f"\nUsing JSON file: {get_json_path()}")
+    print(f"Using YAML directory: {get_yaml_directory()}\n")
+
+class TestEncodingComparison:
+    def test_encoding_matches(self, yaml_instructions, json_data):
+        """Test YAML-defined instructions against their JSON counterparts if they exist."""
+        mismatches = []
+        total_yaml_instructions = 0
+        checked_instructions = 0
+        skipped_instructions = []
+        repo_dir = get_yaml_directory()
+        
+        for yaml_instr_name, yaml_data in yaml_instructions.items():
+            total_yaml_instructions += 1
+            
+            # Skip if no YAML match pattern
+            if not yaml_data.get("yaml_match"):
+                skipped_instructions.append(yaml_instr_name)
+                continue
+
+            # Get JSON encoding from instruction data
+            json_key = self._find_matching_instruction(yaml_instr_name, json_data)
+            if not json_key:
+                skipped_instructions.append(yaml_instr_name)
+                continue
+
+            checked_instructions += 1
+            json_encoding = self._get_json_encoding(json_data[json_key])
+            
+            # Compare encodings using the existing function
+            differences = compare_yaml_json_encoding(
+                yaml_instr_name,
+                yaml_data["yaml_match"],
+                yaml_data["yaml_vars"],
+                json_encoding,
+                repo_dir
+            )
+
+            if differences and differences != ["No YAML match field available for comparison."]:
+                mismatches.append({
+                    'instruction': yaml_instr_name,
+                    'json_key': json_key,
+                    'differences': differences,
+                    'yaml_match': yaml_data["yaml_match"],
+                    'json_encoding': json_encoding
+                })
+
+        # Print statistics
+        print(f"\nYAML instructions found: {total_yaml_instructions}")
+        print(f"Instructions checked: {checked_instructions}")
+        print(f"Instructions skipped: {len(skipped_instructions)}")
+        print(f"Instructions with encoding mismatches: {len(mismatches)}")
+        
+        if skipped_instructions:
+            print("\nSkipped instructions:")
+            for instr in skipped_instructions:
+                print(f"  - {instr}")
+
+        if mismatches:
+            error_msg = "\nEncoding mismatches found:\n"
+            for m in mismatches:
+                error_msg += f"\nInstruction: {m['instruction']} (JSON key: {m['json_key']})\n"
+                error_msg += f"YAML match: {m['yaml_match']}\n"
+                error_msg += f"JSON encoding: {m['json_encoding']}\n"
+                error_msg += "Differences:\n"
+                for d in m['differences']:
+                    error_msg += f"  - {d}\n"
+            pytest.fail(error_msg)
+
+    def _find_matching_instruction(self, yaml_instr_name, json_data):
+        """Find matching instruction in JSON data by comparing instruction names."""
+        yaml_instr_name = yaml_instr_name.lower().strip()
+        for key, value in json_data.items():
+            if not isinstance(value, dict):
+                continue
+            asm_string = value.get('AsmString', '').lower().strip()
+            if not asm_string:
+                continue
+            base_asm_name = asm_string.split()[0]
+            if base_asm_name == yaml_instr_name:
+                return key
+        return None
+
+    def _get_json_encoding(self, json_instr):
+        """Extract encoding string from JSON instruction data."""
+        encoding_bits = []
+        try:
+            inst = json_instr.get('Inst', [])
+            for bit in inst:
+                if isinstance(bit, dict):
+                    encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
+                else:
+                    encoding_bits.append(str(bit))
+            encoding_bits.reverse()
+            return "".join(encoding_bits)
+        except:
+            return ""
\ No newline at end of file

From 6b9fdda4db32791d7bfd079b5373b2fc50682406 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <afonso.oliveira707@gmail.com>
Date: Sun, 22 Dec 2024 13:43:13 +0000
Subject: [PATCH 10/33] refactor to file name

---
 ext/auto-inst/test.py | 52 -------------------------------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 ext/auto-inst/test.py

diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
deleted file mode 100644
index 63ef51fae2..0000000000
--- a/ext/auto-inst/test.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import pytest
-import os
-from parsing import run_parser
-
-@pytest.fixture
-def setup_paths(request):
-    json_file = request.config.getoption("--json_file")
-    repo_dir = request.config.getoption("--repo_dir")
-
-    # Resolve absolute paths
-    json_file = os.path.abspath(json_file)
-    repo_dir = os.path.abspath(repo_dir)
-    output_file = os.path.join(repo_dir, "output.txt")
-
-    print(f"Using JSON File: {json_file}")
-    print(f"Using Repository Directory: {repo_dir}")
-    print(f"Output File Path: {output_file}")
-
-    return json_file, repo_dir, output_file
-
-def test_llvm(setup_paths):
-    json_file, repo_dir, output_file = setup_paths
-
-    result = run_parser(json_file, repo_dir, output_file=output_file)
-
-    if result is None:
-        print("WARNING: No instructions found or an error occurred. ")
-        # You could fail here if this was previously considered a hard error
-        pytest.fail("No output produced by run_parser.")
-
-    # Check output file content
-    if not os.path.exists(output_file):
-        print("ERROR: output.txt was not created.")
-        pytest.fail("Output file was not created.")
-
-    with open(output_file, 'r') as f:
-        content = f.read()
-
-    if "Total Instructions Found: 0" in content:
-        print("WARNING: No instructions found in output.txt ")
-
-    # Check for encoding differences
-    # In the original script, encoding mismatches were printed like:
-    # "Encodings do not match. Differences:"
-    if "Encodings do not match. Differences:" in content:
-        # Extract differences lines
-        lines = content.splitlines()
-        diff_lines = [line for line in lines if line.strip().startswith("-")]
-        print("ERROR: Encoding differences found!")
-        pytest.fail("Encodings do not match.")
-
-    print("No warnings or errors detected.")

From 831a10b7b8b2a57ce086a531be6c3d3205702380 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 23 Dec 2024 12:19:04 +0000
Subject: [PATCH 11/33] Modify to have seveal Unit tests instead of just one

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/test_parsing.py | 149 ++++++++++++++++++----------------
 1 file changed, 78 insertions(+), 71 deletions(-)

diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index c48c722820..9a2ef30f74 100644
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -8,7 +8,7 @@
 def get_json_path():
     """Get the path to the JSON file relative to the test file."""
     current_dir = Path(__file__).parent
-    return str(current_dir / "../../../llvm-project/build/unorder.json")
+    return str(current_dir / "/home/afonsoo/llvm-project/llvm-build/pretty.json")
 
 def get_yaml_directory():
     """Get the path to the YAML directory relative to the test file."""
@@ -187,81 +187,47 @@ def json_data():
     with open(json_file, 'r') as f:
         return json.load(f)
 
-def pytest_configure(config):
-    """Configure the test session."""
-    print(f"\nUsing JSON file: {get_json_path()}")
-    print(f"Using YAML directory: {get_yaml_directory()}\n")
+# Global variables to store loaded data
+_yaml_instructions = None
+_json_data = None
+_repo_dir = None
 
-class TestEncodingComparison:
-    def test_encoding_matches(self, yaml_instructions, json_data):
-        """Test YAML-defined instructions against their JSON counterparts if they exist."""
-        mismatches = []
-        total_yaml_instructions = 0
-        checked_instructions = 0
-        skipped_instructions = []
-        repo_dir = get_yaml_directory()
-        
-        for yaml_instr_name, yaml_data in yaml_instructions.items():
-            total_yaml_instructions += 1
-            
-            # Skip if no YAML match pattern
-            if not yaml_data.get("yaml_match"):
-                skipped_instructions.append(yaml_instr_name)
-                continue
+def load_test_data():
+    """Load test data once and cache it."""
+    global _yaml_instructions, _json_data, _repo_dir
+    if _yaml_instructions is None:
+        # Load YAML instructions
+        from parsing import get_yaml_instructions
+        _repo_dir = get_yaml_directory()
+        if not os.path.exists(_repo_dir):
+            pytest.skip(f"Repository directory not found at {_repo_dir}")
+        _yaml_instructions = get_yaml_instructions(_repo_dir)
 
-            # Get JSON encoding from instruction data
-            json_key = self._find_matching_instruction(yaml_instr_name, json_data)
-            if not json_key:
-                skipped_instructions.append(yaml_instr_name)
-                continue
+        # Load JSON data
+        json_file = get_json_path()
+        if not os.path.exists(json_file):
+            pytest.skip(f"JSON file not found at {json_file}")
+        with open(json_file, 'r') as f:
+            _json_data = json.load(f)
 
-            checked_instructions += 1
-            json_encoding = self._get_json_encoding(json_data[json_key])
-            
-            # Compare encodings using the existing function
-            differences = compare_yaml_json_encoding(
-                yaml_instr_name,
-                yaml_data["yaml_match"],
-                yaml_data["yaml_vars"],
-                json_encoding,
-                repo_dir
-            )
-
-            if differences and differences != ["No YAML match field available for comparison."]:
-                mismatches.append({
-                    'instruction': yaml_instr_name,
-                    'json_key': json_key,
-                    'differences': differences,
-                    'yaml_match': yaml_data["yaml_match"],
-                    'json_encoding': json_encoding
-                })
-
-        # Print statistics
-        print(f"\nYAML instructions found: {total_yaml_instructions}")
-        print(f"Instructions checked: {checked_instructions}")
-        print(f"Instructions skipped: {len(skipped_instructions)}")
-        print(f"Instructions with encoding mismatches: {len(mismatches)}")
-        
-        if skipped_instructions:
-            print("\nSkipped instructions:")
-            for instr in skipped_instructions:
-                print(f"  - {instr}")
-
-        if mismatches:
-            error_msg = "\nEncoding mismatches found:\n"
-            for m in mismatches:
-                error_msg += f"\nInstruction: {m['instruction']} (JSON key: {m['json_key']})\n"
-                error_msg += f"YAML match: {m['yaml_match']}\n"
-                error_msg += f"JSON encoding: {m['json_encoding']}\n"
-                error_msg += "Differences:\n"
-                for d in m['differences']:
-                    error_msg += f"  - {d}\n"
-            pytest.fail(error_msg)
+    return _yaml_instructions, _json_data, _repo_dir
+
+def pytest_generate_tests(metafunc):
+    """Generate test cases dynamically."""
+    if "instr_name" in metafunc.fixturenames:
+        yaml_instructions, _, _ = load_test_data()
+        metafunc.parametrize("instr_name", list(yaml_instructions.keys()))
+
+class TestInstructionEncoding:
+    @classmethod
+    def setup_class(cls):
+        """Setup class-level test data."""
+        cls.yaml_instructions, cls.json_data, cls.repo_dir = load_test_data()
 
-    def _find_matching_instruction(self, yaml_instr_name, json_data):
+    def _find_matching_instruction(self, yaml_instr_name):
         """Find matching instruction in JSON data by comparing instruction names."""
         yaml_instr_name = yaml_instr_name.lower().strip()
-        for key, value in json_data.items():
+        for key, value in self.json_data.items():
             if not isinstance(value, dict):
                 continue
             asm_string = value.get('AsmString', '').lower().strip()
@@ -285,4 +251,45 @@ def _get_json_encoding(self, json_instr):
             encoding_bits.reverse()
             return "".join(encoding_bits)
         except:
-            return ""
\ No newline at end of file
+            return ""
+
+    def test_instruction_encoding(self, instr_name):
+        """Test encoding for a single instruction."""
+        yaml_data = self.yaml_instructions[instr_name]
+        
+        # Skip if no YAML match pattern
+        if not yaml_data.get("yaml_match"):
+            pytest.skip(f"Instruction {instr_name} has no YAML match pattern")
+
+        # Find matching JSON instruction
+        json_key = self._find_matching_instruction(instr_name)
+        if not json_key:
+            pytest.skip(f"No matching JSON instruction found for {instr_name}")
+
+        # Get JSON encoding
+        json_encoding = self._get_json_encoding(self.json_data[json_key])
+        
+        # Compare encodings
+        differences = compare_yaml_json_encoding(
+            instr_name,
+            yaml_data["yaml_match"],
+            yaml_data.get("yaml_vars", []),
+            json_encoding,
+            self.repo_dir
+        )
+
+        # If there are differences, format them nicely and fail the test
+        if differences and differences != ["No YAML match field available for comparison."]:
+            error_msg = f"\nEncoding mismatch for instruction: {instr_name}\n"
+            error_msg += f"JSON key: {json_key}\n"
+            error_msg += f"YAML match: {yaml_data['yaml_match']}\n"
+            error_msg += f"JSON encoding: {json_encoding}\n"
+            error_msg += "Differences:\n"
+            for diff in differences:
+                error_msg += f"  - {diff}\n"
+            pytest.fail(error_msg)
+
+def pytest_configure(config):
+    """Configure the test session."""
+    print(f"\nUsing JSON file: {get_json_path()}")
+    print(f"Using YAML directory: {get_yaml_directory()}\n")

From 655e1d6c7a8eadb89fa69cb7843d9a50ca30ab4f Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 23 Dec 2024 12:30:01 +0000
Subject: [PATCH 12/33] Clean up and code reorganization

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py      | 359 ++++++++++------------------------
 ext/auto-inst/test_parsing.py | 194 +-----------------
 2 files changed, 111 insertions(+), 442 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 27dce382f4..76f0246171 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -1,9 +1,7 @@
 import os
-import json
 import re
-import sys
-from collections import defaultdict
 import yaml
+from pathlib import Path
 
 yaml_instructions = {}
 REPO_DIRECTORY = None
@@ -17,19 +15,83 @@ def safe_get(data, key, default=""):
     except:
         return default
 
+def get_json_path():
+    """Get the path to the JSON file relative to the test file."""
+    current_dir = Path(__file__).parent
+    return str(current_dir / "/home/afonsoo/llvm-project/llvm-build/pretty.json")
+
+def get_yaml_directory():
+    """Get the path to the YAML directory relative to the test file."""
+    current_dir = Path(__file__).parent
+    return str(current_dir / "../../arch/inst/")
+
+def load_inherited_variable(var_path, repo_dir):
+    """Load variable definition from an inherited YAML file."""
+    try:
+        path, anchor = var_path.split('#')
+        if anchor.startswith('/'):
+            anchor = anchor[1:]
+        
+        full_path = os.path.join(repo_dir, path)
+        
+        if not os.path.exists(full_path):
+            print(f"Warning: Inherited file not found: {full_path}")
+            return None
+            
+        with open(full_path, 'r') as f:
+            data = yaml.safe_load(f)
+            
+        for key in anchor.split('/'):
+            if key in data:
+                data = data[key]
+            else:
+                print(f"Warning: Anchor path {anchor} not found in {path}")
+                return None
+                
+        return data
+    except Exception as e:
+        print(f"Error loading inherited variable {var_path}: {str(e)}")
+        return None
+
+def resolve_variable_definition(var, repo_dir):
+    """Resolve variable definition, handling inheritance if needed."""
+    if 'location' in var:
+        return var
+    elif '$inherits' in var:
+        print(f"Warning: Failed to resolve inheritance for variable: {var}")
+    return None
+
+def parse_location(loc_str):
+    """Parse location string that may contain multiple ranges."""
+    if not loc_str:
+        return []
+        
+    loc_str = str(loc_str).strip()
+    ranges = []
+    
+    for range_str in loc_str.split('|'):
+        range_str = range_str.strip()
+        if '-' in range_str:
+            high, low = map(int, range_str.split('-'))
+            ranges.append((high, low))
+        else:
+            try:
+                val = int(range_str)
+                ranges.append((val, val))
+            except ValueError:
+                print(f"Warning: Invalid location format: {range_str}")
+                continue
+    
+    return ranges
+
 def load_yaml_encoding(instr_name):
-    """
-    Given an instruction name, find the corresponding YAML file and load its encoding data.
-    We'll try to match instr_name to a YAML file by using yaml_instructions + transformations.
-    """
+    """Load YAML encoding data for an instruction."""
     candidates = set()
     lower_name = instr_name.lower()
     candidates.add(lower_name)
-    # Also consider underscores replaced by dots, etc. e.g. 'my_instr' -> 'my.instr'
     candidates.add(lower_name.replace('_', '.'))
 
     yaml_file_path = None
-    yaml_category = None
     for cand in candidates:
         if cand in yaml_instructions:
             yaml_category = yaml_instructions[cand]
@@ -40,10 +102,8 @@ def load_yaml_encoding(instr_name):
                 yaml_file_path = None
 
     if not yaml_file_path or not os.path.isfile(yaml_file_path):
-        # YAML not found
         return None, None
 
-    # Load the YAML file
     with open(yaml_file_path, 'r') as yf:
         ydata = yaml.safe_load(yf)
 
@@ -53,45 +113,30 @@ def load_yaml_encoding(instr_name):
 
     return yaml_match, yaml_vars
 
-def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str):
-    """
-    Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
-    If the JSON has a variable like vm[?], it should be treated as just vm.
-
-    If instr_name starts with 'C_', then treat the instruction as 16 bits long.
-    Otherwise, treat it as 32 bits long.
-
-    Return a list of differences.
-    """
+def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str, repo_dir):
+    """Compare the YAML encoding with the JSON encoding."""
     if not yaml_match:
         return ["No YAML match field available for comparison."]
     if not json_encoding_str:
         return ["No JSON encoding available for comparison."]
 
-    # Determine expected length based on whether it's a compressed instruction (C_)
-    expected_length = 16 if instr_name.startswith('C_') else 32
+    expected_length = 16 if instr_name.lower().startswith(('c_', 'c.')) else 32
 
     yaml_pattern_str = yaml_match.replace('-', '.')
     if len(yaml_pattern_str) != expected_length:
         return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
 
-    def parse_location(loc_str):
-        # Ensure loc_str is a string
-        loc_str = str(loc_str).strip()
-        if '-' in loc_str:
-            high, low = loc_str.split('-')
-            return int(high), int(low)
-        else:
-            # If no dash, treat it as a single bit field
-            val = int(loc_str)
-            return val, val
-
     yaml_var_positions = {}
     for var in (yaml_vars or []):
-        high, low = parse_location(var["location"])
-        yaml_var_positions[var["name"]] = (high, low)
+        resolved_var = resolve_variable_definition(var, repo_dir)
+        if not resolved_var or 'location' not in resolved_var:
+            print(f"Warning: Could not resolve variable definition for {var.get('name', 'unknown')}")
+            continue
+        
+        ranges = parse_location(resolved_var['location'])
+        if ranges:
+            yaml_var_positions[var['name']] = ranges
 
-    # Tokenize the JSON encoding string. We assume it should match the expected_length in bits.
     tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
     json_bits = []
     bit_index = expected_length - 1
@@ -102,7 +147,6 @@ def parse_location(loc_str):
     if bit_index != -1:
         return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."]
 
-    # Normalize JSON bits (handle vm[?] etc.)
     normalized_json_bits = []
     for pos, tt in json_bits:
         if re.match(r'vm\[[^\]]*\]', tt):
@@ -112,7 +156,6 @@ def parse_location(loc_str):
 
     differences = []
 
-    # Check fixed bits
     for b in range(expected_length):
         yaml_bit = yaml_pattern_str[expected_length - 1 - b]
         token = [tt for (pos, tt) in json_bits if pos == b]
@@ -130,124 +173,34 @@ def parse_location(loc_str):
             if json_bit_str in ['0', '1']:
                 differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
 
-    # Check variable fields
-    for var_name, (high, low) in yaml_var_positions.items():
-        # Ensure the variable range fits within the expected_length
-        if high >= expected_length or low < 0:
-            differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
-            continue
-
-        json_var_fields = []
-        for bb in range(low, high+1):
-            token = [tt for (pos, tt) in json_bits if pos == bb]
-            if token:
-                json_var_fields.append(token[0])
-            else:
-                json_var_fields.append('?')
-
-        # Extract field names
-        field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
-        if len(field_names) == 0:
-            differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
-        elif len(field_names) > 1:
-            differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
-
-    return differences
-
-def safe_print_instruction_details(name: str, data: dict, output_stream):
-    """Print formatted instruction details and compare YAML/JSON encodings."""
-    try:
-        output_stream.write(f"\n{name} Instruction Details\n")
-        output_stream.write("=" * 50 + "\n")
-
-        output_stream.write("\nBasic Information:\n")
-        output_stream.write("-" * 20 + "\n")
-        output_stream.write(f"Name:              {name}\n")
-        output_stream.write(f"Assembly Format:   {safe_get(data, 'AsmString', 'N/A')}\n")
-        output_stream.write(f"Size:              {safe_get(data, 'Size', 'N/A')} bytes\n")
-
-        locs = safe_get(data, '!locs', [])
-        loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
-        output_stream.write(f"Location:          {loc}\n")
-
-        output_stream.write("\nOperands:\n")
-        output_stream.write("-" * 20 + "\n")
-        try:
-            in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A')
-            output_stream.write(f"Inputs:            {in_ops}\n")
-        except:
-            output_stream.write("Inputs:            N/A\n")
-
-        try:
-            out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A')
-            output_stream.write(f"Outputs:           {out_ops}\n")
-        except:
-            output_stream.write("Outputs:           N/A\n")
-
-        # Encoding
-        output_stream.write("\nEncoding Pattern:\n")
-        output_stream.write("-" * 20 + "\n")
-        encoding_bits = []
-        try:
-            inst = safe_get(data, 'Inst', [])
-            for bit in inst:
-                if isinstance(bit, dict):
-                    encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
+    for var_name, ranges in yaml_var_positions.items():
+        for high, low in ranges:
+            if high >= expected_length or low < 0:
+                differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
+                continue
+
+            json_var_fields = []
+            for bb in range(low, high+1):
+                token = [tt for (pos, tt) in json_bits if pos == bb]
+                if token:
+                    json_var_fields.append(token[0])
                 else:
-                    encoding_bits.append(str(bit))
-            # Reverse the bit order before joining
-            encoding_bits.reverse()
-            encoding = "".join(encoding_bits)
-            output_stream.write(f"JSON Encoding:     {encoding}\n")
-        except:
-            output_stream.write("JSON Encoding:     Unable to parse encoding\n")
-            encoding = ""
+                    json_var_fields.append('?')
 
-        # YAML
-        yaml_match = safe_get(data, 'yaml_match', None)
-        yaml_vars  = safe_get(data, 'yaml_vars', None)
+            field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
+            if len(field_names) == 0:
+                differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
+            elif len(field_names) > 1:
+                differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
 
-        if yaml_match is not None:
-            output_stream.write(f"YAML Encoding:     {yaml_match}\n")
-        else:
-            output_stream.write("YAML Encoding:     Not found\n")
-
-        # Compare
-        if yaml_match and encoding:
-            differences = compare_yaml_json_encoding(name, yaml_match, yaml_vars, encoding)
-            if differences and len(differences) > 0:
-                output_stream.write("\nEncodings do not match. Differences:\n")
-                for d in differences:
-                    output_stream.write(f"  - {d}\n")
-                    print(f"Difference in {name}: {d}", file=sys.stdout)  # Print to console
-            else:
-                output_stream.write("\nEncodings Match: No differences found.\n")
-        else:
-            output_stream.write("\nComparison: Cannot compare encodings (missing YAML or JSON encoding).\n")
-
-        output_stream.write("\n")
-    except Exception as e:
-        output_stream.write(f"Error processing instruction {name}: {str(e)}\n")
-        output_stream.write("Continuing with next instruction...\n\n")
+    return differences
 
 def get_yaml_instructions(repo_directory):
-    """
-    Recursively find all YAML files in the repository *and* immediately load their encodings.
-    This function will return a dict of the form:
-       {
-         <instr_name_lower>: {
-             "category":   <relative_path>,
-             "yaml_match": <string or None>,
-             "yaml_vars":  <list or None>
-         },
-         ...
-       }
-    """
+    """Recursively find all YAML files in the repository and load their encodings."""
     global yaml_instructions, REPO_DIRECTORY
     REPO_DIRECTORY = repo_directory
     yaml_instructions = {}
 
-    # Step 1: Collect <instr_name_lower> -> <relative_path>
     for root, _, files in os.walk(repo_directory):
         for file in files:
             if file.endswith(".yaml"):
@@ -255,117 +208,13 @@ def get_yaml_instructions(repo_directory):
                 relative_path = os.path.relpath(root, repo_directory)
                 yaml_instructions[instr_name.lower()] = relative_path
 
-    # Step 2: For each instruction, load YAML encodings right away
     instructions_with_encodings = {}
     for instr_name_lower, path in yaml_instructions.items():
         yaml_match, yaml_vars = load_yaml_encoding(instr_name_lower)
         instructions_with_encodings[instr_name_lower] = {
-            "category":   path,
+            "category": path,
             "yaml_match": yaml_match,
-            "yaml_vars":  yaml_vars
+            "yaml_vars": yaml_vars
         }
 
-    return instructions_with_encodings
-
-def find_json_key(instr_name, json_data):
-    """
-    Find a matching instruction in json_data by comparing against AsmString values.
-    Returns the matching key if found, None otherwise.
-
-    Args:
-        instr_name (str): The instruction name from YAML
-        json_data (dict): The JSON data containing instruction information
-
-    Returns:
-        str or None: The matching key from json_data if found, None otherwise
-    """
-    instr_name = instr_name.lower().strip()
-    for key, value in json_data.items():
-        if not isinstance(value, dict):
-            continue
-
-        asm_string = safe_get(value, 'AsmString', '').lower().strip()
-        if not asm_string:
-            continue
-
-        base_asm_name = asm_string.split()[0]
-        if base_asm_name == instr_name:
-            return key
-    return None
-
-def run_parser(json_file, repo_directory, output_file="output.txt"):
-    """
-    Run the parser logic:
-    1. Collect YAML instructions + encodings from the repo.
-    2. Parse the JSON file and match instructions.
-    3. Generate output.txt with instruction details.
-    4. Save updated JSON with YAML encodings inserted.
-    """
-    # Step 1: get all instructions + YAML encoding data
-    instructions_with_encodings = get_yaml_instructions(repo_directory)
-
-    # Step 2: parse JSON
-    try:
-        with open(json_file, 'r') as f:
-            json_data = json.loads(f.read())
-    except Exception as e:
-        print(f"Error reading file: {str(e)}")
-        return None
-
-    all_instructions = []
-
-    # Step 3: For each YAML instruction, attempt to find it in JSON by AsmString
-    for yaml_instr_name_lower, yaml_data in instructions_with_encodings.items():
-        json_key = find_json_key(yaml_instr_name_lower, json_data)
-        if json_key is None:
-            print(f"DEBUG: Instruction '{yaml_instr_name_lower}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
-            continue
-
-        instr_data = json_data.get(json_key)
-        if not isinstance(instr_data, dict):
-            print(f"DEBUG: Instruction '{yaml_instr_name_lower}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
-            continue
-
-        # Insert the YAML fields (match & vars) into the JSON entry
-        instr_data["yaml_match"] = yaml_data["yaml_match"]
-        instr_data["yaml_vars"]  = yaml_data["yaml_vars"]
-
-        # We'll keep track of them so we can print details
-        all_instructions.append((json_key, instr_data))
-    # Sort instructions by JSON key
-    all_instructions.sort(key=lambda x: x[0].lower())
-
-    # Step 4: Generate a text report
-    with open(output_file, "w") as outfile:
-        outfile.write("RISC-V Instruction Summary\n")
-        outfile.write("=" * 50 + "\n")
-        total = len(all_instructions)
-        outfile.write(f"\nTotal Instructions Found: {total}\n")
-        for name, _ in all_instructions:
-            outfile.write(f"  - {name}\n")
-
-        outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
-        outfile.write("=" * 80 + "\n")
-
-        # Print details for each instruction directly
-        for name, instr_data in all_instructions:
-            safe_print_instruction_details(name, instr_data, outfile)
-
-    print(f"Output (report) has been written to {output_file}")
-
-    return output_file
-
-def main():
-    if len(sys.argv) != 3:
-        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
-        sys.exit(1)
-
-    json_file = sys.argv[1]
-    repo_directory = sys.argv[2]
-
-    result = run_parser(json_file, repo_directory, output_file="output.txt")
-    if result is None:
-        sys.exit(1)
-
-if __name__ == '__main__':
-    main()
+    return instructions_with_encodings
\ No newline at end of file
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index 9a2ef30f74..74e3ae8747 100644
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -1,191 +1,12 @@
 import pytest
 import json
 import os
-import re
-import yaml
-from pathlib import Path
-
-def get_json_path():
-    """Get the path to the JSON file relative to the test file."""
-    current_dir = Path(__file__).parent
-    return str(current_dir / "/home/afonsoo/llvm-project/llvm-build/pretty.json")
-
-def get_yaml_directory():
-    """Get the path to the YAML directory relative to the test file."""
-    current_dir = Path(__file__).parent
-    return str(current_dir / "../../arch/inst/")
-
-def load_inherited_variable(var_path, repo_dir):
-    """Load variable definition from an inherited YAML file."""
-    try:
-        # Parse the path to get directory and anchor
-        path, anchor = var_path.split('#')
-        if anchor.startswith('/'):
-            anchor = anchor[1:]  # Remove leading slash
-        
-        # Construct full path
-        full_path = os.path.join(repo_dir, path)
-        
-        if not os.path.exists(full_path):
-            print(f"Warning: Inherited file not found: {full_path}")
-            return None
-            
-        with open(full_path, 'r') as f:
-            data = yaml.safe_load(f)
-            
-        # Navigate through the anchor path
-        for key in anchor.split('/'):
-            if key in data:
-                data = data[key]
-            else:
-                print(f"Warning: Anchor path {anchor} not found in {path}")
-                return None
-                
-        return data
-    except Exception as e:
-        print(f"Error loading inherited variable {var_path}: {str(e)}")
-        return None
-
-def resolve_variable_definition(var, repo_dir):
-    """Resolve variable definition, handling inheritance if needed."""
-    if 'location' in var:
-        return var
-    elif '$inherits' in var:
-            print(f"Warning: Failed to resolve inheritance for variable: {var}")
-    return None
-
-def parse_location(loc_str):
-    """Parse location string that may contain multiple ranges."""
-    if not loc_str:
-        return []
-        
-    loc_str = str(loc_str).strip()
-    ranges = []
-    
-    # Split on pipe if there are multiple ranges
-    for range_str in loc_str.split('|'):
-        range_str = range_str.strip()
-        if '-' in range_str:
-            high, low = map(int, range_str.split('-'))
-            ranges.append((high, low))
-        else:
-            # Single bit case
-            try:
-                val = int(range_str)
-                ranges.append((val, val))
-            except ValueError:
-                print(f"Warning: Invalid location format: {range_str}")
-                continue
-    
-    return ranges
-
-def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str, repo_dir):
-    """Compare the YAML encoding with the JSON encoding."""
-    if not yaml_match:
-        return ["No YAML match field available for comparison."]
-    if not json_encoding_str:
-        return ["No JSON encoding available for comparison."]
-
-    # Determine expected length based on whether it's a compressed instruction (C_ or c.)
-    expected_length = 16 if instr_name.lower().startswith(('c_', 'c.')) else 32
-
-    yaml_pattern_str = yaml_match.replace('-', '.')
-    if len(yaml_pattern_str) != expected_length:
-        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
-
-    # Process variables and their locations
-    yaml_var_positions = {}
-    for var in (yaml_vars or []):
-        resolved_var = resolve_variable_definition(var, repo_dir)
-        if not resolved_var or 'location' not in resolved_var:
-            print(f"Warning: Could not resolve variable definition for {var.get('name', 'unknown')}")
-            continue
-        
-        ranges = parse_location(resolved_var['location'])
-        if ranges:
-            yaml_var_positions[var['name']] = ranges
-
-    # Tokenize the JSON encoding string
-    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
-    json_bits = []
-    bit_index = expected_length - 1
-    for t in tokens:
-        json_bits.append((bit_index, t))
-        bit_index -= 1
-
-    if bit_index != -1:
-        return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."]
-
-    # Normalize JSON bits (handle vm[?] etc.)
-    normalized_json_bits = []
-    for pos, tt in json_bits:
-        if re.match(r'vm\[[^\]]*\]', tt):
-            tt = 'vm'
-        normalized_json_bits.append((pos, tt))
-    json_bits = normalized_json_bits
-
-    differences = []
-
-    # Check fixed bits
-    for b in range(expected_length):
-        yaml_bit = yaml_pattern_str[expected_length - 1 - b]
-        token = [tt for (pos, tt) in json_bits if pos == b]
-        if not token:
-            differences.append(f"Bit {b}: No corresponding JSON bit found.")
-            continue
-        json_bit_str = token[0]
-
-        if yaml_bit in ['0', '1']:
-            if json_bit_str not in ['0', '1']:
-                differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
-            elif json_bit_str != yaml_bit:
-                differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
-        else:
-            if json_bit_str in ['0', '1']:
-                differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
-
-    # Check variable fields
-    for var_name, ranges in yaml_var_positions.items():
-        for high, low in ranges:
-            # Ensure the variable range fits within the expected_length
-            if high >= expected_length or low < 0:
-                differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
-                continue
-
-            json_var_fields = []
-            for bb in range(low, high+1):
-                token = [tt for (pos, tt) in json_bits if pos == bb]
-                if token:
-                    json_var_fields.append(token[0])
-                else:
-                    json_var_fields.append('?')
-
-            # Extract field names
-            field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
-            if len(field_names) == 0:
-                differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
-            elif len(field_names) > 1:
-                differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
-
-    return differences
-
-@pytest.fixture
-def yaml_instructions():
-    """Load all YAML instructions from the repository."""
-    from parsing import get_yaml_instructions
-    repo_dir = get_yaml_directory()
-    if not os.path.exists(repo_dir):
-        pytest.skip(f"Repository directory not found at {repo_dir}")
-    return get_yaml_instructions(repo_dir)
-
-@pytest.fixture
-def json_data():
-    """Load the real JSON data from the TableGen file."""
-    json_file = get_json_path()
-    if not os.path.exists(json_file):
-        pytest.skip(f"JSON file not found at {json_file}")
-    with open(json_file, 'r') as f:
-        return json.load(f)
+from parsing import (
+    get_json_path,
+    get_yaml_directory,
+    get_yaml_instructions,
+    compare_yaml_json_encoding
+)
 
 # Global variables to store loaded data
 _yaml_instructions = None
@@ -197,7 +18,6 @@ def load_test_data():
     global _yaml_instructions, _json_data, _repo_dir
     if _yaml_instructions is None:
         # Load YAML instructions
-        from parsing import get_yaml_instructions
         _repo_dir = get_yaml_directory()
         if not os.path.exists(_repo_dir):
             pytest.skip(f"Repository directory not found at {_repo_dir}")
@@ -292,4 +112,4 @@ def test_instruction_encoding(self, instr_name):
 def pytest_configure(config):
     """Configure the test session."""
     print(f"\nUsing JSON file: {get_json_path()}")
-    print(f"Using YAML directory: {get_yaml_directory()}\n")
+    print(f"Using YAML directory: {get_yaml_directory()}\n")
\ No newline at end of file

From d61fb3b27f38a0aec64e7d60e98226c8c8f80f40 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 23 Dec 2024 12:32:49 +0000
Subject: [PATCH 13/33] Ensure it is not pseudo

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/test_parsing.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index 74e3ae8747..844dfc2cdf 100644
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -53,6 +53,9 @@ def _find_matching_instruction(self, yaml_instr_name):
             asm_string = value.get('AsmString', '').lower().strip()
             if not asm_string:
                 continue
+            is_pseudo = value.get('isPseudo', "")
+            if is_pseudo == 1:
+                continue
             base_asm_name = asm_string.split()[0]
             if base_asm_name == yaml_instr_name:
                 return key

From 599212489f35a48c29af5ed1d04be2731ae281e7 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 23 Dec 2024 12:47:17 +0000
Subject: [PATCH 14/33] Skip aq/rl instructions

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/test_parsing.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index 844dfc2cdf..2f9ca33abc 100644
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -32,6 +32,12 @@ def load_test_data():
 
     return _yaml_instructions, _json_data, _repo_dir
 
+def has_aqrl_variables(yaml_vars):
+    """Check if instruction has aq/rl variables."""
+    if not yaml_vars:
+        return False
+    return any(var.get("name") in ["aq", "rl"] for var in yaml_vars)
+
 def pytest_generate_tests(metafunc):
     """Generate test cases dynamically."""
     if "instr_name" in metafunc.fixturenames:
@@ -50,12 +56,16 @@ def _find_matching_instruction(self, yaml_instr_name):
         for key, value in self.json_data.items():
             if not isinstance(value, dict):
                 continue
+            
+            # Skip if instruction is pseudo and keep looking
+            is_pseudo = value.get('isPseudo', '')
+            if is_pseudo == 1:
+                continue
+                
             asm_string = value.get('AsmString', '').lower().strip()
             if not asm_string:
                 continue
-            is_pseudo = value.get('isPseudo', "")
-            if is_pseudo == 1:
-                continue
+                
             base_asm_name = asm_string.split()[0]
             if base_asm_name == yaml_instr_name:
                 return key
@@ -80,6 +90,10 @@ def test_instruction_encoding(self, instr_name):
         """Test encoding for a single instruction."""
         yaml_data = self.yaml_instructions[instr_name]
         
+        # Skip if the instruction has aq/rl variables
+        if has_aqrl_variables(yaml_data.get("yaml_vars", [])):
+            pytest.skip(f"Skipping instruction {instr_name} due to aq/rl variables")
+
         # Skip if no YAML match pattern
         if not yaml_data.get("yaml_match"):
             pytest.skip(f"Instruction {instr_name} has no YAML match pattern")

From c2e6e928171236ce728ce101f285dacdc2f2abb1 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 23 Dec 2024 14:53:34 +0000
Subject: [PATCH 15/33] add pytest to requirements

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 37293bfb9b..f0d4e11b04 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ jsonschema==4.23.0
 tqdm==4.67.1
 ruamel.yaml==0.18.6
 mergedeep==1.3.4
+pytest==8.3.4

From 7c9c65bbddbeea0509d287a502008a9306559e16 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 27 Dec 2024 10:56:44 +0000
Subject: [PATCH 16/33] add LLVM path as environment variable

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 76f0246171..f15c5a80cd 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -2,7 +2,7 @@
 import re
 import yaml
 from pathlib import Path
-
+import pytest
 yaml_instructions = {}
 REPO_DIRECTORY = None
 
@@ -16,14 +16,15 @@ def safe_get(data, key, default=""):
         return default
 
 def get_json_path():
-    """Get the path to the JSON file relative to the test file."""
-    current_dir = Path(__file__).parent
-    return str(current_dir / "/home/afonsoo/llvm-project/llvm-build/pretty.json")
+    env_path = os.environ.get('LLVM_JSON')
+    if not env_path:
+        print("\nNo LLVM path found in environment.")
+        print("Tests will be skipped.\n")
+        pytest.skip("LLVM path not configured")
+    return env_path
 
 def get_yaml_directory():
-    """Get the path to the YAML directory relative to the test file."""
-    current_dir = Path(__file__).parent
-    return str(current_dir / "../../arch/inst/")
+    return "arch/inst/"
 
 def load_inherited_variable(var_path, repo_dir):
     """Load variable definition from an inherited YAML file."""
@@ -31,23 +32,23 @@ def load_inherited_variable(var_path, repo_dir):
         path, anchor = var_path.split('#')
         if anchor.startswith('/'):
             anchor = anchor[1:]
-        
+
         full_path = os.path.join(repo_dir, path)
-        
+
         if not os.path.exists(full_path):
             print(f"Warning: Inherited file not found: {full_path}")
             return None
-            
+
         with open(full_path, 'r') as f:
             data = yaml.safe_load(f)
-            
+
         for key in anchor.split('/'):
             if key in data:
                 data = data[key]
             else:
                 print(f"Warning: Anchor path {anchor} not found in {path}")
                 return None
-                
+
         return data
     except Exception as e:
         print(f"Error loading inherited variable {var_path}: {str(e)}")
@@ -65,10 +66,10 @@ def parse_location(loc_str):
     """Parse location string that may contain multiple ranges."""
     if not loc_str:
         return []
-        
+
     loc_str = str(loc_str).strip()
     ranges = []
-    
+
     for range_str in loc_str.split('|'):
         range_str = range_str.strip()
         if '-' in range_str:
@@ -81,7 +82,7 @@ def parse_location(loc_str):
             except ValueError:
                 print(f"Warning: Invalid location format: {range_str}")
                 continue
-    
+
     return ranges
 
 def load_yaml_encoding(instr_name):
@@ -132,7 +133,7 @@ def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_
         if not resolved_var or 'location' not in resolved_var:
             print(f"Warning: Could not resolve variable definition for {var.get('name', 'unknown')}")
             continue
-        
+
         ranges = parse_location(resolved_var['location'])
         if ranges:
             yaml_var_positions[var['name']] = ranges
@@ -217,4 +218,4 @@ def get_yaml_instructions(repo_directory):
             "yaml_vars": yaml_vars
         }
 
-    return instructions_with_encodings
\ No newline at end of file
+    return instructions_with_encodings

From 3178538ed84b7b2e4c8dda98f0f081748c4d7447 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 27 Dec 2024 11:05:58 +0000
Subject: [PATCH 17/33] remove and ignor python cache

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .gitignore                                       |   3 +++
 .../conftest.cpython-310-pytest-8.3.4.pyc        | Bin 549 -> 0 bytes
 .../__pycache__/parsing.cpython-310.pyc          | Bin 9704 -> 0 bytes
 .../test.cpython-310-pytest-8.3.4.pyc            | Bin 1777 -> 0 bytes
 4 files changed, 3 insertions(+)
 delete mode 100644 ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc
 delete mode 100644 ext/auto-inst/__pycache__/parsing.cpython-310.pyc
 delete mode 100644 ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc

diff --git a/.gitignore b/.gitignore
index ddd1aa6a76..93d0d737ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,6 @@ gen
 node_modules
 _site
 images
+__pycache__/
+*.pyc
+.pytest_cache/
diff --git a/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc
deleted file mode 100644
index 0b8bc1d08667e8e7a0f516d6da2fb738b7cfd03e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 549
zcmYjOJx{|h5Vg~!X$zG)z`&LPRmzZNV?tFC8-jw=i7rtZJFQC^N45i`Vq|4w;vevr
zydnnv0u$$yRz2xH-Z}5xIp*DN2Z6nR%;w*azYN$n3&03m_8|~NaE7M1&ep17A&ge`
z)WL{Yr16TTO@b%Lw}0SUG)}Nd|7L0j{1RNALU=(-P%}JYzcug%JAj)TOb{r!<Mlxh
zEG5sQlx0*qQgK1o$9J(>c#3;!LA|TV?btJ?zNMRSq8QJ$y`WjK?gW9L1&;_5>$81|
z=}HDtl?P`usjGzNDrPy$=idFr&2=p)eXQFtA-piM0GdTCBo&`XxNwKxVYuKc8pbKl
zCFdcbf<A<T$>cF8bCxnn0x}EfvkK!<@qp!0g)k>oRH{Ky>0VKpGTMzWze(B<diV&7
zHWa?ATeFg73ds6!R8-LpokSb@AeOL`*>E7}lVFO%(%Y7Hx4jOKH`7F|Twh@?v|yTU
Qov%14Gdip{F|Xf8fBQq3DgXcg

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
deleted file mode 100644
index 75bd77c41bc885711b1bbf6a2aef74f4c400a15d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 9704
zcmb7K&2t+^cAuUZ0E58?!7quFWO*cikdR2ovJ>x`tCd!wWUqE@N)ctN^uZuQ^pJuG
z0%XsS5~l`Ru25CB4^EtqL#k3rk*Aaod&n)7e;}t^Qpsg1hoo|ej;SPDDOGBf-+KcP
z04-OtV4*S7AFp4(e*NC>y=JRWur+)dcjW4W_ciTb>81Z;;^im!`g<s(CPhQLm;8!*
zg5ULfI{zB?4E*X1b3Ju0C6Yerd+EC7W~6W}IU!A%dMfT^Wm;yQYWHku$t-?z()RK)
z=M`2=nU{s9=DngU$|1B2$&wt#Z%IzdQ8|XO!*X06LCuIaDvz$|YQY=X)i9S><Zt=d
zb8x9GltrAA-io``2xPq$#9Gau`p$=*)A-Kg>sL^OLxC1)f!NhmA=38rNJRR+en(r>
zA}$4{5O_{&cTras;m;S{6|b@5ta^dtI@@kz%X3y#YaK5%U%l0I)y|BgykJW;oen2X
zeZ^_E0%xVQ)s#+4Ii6B2RVf>>UH9uvKX98hFE*KB1C-HVN;hz0W6j%%GaV}5YKk`O
z#gETDY^{6e+?7_-Z?(=zPkC$SRNb#_&u%sAD|JuK%H?z3li-}Y6|`oF{<#fTVWQQ_
z#!hVcSV0xD<VXm46uNGUqDbS<5{fEn0!8;j-7I`rX8(qVK-<;Y0@4!uS{o9B)bxD;
zDe=>uA%%ntsYV)Y179-|SGA>+?NpQs%xz755SUSFQfr$*>QQ<x6Ic=6rFm4_M}Mx%
zM#jESKM-1z#v7Z*BST&3wfI+MYEcX9$dKt>{SOQ6bdcN2N9ldh&fLMQre<op21Z-w
zHUI8oa6*4~@eK-5MrP`0ZCiM@poj*Zv;3T0rFuKcM)r!1-L}KOw;g;ws&9Ku$89<!
zO0~ti>@?kV&za_}|JmY=h4PHEQpe^74?U;WQp)o;T1^R&J9jUC@-f;P9w<ns(Q>6z
z4}8aK)>_<78(raf?s!v;h7+hACulkAZcuy3<apL9&xd-qkKpo-vqjtL+`4x2M)mr_
z;_X|X&fmU%V_}i!3Y6RQS6T|BpqG9!Ni3?ds4TolRd(F<Mp<82h=q!+E<16GCyuk*
zuByAs4bLwZW5f4?IMrx9_Ec=RQpOqOZ8Tiy?C^JHj`fxwn;UNMFg72x>dm;wU9Mig
zb#4Ckjazr)bloRu#Aa*5YsMyV5ocK^Nj9v1<s1$M$(mOk+cik84mo*#Y%>>0eUZji
ziCC5AEAV>_{Z?CQCpKwVtnaMEsT~rs8iw-5Hh1bQlAp0spHEg&kiN$w#7mW%yOJ40
zJ==_Q$`ZC15vFe9z|h~A$O{wi`sXFx)bmDOoOnrPURb(1i*YrwrQOdY9_W-A`1=2?
zYdAQ5N{UrjvdHjlsq>Sz2Svsq7?x&J$P^u-Jwf(!UvH-ZJxcA0$XqkjS!pG2f3C@F
zWTLd6rHF#e$^28T4J+KvL^=-ONZX3iQ2D~7wksa!?QB=oqYP?`liCGs^S!{>GvyGD
zpWe=`YVCYv%F=|^E<`y{D71@FelHajqT)(&<O{O&L$q$*jtX)Z+$cH1JdEPKbW{U3
znS>kA8jA9uF^ZW>?P1K5mSfTIzOH^>j<<)SA<!*F!wKEZ?P#c-$C!~I6PdeYKB`}7
zQ7Yl)OKn1f`565|TM_bzJo?lk%DbXH7LECT6^-nr<iwXW_vAB3P-vTf9F56i7%?7=
z%HuFV{}PSkSE#@1>w6*^#faCUag-;YWjf<90;~GxzJ9O2x*ya1_r88khf=CTDYIwo
zYgorF)_>$c`sP~d8{wxTTOM1}z7cAYDV&OqfQ!>n7A4wHzQIy`^I4Xu{R)yf3dz7!
zk(767cVGKTd?6r@w+7__njb_*AzPLJjs8Y#o~9c4cK0pF@~%(G={-9-x-a&1$Z`T&
zHL)wemkt_kA7AAjU(aB2A>Uth`j*yA^xf&}dnTFy_h<V?SjmWJ1O|4-orkAk+c)1@
zhmZ2u;IYN*9y@0`v4m&gY?WVj9@m4!ERxmjS<&g`I(c5WSTc9z#MWJ3VS3#2q3gqT
zcCDJzsIPg>_WJ!xOEWOXj{mT=)sW6I7*HMzh;-2X(Uu>ePsL^qL9Az!9QbSnE6&_Z
z%~z*pXr^W|x@WaJz5=w)zwa!Q*>AMqg_+(B3{a2jzK5YSC$WK!@4egKk$c`sz=R8?
z!BA@jNMmIMULT;LW7Nahg;wGh5@7)ZU^VWxTW^pJC#k?Ik%a4ht62%pp!=ZwzPE3z
z&sU$9V5*)wC|-zJRm5-j8gEiE8;oz@0qr!r<|;O)?mJ;_#_^tPcr}t_c&y^gyG;T>
z2@jsLp<w(~W2Z8JG{{JjQC!5=zk-6W*)Gf;a0WnLq~YjJ5=xU|pA492>wyu8U4arR
zOFkP<WC)B1f9%8c1$7RT$|jsxpyCvY`UZYB8ucJH>dgQxnMSKtrB?G{ef42%z{SJ{
zpxt4oP$U%PRneOr>L<h|8S=bt0Y8<5WmqD7x3k8?c|3x+t6f#DT%e&1|6*m3P;7Nc
z#wNM>aC-XE-2HdXFGcq+&)#!q!*^ysSUQWk2lpSyvk#Uc{9bynRK8SxPC)JXOMJpN
z2lrF9JfAVAyRqT9iVy{qI~--@syNr062~E+Hz&+*uRpkd|92lOEuDF=6zkjT1G_>y
zLOU~x?|+;?v5H+;vJ?47;!6!+90>4_^i9aWC$eviSg*Vj8ykQFdJq7Yi{(BCYhNH`
zCRttFCc<!o+6gVJ{C}j^8ycEdXaT3^94Pc4>|$}y@)&ei6iKSA$Kst>oDYx9C8s6P
zQ6KQ^2^5ujPlcr^XL&0~jt3cysTTs3hXzcAQ=OUvi$bFh2I34PG^fJsXB{MkQ!$70
zpD2$Lj?+PDVI~}C0JYWRl!n8zM5qsG4xW+z-B1^?rk=#rC0e40_2oW6tE;H|1L}7T
z#S3vMJT={~RpnQx)eGT-Gp`V7VpWnLrrt~>1ZHvEN9LSQ5OLKv$kveULES_r-0Ait
z8^pZ{-6iIetfx8(zHsJ%1$E`c8A8!+qY<Y`ahp<&P;-g}6sPKex9%%KV(K`G@}xRN
z&t)pkpooVD4jdoqp{^mSCiW>VF|6wtE$X6vNJB@uV6Y06tutaKE1=YG6Lkd0TAU7A
zYhKfjvpfqeCeEU)f@SYXEP{BfN$#gn4?S5u=%X$Z0kUTZO?#F*ws=B}k5f!H&Uc3}
z-xlUhaPAP-(`zEzILE)-m>}1(uI6cy>r^m+J@)+v=?CcKhbT-t4cM9%BSv1&iwS^L
zGjG9;jflLFK-(gKuBA^1lfkTsHiEf%gZh~G>I8pb{T%$B6#D1XSO0v>7}MWGo2ieW
z59o`ce#9uEM^T(cyQwQljLuP!K`{`cldqvTmlouYK;OR+;qOH{;4nM@z=WQFI<8@O
z$4@4~3Vve1#ZPJOFZ%jt0uc#HKI|Hx)Zb!rORm$BPsVUvfulkL5XT4%<OyyedNlxy
zifg92g$NYD82~*CfKEIPF;MHLTpHvo9A~P-6oUeUjmRtkdLBC(rFvZ5(VE$A_5pOq
zh{|29GkSbrbecyWiPGOY8la9wbH9l}egGvi;U(s2l2>X)7U1M*#Kd}(4G>?ee~dDF
znJ5cxkICcvI%YY+Z?ARUPChe<=H}l}KScA{UHIqDdv+gDw|pJI5|A1J2g%4`NZXMS
z)|u;D=P4Q~UvJxYwID~n9DXK;FeE2WZwY|QVW#j#l<TdvnMm(V8X4u#=hMihGRQ}G
zkMfbdYa)Otpza3tu8`~<y)mt~cw7N%o<w*<{Jk9&d(?x1oMJtirrKv_P<%8b%U|mZ
zq<^cK;}K_wMu(5HOpo=Io{*0?y$A;yl)e!=UkZdlLJN=asF{Q=BsR1wc*dUY&L(5;
zVsv}x7ehoNd3r1!YvoFK^`-)N>A+n=S$!S@X{P|l4-Os)hmIX!e|K&GBlwkA*l<JF
z`?4BWlacm|_kR2vf9!C~zT*1z8YmuQ9p*5@>?=NYIJy9+Gv^F^!qLmV@2xK*!S*2o
zd1{Zdm;Loc;I`&!Xe^w&9F8u+z#bYIn^YNQoaG&)3G6U+3ZB+yuOE(n+;O^nW5S$$
z0}vDv^5^WhaJ|`i`xvMs&Tf!!2|0(O*P9z#f!{aMal)gos03qnT)eRr^brb2@qT!C
zc=Q#OG&&r$uXU3`Hxoz=)=b`>J1~aoXDDKEA8wHR%?q)l2n~dzysO=z38&%AD=I&2
zaxefW$oTUhc(ND5QHJw}Xe1bauyUcB#<at?57_k`*<!HC@j%77+VgUA_6yT-W}UFu
ztAwsPp+4t)D`JzfH{o<2rB5jD#o(jQS9U<09e!lbAA~V;hocIwBRwB6W8LS(7%vOJ
zV?gl{9fq%6qZBL!A_$_mYySZX-<oeVgL-p|l4~5}HIYy4KVy{&!ou=!oO-MPN;qq!
z2qniR;aWwpfuc;aN>Oo>ig!>@=4{(T6v6m~b3^P4;}okLf|MOUPWyobFpRU;p47Yz
zLL%i-&soL=v_Yu|BzW#RCWN9>&{9Kctyyht_;Ch3_@U4hNtMBhNo$M^JaaCrhnv(A
z@g`&}ygoe1K;k5)=9oL+9L$--wAGG{d4_?0fP$R8Eo`KcN5nDw<@M8eqIVtnu_J&1
zd7<mwC)=<AAau%*VbruJ=@xoYFPnOzbqr(kdJ!W4K|l%c;@5f8HcEO)oFK6Bx&CYQ
z6cY~wE0m}{Nbo<v3$kA#K&m>@_9=_><^!Xxw<#rU;_U>bq?2$`oe8u@$bCgH!BR)A
zD9!J1Ef}dIgO<V7Kzc8|Pg%0!Cf<<0Fv6e{px3sLU*D#S0nP#g!1mI#KIuE<g^@HR
zWQp7)TV`mKa%%%x#aU(8ZORfHWIF1I-8(tWBy*sKu&^Yc$tcPu#^z(Uu|`Q;=`*H2
zMwJ!^xtN8Nu0a`6HIDkS6&Hz<s;nyn=#(iP=J&qcMFv;3T7fDMofMr>Unx8@+>Q$%
zhX{B(NxL3am~mV!RKG)zE)IMG?xs1$dtL$<ghqU>|Bsnc6rj{71nhoj59#DE2qdtG
zuwUCxt{nKHfpckZ0UTgA5%e2@xvTqQn|6=_B*P8@Ty^_nul-Y-c4X4-NdtR}U5_Yi
zJs0UZ;K`!4X-gCPnR35Cn8;iDAtm$OBnwYQpieloos7sSsSJP9&8(o8yXsO$=fGhl
zH$<H*O_Im)Sr^z+5@Fo%(De{URWk@<o1W9^W`>gOW_~WKRXD1yvt(`<|2xw-lI1z)
zHuS5XN4_rL+X*s(2LpS2Pt$EvXCPZgUQG>V4@>y&iVqz60V#gyEUCvCxS<O!T1Udv
zz(Sd!9uMEQZYHf(vF+^PSk)KG8CG9)o~mzALH0JWuat#R@1iKD6BSJ&G|u{yNr+Q?
zQy1H=zfN`&>x&D^E|NJ^L9#lm#RWRYNeqV_`Y+J%Cs5=ICD<ldG=r^Iih>{grD>=?
zLE{TMvCNg=NiY!7(0M%wY5oB!76})Zj<`q@t{&)qF@VnQ3IJcEP{NY{snAngYz@(M
z*j_p^_A>jhUu_d$QXjb7db|H}3sCw5fTp$cQ@~8z!-gf$HG?eO6#%*-MMiKGkP-pM
zFH$sD@<fmlVL&?2_iR2_@8eu$a6y)X)x)`JXOU;tw>0$-^ioj<P&NI`2=b2#GDARk
z^9IgVF+y&-Z3jb<9h7hue{s;)I@CA4H=JCt5yZvV^dkg8(te5?i?30G)$1~k)qmK|
zMY+9ESztg-PepnP#-cP*d<Y3pgF6LqQ|g3xY1s4vc&B>`Y8{RWz1GcNa^1)QwNZYe
zF_g?IfcJ6SY}iqbt`0Z<_B*ovr+!&ubz>1ip+u??WLX{$|2jDnTZy@6kfo6ES*_QS
z19HCNe1v=BzH^E+5nUd4y#q449h;mS{dX(QP0BfP2R@}_i`jedxf&pq40(^{Xad;=
zFeD5VNF5Vr(l>E8Mo0#QKSKVjuN7?ZRdvU40VQqMC4!7T57y&!K~cyajwOf+n1ru2
ziP1SH9J_k$%BLUA4Fnle;mlO$>XeLAx$kZq6Vftg9Dl98v4LX>oTWC2d}WIAWu10T
zv{6Etui)#F@7@On4WI&?IE}muCzi=p;CzWm_9)mlq=>0QStRZeiOMI3t$}r+;jS-B
z_u>`w^odkn3s0!6CN3Tc%=I$6-HxEfhDSUOkKek!I6wQ@fCv`1*4J@eVXGC;3Xj{j
zTY=l?@8Kf=!aoblLFDM6HmKWa`|7pZm#=?(?W)sv*Nf*5Z`}IiGTr;y;qUy$VcY*P
zzz&b!U^Q|m8M^G@l7k>`K$jgYr+h?FN|+tEA_J)?o2yQu!1XVZ-*j#4O2QM+3$;Uy
zgo6_UPDWEbroIGu@lO#6WbWDZo786dEfpxA`ZBC0&{rPnB;iQS)K}CkmuNfR2ghQC
z6H98BT1tRp{n*uyhdA}d3ZJCdtS-WeEp|`ze;1uAXmTQqsA(!n)nTU+G*0N~u%_5O
zrI;XHxs5)iWx-uo1b6{y0bgI5pBDj_iwt5-lU#}g@T)Uirr(lLLQW2FnxSu=!LSWj
zT!gbZ@t?&ZQ>ecL;TpaN9|KPZkcpsg02lKJMqdhCVUjCg2M`Ec@N(XaS{V*;KWPFO
z3BV^fCz+DzuK|={h&7n~ncf(QGeqBVX5l$a82<EA-(AH)-`EKrwwezA8%p<3Z0tA}
zIEr5Nnu#kV5ngmIxT^M$H>8I_FRCAb+;ciw>Q7PN{QL#|q?0{oB{5yKy3Gm}o8A)y
zF(iYsp(qAPbRdoYyFecL@TQpSz%-xHc2G7MVuxTUGAZ)Kjrmgj64kL)tx~^g6?4<t
z@WY&pF~(MZMzbtX!OY}ptwsa*nf|jvG25?G8=nC_5{yac@;^y-vewi7aL|$*7}kGU
z!~p{YMXJ?4{s}fD8C~nbdP{CWeq^ltw^3MH^6v)}MA1JZ*t(Uq-WXZ2er^@)H>`_R
J9zoX2{{;bdD#ri-

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
deleted file mode 100644
index 82b12de2706eb3018c923f41d81b8a5588cb79b4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1777
zcmah~PjBNy6rZsj+i~0$Rzk3%EZzkna!67RT%xL4w0}@Jq^lORB3cx=o=NNAc)T;?
z{;`}>dqs#dLQ0Q(89u;V;lfwg3%nV(+3s?{(Tv}`_h#n3_xnAEt*r$Jp7vkK(IXF`
zKlS4FW5ePh4Dk#mf(Xvg7@M~>w=IOx+!?oUV{yl>v3O$--d5(%+v7Gy)SU&yCe9_g
zz~c^S5%&^}LxK-b*ZUhzMWZfO0Wb4O5pzL#g0=Gcap0}TBN&3iRHGSMVuG*m0$<A(
z$d<Hb_R^`1{#t&mZ`BsDudIbFT>?J8qTj3wj1jEBa%<(*_RXrswN=|wTSqynI=$XZ
zuzWI2Gg<|`9;XGHkd#*eD`ip2zC4rFyWfd4KZ?FN*c(Mp!EqQ>n~iE8+(jxGKaU=#
zoF-ZV>zzHb**HhfVtG7_x}I_qmQT|o)jpz<6*6VHvY1d#0S?NG4}}q&8B253DtMa9
zuA@9oUzSuz)n3J^{F^9Mr-{;p|2yLAP{&up!7-cDK|E!-U~E7rr#}vODw304nWxi~
zk{&r6&@(xROUZg^F62PM4gKO=bp(}V0eHz{(bCA=fhn{?9NHmX$Lkgh99aAvP`?3Z
z8x%ea1Lq$g)aZr10QdkJ)RdgypX`NGI|P-8-<6m&P+3bGAk?5av(^%Ty29dfusQ}v
zZEYE=SKGwBuzn5~t-7`Juh7DM4)k>3cVT0c`(GfjeG2F3-}f+xSGy;O@4{KWK8v)?
zI<DPi8+LmnAe~E8dyj#>3m?3~)s1WBMoiYtJ_J<R050=2$gBGD`B(d+-O<;>Xv892
zJYFUmOA$?3nUjd|D9$6wIS4FCO3o?iN7a4sZorBP&PbV15*?mLue+mPJ$SOezqdcU
zcGJv@PGb?}OhyT(v4nG~&1+_9zW&FmFt0xRmdQAac5esvR0lSUwyNI$#GdB2FRwm*
zk|&G+^+iOOiDMqi<hT!9pH6{Rc|yhT4H{TbA<dfu7~&pGM?i!hI?&O0iLP{iqbtm<
z8vkOww4ki`74CH{)q*Ofg$jg>xfG|VJO-uJ-%?JN=G4%-nKs}^6>MjzkO`X?J6rl5
zed#?^eWXb|$+DAqe>-DIoQWL}Zai1jy~!yeuajt?yI;M3V|**&>SI9fH0HUf1psT{
z1t%(@=K+^)z;zT{w}Enta~+Curg55aT@LOVJw1bHpy63TbL9Y}1X3=tRO&26w{6;r
zcl3r(PnvJa<y4fJRCua9_<%^7OVu(PRG?Fs82RlQwO7S>4-OHZ!4yFI`j!vv8`?1A
z&<gP8KR(EQ%ZJ8pbo>)=?XD^RuuL-v{8r(^;(W4N-JLYq^<x2_bWQv|r>8uXRJB(s
z{ncu80c<v*#GHQsC!2C8PyZo6EltssH%-rE$!U|mzW-J>`jf^D?Edx~`X-}0`fo(&
P0zE)J4zUAygd6_?S3>7_


From 11d4c607c5def4855aeefc9b9bfedc7ee48340cb Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 27 Dec 2024 11:35:46 +0000
Subject: [PATCH 18/33] Add LLVM test to the Rakefile

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 Rakefile | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Rakefile b/Rakefile
index a0358f82f2..f365e4cda4 100644
--- a/Rakefile
+++ b/Rakefile
@@ -84,6 +84,22 @@ namespace :serve do
 end
 
 namespace :test do
+
+  # "Run the cross-validation against LLVM"
+  task :llvm do
+    venv_python = "#{$root}/.home/.venv/bin/python3"
+    if File.exist?(venv_python)
+      begin
+        sh "#{venv_python} -m pytest ext/auto-inst/test_parsing.py -v"
+      rescue => e
+        raise unless e.message.include?("status (5)") #dont fail on skipped tests
+      end
+    else
+      puts "\nNo Python virtual environment found."
+      puts "Tests will be skipped.\n"
+    end
+  end
+
   # "Run the IDL compiler test suite"
   task :idl_compiler do
     t = Minitest::TestTask.new(:lib_test)
@@ -289,6 +305,7 @@ namespace :test do
     These are basic but fast-running tests to check the database and tools
   DESC
   task :smoke do
+    Rake::Task["test:llvm"].invoke
     Rake::Task["test:idl_compiler"].invoke
     Rake::Task["test:lib"].invoke
     Rake::Task["test:schema"].invoke

From 21852392fb0337e07f9a7f0b220a64bb49047450 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 27 Dec 2024 12:01:21 +0000
Subject: [PATCH 19/33] Optimizations to test logic and modify test order

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 Rakefile                      |  2 +-
 ext/auto-inst/test_parsing.py | 25 ++++++++++++++++---------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/Rakefile b/Rakefile
index f365e4cda4..ba0c9a129a 100644
--- a/Rakefile
+++ b/Rakefile
@@ -305,11 +305,11 @@ namespace :test do
     These are basic but fast-running tests to check the database and tools
   DESC
   task :smoke do
-    Rake::Task["test:llvm"].invoke
     Rake::Task["test:idl_compiler"].invoke
     Rake::Task["test:lib"].invoke
     Rake::Task["test:schema"].invoke
     Rake::Task["test:idl"].invoke
+    Rake::Task["test:llvm"].invoke
   end
 
   desc <<~DESC
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index 2f9ca33abc..df3b3fac60 100644
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -49,26 +49,33 @@ class TestInstructionEncoding:
     def setup_class(cls):
         """Setup class-level test data."""
         cls.yaml_instructions, cls.json_data, cls.repo_dir = load_test_data()
+        cls.rv_instructions = cls.json_data.get("!instanceof", {}).get("RVInstCommon", [])
 
     def _find_matching_instruction(self, yaml_instr_name):
         """Find matching instruction in JSON data by comparing instruction names."""
         yaml_instr_name = yaml_instr_name.lower().strip()
-        for key, value in self.json_data.items():
+
+        for def_name in self.rv_instructions:
+            value = self.json_data.get(def_name)
             if not isinstance(value, dict):
                 continue
-            
-            # Skip if instruction is pseudo and keep looking
+
             is_pseudo = value.get('isPseudo', '')
             if is_pseudo == 1:
                 continue
-                
+
+            is_codegen_only = value.get('isCodeGenOnly', '')
+            if is_codegen_only == 1:
+                continue
+
             asm_string = value.get('AsmString', '').lower().strip()
             if not asm_string:
                 continue
-                
+
             base_asm_name = asm_string.split()[0]
             if base_asm_name == yaml_instr_name:
-                return key
+                return def_name
+
         return None
 
     def _get_json_encoding(self, json_instr):
@@ -89,7 +96,7 @@ def _get_json_encoding(self, json_instr):
     def test_instruction_encoding(self, instr_name):
         """Test encoding for a single instruction."""
         yaml_data = self.yaml_instructions[instr_name]
-        
+
         # Skip if the instruction has aq/rl variables
         if has_aqrl_variables(yaml_data.get("yaml_vars", [])):
             pytest.skip(f"Skipping instruction {instr_name} due to aq/rl variables")
@@ -105,7 +112,7 @@ def test_instruction_encoding(self, instr_name):
 
         # Get JSON encoding
         json_encoding = self._get_json_encoding(self.json_data[json_key])
-        
+
         # Compare encodings
         differences = compare_yaml_json_encoding(
             instr_name,
@@ -129,4 +136,4 @@ def test_instruction_encoding(self, instr_name):
 def pytest_configure(config):
     """Configure the test session."""
     print(f"\nUsing JSON file: {get_json_path()}")
-    print(f"Using YAML directory: {get_yaml_directory()}\n")
\ No newline at end of file
+    print(f"Using YAML directory: {get_yaml_directory()}\n")

From 9b9bce58566469a86e584c2b699acd6a52e8bb83 Mon Sep 17 00:00:00 2001
From: root <root@L-SNPS-Wgcf0wOB.internal.synopsys.com>
Date: Thu, 9 Jan 2025 16:02:17 +0000
Subject: [PATCH 20/33] Add prerequisites syntax

Signed-off-by: root <root@L-SNPS-Wgcf0wOB.internal.synopsys.com>
---
 Rakefile | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/Rakefile b/Rakefile
index ba0c9a129a..1349cf2339 100644
--- a/Rakefile
+++ b/Rakefile
@@ -304,13 +304,7 @@ namespace :test do
 
     These are basic but fast-running tests to check the database and tools
   DESC
-  task :smoke do
-    Rake::Task["test:idl_compiler"].invoke
-    Rake::Task["test:lib"].invoke
-    Rake::Task["test:schema"].invoke
-    Rake::Task["test:idl"].invoke
-    Rake::Task["test:llvm"].invoke
-  end
+  task :smoke => ["test:idl_compiler", "test:lib", "test:schema", "test:idl", "test:llvm"]
 
   desc <<~DESC
     Run the regression tests

From daae5d50d704ac6523e4893d8e4de4c96d8887fc Mon Sep 17 00:00:00 2001
From: root <root@L-SNPS-Wgcf0wOB.internal.synopsys.com>
Date: Fri, 10 Jan 2025 09:53:50 +0000
Subject: [PATCH 21/33] Fix pre-commit related issues

Signed-off-by: root <root@L-SNPS-Wgcf0wOB.internal.synopsys.com>
---
 ext/auto-inst/parsing.py      | 116 ++++++++++++++++++++++------------
 ext/auto-inst/test_parsing.py |  33 ++++++----
 2 files changed, 97 insertions(+), 52 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index f15c5a80cd..31b4ad4398 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -3,9 +3,11 @@
 import yaml
 from pathlib import Path
 import pytest
+
 yaml_instructions = {}
 REPO_DIRECTORY = None
 
+
 def safe_get(data, key, default=""):
     """Safely get a value from a dictionary, return default if not found or error."""
     try:
@@ -15,22 +17,25 @@ def safe_get(data, key, default=""):
     except:
         return default
 
+
 def get_json_path():
-    env_path = os.environ.get('LLVM_JSON')
+    env_path = os.environ.get("LLVM_JSON")
     if not env_path:
         print("\nNo LLVM path found in environment.")
         print("Tests will be skipped.\n")
         pytest.skip("LLVM path not configured")
     return env_path
 
+
 def get_yaml_directory():
     return "arch/inst/"
 
+
 def load_inherited_variable(var_path, repo_dir):
     """Load variable definition from an inherited YAML file."""
     try:
-        path, anchor = var_path.split('#')
-        if anchor.startswith('/'):
+        path, anchor = var_path.split("#")
+        if anchor.startswith("/"):
             anchor = anchor[1:]
 
         full_path = os.path.join(repo_dir, path)
@@ -39,10 +44,10 @@ def load_inherited_variable(var_path, repo_dir):
             print(f"Warning: Inherited file not found: {full_path}")
             return None
 
-        with open(full_path, 'r') as f:
+        with open(full_path) as f:
             data = yaml.safe_load(f)
 
-        for key in anchor.split('/'):
+        for key in anchor.split("/"):
             if key in data:
                 data = data[key]
             else:
@@ -54,14 +59,16 @@ def load_inherited_variable(var_path, repo_dir):
         print(f"Error loading inherited variable {var_path}: {str(e)}")
         return None
 
+
 def resolve_variable_definition(var, repo_dir):
     """Resolve variable definition, handling inheritance if needed."""
-    if 'location' in var:
+    if "location" in var:
         return var
-    elif '$inherits' in var:
+    elif "$inherits" in var:
         print(f"Warning: Failed to resolve inheritance for variable: {var}")
     return None
 
+
 def parse_location(loc_str):
     """Parse location string that may contain multiple ranges."""
     if not loc_str:
@@ -70,10 +77,10 @@ def parse_location(loc_str):
     loc_str = str(loc_str).strip()
     ranges = []
 
-    for range_str in loc_str.split('|'):
+    for range_str in loc_str.split("|"):
         range_str = range_str.strip()
-        if '-' in range_str:
-            high, low = map(int, range_str.split('-'))
+        if "-" in range_str:
+            high, low = map(int, range_str.split("-"))
             ranges.append((high, low))
         else:
             try:
@@ -85,12 +92,13 @@ def parse_location(loc_str):
 
     return ranges
 
+
 def load_yaml_encoding(instr_name):
     """Load YAML encoding data for an instruction."""
     candidates = set()
     lower_name = instr_name.lower()
     candidates.add(lower_name)
-    candidates.add(lower_name.replace('_', '.'))
+    candidates.add(lower_name.replace("_", "."))
 
     yaml_file_path = None
     for cand in candidates:
@@ -105,40 +113,47 @@ def load_yaml_encoding(instr_name):
     if not yaml_file_path or not os.path.isfile(yaml_file_path):
         return None, None
 
-    with open(yaml_file_path, 'r') as yf:
+    with open(yaml_file_path) as yf:
         ydata = yaml.safe_load(yf)
 
-    encoding = safe_get(ydata, 'encoding', {})
-    yaml_match = safe_get(encoding, 'match', None)
-    yaml_vars = safe_get(encoding, 'variables', [])
+    encoding = safe_get(ydata, "encoding", {})
+    yaml_match = safe_get(encoding, "match", None)
+    yaml_vars = safe_get(encoding, "variables", [])
 
     return yaml_match, yaml_vars
 
-def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str, repo_dir):
+
+def compare_yaml_json_encoding(
+    instr_name, yaml_match, yaml_vars, json_encoding_str, repo_dir
+):
     """Compare the YAML encoding with the JSON encoding."""
     if not yaml_match:
         return ["No YAML match field available for comparison."]
     if not json_encoding_str:
         return ["No JSON encoding available for comparison."]
 
-    expected_length = 16 if instr_name.lower().startswith(('c_', 'c.')) else 32
+    expected_length = 16 if instr_name.lower().startswith(("c_", "c.")) else 32
 
-    yaml_pattern_str = yaml_match.replace('-', '.')
+    yaml_pattern_str = yaml_match.replace("-", ".")
     if len(yaml_pattern_str) != expected_length:
-        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
+        return [
+            f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."
+        ]
 
     yaml_var_positions = {}
-    for var in (yaml_vars or []):
+    for var in yaml_vars or []:
         resolved_var = resolve_variable_definition(var, repo_dir)
-        if not resolved_var or 'location' not in resolved_var:
-            print(f"Warning: Could not resolve variable definition for {var.get('name', 'unknown')}")
+        if not resolved_var or "location" not in resolved_var:
+            print(
+                f"Warning: Could not resolve variable definition for {var.get('name', 'unknown')}"
+            )
             continue
 
-        ranges = parse_location(resolved_var['location'])
+        ranges = parse_location(resolved_var["location"])
         if ranges:
-            yaml_var_positions[var['name']] = ranges
+            yaml_var_positions[var["name"]] = ranges
 
-    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
+    tokens = re.findall(r"(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)", json_encoding_str)
     json_bits = []
     bit_index = expected_length - 1
     for t in tokens:
@@ -146,12 +161,14 @@ def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_
         bit_index -= 1
 
     if bit_index != -1:
-        return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."]
+        return [
+            f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."
+        ]
 
     normalized_json_bits = []
     for pos, tt in json_bits:
-        if re.match(r'vm\[[^\]]*\]', tt):
-            tt = 'vm'
+        if re.match(r"vm\[[^\]]*\]", tt):
+            tt = "vm"
         normalized_json_bits.append((pos, tt))
     json_bits = normalized_json_bits
 
@@ -165,37 +182,54 @@ def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_
             continue
         json_bit_str = token[0]
 
-        if yaml_bit in ['0', '1']:
-            if json_bit_str not in ['0', '1']:
-                differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
+        if yaml_bit in ["0", "1"]:
+            if json_bit_str not in ["0", "1"]:
+                differences.append(
+                    f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'"
+                )
             elif json_bit_str != yaml_bit:
-                differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
+                differences.append(
+                    f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'"
+                )
         else:
-            if json_bit_str in ['0', '1']:
-                differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
+            if json_bit_str in ["0", "1"]:
+                differences.append(
+                    f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'"
+                )
 
     for var_name, ranges in yaml_var_positions.items():
         for high, low in ranges:
             if high >= expected_length or low < 0:
-                differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
+                differences.append(
+                    f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction."
+                )
                 continue
 
             json_var_fields = []
-            for bb in range(low, high+1):
+            for bb in range(low, high + 1):
                 token = [tt for (pos, tt) in json_bits if pos == bb]
                 if token:
                     json_var_fields.append(token[0])
                 else:
-                    json_var_fields.append('?')
+                    json_var_fields.append("?")
 
-            field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
+            field_names = set(
+                re.findall(
+                    r"([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?", " ".join(json_var_fields)
+                )
+            )
             if len(field_names) == 0:
-                differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
+                differences.append(
+                    f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}"
+                )
             elif len(field_names) > 1:
-                differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
+                differences.append(
+                    f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}"
+                )
 
     return differences
 
+
 def get_yaml_instructions(repo_directory):
     """Recursively find all YAML files in the repository and load their encodings."""
     global yaml_instructions, REPO_DIRECTORY
@@ -215,7 +249,7 @@ def get_yaml_instructions(repo_directory):
         instructions_with_encodings[instr_name_lower] = {
             "category": path,
             "yaml_match": yaml_match,
-            "yaml_vars": yaml_vars
+            "yaml_vars": yaml_vars,
         }
 
     return instructions_with_encodings
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index df3b3fac60..fdbeb75f70 100644
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -5,7 +5,7 @@
     get_json_path,
     get_yaml_directory,
     get_yaml_instructions,
-    compare_yaml_json_encoding
+    compare_yaml_json_encoding,
 )
 
 # Global variables to store loaded data
@@ -13,6 +13,7 @@
 _json_data = None
 _repo_dir = None
 
+
 def load_test_data():
     """Load test data once and cache it."""
     global _yaml_instructions, _json_data, _repo_dir
@@ -27,29 +28,34 @@ def load_test_data():
         json_file = get_json_path()
         if not os.path.exists(json_file):
             pytest.skip(f"JSON file not found at {json_file}")
-        with open(json_file, 'r') as f:
+        with open(json_file) as f:
             _json_data = json.load(f)
 
     return _yaml_instructions, _json_data, _repo_dir
 
+
 def has_aqrl_variables(yaml_vars):
     """Check if instruction has aq/rl variables."""
     if not yaml_vars:
         return False
     return any(var.get("name") in ["aq", "rl"] for var in yaml_vars)
 
+
 def pytest_generate_tests(metafunc):
     """Generate test cases dynamically."""
     if "instr_name" in metafunc.fixturenames:
         yaml_instructions, _, _ = load_test_data()
         metafunc.parametrize("instr_name", list(yaml_instructions.keys()))
 
+
 class TestInstructionEncoding:
     @classmethod
     def setup_class(cls):
         """Setup class-level test data."""
         cls.yaml_instructions, cls.json_data, cls.repo_dir = load_test_data()
-        cls.rv_instructions = cls.json_data.get("!instanceof", {}).get("RVInstCommon", [])
+        cls.rv_instructions = cls.json_data.get("!instanceof", {}).get(
+            "RVInstCommon", []
+        )
 
     def _find_matching_instruction(self, yaml_instr_name):
         """Find matching instruction in JSON data by comparing instruction names."""
@@ -60,15 +66,15 @@ def _find_matching_instruction(self, yaml_instr_name):
             if not isinstance(value, dict):
                 continue
 
-            is_pseudo = value.get('isPseudo', '')
+            is_pseudo = value.get("isPseudo", "")
             if is_pseudo == 1:
                 continue
 
-            is_codegen_only = value.get('isCodeGenOnly', '')
+            is_codegen_only = value.get("isCodeGenOnly", "")
             if is_codegen_only == 1:
                 continue
 
-            asm_string = value.get('AsmString', '').lower().strip()
+            asm_string = value.get("AsmString", "").lower().strip()
             if not asm_string:
                 continue
 
@@ -82,10 +88,12 @@ def _get_json_encoding(self, json_instr):
         """Extract encoding string from JSON instruction data."""
         encoding_bits = []
         try:
-            inst = json_instr.get('Inst', [])
+            inst = json_instr.get("Inst", [])
             for bit in inst:
                 if isinstance(bit, dict):
-                    encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
+                    encoding_bits.append(
+                        f"{bit.get('var', '?')}[{bit.get('index', '?')}]"
+                    )
                 else:
                     encoding_bits.append(str(bit))
             encoding_bits.reverse()
@@ -119,11 +127,13 @@ def test_instruction_encoding(self, instr_name):
             yaml_data["yaml_match"],
             yaml_data.get("yaml_vars", []),
             json_encoding,
-            self.repo_dir
+            self.repo_dir,
         )
 
         # If there are differences, format them nicely and fail the test
-        if differences and differences != ["No YAML match field available for comparison."]:
+        if differences and differences != [
+            "No YAML match field available for comparison."
+        ]:
             error_msg = f"\nEncoding mismatch for instruction: {instr_name}\n"
             error_msg += f"JSON key: {json_key}\n"
             error_msg += f"YAML match: {yaml_data['yaml_match']}\n"
@@ -133,7 +143,8 @@ def test_instruction_encoding(self, instr_name):
                 error_msg += f"  - {diff}\n"
             pytest.fail(error_msg)
 
+
 def pytest_configure(config):
     """Configure the test session."""
     print(f"\nUsing JSON file: {get_json_path()}")
-    print(f"Using YAML directory: {get_yaml_directory()}\n")
+    print(f"Using YAML directory: {get_yaml_directory()}\n")

From d9e45b356f3b6199d37d73312a26f0456239e206 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 17 Jan 2025 15:23:35 +0000
Subject: [PATCH 22/33] Add LLVM tblgen to regress.yaml && change Rakefile for
 new changes

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 30 ++++++++++++++++++++++++++++++
 Rakefile                      | 11 ++---------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
index 4ff261868b..f5a6d0191d 100644
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -161,3 +161,33 @@ jobs:
         run: ./bin/build_container
       - name: Generate extension PDF
         run: ./do gen:profile[MockProfileRelease]
+  build-llvm:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+      - name: Cache LLVM build
+        id: cache-llvm
+        uses: actions/cache@v4
+        with:
+          path: build
+          key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
+      - name: Configure and build llvm-tblgen
+        run: |
+          # Configure LLVM in 'build' directory (Release mode)
+          cmake -S llvm -B build -DCMAKE_BUILD_TYPE=Release
+          # Build the llvm-tblgen target
+          cmake --build build --target llvm-tblgen
+      - name: Generate RISC-V JSON
+        run: |
+          # llvm-tblgen is now built into ./build/bin/llvm-tblgen
+          # Because we run it from the repository root, the output riscv.json
+          # will be placed at the repository root by default.
+          ./build/bin/llvm-tblgen \
+            -I llvm/include \
+            -I llvm/lib/Target/RISCV \
+            llvm/lib/Target/RISCV/RISCV.td \
+            --dump-json \
+            -o riscv.json
+      - name: Show riscv.json output
+        run: ls -l riscv.json
diff --git a/Rakefile b/Rakefile
index 145896c067..e92af58b7f 100644
--- a/Rakefile
+++ b/Rakefile
@@ -87,19 +87,12 @@ namespace :test do
 
   # "Run the cross-validation against LLVM"
   task :llvm do
-    venv_python = "#{$root}/.home/.venv/bin/python3"
-    if File.exist?(venv_python)
       begin
-        sh "#{venv_python} -m pytest ext/auto-inst/test_parsing.py -v"
+        sh "#{$root}/.home/.venv/bin/python3 -m pytest ext/auto-inst/test_parsing.py -v"
       rescue => e
-        raise unless e.message.include?("status (5)") #dont fail on skipped tests
-      end
-    else
-      puts "\nNo Python virtual environment found."
-      puts "Tests will be skipped.\n"
+        raise unless e.message.include?("status (5)") # don't fail on skipped tests
     end
   end
-
   # "Run the IDL compiler test suite"
   task :idl_compiler do
     t = Minitest::TestTask.new(:lib_test)

From 0fa5bdd03c968892a56ed787e5431eb7d25be28a Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 17 Jan 2025 15:29:02 +0000
Subject: [PATCH 23/33] Fix caching

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 67 ++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 29 deletions(-)
 mode change 100644 => 100755 .github/workflows/regress.yml

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
old mode 100644
new mode 100755
index f5a6d0191d..dddf1dacde
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -162,32 +162,41 @@ jobs:
       - name: Generate extension PDF
         run: ./do gen:profile[MockProfileRelease]
   build-llvm:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-      - name: Cache LLVM build
-        id: cache-llvm
-        uses: actions/cache@v4
-        with:
-          path: build
-          key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
-      - name: Configure and build llvm-tblgen
-        run: |
-          # Configure LLVM in 'build' directory (Release mode)
-          cmake -S llvm -B build -DCMAKE_BUILD_TYPE=Release
-          # Build the llvm-tblgen target
-          cmake --build build --target llvm-tblgen
-      - name: Generate RISC-V JSON
-        run: |
-          # llvm-tblgen is now built into ./build/bin/llvm-tblgen
-          # Because we run it from the repository root, the output riscv.json
-          # will be placed at the repository root by default.
-          ./build/bin/llvm-tblgen \
-            -I llvm/include \
-            -I llvm/lib/Target/RISCV \
-            llvm/lib/Target/RISCV/RISCV.td \
-            --dump-json \
-            -o riscv.json
-      - name: Show riscv.json output
-        run: ls -l riscv.json
+      runs-on: ubuntu-latest
+      steps:
+        - name: Check out repository
+          uses: actions/checkout@v4
+
+        - name: Cache LLVM build
+          id: cache-llvm
+          uses: actions/cache@v4
+          with:
+            path: |
+              build
+              llvm
+            key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
+
+        - name: Clone LLVM repository
+          if: steps.cache-llvm.outputs.cache-hit != 'true'
+          run: |
+            git clone --depth 1 https://github.com/llvm/llvm-project.git
+            mv llvm-project/llvm .
+            rm -rf llvm-project
+
+        - name: Configure and build llvm-tblgen
+          if: steps.cache-llvm.outputs.cache-hit != 'true'
+          run: |
+            cmake -S llvm -B build -DCMAKE_BUILD_TYPE=Release
+            cmake --build build --target llvm-tblgen
+
+        - name: Generate RISC-V JSON
+          run: |
+            ./build/bin/llvm-tblgen \
+              -I llvm/include \
+              -I llvm/lib/Target/RISCV \
+              llvm/lib/Target/RISCV/RISCV.td \
+              --dump-json \
+              -o riscv.json
+
+        - name: Show riscv.json output
+          run: ls -l riscv.json

From 56412ac05c38920b78d5c338aac3890d21f1e57d Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 17 Jan 2025 15:36:10 +0000
Subject: [PATCH 24/33] Fix caching

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 66 +++++++++++++++++------------------
 1 file changed, 32 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
index dddf1dacde..1c113d13e7 100755
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -161,42 +161,40 @@ jobs:
         run: ./bin/build_container
       - name: Generate extension PDF
         run: ./do gen:profile[MockProfileRelease]
-  build-llvm:
-      runs-on: ubuntu-latest
-      steps:
-        - name: Check out repository
-          uses: actions/checkout@v4
+    build-llvm:
+        runs-on: ubuntu-latest
+        steps:
+          - name: Check out repository
+            uses: actions/checkout@v4
 
-        - name: Cache LLVM build
-          id: cache-llvm
-          uses: actions/cache@v4
-          with:
-            path: |
-              build
-              llvm
-            key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
+          - name: Cache LLVM build
+            id: cache-llvm
+            uses: actions/cache@v4
+            with:
+              path: |
+                build
+                llvm-project
+              key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
 
-        - name: Clone LLVM repository
-          if: steps.cache-llvm.outputs.cache-hit != 'true'
-          run: |
-            git clone --depth 1 https://github.com/llvm/llvm-project.git
-            mv llvm-project/llvm .
-            rm -rf llvm-project
+          - name: Clone LLVM repository
+            if: steps.cache-llvm.outputs.cache-hit != 'true'
+            run: |
+              git clone --depth 1 https://github.com/llvm/llvm-project.git
 
-        - name: Configure and build llvm-tblgen
-          if: steps.cache-llvm.outputs.cache-hit != 'true'
-          run: |
-            cmake -S llvm -B build -DCMAKE_BUILD_TYPE=Release
-            cmake --build build --target llvm-tblgen
+          - name: Configure and build llvm-tblgen
+            if: steps.cache-llvm.outputs.cache-hit != 'true'
+            run: |
+              cmake -S llvm-project/llvm -B build -DCMAKE_BUILD_TYPE=Release
+              cmake --build build --target llvm-tblgen
 
-        - name: Generate RISC-V JSON
-          run: |
-            ./build/bin/llvm-tblgen \
-              -I llvm/include \
-              -I llvm/lib/Target/RISCV \
-              llvm/lib/Target/RISCV/RISCV.td \
-              --dump-json \
-              -o riscv.json
+          - name: Generate RISC-V JSON
+            run: |
+              ./build/bin/llvm-tblgen \
+                -I llvm-project/llvm/include \
+                -I llvm-project/llvm/lib/Target/RISCV \
+                llvm-project/llvm/lib/Target/RISCV/RISCV.td \
+                --dump-json \
+                -o riscv.json
 
-        - name: Show riscv.json output
-          run: ls -l riscv.json
+          - name: Show riscv.json output
+            run: ls -l riscv.json

From b2748e0be86a8a939a68b0241ed88eb2cc8c625e Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 17 Jan 2025 15:36:20 +0000
Subject: [PATCH 25/33] Fix caching

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 64 +++++++++++++++++------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
index 1c113d13e7..4efc603e1d 100755
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -161,40 +161,40 @@ jobs:
         run: ./bin/build_container
       - name: Generate extension PDF
         run: ./do gen:profile[MockProfileRelease]
-    build-llvm:
-        runs-on: ubuntu-latest
-        steps:
-          - name: Check out repository
-            uses: actions/checkout@v4
+  build-llvm:
+      runs-on: ubuntu-latest
+      steps:
+        - name: Check out repository
+          uses: actions/checkout@v4
 
-          - name: Cache LLVM build
-            id: cache-llvm
-            uses: actions/cache@v4
-            with:
-              path: |
-                build
-                llvm-project
-              key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
+        - name: Cache LLVM build
+          id: cache-llvm
+          uses: actions/cache@v4
+          with:
+            path: |
+              build
+              llvm-project
+            key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
 
-          - name: Clone LLVM repository
-            if: steps.cache-llvm.outputs.cache-hit != 'true'
-            run: |
-              git clone --depth 1 https://github.com/llvm/llvm-project.git
+        - name: Clone LLVM repository
+          if: steps.cache-llvm.outputs.cache-hit != 'true'
+          run: |
+            git clone --depth 1 https://github.com/llvm/llvm-project.git
 
-          - name: Configure and build llvm-tblgen
-            if: steps.cache-llvm.outputs.cache-hit != 'true'
-            run: |
-              cmake -S llvm-project/llvm -B build -DCMAKE_BUILD_TYPE=Release
-              cmake --build build --target llvm-tblgen
+        - name: Configure and build llvm-tblgen
+          if: steps.cache-llvm.outputs.cache-hit != 'true'
+          run: |
+            cmake -S llvm-project/llvm -B build -DCMAKE_BUILD_TYPE=Release
+            cmake --build build --target llvm-tblgen
 
-          - name: Generate RISC-V JSON
-            run: |
-              ./build/bin/llvm-tblgen \
-                -I llvm-project/llvm/include \
-                -I llvm-project/llvm/lib/Target/RISCV \
-                llvm-project/llvm/lib/Target/RISCV/RISCV.td \
-                --dump-json \
-                -o riscv.json
+        - name: Generate RISC-V JSON
+          run: |
+            ./build/bin/llvm-tblgen \
+              -I llvm-project/llvm/include \
+              -I llvm-project/llvm/lib/Target/RISCV \
+              llvm-project/llvm/lib/Target/RISCV/RISCV.td \
+              --dump-json \
+              -o riscv.json
 
-          - name: Show riscv.json output
-            run: ls -l riscv.json
+        - name: Show riscv.json output
+          run: ls -l riscv.json

From 6968919bbd0cbd8b9906ffbe810865003af8ba42 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 17 Jan 2025 15:46:29 +0000
Subject: [PATCH 26/33] Add dependencie for smoke test

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
index 4efc603e1d..9dc425dc2a 100755
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -12,6 +12,7 @@ jobs:
       - uses: actions/setup-python@v5
       - uses: pre-commit/action@v3.0.1
   regress-smoke:
+    needs: build-llvm
     runs-on: ubuntu-latest
     env:
       SINGULARITY: 1

From 25aa928efc5f92f772934b2c71da318d8d14b551 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 17 Jan 2025 15:52:51 +0000
Subject: [PATCH 27/33] Change logic for LLVM's path

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 31b4ad4398..9d9f3fffde 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -19,12 +19,16 @@ def safe_get(data, key, default=""):
 
 
 def get_json_path():
-    env_path = os.environ.get("LLVM_JSON")
-    if not env_path:
-        print("\nNo LLVM path found in environment.")
+    script_dir = Path(__file__).parent.resolve()
+    repo_root = script_dir.parent.parent  # adjust as needed
+    riscv_json_path = repo_root / "riscv.json"
+
+    if not riscv_json_path.is_file():
+        print(f"\nNo 'riscv.json' found at {riscv_json_path}.")
         print("Tests will be skipped.\n")
-        pytest.skip("LLVM path not configured")
-    return env_path
+        pytest.skip("riscv.json does not exist in the repository at the expected path.")
+
+    return riscv_json_path
 
 
 def get_yaml_directory():

From 5ee8496dcf62ed62791da3e7456b70b04f15a9ae Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 20 Jan 2025 11:48:39 +0000
Subject: [PATCH 28/33] Change CI logic for LLVM

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 72 ++++++++++++++++++-----------------
 .gitmodules                   |  4 ++
 ext/auto-inst/parsing.py      | 11 +++---
 ext/llvm-project              |  1 +
 ext/riscv-opcodes             |  2 +-
 5 files changed, 49 insertions(+), 41 deletions(-)
 create mode 160000 ext/llvm-project

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
index 9dc425dc2a..186f2a8e26 100755
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -162,40 +162,44 @@ jobs:
         run: ./bin/build_container
       - name: Generate extension PDF
         run: ./do gen:profile[MockProfileRelease]
-  build-llvm:
-      runs-on: ubuntu-latest
-      steps:
-        - name: Check out repository
-          uses: actions/checkout@v4
-
-        - name: Cache LLVM build
-          id: cache-llvm
-          uses: actions/cache@v4
-          with:
-            path: |
-              build
-              llvm-project
-            key: ${{ runner.os }}-llvm-${{ hashFiles('llvm/**') }}
-
-        - name: Clone LLVM repository
-          if: steps.cache-llvm.outputs.cache-hit != 'true'
-          run: |
-            git clone --depth 1 https://github.com/llvm/llvm-project.git
 
-        - name: Configure and build llvm-tblgen
-          if: steps.cache-llvm.outputs.cache-hit != 'true'
-          run: |
-            cmake -S llvm-project/llvm -B build -DCMAKE_BUILD_TYPE=Release
-            cmake --build build --target llvm-tblgen
+  build-llvm:
+    runs-on: ubuntu-latest
 
-        - name: Generate RISC-V JSON
-          run: |
-            ./build/bin/llvm-tblgen \
-              -I llvm-project/llvm/include \
-              -I llvm-project/llvm/lib/Target/RISCV \
-              llvm-project/llvm/lib/Target/RISCV/RISCV.td \
-              --dump-json \
-              -o riscv.json
+    steps:
+      - name: Check out repository (no submodules, shallow fetch)
+        uses: actions/checkout@v4
+        with:
+          submodules: false      # Do NOT auto-checkout submodules
+          fetch-depth: 1         # Shallow checkout for the main repo
 
-        - name: Show riscv.json output
-          run: ls -l riscv.json
+      - name: Get current LLVM submodule commit SHA
+        id: get-llvm-sha
+        run: echo "LLVM_SHA=$(git ls-tree HEAD ext/llvm-project | awk '{print $3}')" >> $GITHUB_ENV
+      - name: Cache RISC-V JSON
+        id: cache-riscv
+        uses: actions/cache@v4
+        with:
+          path: ext/llvm-project/riscv.json
+          key: ${{ runner.os }}-riscv-json-${{ env.LLVM_SHA }}
+          restore-keys: |
+            ${{ runner.os }}-riscv-json-
+      - name: Initialize LLVM submodule (shallow + sparse)
+        run: |
+          git submodule sync --recursive
+          git submodule update --init --recursive --depth=1 ext/llvm-project
+          cd ext/llvm-project
+      - name: Configure and build llvm-tblgen
+        run: |
+          cmake -S ext/llvm-project/llvm -B ext/llvm-project/build -DCMAKE_BUILD_TYPE=Release
+          cmake --build ext/llvm-project/build --target llvm-tblgen
+      - name: Generate RISC-V JSON
+        run: |
+          ./ext/llvm-project/build/bin/llvm-tblgen \
+            -I ext/llvm-project/llvm/include \
+            -I ext/llvm-project/llvm/lib/Target/RISCV \
+            ext/llvm-project/llvm/lib/Target/RISCV/RISCV.td \
+            --dump-json \
+            -o ext/llvm-project/riscv.json
+      - name: Show riscv.json output
+        run: ls -l ext/llvm-project/riscv.json
diff --git a/.gitmodules b/.gitmodules
index 7b74de08dc..b3d64aa488 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,7 @@
 [submodule "ext/riscv-isa-manual"]
 	path = ext/riscv-isa-manual
 	url = https://github.com/riscv/riscv-isa-manual
+[submodule "ext/llvm-project"]
+	path = ext/llvm-project
+	url = https://github.com/llvm/llvm-project.git
+	branch = main
diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 9d9f3fffde..501628f362 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -19,16 +19,15 @@ def safe_get(data, key, default=""):
 
 
 def get_json_path():
-    script_dir = Path(__file__).parent.resolve()
-    repo_root = script_dir.parent.parent  # adjust as needed
-    riscv_json_path = repo_root / "riscv.json"
+    script_dir = Path(__file__).parent.resolve()  # auto-inst directory
+    llvm_json_path = script_dir.parent / "/ext/llvm-project/riscv.json"
 
-    if not riscv_json_path.is_file():
-        print(f"\nNo 'riscv.json' found at {riscv_json_path}.")
+    if not llvm_json_path.is_file():
+        print(f"\nNo 'riscv.json' found at {llvm_json_path}.")
         print("Tests will be skipped.\n")
         pytest.skip("riscv.json does not exist in the repository at the expected path.")
 
-    return riscv_json_path
+    return llvm_json_path
 
 
 def get_yaml_directory():
diff --git a/ext/llvm-project b/ext/llvm-project
new file mode 160000
index 0000000000..8e85b77f6a
--- /dev/null
+++ b/ext/llvm-project
@@ -0,0 +1 @@
+Subproject commit 8e85b77f6a73477ab094acf0dccce61590a29222
diff --git a/ext/riscv-opcodes b/ext/riscv-opcodes
index 9226b0d091..5ce8977a59 160000
--- a/ext/riscv-opcodes
+++ b/ext/riscv-opcodes
@@ -1 +1 @@
-Subproject commit 9226b0d091b0d2ea9ccad6f7f8ca1283a3b15e88
+Subproject commit 5ce8977a5961a6bbfc1638e6676e60489665d882

From 92f9c99485a19764df22a9e8901f2a715c501ba7 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 20 Jan 2025 13:52:53 +0000
Subject: [PATCH 29/33] Add cache and update paths

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .github/workflows/regress.yml | 28 ++++++++++++++++++++++++----
 ext/auto-inst/parsing.py      | 18 ++++++++++++++++--
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/regress.yml b/.github/workflows/regress.yml
index 186f2a8e26..a0e60e2ebc 100755
--- a/.github/workflows/regress.yml
+++ b/.github/workflows/regress.yml
@@ -19,6 +19,17 @@ jobs:
     steps:
       - name: Clone Github Repo Action
         uses: actions/checkout@v4
+      - name: Get current LLVM submodule commit SHA
+        id: get-llvm-sha
+        run: echo "LLVM_SHA=$(git ls-tree HEAD ext/llvm-project | awk '{print $3}')" >> $GITHUB_ENV
+      - name: Restore cache RISC-V JSON
+        id: cache-riscv
+        uses: actions/cache@v4
+        with:
+          path: ext/llvm-project/riscv.json
+          key: ${{ runner.os }}-riscv-json-${{ env.LLVM_SHA }}
+          restore-keys: |
+            ${{ runner.os }}-riscv-json-
       - name: Setup apptainer
         uses: eWaterCycle/setup-apptainer@v2.0.0
       - name: Get container from cache
@@ -170,9 +181,8 @@ jobs:
       - name: Check out repository (no submodules, shallow fetch)
         uses: actions/checkout@v4
         with:
-          submodules: false      # Do NOT auto-checkout submodules
-          fetch-depth: 1         # Shallow checkout for the main repo
-
+          submodules: false
+          fetch-depth: 1
       - name: Get current LLVM submodule commit SHA
         id: get-llvm-sha
         run: echo "LLVM_SHA=$(git ls-tree HEAD ext/llvm-project | awk '{print $3}')" >> $GITHUB_ENV
@@ -185,16 +195,26 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-riscv-json-
       - name: Initialize LLVM submodule (shallow + sparse)
+        if: ${{ steps.cache-riscv.outputs.cache-hit != 'true' }}
         run: |
           git submodule sync --recursive
           git submodule update --init --recursive --depth=1 ext/llvm-project
-          cd ext/llvm-project
+
+      - name: Check for required directories and files
+        if: ${{ steps.cache-riscv.outputs.cache-hit != 'true' }}
+        run: |
+          ls -l ext/llvm-project/llvm/include
+          ls -l ext/llvm-project/llvm/lib/Target/RISCV
+          ls -l ext/llvm-project/llvm/lib/Target/RISCV/RISCV.td
       - name: Configure and build llvm-tblgen
+        if: ${{ steps.cache-riscv.outputs.cache-hit != 'true' }}
         run: |
           cmake -S ext/llvm-project/llvm -B ext/llvm-project/build -DCMAKE_BUILD_TYPE=Release
           cmake --build ext/llvm-project/build --target llvm-tblgen
       - name: Generate RISC-V JSON
+        if: ${{ steps.cache-riscv.outputs.cache-hit != 'true' }}
         run: |
+          chmod +x ./ext/llvm-project/build/bin/llvm-tblgen
           ./ext/llvm-project/build/bin/llvm-tblgen \
             -I ext/llvm-project/llvm/include \
             -I ext/llvm-project/llvm/lib/Target/RISCV \
diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 501628f362..9284ae3239 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -19,8 +19,22 @@ def safe_get(data, key, default=""):
 
 
 def get_json_path():
-    script_dir = Path(__file__).parent.resolve()  # auto-inst directory
-    llvm_json_path = script_dir.parent / "/ext/llvm-project/riscv.json"
+    """
+    Resolves the path to riscv.json in the repository.
+    Returns the Path object if file exists, otherwise skips the test.
+    """
+    # Print current working directory and script location for debugging
+    cwd = Path.cwd()
+    script_dir = Path(__file__).parent.resolve()
+    print(f"Current working directory: {cwd}")
+    print(f"Script directory: {script_dir}")
+
+    # Try to find the repository root
+    repo_root = os.environ.get("GITHUB_WORKSPACE", cwd)
+    repo_root = Path(repo_root)
+
+    llvm_json_path = repo_root / "ext" / "llvm-project" / "riscv.json"
+    print(f"Looking for riscv.json at: {llvm_json_path}")
 
     if not llvm_json_path.is_file():
         print(f"\nNo 'riscv.json' found at {llvm_json_path}.")

From 34e769e5c0681658a82aa51d48ca0eb02c0d3391 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 20 Jan 2025 14:51:39 +0000
Subject: [PATCH 30/33] Add corner case when implementation of LLVM does not
 need to follow the ISA Spec

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .pre-commit-config.yaml       |  0
 ext/auto-inst/parsing.py      |  0
 ext/auto-inst/test_parsing.py | 10 ++++++++++
 3 files changed, 10 insertions(+)
 mode change 100644 => 100755 .pre-commit-config.yaml
 mode change 100644 => 100755 ext/auto-inst/parsing.py
 mode change 100644 => 100755 ext/auto-inst/test_parsing.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
old mode 100644
new mode 100755
diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
old mode 100644
new mode 100755
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
old mode 100644
new mode 100755
index fdbeb75f70..2af2888807
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -113,6 +113,15 @@ def test_instruction_encoding(self, instr_name):
         if not yaml_data.get("yaml_match"):
             pytest.skip(f"Instruction {instr_name} has no YAML match pattern")
 
+        if (
+            instr_name == "fence.i"
+            or instr_name == "c.nop"
+            or instr_name == "fcvtmod.w.d"
+        ):
+            pytest.skip(
+                f"Instruction {instr_name} is a corner case and implementation should not follow ISA spec"
+            )
+
         # Find matching JSON instruction
         json_key = self._find_matching_instruction(instr_name)
         if not json_key:
@@ -135,6 +144,7 @@ def test_instruction_encoding(self, instr_name):
             "No YAML match field available for comparison."
         ]:
             error_msg = f"\nEncoding mismatch for instruction: {instr_name}\n"
+            error_msg += f"name : {instr_name}\n"
             error_msg += f"JSON key: {json_key}\n"
             error_msg += f"YAML match: {yaml_data['yaml_match']}\n"
             error_msg += f"JSON encoding: {json_encoding}\n"

From af0acaea86b3888e6268123bb92f1c367117b2a4 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 27 Jan 2025 09:31:41 +0000
Subject: [PATCH 31/33] Work around for FENCE. ISA and compiler should treat it
 differently

---
 arch/ext/Zcd.yaml                             | 43 ++++++++++
 arch/inst/F/fadd.s.yaml                       |  5 +-
 arch/inst/F/fleq.s.yaml                       |  2 +-
 arch/inst/F/fli.s.yaml                        |  2 +-
 arch/inst/F/fltq.s.yaml                       |  2 +-
 arch/inst/F/fmaxm.s.yaml                      |  2 +-
 arch/inst/F/fminm.s.yaml                      |  2 +-
 arch/inst/F/fround.s.yaml                     |  2 +-
 arch/inst/F/froundnx.s.yaml                   |  2 +-
 arch/inst/F/fsub.s.yaml                       |  8 +-
 arch/inst/Zbkb/brev8.yaml                     | 24 +++++-
 arch/inst/Zbkb/unzip.yaml                     | 24 +++++-
 arch/inst/Zbkb/zip.yaml                       | 24 +++++-
 arch/inst/Zbkx/xperm4.yaml                    | 35 +++++++-
 arch/inst/Zbkx/xperm8.yaml                    | 35 +++++++-
 arch/inst/Zcmp/cm.mva01s.yaml                 | 33 +++++++
 arch/inst/Zcmp/cm.mvsa01.yaml                 | 35 ++++++++
 arch/inst/Zcmp/cm.pop.yaml                    | 83 ++++++++++++++++++
 arch/inst/Zcmp/cm.popret.yaml                 | 84 ++++++++++++++++++
 arch/inst/Zcmp/cm.popretz.yaml                | 85 +++++++++++++++++++
 arch/inst/Zcmp/cm.push.yaml                   | 84 ++++++++++++++++++
 arch/isa/fp.idl                               | 21 +++++
 arch/isa/globals.isa                          | 46 ++++++++++
 cfgs/qc_iu/arch_overlay/ext/Xqci.yaml         | 35 ++++++++
 cfgs/qc_iu/arch_overlay/ext/Xqcia.yaml        | 13 +++
 cfgs/qc_iu/arch_overlay/ext/Xqciac.yaml       |  2 +-
 cfgs/qc_iu/arch_overlay/ext/Xqcibm.yaml       | 13 +++
 cfgs/qc_iu/arch_overlay/ext/Xqcilsm.yaml      | 12 +++
 .../arch_overlay/inst/Xqci/qc.c.extu.yaml     |  1 +
 .../arch_overlay/inst/Xqci/qc.c.muliadd.yaml  | 11 ++-
 .../arch_overlay/inst/Xqci/qc.c.mveqz.yaml    | 11 ++-
 .../Xqci/{qc.slasat.yaml => qc.shlsat.yaml}   |  4 +-
 .../Xqci/{qc.sllsat.yaml => qc.shlusat.yaml}  |  4 +-
 .../qc_iu/arch_overlay/inst/Xqci/qc.swmi.yaml |  2 +-
 ext/auto-inst/test_parsing.py                 |  1 +
 35 files changed, 748 insertions(+), 44 deletions(-)
 create mode 100644 arch/ext/Zcd.yaml
 create mode 100644 arch/inst/Zcmp/cm.mva01s.yaml
 create mode 100644 arch/inst/Zcmp/cm.mvsa01.yaml
 create mode 100644 arch/inst/Zcmp/cm.pop.yaml
 create mode 100644 arch/inst/Zcmp/cm.popret.yaml
 create mode 100644 arch/inst/Zcmp/cm.popretz.yaml
 create mode 100644 arch/inst/Zcmp/cm.push.yaml
 rename cfgs/qc_iu/arch_overlay/inst/Xqci/{qc.slasat.yaml => qc.shlsat.yaml} (94%)
 rename cfgs/qc_iu/arch_overlay/inst/Xqci/{qc.sllsat.yaml => qc.shlusat.yaml} (94%)

diff --git a/arch/ext/Zcd.yaml b/arch/ext/Zcd.yaml
new file mode 100644
index 0000000000..70cec37698
--- /dev/null
+++ b/arch/ext/Zcd.yaml
@@ -0,0 +1,43 @@
+# yaml-language-server: $schema=../../schemas/ext_schema.json
+
+$schema: "ext_schema.json#"
+kind: extension
+name: Zcd
+long_name: Compressed instructions for double precision floating point
+description: |
+  Zcd is the existing set of compressed double precision floating point loads and stores:
+  `c.fld`, `c.fldsp`, `c.fsd`, `c.fsdsp`.
+
+type: unprivileged
+company:
+  name: RISC-V International
+  url: https://riscv.org
+versions:
+  - version: "1.0.0"
+    state: ratified
+    ratification_date: 2023-04
+    repositories:
+      - url: https://github.com/riscv/riscv-code-size-reduction
+        branch: main
+    contributors:
+      - name: Tariq Kurd
+      - name: Ibrahim Abu Kharmeh
+      - name: Torbjørn Viem Ness
+      - name: Matteo Perotti
+      - name: Nidal Faour
+      - name: Bill Traynor
+      - name: Rafael Sene
+      - name: Xinlong Wu
+      - name: sinan
+      - name: Jeremy Bennett
+      - name: Heda Chen
+      - name: Alasdair Armstrong
+      - name: Graeme Smecher
+      - name: Nicolas Brunie
+      - name: Jiawei
+    requires:
+      allOf:
+        - anyOf:
+          - { name: Zca, version: "= 1.0.0" }
+          - { name: C, version: "= 1.0.0" }
+        - { name: D, version: "~> 2.2.0" }
diff --git a/arch/inst/F/fadd.s.yaml b/arch/inst/F/fadd.s.yaml
index dd64507d50..5553eedffa 100644
--- a/arch/inst/F/fadd.s.yaml
+++ b/arch/inst/F/fadd.s.yaml
@@ -3,9 +3,10 @@
 $schema: "inst_schema.json#"
 kind: instruction
 name: fadd.s
-long_name: No synopsis available.
+long_name: Single-precision floating-point addition
 description: |
-  No description available.
+  Do the single-precision floating-point addition of fs1 and fs2 and store the result in fd.
+  rm is the dynamic Rounding Mode.
 definedBy: F
 assembly: fd, fs1, fs2, rm
 encoding:
diff --git a/arch/inst/F/fleq.s.yaml b/arch/inst/F/fleq.s.yaml
index 72e648b892..63ecf58c9e 100644
--- a/arch/inst/F/fleq.s.yaml
+++ b/arch/inst/F/fleq.s.yaml
@@ -6,7 +6,7 @@ name: fleq.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: xd, fs1, fs2
 encoding:
   match: 1010000----------100-----1010011
diff --git a/arch/inst/F/fli.s.yaml b/arch/inst/F/fli.s.yaml
index 2d42589203..65838a6ebd 100644
--- a/arch/inst/F/fli.s.yaml
+++ b/arch/inst/F/fli.s.yaml
@@ -6,7 +6,7 @@ name: fli.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: fd, fs1
 encoding:
   match: 111100000001-----000-----1010011
diff --git a/arch/inst/F/fltq.s.yaml b/arch/inst/F/fltq.s.yaml
index 099bd82310..dce9f1f329 100644
--- a/arch/inst/F/fltq.s.yaml
+++ b/arch/inst/F/fltq.s.yaml
@@ -6,7 +6,7 @@ name: fltq.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: xd, fs1, fs2
 encoding:
   match: 1010000----------101-----1010011
diff --git a/arch/inst/F/fmaxm.s.yaml b/arch/inst/F/fmaxm.s.yaml
index d57459ac4e..111082155c 100644
--- a/arch/inst/F/fmaxm.s.yaml
+++ b/arch/inst/F/fmaxm.s.yaml
@@ -6,7 +6,7 @@ name: fmaxm.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: xd, xs1, xs2
 encoding:
   match: 0010100----------011-----1010011
diff --git a/arch/inst/F/fminm.s.yaml b/arch/inst/F/fminm.s.yaml
index 7ae32aab14..2b058ae091 100644
--- a/arch/inst/F/fminm.s.yaml
+++ b/arch/inst/F/fminm.s.yaml
@@ -6,7 +6,7 @@ name: fminm.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: fd, fs1, fs2
 encoding:
   match: 0010100----------010-----1010011
diff --git a/arch/inst/F/fround.s.yaml b/arch/inst/F/fround.s.yaml
index 5eb132befd..813b35f8c5 100644
--- a/arch/inst/F/fround.s.yaml
+++ b/arch/inst/F/fround.s.yaml
@@ -6,7 +6,7 @@ name: fround.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: fd, xs1, rm
 encoding:
   match: 010000000100-------------1010011
diff --git a/arch/inst/F/froundnx.s.yaml b/arch/inst/F/froundnx.s.yaml
index e87cf9737e..7e46a90ec7 100644
--- a/arch/inst/F/froundnx.s.yaml
+++ b/arch/inst/F/froundnx.s.yaml
@@ -6,7 +6,7 @@ name: froundnx.s
 long_name: No synopsis available.
 description: |
   No description available.
-definedBy: { allOf: [F, Zfa] }
+definedBy: Zfa
 assembly: fd, rs1, rm
 encoding:
   match: 010000000101-------------1010011
diff --git a/arch/inst/F/fsub.s.yaml b/arch/inst/F/fsub.s.yaml
index e430701033..b0622f32ad 100644
--- a/arch/inst/F/fsub.s.yaml
+++ b/arch/inst/F/fsub.s.yaml
@@ -3,9 +3,10 @@
 $schema: "inst_schema.json#"
 kind: instruction
 name: fsub.s
-long_name: No synopsis available.
+long_name: Single-precision floating-point subtraction
 description: |
-  No description available.
+  Do the single-precision floating-point subtraction of fs2 from fs1 and store the result in fd.
+  rm is the dynamic Rounding Mode.
 definedBy: F
 assembly: fd, fs1, fs2, rm
 encoding:
@@ -26,7 +27,8 @@ access:
   vu: always
 data_independent_timing: true
 operation(): |
-
+  RoundingMode mode = rm_to_mode(X[rm], $encoding);
+  X[fd] = f32_sub(X[fs1], X[fs2], mode);
 sail(): |
   {
     let rs1_val_32b = F_or_X_S(rs1);
diff --git a/arch/inst/Zbkb/brev8.yaml b/arch/inst/Zbkb/brev8.yaml
index 2f3cea1dfc..9f53606b12 100644
--- a/arch/inst/Zbkb/brev8.yaml
+++ b/arch/inst/Zbkb/brev8.yaml
@@ -3,11 +3,10 @@
 $schema: inst_schema.json#
 kind: instruction
 name: brev8
-long_name: No synopsis available.
+long_name: Reverse bits in bytes
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkb, Zk, Zkn, Zks]
+  This instruction reverses the order of the bits in every byte of a register.
+definedBy: Zbkb
 assembly: xd, xs1
 encoding:
   match: 011010000111-----101-----0010011
@@ -23,3 +22,20 @@ access:
   vu: always
 data_independent_timing: false
 operation(): |
+  XReg input = X[rs1];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()-8); i = i+8) {
+    for(U32 j=0; j<8; j = j+1) {
+      output[(i*8)+(7-j)] = input[(i*8)+j];
+    }
+  }
+
+  X[rd] = output;
+
+sail(): |
+  result : xlenbits = EXTZ(0b0);
+  foreach (i from 0 to sizeof(xlen) by 8) {
+    result[i+7..i] = reverse_bits_in_byte(X(rs1)[i+7..i]);
+  };
+  X(rd) = result;
diff --git a/arch/inst/Zbkb/unzip.yaml b/arch/inst/Zbkb/unzip.yaml
index ee1346c116..7ad0a4c237 100644
--- a/arch/inst/Zbkb/unzip.yaml
+++ b/arch/inst/Zbkb/unzip.yaml
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: unzip
-long_name: No synopsis available.
+long_name: Bit deinterleave
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkb, Zk, Zkn, Zks]
+  This instruction gathers bits from the high and low halves of the source word into odd/even bit
+  positions in the destination word. It is the inverse of the zip instruction. This instruction is
+  available only on RV32.
+definedBy: Zbkb
 assembly: xd, xs1
 encoding:
   match: 000010001111-----101-----0010011
@@ -24,3 +25,18 @@ access:
 data_independent_timing: false
 base: 32
 operation(): |
+  XReg input = X[rs1];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()/2-1); i = i+1) {
+    output[i] = input[2*i];
+    output[i+xlen()/2] = input[2*i+1];
+  }
+
+  X[rd] = output;
+
+sail(): |
+  foreach (i from 0 to xlen/2-1) {
+    X(rd)[i] = X(rs1)[2*i];
+    X(rd)[i+xlen/2] = X(rs1)[2*i+1];
+  }
diff --git a/arch/inst/Zbkb/zip.yaml b/arch/inst/Zbkb/zip.yaml
index 17f309379d..6b74dddb6a 100644
--- a/arch/inst/Zbkb/zip.yaml
+++ b/arch/inst/Zbkb/zip.yaml
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: zip
-long_name: No synopsis available.
+long_name: Bit interleave
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkb, Zk, Zkn, Zks]
+  This instruction scatters all of the odd and even bits of a source word into the high and low halves
+  of a destination word. It is the inverse of the unzip instruction. This instruction is available only on
+  RV32.
+definedBy: Zbkb
 assembly: xd, xs1
 encoding:
   match: 000010001111-----001-----0010011
@@ -24,3 +25,18 @@ access:
 data_independent_timing: false
 base: 32
 operation(): |
+  XReg input = X[rs1];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()/2-1); i = i+1){
+    output[2*i] = input[i];
+    output[2*i+1] = input[i+xlen()/2];
+  }
+
+  X[rd] = output;
+
+sail(): |
+  foreach (i from 0 to xlen/2-1) {
+    X(rd)[2*i] = X(rs1)[i];
+    X(rd)[2*i+1] = X(rs1)[i+xlen/2];
+  }
diff --git a/arch/inst/Zbkx/xperm4.yaml b/arch/inst/Zbkx/xperm4.yaml
index 1ff88af984..4d0bebbd4c 100644
--- a/arch/inst/Zbkx/xperm4.yaml
+++ b/arch/inst/Zbkx/xperm4.yaml
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: xperm4
-long_name: No synopsis available.
+long_name: Crossbar permutation (nibbles)
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkx, Zk, Zkn, Zks]
+  The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 4-bit
+  elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is each element in
+  rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds.
+definedBy: Zbkx
 assembly: xd, xs1, xs2
 encoding:
   match: 0010100----------010-----0110011
@@ -25,3 +26,29 @@ access:
   vu: always
 data_independent_timing: false
 operation(): |
+  XReg input1 = X[rs1];
+  XReg input2 = X[rs2];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()-4); i = i+4) {
+    XReg index = input2[i+3:i];
+    if(4*index < xlen()) {
+      output[i+3:i] = input1[4*index+3:4*index];
+    }
+  }
+
+  X[rd] = output;
+
+sail(): |
+  val xperm4_lookup : (bits(4), xlenbits) -> bits(4)
+  function xperm4_lookup (idx, lut) = {
+    (lut >> (idx @ 0b00))[3..0]
+  }
+  function clause execute ( XPERM_4 (rs2,rs1,rd)) = {
+    result : xlenbits = EXTZ(0b0);
+    foreach(i from 0 to xlen by 4) {
+      result[i+3..i] = xperm4_lookup(X(rs2)[i+3..i], X(rs1));
+    };
+    X(rd) = result;
+    RETIRE_SUCCESS
+  }
diff --git a/arch/inst/Zbkx/xperm8.yaml b/arch/inst/Zbkx/xperm8.yaml
index 3968dc5f9e..b9be5932f7 100644
--- a/arch/inst/Zbkx/xperm8.yaml
+++ b/arch/inst/Zbkx/xperm8.yaml
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: xperm8
-long_name: No synopsis available.
+long_name: Crossbar permutation (bytes)
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkx, Zk, Zkn, Zks]
+  The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 8-bit
+  elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is each element in
+  rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds.
+definedBy: Zbkx
 assembly: xd, xs1, xs2
 encoding:
   match: 0010100----------100-----0110011
@@ -25,3 +26,29 @@ access:
   vu: always
 data_independent_timing: false
 operation(): |
+  XReg input1 = X[rs1];
+  XReg input2 = X[rs2];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()-8); i = i+8) {
+    XReg index = input2[i+7:i];
+    if(8*index < xlen()) {
+      output[i+7:i] = input1[8*index+7:8*index];
+    }
+  }
+
+  X[rd] = output;
+
+sail(): |
+  val xperm8_lookup : (bits(8), xlenbits) -> bits(8)
+  function xperm8_lookup (idx, lut) = {
+    (lut >> (idx @ 0b00))[7..0]
+  }
+  function clause execute ( XPERM_8 (rs2,rs1,rd)) = {
+    result : xlenbits = EXTZ(0b0);
+    foreach(i from 0 to xlen by 8) {
+      result[i+7..i] = xperm8_lookup(X(rs2)[i+7..i], X(rs1));
+    };
+    X(rd) = result;
+    RETIRE_SUCCESS
+  }
diff --git a/arch/inst/Zcmp/cm.mva01s.yaml b/arch/inst/Zcmp/cm.mva01s.yaml
new file mode 100644
index 0000000000..47078f179f
--- /dev/null
+++ b/arch/inst/Zcmp/cm.mva01s.yaml
@@ -0,0 +1,33 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.mva01s
+long_name: Move two s0-s7 registers into a0-a1
+description: |
+  This instruction moves r1s' into a0 and r2s' into a1. The execution is atomic, so it is not possible to observe state where only one of a0 or a1 have been updated.
+  The encoding uses sreg number specifiers instead of xreg number specifiers to save encoding space. The mapping between them is specified in the pseudo-code below.
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: r1s, r2s
+encoding:
+  match: 101011---11---10
+  variables:
+    - name: r1s
+      location: 9-7
+    - name: r2s
+      location: 4-2
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+  XReg xreg1 = (r1s[2:1]>0) ? {1,0,r1s[2:0]} : {0,1,r1s[2:0]};
+  XReg xreg2 = (r2s[2:1]>0) ? {1,0,r2s[2:0]} : {0,1,r2s[2:0]};
+  X[10] = X[xreg1];
+  X[11] = X[xreg2];
diff --git a/arch/inst/Zcmp/cm.mvsa01.yaml b/arch/inst/Zcmp/cm.mvsa01.yaml
new file mode 100644
index 0000000000..083242fb67
--- /dev/null
+++ b/arch/inst/Zcmp/cm.mvsa01.yaml
@@ -0,0 +1,35 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.mvsa01
+long_name: Move a0-a1 into two registers of s0-s7
+description: |
+  This instruction moves a0 into r1s' and a1 into r2s'. r1s' and r2s' must be different.
+  The execution is atomic, so it is not possible to observe state where only one of r1s' or r2s' has been updated.
+  The encoding uses sreg number specifiers instead of xreg number specifiers to save encoding space.
+  The mapping between them is specified in the pseudo-code below.
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: r1s, r2s
+encoding:
+  match: 101011---01---10
+  variables:
+    - name: r1s
+      location: 9-7
+    - name: r2s
+      location: 4-2
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+  XReg xreg1 = (r1s[2:1]>0) ? {1,0,r1s[2:0]} : {0,1,r1s[2:0]};
+  XReg xreg2 = (r2s[2:1]>0) ? {1,0,r2s[2:0]} : {0,1,r2s[2:0]};
+  X[xreg1] = X[10];
+  X[xreg2] = X[11];
diff --git a/arch/inst/Zcmp/cm.pop.yaml b/arch/inst/Zcmp/cm.pop.yaml
new file mode 100644
index 0000000000..8063822a07
--- /dev/null
+++ b/arch/inst/Zcmp/cm.pop.yaml
@@ -0,0 +1,83 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.pop
+long_name: Destroy function call stack frame
+description: |
+  Destroy stack frame: load `ra` and 0 to 12 saved registers from the stack frame, deallocate the stack frame.
+  This instruction pops (loads) the registers in `reg_list` from stack memory, and then adjusts the stack pointer by `stack_adj`.
+
+  Restrictions on stack_adj:
+
+  * it must be enough to store all of the listed registers
+  * it must be a multiple of 16 (bytes):
+  ** for RV32 the allowed values are: 16, 32, 48, 64, 80, 96, 112
+  ** for RV64 the allowed values are: 16, 32, 48, 64, 80, 96, 112, 128, 144, 160
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: reg_list, stack_adj
+encoding:
+  match: 10111010------10
+  variables:
+    - name: rlist
+      location: 7-4
+      not: [0, 1, 2, 3]
+    - name: spimm
+      location: 3-2
+      left_shift: 4
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+
+  XReg size = xlen();
+  XReg nreg = (rlist == 15) ? 13 : (rlist - 3);
+  XReg stack_aligned_adj = (nreg * 4 + 15) & ~0xF;
+  XReg virtual_address_sp = X[2];
+  XReg virtual_address_new_sp = virtual_address_sp + stack_aligned_adj + spimm;
+  XReg virtual_address_base = virtual_address_new_sp - (nreg * size);
+
+  X[ 1] = read_memory_xlen(virtual_address_base +  0*size, $encoding);
+  if (nreg > 1) {
+    X[ 8] = read_memory_xlen(virtual_address_base +  1*size, $encoding);
+  }
+  if (nreg > 2) {
+    X[ 9] = read_memory_xlen(virtual_address_base +  2*size, $encoding);
+  }
+  if (nreg > 3) {
+    X[18] = read_memory_xlen(virtual_address_base +  3*size, $encoding);
+  }
+  if (nreg > 4) {
+    X[19] = read_memory_xlen(virtual_address_base +  4*size, $encoding);
+  }
+  if (nreg > 5) {
+    X[20] = read_memory_xlen(virtual_address_base +  5*size, $encoding);
+  }
+  if (nreg > 6) {
+    X[21] = read_memory_xlen(virtual_address_base +  6*size, $encoding);
+  }
+  if (nreg > 7) {
+    X[22] = read_memory_xlen(virtual_address_base +  7*size, $encoding);
+  }
+  if (nreg > 8) {
+    X[23] = read_memory_xlen(virtual_address_base +  8*size, $encoding);
+  }
+  if (nreg > 9) {
+    X[24] = read_memory_xlen(virtual_address_base +  9*size, $encoding);
+  }
+  if (nreg > 10) {
+    X[25] = read_memory_xlen(virtual_address_base + 10*size, $encoding);
+  }
+  if (nreg > 11) {
+    X[26] = read_memory_xlen(virtual_address_base + 11*size, $encoding);
+    X[27] = read_memory_xlen(virtual_address_base + 12*size, $encoding);
+  }
+
+  X[2] = virtual_address_new_sp;
diff --git a/arch/inst/Zcmp/cm.popret.yaml b/arch/inst/Zcmp/cm.popret.yaml
new file mode 100644
index 0000000000..77f63bcac5
--- /dev/null
+++ b/arch/inst/Zcmp/cm.popret.yaml
@@ -0,0 +1,84 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.popret
+long_name: Destroy function call stack frame and return to `ra`.
+description: |
+  Destroy stack frame: load `ra` and 0 to 12 saved registers from the stack frame, deallocate the stack frame, return to `ra`.
+  This instruction pops (loads) the registers in `reg_list` from stack memory, and then adjusts the stack pointer by `stack_adj` and then return to `ra`.
+
+  Restrictions on stack_adj:
+
+  * it must be enough to store all of the listed registers
+  * it must be a multiple of 16 (bytes):
+  ** for RV32 the allowed values are: 16, 32, 48, 64, 80, 96, 112
+  ** for RV64 the allowed values are: 16, 32, 48, 64, 80, 96, 112, 128, 144, 160
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: reg_list, stack_adj
+encoding:
+  match: 10111110------10
+  variables:
+    - name: rlist
+      location: 7-4
+      not: [0, 1, 2, 3]
+    - name: spimm
+      location: 3-2
+      left_shift: 4
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+
+  XReg size = xlen();
+  XReg nreg = (rlist == 15) ? 13 : (rlist - 3);
+  XReg stack_aligned_adj = (nreg * 4 + 15) & ~0xF;
+  XReg virtual_address_sp = X[2];
+  XReg virtual_address_new_sp = virtual_address_sp + stack_aligned_adj + spimm;
+  XReg virtual_address_base = virtual_address_new_sp - (nreg * size);
+
+  X[ 1] = read_memory_xlen(virtual_address_base +  0*size, $encoding);
+  if (nreg > 1) {
+    X[ 8] = read_memory_xlen(virtual_address_base +  1*size, $encoding);
+  }
+  if (nreg > 2) {
+    X[ 9] = read_memory_xlen(virtual_address_base +  2*size, $encoding);
+  }
+  if (nreg > 3) {
+    X[18] = read_memory_xlen(virtual_address_base +  3*size, $encoding);
+  }
+  if (nreg > 4) {
+    X[19] = read_memory_xlen(virtual_address_base +  4*size, $encoding);
+  }
+  if (nreg > 5) {
+    X[20] = read_memory_xlen(virtual_address_base +  5*size, $encoding);
+  }
+  if (nreg > 6) {
+    X[21] = read_memory_xlen(virtual_address_base +  6*size, $encoding);
+  }
+  if (nreg > 7) {
+    X[22] = read_memory_xlen(virtual_address_base +  7*size, $encoding);
+  }
+  if (nreg > 8) {
+    X[23] = read_memory_xlen(virtual_address_base +  8*size, $encoding);
+  }
+  if (nreg > 9) {
+    X[24] = read_memory_xlen(virtual_address_base +  9*size, $encoding);
+  }
+  if (nreg > 10) {
+    X[25] = read_memory_xlen(virtual_address_base + 10*size, $encoding);
+  }
+  if (nreg > 11) {
+    X[26] = read_memory_xlen(virtual_address_base + 11*size, $encoding);
+    X[27] = read_memory_xlen(virtual_address_base + 12*size, $encoding);
+  }
+
+  X[2] = virtual_address_new_sp;
+  jump(X[1]);
diff --git a/arch/inst/Zcmp/cm.popretz.yaml b/arch/inst/Zcmp/cm.popretz.yaml
new file mode 100644
index 0000000000..d38ccde532
--- /dev/null
+++ b/arch/inst/Zcmp/cm.popretz.yaml
@@ -0,0 +1,85 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.popretz
+long_name: Destroy function call stack frame, move zero to `a0` and return to `ra`.
+description: |
+  Destroy stack frame: load `ra` and 0 to 12 saved registers from the stack frame, deallocate the stack frame, move zero to `a0`, return to `ra`.
+  This instruction pops (loads) the registers in `reg_list` from stack memory, and then adjusts the stack pointer by `stack_adj`, move zero to `a0` and then return to `ra`.
+
+  Restrictions on stack_adj:
+
+  * it must be enough to store all of the listed registers
+  * it must be a multiple of 16 (bytes):
+  ** for RV32 the allowed values are: 16, 32, 48, 64, 80, 96, 112
+  ** for RV64 the allowed values are: 16, 32, 48, 64, 80, 96, 112, 128, 144, 160
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: reg_list, stack_adj
+encoding:
+  match: 10111100------10
+  variables:
+    - name: rlist
+      location: 7-4
+      not: [0, 1, 2, 3]
+    - name: spimm
+      location: 3-2
+      left_shift: 4
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+
+  XReg size = xlen();
+  XReg nreg = (rlist == 15) ? 13 : (rlist - 3);
+  XReg stack_aligned_adj = (nreg * 4 + 15) & ~0xF;
+  XReg virtual_address_sp = X[2];
+  XReg virtual_address_new_sp = virtual_address_sp + stack_aligned_adj + spimm;
+  XReg virtual_address_base = virtual_address_new_sp - (nreg * size);
+
+  X[ 1] = read_memory_xlen(virtual_address_base +  0*size, $encoding);
+  if (nreg > 1) {
+    X[ 8] = read_memory_xlen(virtual_address_base +  1*size, $encoding);
+  }
+  if (nreg > 2) {
+    X[ 9] = read_memory_xlen(virtual_address_base +  2*size, $encoding);
+  }
+  if (nreg > 3) {
+    X[18] = read_memory_xlen(virtual_address_base +  3*size, $encoding);
+  }
+  if (nreg > 4) {
+    X[19] = read_memory_xlen(virtual_address_base +  4*size, $encoding);
+  }
+  if (nreg > 5) {
+    X[20] = read_memory_xlen(virtual_address_base +  5*size, $encoding);
+  }
+  if (nreg > 6) {
+    X[21] = read_memory_xlen(virtual_address_base +  6*size, $encoding);
+  }
+  if (nreg > 7) {
+    X[22] = read_memory_xlen(virtual_address_base +  7*size, $encoding);
+  }
+  if (nreg > 8) {
+    X[23] = read_memory_xlen(virtual_address_base +  8*size, $encoding);
+  }
+  if (nreg > 9) {
+    X[24] = read_memory_xlen(virtual_address_base +  9*size, $encoding);
+  }
+  if (nreg > 10) {
+    X[25] = read_memory_xlen(virtual_address_base + 10*size, $encoding);
+  }
+  if (nreg > 11) {
+    X[26] = read_memory_xlen(virtual_address_base + 11*size, $encoding);
+    X[27] = read_memory_xlen(virtual_address_base + 12*size, $encoding);
+  }
+
+  X[2] = virtual_address_new_sp;
+  X[10] = 0;
+  jump(X[1]);
diff --git a/arch/inst/Zcmp/cm.push.yaml b/arch/inst/Zcmp/cm.push.yaml
new file mode 100644
index 0000000000..dd9b840cf6
--- /dev/null
+++ b/arch/inst/Zcmp/cm.push.yaml
@@ -0,0 +1,84 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.push
+long_name: Create function call stack frame
+description: |
+  Create stack frame: store `ra` and 0 to 12 saved registers to the stack frame, optionally allocate additional stack space.
+  This instruction pushes (stores) the registers in `reg_list` to the memory below the stack pointer,
+  and then creates the stack frame by decrementing the stack pointer by `stack_adj`.
+
+  Restrictions on stack_adj:
+
+  * it must be enough to store all of the listed registers
+  * it must be a multiple of 16 (bytes):
+  ** for RV32 the allowed values are: 16, 32, 48, 64, 80, 96, 112
+  ** for RV64 the allowed values are: 16, 32, 48, 64, 80, 96, 112, 128, 144, 160
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: reg_list, -stack_adj
+encoding:
+  match: 10111000------10
+  variables:
+    - name: rlist
+      location: 7-4
+      not: [0, 1, 2, 3]
+    - name: spimm
+      location: 3-2
+      left_shift: 4
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+
+  XReg size = xlen();
+  XReg nreg = (rlist == 15) ? 13 : (rlist - 3);
+  XReg stack_aligned_adj = (nreg * 4 + 15) & ~0xF;
+  XReg virtual_address_sp = X[2];
+  XReg virtual_address_new_sp = virtual_address_sp - stack_aligned_adj - spimm;
+  XReg virtual_address_base = virtual_address_sp - (nreg * size);
+
+  write_memory_xlen(virtual_address_base +  0*size, X[ 1], $encoding);
+  if (nreg > 1) {
+    write_memory_xlen(virtual_address_base +  1*size, X[ 8], $encoding);
+  }
+  if (nreg > 2) {
+    write_memory_xlen(virtual_address_base +  2*size, X[ 9], $encoding);
+  }
+  if (nreg > 3) {
+    write_memory_xlen(virtual_address_base +  3*size, X[18], $encoding);
+  }
+  if (nreg > 4) {
+    write_memory_xlen(virtual_address_base +  4*size, X[19], $encoding);
+  }
+  if (nreg > 5) {
+    write_memory_xlen(virtual_address_base +  5*size, X[20], $encoding);
+  }
+  if (nreg > 6) {
+    write_memory_xlen(virtual_address_base +  6*size, X[21], $encoding);
+  }
+  if (nreg > 7) {
+    write_memory_xlen(virtual_address_base +  7*size, X[22], $encoding);
+  }
+  if (nreg > 8) {
+    write_memory_xlen(virtual_address_base +  8*size, X[23], $encoding);
+  }
+  if (nreg > 9) {
+    write_memory_xlen(virtual_address_base +  9*size, X[24], $encoding);
+  }
+  if (nreg > 10) {
+    write_memory_xlen(virtual_address_base + 10*size, X[25], $encoding);
+  }
+  if (nreg > 11) {
+    write_memory_xlen(virtual_address_base + 11*size, X[26], $encoding);
+    write_memory_xlen(virtual_address_base + 12*size, X[27], $encoding);
+  }
+
+  X[2] = virtual_address_new_sp;
diff --git a/arch/isa/fp.idl b/arch/isa/fp.idl
index 2484168101..c5386b455d 100644
--- a/arch/isa/fp.idl
+++ b/arch/isa/fp.idl
@@ -861,3 +861,24 @@ function f32_add {
     }
   }
 }
+
+function f32_sub {
+  returns U32
+  arguments
+    U32 a,
+    U32 b,
+    RoundingMode mode
+  description {
+    Returns difference of 2 floating point numbers
+  }
+  body {
+    U32 a_xor_b = a ^ b;
+    if (signF32UI(a_xor_b) == 1) {
+      # add if signs are different
+      return softfloat_addMagsF32(a,b,mode);
+    } else {
+      # subtract if signs are the same
+      return softfloat_subMagsF32(a,b,mode);
+    }
+  }
+}
diff --git a/arch/isa/globals.isa b/arch/isa/globals.isa
index 99517ba26d..c5aab80051 100644
--- a/arch/isa/globals.isa
+++ b/arch/isa/globals.isa
@@ -2477,6 +2477,30 @@ function read_memory {
   }
 }
 
+function read_memory_xlen {
+  returns Bits<XLEN>
+  arguments
+    XReg virtual_address,
+    Bits<INSTR_ENC_SIZE> encoding   # the encoding of an instruction causing this access, or 0 if a fetch
+  description {
+    Read from virtual memory XLEN bits using a known aligned address.
+  }
+  body {
+    TranslationResult result;
+
+    if (CSR[misa].S == 1) {
+      result = translate(virtual_address, MemoryOperation::Read, effective_ldst_mode(), encoding);
+    } else {
+      result.paddr = virtual_address;
+    }
+
+    # may raise an exception
+    access_check(result.paddr, XLEN, virtual_address, MemoryOperation::Read, ExceptionCode::LoadAccessFault, effective_ldst_mode());
+
+    return read_physical_memory<XLEN>(result.paddr);
+  }
+}
+
 # hart-global state to track the local reservation set
 Boolean    reservation_set_valid = false;
 XReg       reservation_set_address;
@@ -2798,6 +2822,28 @@ function write_memory {
   }
 }
 
+function write_memory_xlen {
+  arguments
+    XReg virtual_address,
+    Bits<XLEN> value,
+    Bits<INSTR_ENC_SIZE> encoding # encoding of the instruction causing this access
+  description {
+    Write to virtual memory XLEN bits using a known aligned address.
+  }
+  body {
+    XReg physical_address;
+
+    physical_address = (CSR[misa].S == 1)
+      ? translate(virtual_address, MemoryOperation::Write, effective_ldst_mode(), encoding).paddr
+      : virtual_address;
+
+    # may raise an exception
+    access_check(physical_address, XLEN, virtual_address, MemoryOperation::Write, ExceptionCode::StoreAmoAccessFault, effective_ldst_mode());
+
+    write_physical_memory<XLEN>(physical_address, value);
+  }
+}
+
 function mstatus_sd_has_known_reset
 {
   returns Boolean
diff --git a/cfgs/qc_iu/arch_overlay/ext/Xqci.yaml b/cfgs/qc_iu/arch_overlay/ext/Xqci.yaml
index 4615faf2a9..8b50c720a7 100644
--- a/cfgs/qc_iu/arch_overlay/ext/Xqci.yaml
+++ b/cfgs/qc_iu/arch_overlay/ext/Xqci.yaml
@@ -136,6 +136,40 @@ versions:
   requires:
     name: Zca
     version: ">= 1.0.0"
+- version: "0.6.0"
+  state: frozen
+  ratification_date: null
+  contributors:
+  - name: Albert Yosher
+    company: Qualcomm Technologies, Inc.
+    email: ayosher@qti.qualcomm.com
+  - name: Derek Hower
+    company: Qualcomm Technologies, Inc.
+    email: dhower@qti.qualcomm.com
+  changes:
+    - Fix encoding of qc.c.extu instruction
+    - Fix encoding of qc.swmi instruction
+    - Rename qc.slasat -> qc.shlsat
+    - Rename qc.sllsat -> qc.shlusat
+  implies:
+  - [Xqcia, "0.4.0"]
+  - [Xqciac, "0.2.0"]
+  - [Xqcibi, "0.2.0"]
+  - [Xqcibm, "0.4.0"]
+  - [Xqcicli, "0.2.0"]
+  - [Xqcicm, "0.2.0"]
+  - [Xqcics, "0.2.0"]
+  - [Xqcicsr, "0.2.0"]
+  - [Xqciint, "0.2.0"]
+  - [Xqcilb, "0.2.0"]
+  - [Xqcili, "0.2.0"]
+  - [Xqcilia, "0.2.0"]
+  - [Xqcilo, "0.2.0"]
+  - [Xqcilsm, "0.4.0"]
+  - [Xqcisls, "0.2.0"]
+  requires:
+    name: Zca
+    version: ">= 1.0.0"
 description: |
   The Xqci extension includes a set of instructions that improve RISC-V code density and
   performance in microontrollers. It fills several gaps:
@@ -431,3 +465,4 @@ doc_license:
 company:
   name: Qualcomm Technologies, Inc.
   url: https://qualcomm.com
+conflicts: [D]
diff --git a/cfgs/qc_iu/arch_overlay/ext/Xqcia.yaml b/cfgs/qc_iu/arch_overlay/ext/Xqcia.yaml
index c098ca7ffa..90254a4adf 100644
--- a/cfgs/qc_iu/arch_overlay/ext/Xqcia.yaml
+++ b/cfgs/qc_iu/arch_overlay/ext/Xqcia.yaml
@@ -40,6 +40,19 @@ versions:
     email: dhower@qti.qualcomm.com
   changes:
     - Fix description and functionality of qc.wrapi instruction
+- version: "0.4.0"
+  state: frozen
+  ratification_date: null
+  contributors:
+  - name: Albert Yosher
+    company: Qualcomm Technologies, Inc.
+    email: ayosher@qti.qualcomm.com
+  - name: Derek Hower
+    company: Qualcomm Technologies, Inc.
+    email: dhower@qti.qualcomm.com
+  changes:
+    - Rename qc.slasat -> qc.shlsat
+    - Rename qc.sllsat -> qc.shlusat
 description: |
   The Xqcia extension includes eleven instructions to perform integer arithmetic.
 
diff --git a/cfgs/qc_iu/arch_overlay/ext/Xqciac.yaml b/cfgs/qc_iu/arch_overlay/ext/Xqciac.yaml
index f1c0791a16..7caa649484 100644
--- a/cfgs/qc_iu/arch_overlay/ext/Xqciac.yaml
+++ b/cfgs/qc_iu/arch_overlay/ext/Xqciac.yaml
@@ -47,7 +47,7 @@ versions:
 description: |
   The Xqciac extension includes three instructions to accelerate common
   address calculations.
-
+conflicts: [D]
 doc_license:
   name: Creative Commons Attribution 4.0 International License
   url: https://creativecommons.org/licenses/by/4.0/
diff --git a/cfgs/qc_iu/arch_overlay/ext/Xqcibm.yaml b/cfgs/qc_iu/arch_overlay/ext/Xqcibm.yaml
index 1f64d90b03..dad4006a16 100644
--- a/cfgs/qc_iu/arch_overlay/ext/Xqcibm.yaml
+++ b/cfgs/qc_iu/arch_overlay/ext/Xqcibm.yaml
@@ -43,6 +43,19 @@ versions:
   changes:
     - Fix description and functionality of qc.c.extu instruction
   requires: { name: Zca, version: ">= 1.0.0" }
+- version: "0.4.0"
+  state: frozen
+  ratification_date: null
+  contributors:
+  - name: Albert Yosher
+    company: Qualcomm Technologies, Inc.
+    email: ayosher@qti.qualcomm.com
+  - name: Derek Hower
+    company: Qualcomm Technologies, Inc.
+    email: dhower@qti.qualcomm.com
+  changes:
+    - Fix encoding for qc.c.extu
+  requires: { name: Zca, version: ">= 1.0.0" }
 description: |
   The Xqcibm extension includes thirty eight instructions that perform bit manipulation,
   include insertion and extraction.
diff --git a/cfgs/qc_iu/arch_overlay/ext/Xqcilsm.yaml b/cfgs/qc_iu/arch_overlay/ext/Xqcilsm.yaml
index 9a97f0ed6e..5347c9e7ea 100644
--- a/cfgs/qc_iu/arch_overlay/ext/Xqcilsm.yaml
+++ b/cfgs/qc_iu/arch_overlay/ext/Xqcilsm.yaml
@@ -40,6 +40,18 @@ versions:
     email: dhower@qti.qualcomm.com
   changes:
     - Fix description of qc.swmi, qc.lwmi and qc.setwmi instructions
+- version: "0.4.0"
+  state: frozen
+  ratification_date: null
+  contributors:
+  - name: Albert Yosher
+    company: Qualcomm Technologies, Inc.
+    email: ayosher@qti.qualcomm.com
+  - name: Derek Hower
+    company: Qualcomm Technologies, Inc.
+    email: dhower@qti.qualcomm.com
+  changes:
+    - Fix encoding of qc.swmi
 description: |
   The Xqcilsm extension includes six instructions that transfer multiple values
   between registers and memory.
diff --git a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.extu.yaml b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.extu.yaml
index e73a915ee5..a7bacd798b 100644
--- a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.extu.yaml
+++ b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.extu.yaml
@@ -22,6 +22,7 @@ encoding:
       not: [0, 1, 2, 3, 4]
     - name: rd
       location: 11-7
+      not: 0
 access:
   s: always
   u: always
diff --git a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.muliadd.yaml b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.muliadd.yaml
index da1af4692a..76253f546b 100644
--- a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.muliadd.yaml
+++ b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.muliadd.yaml
@@ -8,9 +8,14 @@ description: |
   Increments `rd` by the multiplication of `rs1` and an unsigned immediate
   Instruction encoded in CL instruction format.
 definedBy:
-  anyOf:
-    - Xqci
-    - Xqciac
+  allOf:
+    - not:
+        anyOf:
+          - allOf: [C, D]
+          - Zcd
+    - anyOf:
+        - Xqci
+        - Xqciac
 base: 32
 encoding:
   match: 001-----------10
diff --git a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.mveqz.yaml b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.mveqz.yaml
index 054f50ac27..ba619a8e73 100644
--- a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.mveqz.yaml
+++ b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.c.mveqz.yaml
@@ -8,9 +8,14 @@ description: |
   Move `rs1` to `rd` if `rd` == 0, keep `rd` value otherwise
   Instruction encoded in CL instruction format.
 definedBy:
-  anyOf:
-    - Xqci
-    - Xqcicm
+  allOf:
+    - anyOf:
+        - Xqci
+        - Xqcicm
+    - not:
+        anyOf:
+          - allOf: [C, D]
+          - Zcd
 base: 32
 encoding:
   match: 101011---00---10
diff --git a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.slasat.yaml b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.shlsat.yaml
similarity index 94%
rename from cfgs/qc_iu/arch_overlay/inst/Xqci/qc.slasat.yaml
rename to cfgs/qc_iu/arch_overlay/inst/Xqci/qc.shlsat.yaml
index 3d4bc09ed9..88d1482418 100644
--- a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.slasat.yaml
+++ b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.shlsat.yaml
@@ -2,8 +2,8 @@
 
 $schema: inst_schema.json#
 kind: instruction
-name: qc.slasat
-long_name: Saturating arithmetic left shift
+name: qc.shlsat
+long_name: Saturating signed left shift
 description: |
   Left shift `rs1` by the value of `rs2`, and saturate the signed result.
   The number of words is in `length`.
diff --git a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.sllsat.yaml b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.shlusat.yaml
similarity index 94%
rename from cfgs/qc_iu/arch_overlay/inst/Xqci/qc.sllsat.yaml
rename to cfgs/qc_iu/arch_overlay/inst/Xqci/qc.shlusat.yaml
index 6c033af65d..691da04252 100644
--- a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.sllsat.yaml
+++ b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.shlusat.yaml
@@ -2,8 +2,8 @@
 
 $schema: inst_schema.json#
 kind: instruction
-name: qc.sllsat
-long_name: Saturating logical left shift
+name: qc.shlusat
+long_name: Saturating unsigned left shift
 description: |
   Left shift `rs1` by the value of `rs2`, and saturate the unsigned result.
   The number of words is in `length`.
diff --git a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.swmi.yaml b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.swmi.yaml
index b7dbc428cd..5e0f10f392 100644
--- a/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.swmi.yaml
+++ b/cfgs/qc_iu/arch_overlay/inst/Xqci/qc.swmi.yaml
@@ -14,7 +14,7 @@ definedBy:
     - Xqcilsm
 base: 32
 encoding:
-  match: 00---------------111-----0101011
+  match: 01---------------111-----0101011
   variables:
     - name: imm
       location: 29-25
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index 2af2888807..e98ce14596 100755
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -117,6 +117,7 @@ def test_instruction_encoding(self, instr_name):
             instr_name == "fence.i"
             or instr_name == "c.nop"
             or instr_name == "fcvtmod.w.d"
+            or instr_name == "fence"
         ):
             pytest.skip(
                 f"Instruction {instr_name} is a corner case and implementation should not follow ISA spec"

From b1e0ee66d0d5ef441ae2d8ee502bee1e624cb23f Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Mon, 27 Jan 2025 09:43:02 +0000
Subject: [PATCH 32/33] Set CM instruction length 16 bit instead of 32

---
 ext/auto-inst/parsing.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 9284ae3239..975f32c3b4 100755
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -149,7 +149,9 @@ def compare_yaml_json_encoding(
     if not json_encoding_str:
         return ["No JSON encoding available for comparison."]
 
-    expected_length = 16 if instr_name.lower().startswith(("c_", "c.")) else 32
+    expected_length = (
+        16 if instr_name.lower().startswith(("c_", "c.", "cm_", "cm.")) else 32
+    )
 
     yaml_pattern_str = yaml_match.replace("-", ".")
     if len(yaml_pattern_str) != expected_length:

From b2f102eff2c072cddcf5693066328da1a48a3070 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 24 Apr 2025 16:45:24 +0100
Subject: [PATCH 33/33] Add LICENSE compliant to UDB native files.

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py      | 4 ++++
 ext/auto-inst/test_parsing.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 975f32c3b4..605b1e511b 100755
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-FileCopyrightText: 2024-2025 Contributors to the RISCV UnifiedDB <https://github.com/riscv-software-src/riscv-unified-db>
+# SPDX-License-Identifier: BSD-3-Clause-Clear
+
 import os
 import re
 import yaml
diff --git a/ext/auto-inst/test_parsing.py b/ext/auto-inst/test_parsing.py
index e98ce14596..dde64d41fe 100755
--- a/ext/auto-inst/test_parsing.py
+++ b/ext/auto-inst/test_parsing.py
@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-FileCopyrightText: 2024-2025 Contributors to the RISCV UnifiedDB <https://github.com/riscv-software-src/riscv-unified-db>
+# SPDX-License-Identifier: BSD-3-Clause-Clear
+
 import pytest
 import json
 import os