add type.py with all formats, but Vector

AFOliveira · AFOliveira · commit 087153d8c61e · 2025-02-10T15:18:16.000Z
Signed-off-by: Afonso Oliveira &lt;Afonso.Oliveira@synopsys.com&gt;
diff --git a/type.py b/type.py
@@ -17,9 +17,12 @@
    else, set the type to "Unknown".
 5. Finally, force instructions whose names start with specific prefixes:
       - Names starting with "fcvt" or "fmv" are forced to R-type.
-      - **Loads** (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
+      - Load instructions (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
+      - Any instruction whose name contains "fence" is forced to I-type.
+6. As a fallback, if all tests have failed (the type is still "Unknown")
+   and the encoding’s variables contain only "rd" and "rs1", then the type is set to R-type.
 
-Once determined, the script inserts (or updates) a new field named `type:`
+Once determined, the script inserts (or updates) a new field named `format`
 immediately after the `long_name:` field.
 """
 
@@ -29,15 +32,15 @@
 from ruamel.yaml.scalarstring import PlainScalarString
 from ruamel.yaml.representer import RoundTripRepresenter
 
-
-yaml = YAML(typ="rt")  # Use round-trip mode
+# Use round-trip mode to preserve as much of the original formatting as possible.
+yaml = YAML(typ="rt")
 yaml.preserve_quotes = True  # Preserve original quoting
 yaml.indent(mapping=2, sequence=4, offset=2)
 yaml.width = 4096  # Prevent line wrapping
 
 
 def represent_plain_str(dumper, data):
-    # Force plain style (empty string) regardless of the content.
+    # Force plain style (no quotes) for PlainScalarString instances.
     return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="")
 
 
@@ -51,13 +54,12 @@ def parse_location(location):
     - If the location is an integer, it is treated as a single bit
       (e.g. 7 becomes [(7, 7)]).
     - If it is a string (e.g. "31|7|30-25|11-8"), it is assumed to be delimited by '|'
-      characters. Each segment is either a single bit (e.g. "7") or a range (e.g. "30-25").
+      characters. Each segment is either a single bit or a range.
 
     Returns a list of tuples in the form (high_bit, low_bit).
     """
     if isinstance(location, int):
         return [(location, location)]
-
     segments = [seg.strip() for seg in location.split("|") if seg.strip()]
     parsed = []
     for seg in segments:
@@ -89,7 +91,6 @@ def identify_immediate_type(imm_location):
     """
     segments = parse_location(imm_location)
     seg_set = set(segments)
-
     if len(segments) == 1 and segments[0] == (31, 20):
         return "I-type"
     if len(segments) == 2 and seg_set == {(31, 25), (11, 7)}:
@@ -100,7 +101,6 @@ def identify_immediate_type(imm_location):
         return "U-type"
     if len(segments) == 4 and seg_set == {(31, 31), (30, 21), (20, 20), (19, 12)}:
         return "J-type"
-
     return "Unknown"
 
 
@@ -115,7 +115,6 @@ def check_rtype_registers(variables):
     found_source1 = False
     found_source2 = False
     found_dest = False
-
     for var in variables:
         loc_str = var.get("location", "")
         segments = parse_location(loc_str)
@@ -127,7 +126,6 @@ def check_rtype_registers(variables):
                 found_source2 = True
             elif seg == (11, 7):
                 found_dest = True
-
     return found_source1 and found_source2 and found_dest
 
 
@@ -142,19 +140,12 @@ def classify_compressed(match_field):
     Returns one of:
       "CIW", "CL", "CS", "CI", "CR", "CB", "CJ"
     or falls back to "C-type" if none match.
-
-    Note: This mapping is a simplification and may not cover all cases.
     """
     if len(match_field) != 16:
         return "C-type"
-    # In our assumed representation, bit15 is match_field[0] and bit0 is match_field[15]
-    # Extract the two least-significant bits (bits 1:0):
     group = match_field[-2:]
-    # Extract bits 15:13 as funct3:
     funct3 = match_field[0:3]
-
     if group == "00":
-        # Group 0: usually CIW, CL, or CS.
         if funct3 == "000":
             return "CIW"  # e.g., C.ADDI4SPN
         elif funct3 == "010":
@@ -164,7 +155,6 @@ def classify_compressed(match_field):
         else:
             return "C-type"
     elif group == "01":
-        # Group 1: often CI, CR, or CB.
         if funct3 in ["000", "010", "011"]:
             return "CI"  # e.g., C.ADDI, C.LI, C.ADDI16SP
         elif funct3 == "100":
@@ -176,7 +166,6 @@ def classify_compressed(match_field):
         else:
             return "C-type"
     elif group == "10":
-        # Group 2: similar to group 1.
         if funct3 in ["000", "010", "011"]:
             return "CI"
         elif funct3 == "100":
@@ -194,7 +183,7 @@ def classify_compressed(match_field):
 def ensure_plain_match(enc):
     """
     Ensure that if the given encoding dict has a 'match' field that is a string,
-    it is wrapped in PlainScalarString so that it is output without quotes.
+    it is wrapped in PlainScalarString so that it is output without added quotes.
     """
     if isinstance(enc, dict) and "match" in enc:
         match_value = enc["match"]
@@ -208,15 +197,20 @@ def process_file(filepath):
       - Determine the instruction type using the encoding section.
       - If the chosen encoding's "match" field is 16 characters long,
         classify the instruction using classify_compressed().
+      - Otherwise, if the match field is entirely hardcoded (only "0" and "1"),
+        force the instruction type to I-type.
       - Otherwise, if an "imm" variable is present, use it to classify the instruction.
       - Else if a "shamt" variable is present, classify the instruction as I-type.
       - Otherwise, if registers appear as expected for R-type, classify as R-type;
         else, set the type to "Unknown".
       - Finally, force instructions whose names start with specific prefixes:
             - Names starting with "fcvt" or "fmv" are forced to R-type.
-            - **Load instructions** (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
+            - Load instructions (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
+            - If the instruction name contains "fence", force it to I-type.
+      - As a fallback: if all tests have failed (type is "Unknown") and the only register
+        variables in the encoding are "rd" and "rs1", force the type to R-type.
       - Insert (or update) a new field "type:" immediately after "long_name:".
-      - Ensure that the 'match' field remains unquoted by wrapping it in PlainScalarString.
+      - Ensure that the 'match' field remains unquoted.
       - Write the updated YAML back to the same file.
     """
     try:
@@ -228,29 +222,26 @@ def process_file(filepath):
 
     # Handle nested encoding (e.g., RV32, RV64) versus flat encoding.
     encoding = data.get("encoding", {})
-    chosen_encoding = {}
     if isinstance(encoding, dict) and ("RV32" in encoding or "RV64" in encoding):
-        # Prefer RV32 if available; otherwise use RV64.
         chosen_encoding = encoding.get("RV32", encoding.get("RV64", {}))
     else:
         chosen_encoding = encoding
 
-    # First, if the match field is 16 characters long, classify as a specific C-type.
     match_field = chosen_encoding.get("match", "")
-    if isinstance(match_field, str) and len(match_field) == 16:
+    # If the match field is entirely hardcoded (only "0" and "1"), force I-type.
+    if isinstance(match_field, str) and match_field and set(match_field) <= {"0", "1"}:
+        inst_type = "I-type"
+    elif isinstance(match_field, str) and len(match_field) == 16:
         inst_type = classify_compressed(match_field)
     else:
-        # Otherwise, use our usual tests.
         variables = chosen_encoding.get("variables", [])
         imm_location = None
         shamt_exists = False
-
         for var in variables:
             if var.get("name") == "imm":
                 imm_location = var.get("location")
             if var.get("name") == "shamt":
                 shamt_exists = True
-
         if imm_location is not None:
             inst_type = identify_immediate_type(imm_location)
         elif shamt_exists:
@@ -262,18 +253,51 @@ def process_file(filepath):
 
     # Force specific instruction types based on the instruction name.
     inst_name = data.get("name", "").lower()
-    # Force instructions starting with "fcvt" or "fmv" to be R-type.
     if inst_name.startswith("fcvt") or inst_name.startswith("fmv"):
         inst_type = "R-type"
-    # Force load instructions (lb, ld, lh, lw) to be I-type.
-    elif inst_name.startswith(("lb", "ld", "lh", "lw", "lr")):
-        # Loads are I-type
+    elif inst_name.startswith(("lb", "ld", "lh", "lw", "lr", "li")):
         inst_type = "I-type"
+    elif inst_name.startswith(("sw.", "sh.", "sd.", "sb.")):
+        inst_type = "S-type"
+    elif (
+        "fence" in inst_name
+        or inst_name.startswith("cbo.")
+        or inst_name.startswith("ssrdp")
+    ):
+        inst_type = "I-type"
+
+    # Fallback: if inst_type is still "Unknown", check if there are "rd" and "rs1".
+    if inst_type == "Unknown":
+        var_names = [
+            var.get("name", "").lower() for var in chosen_encoding.get("variables", [])
+        ]
+        # Remove empty names if any.
+        var_names = [name for name in var_names if name]
+        if (
+            any(name.endswith("s1") for name in var_names)
+            and any(name.endswith("d") for name in var_names)
+            or (
+                any(name.endswith("s1") for name in var_names)
+                and any(name.endswith("s2") for name in var_names)
+            )
+            or (
+                any(name.endswith("s2") for name in var_names)
+                and any(name.endswith("d") for name in var_names)
+            )
+        ):
+            inst_type = "R-type"
+        elif {"csr", "imm", "rd"}.issubset(set(var_names)) or {
+            "csr",
+            "uimm",
+            "rd",
+        }.issubset(set(var_names)):
+            inst_type = "I-type"
 
-    # Insert or update the new field "type:" immediately after "long_name:"
+    # Insert (or update) a new field "type:" immediately after "long_name:".
     if "long_name" in data:
         keys = list(data.keys())
         idx = keys.index("long_name")
+        # Use "format" as the key (change to "type" if desired)
         if "format" in data:
             data["format"] = inst_type
         else:
@@ -303,9 +327,7 @@ def main():
     if len(sys.argv) < 2:
         print("Usage: {} <file_or_directory>".format(sys.argv[0]))
         sys.exit(1)
-
     path = Path(sys.argv[1])
-
     if path.is_file() and path.suffix == ".yaml":
         process_file(path)
     elif path.is_dir():