Skip to content

Commit 087153d

Browse files
committed
add type.py with all formats, but Vector
Signed-off-by: Afonso Oliveira <[email protected]>
1 parent beeecc1 commit 087153d

File tree

1 file changed

+59
-37
lines changed

1 file changed

+59
-37
lines changed

type.py

100644100755
Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@
1717
else, set the type to "Unknown".
1818
5. Finally, force instructions whose names start with specific prefixes:
1919
- Names starting with "fcvt" or "fmv" are forced to R-type.
20-
- **Loads** (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
20+
- Load instructions (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
21+
- Any instruction whose name contains "fence" is forced to I-type.
22+
6. As a fallback, if all tests have failed (the type is still "Unknown")
23+
and the encoding’s variables contain only "rd" and "rs1", then the type is set to R-type.
2124
22-
Once determined, the script inserts (or updates) a new field named `type:`
25+
Once determined, the script inserts (or updates) a new field named `format`
2326
immediately after the `long_name:` field.
2427
"""
2528

@@ -29,15 +32,15 @@
2932
from ruamel.yaml.scalarstring import PlainScalarString
3033
from ruamel.yaml.representer import RoundTripRepresenter
3134

32-
33-
yaml = YAML(typ="rt") # Use round-trip mode
35+
# Use round-trip mode to preserve as much of the original formatting as possible.
36+
yaml = YAML(typ="rt")
3437
yaml.preserve_quotes = True # Preserve original quoting
3538
yaml.indent(mapping=2, sequence=4, offset=2)
3639
yaml.width = 4096 # Prevent line wrapping
3740

3841

3942
def represent_plain_str(dumper, data):
40-
# Force plain style (empty string) regardless of the content.
43+
# Force plain style (no quotes) for PlainScalarString instances.
4144
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="")
4245

4346

@@ -51,13 +54,12 @@ def parse_location(location):
5154
- If the location is an integer, it is treated as a single bit
5255
(e.g. 7 becomes [(7, 7)]).
5356
- If it is a string (e.g. "31|7|30-25|11-8"), it is assumed to be delimited by '|'
54-
characters. Each segment is either a single bit (e.g. "7") or a range (e.g. "30-25").
57+
characters. Each segment is either a single bit or a range.
5558
5659
Returns a list of tuples in the form (high_bit, low_bit).
5760
"""
5861
if isinstance(location, int):
5962
return [(location, location)]
60-
6163
segments = [seg.strip() for seg in location.split("|") if seg.strip()]
6264
parsed = []
6365
for seg in segments:
@@ -89,7 +91,6 @@ def identify_immediate_type(imm_location):
8991
"""
9092
segments = parse_location(imm_location)
9193
seg_set = set(segments)
92-
9394
if len(segments) == 1 and segments[0] == (31, 20):
9495
return "I-type"
9596
if len(segments) == 2 and seg_set == {(31, 25), (11, 7)}:
@@ -100,7 +101,6 @@ def identify_immediate_type(imm_location):
100101
return "U-type"
101102
if len(segments) == 4 and seg_set == {(31, 31), (30, 21), (20, 20), (19, 12)}:
102103
return "J-type"
103-
104104
return "Unknown"
105105

106106

@@ -115,7 +115,6 @@ def check_rtype_registers(variables):
115115
found_source1 = False
116116
found_source2 = False
117117
found_dest = False
118-
119118
for var in variables:
120119
loc_str = var.get("location", "")
121120
segments = parse_location(loc_str)
@@ -127,7 +126,6 @@ def check_rtype_registers(variables):
127126
found_source2 = True
128127
elif seg == (11, 7):
129128
found_dest = True
130-
131129
return found_source1 and found_source2 and found_dest
132130

133131

@@ -142,19 +140,12 @@ def classify_compressed(match_field):
142140
Returns one of:
143141
"CIW", "CL", "CS", "CI", "CR", "CB", "CJ"
144142
or falls back to "C-type" if none match.
145-
146-
Note: This mapping is a simplification and may not cover all cases.
147143
"""
148144
if len(match_field) != 16:
149145
return "C-type"
150-
# In our assumed representation, bit15 is match_field[0] and bit0 is match_field[15]
151-
# Extract the two least-significant bits (bits 1:0):
152146
group = match_field[-2:]
153-
# Extract bits 15:13 as funct3:
154147
funct3 = match_field[0:3]
155-
156148
if group == "00":
157-
# Group 0: usually CIW, CL, or CS.
158149
if funct3 == "000":
159150
return "CIW" # e.g., C.ADDI4SPN
160151
elif funct3 == "010":
@@ -164,7 +155,6 @@ def classify_compressed(match_field):
164155
else:
165156
return "C-type"
166157
elif group == "01":
167-
# Group 1: often CI, CR, or CB.
168158
if funct3 in ["000", "010", "011"]:
169159
return "CI" # e.g., C.ADDI, C.LI, C.ADDI16SP
170160
elif funct3 == "100":
@@ -176,7 +166,6 @@ def classify_compressed(match_field):
176166
else:
177167
return "C-type"
178168
elif group == "10":
179-
# Group 2: similar to group 1.
180169
if funct3 in ["000", "010", "011"]:
181170
return "CI"
182171
elif funct3 == "100":
@@ -194,7 +183,7 @@ def classify_compressed(match_field):
194183
def ensure_plain_match(enc):
195184
"""
196185
Ensure that if the given encoding dict has a 'match' field that is a string,
197-
it is wrapped in PlainScalarString so that it is output without quotes.
186+
it is wrapped in PlainScalarString so that it is output without added quotes.
198187
"""
199188
if isinstance(enc, dict) and "match" in enc:
200189
match_value = enc["match"]
@@ -208,15 +197,20 @@ def process_file(filepath):
208197
- Determine the instruction type using the encoding section.
209198
- If the chosen encoding's "match" field is 16 characters long,
210199
classify the instruction using classify_compressed().
200+
- Otherwise, if the match field is entirely hardcoded (only "0" and "1"),
201+
force the instruction type to I-type.
211202
- Otherwise, if an "imm" variable is present, use it to classify the instruction.
212203
- Else if a "shamt" variable is present, classify the instruction as I-type.
213204
- Otherwise, if registers appear as expected for R-type, classify as R-type;
214205
else, set the type to "Unknown".
215206
- Finally, force instructions whose names start with specific prefixes:
216207
- Names starting with "fcvt" or "fmv" are forced to R-type.
217-
- **Load instructions** (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
208+
- Load instructions (names starting with "lb", "ld", "lh", or "lw") are forced to I-type.
209+
- If the instruction name contains "fence", force it to I-type.
210+
- As a fallback: if all tests have failed (type is "Unknown") and the only register
211+
variables in the encoding are "rd" and "rs1", force the type to R-type.
218212
- Insert (or update) a new field "type:" immediately after "long_name:".
219-
- Ensure that the 'match' field remains unquoted by wrapping it in PlainScalarString.
213+
- Ensure that the 'match' field remains unquoted.
220214
- Write the updated YAML back to the same file.
221215
"""
222216
try:
@@ -228,29 +222,26 @@ def process_file(filepath):
228222

229223
# Handle nested encoding (e.g., RV32, RV64) versus flat encoding.
230224
encoding = data.get("encoding", {})
231-
chosen_encoding = {}
232225
if isinstance(encoding, dict) and ("RV32" in encoding or "RV64" in encoding):
233-
# Prefer RV32 if available; otherwise use RV64.
234226
chosen_encoding = encoding.get("RV32", encoding.get("RV64", {}))
235227
else:
236228
chosen_encoding = encoding
237229

238-
# First, if the match field is 16 characters long, classify as a specific C-type.
239230
match_field = chosen_encoding.get("match", "")
240-
if isinstance(match_field, str) and len(match_field) == 16:
231+
# If the match field is entirely hardcoded (only "0" and "1"), force I-type.
232+
if isinstance(match_field, str) and match_field and set(match_field) <= {"0", "1"}:
233+
inst_type = "I-type"
234+
elif isinstance(match_field, str) and len(match_field) == 16:
241235
inst_type = classify_compressed(match_field)
242236
else:
243-
# Otherwise, use our usual tests.
244237
variables = chosen_encoding.get("variables", [])
245238
imm_location = None
246239
shamt_exists = False
247-
248240
for var in variables:
249241
if var.get("name") == "imm":
250242
imm_location = var.get("location")
251243
if var.get("name") == "shamt":
252244
shamt_exists = True
253-
254245
if imm_location is not None:
255246
inst_type = identify_immediate_type(imm_location)
256247
elif shamt_exists:
@@ -262,18 +253,51 @@ def process_file(filepath):
262253

263254
# Force specific instruction types based on the instruction name.
264255
inst_name = data.get("name", "").lower()
265-
# Force instructions starting with "fcvt" or "fmv" to be R-type.
266256
if inst_name.startswith("fcvt") or inst_name.startswith("fmv"):
267257
inst_type = "R-type"
268-
# Force load instructions (lb, ld, lh, lw) to be I-type.
269-
elif inst_name.startswith(("lb", "ld", "lh", "lw", "lr")):
270-
# Loads are I-type
258+
elif inst_name.startswith(("lb", "ld", "lh", "lw", "lr", "li")):
271259
inst_type = "I-type"
260+
elif inst_name.startswith(("sw.", "sh.", "sd.", "sb.")):
261+
inst_type = "S-type"
262+
elif (
263+
"fence" in inst_name
264+
or inst_name.startswith("cbo.")
265+
or inst_name.startswith("ssrdp")
266+
):
267+
inst_type = "I-type"
268+
269+
# Fallback: if inst_type is still "Unknown", check if there are "rd" and "rs1".
270+
if inst_type == "Unknown":
271+
var_names = [
272+
var.get("name", "").lower() for var in chosen_encoding.get("variables", [])
273+
]
274+
# Remove empty names if any.
275+
var_names = [name for name in var_names if name]
276+
if (
277+
any(name.endswith("s1") for name in var_names)
278+
and any(name.endswith("d") for name in var_names)
279+
or (
280+
any(name.endswith("s1") for name in var_names)
281+
and any(name.endswith("s2") for name in var_names)
282+
)
283+
or (
284+
any(name.endswith("s2") for name in var_names)
285+
and any(name.endswith("d") for name in var_names)
286+
)
287+
):
288+
inst_type = "R-type"
289+
elif {"csr", "imm", "rd"}.issubset(set(var_names)) or {
290+
"csr",
291+
"uimm",
292+
"rd",
293+
}.issubset(set(var_names)):
294+
inst_type = "I-type"
272295

273-
# Insert or update the new field "type:" immediately after "long_name:"
296+
# Insert (or update) a new field "type:" immediately after "long_name:".
274297
if "long_name" in data:
275298
keys = list(data.keys())
276299
idx = keys.index("long_name")
300+
# Use "format" as the key (change to "type" if desired)
277301
if "format" in data:
278302
data["format"] = inst_type
279303
else:
@@ -303,9 +327,7 @@ def main():
303327
if len(sys.argv) < 2:
304328
print("Usage: {} <file_or_directory>".format(sys.argv[0]))
305329
sys.exit(1)
306-
307330
path = Path(sys.argv[1])
308-
309331
if path.is_file() and path.suffix == ".yaml":
310332
process_file(path)
311333
elif path.is_dir():

0 commit comments

Comments
 (0)