2020
2121class OTBNProgram :
2222 def __init__ (self , symbols : Dict [str , int ], insns : Dict [int , int ],
23- data : Dict [int , int ]):
23+ data : Dict [int , int ], nop_subfuncs : List [ str ] ):
2424 self .symbols = symbols # label -> PC
25- self .data = data # addr -> data (32b word)
25+ self .data = data # addr -> data (32b word)
26+
27+ # For each function name in nop_subfuncs, we assume that it will have
28+ # control flow limited in the following way. If the function starts at
29+ # address A and the first JALR instruction after A is at address B,
30+ # then execution will not leave the interval [A, B] until the function
31+ # returns by executing the JALR at B.
32+ #
33+ # With this assumption, we can imagine replacing the instructions in
34+ # that interval with a "nop sled". The ranges of the these sleds are
35+ # stored in the nop_ranges list, as pairs (A, B).
36+ nop_ranges : list [tuple [int , int ]] = []
37+
38+ for symbol in nop_subfuncs :
39+ # Get the start address of the function. If the function dosen't
40+ # appear in the list of symbols, there's nothing to do for it.
41+ start_addr = symbols .get (symbol )
42+ if start_addr is None :
43+ continue
44+
45+ for pc in range (start_addr , 1 << 32 , 4 ):
46+ opcode = insns .get (pc )
47+ if opcode is None :
48+ raise RuntimeError ("Fell off the end of the binary "
49+ "when searching for a JALR for a "
50+ f"function with symbol { symbol } , "
51+ f"starting at { start_addr :#x} " )
52+
53+ # Check whether we just found 'B' (see note above nop_ranges)
54+ if INSNS_FILE .mnem_for_word (opcode ) == 'jalr' :
55+ nop_ranges .append ((start_addr , pc ))
56+ break
2657
2758 self .insns = {}
2859 for pc , opcode in insns .items ():
29- mnem = INSNS_FILE .mnem_for_word (opcode )
30- if mnem is None :
31- raise ValueError (
32- 'No legal decoding for mnemonic: {}' .format (mnem ))
33- insn = INSNS_FILE .mnemonic_to_insn [mnem ]
34- assert insn .encoding is not None
35- enc_vals = insn .encoding .extract_operands (opcode )
60+ # Check if PC lies within one of the NOP ranges (equal to or after
61+ # the start of a nop subfunc and strictly before the JALR
62+ # instruction at the end).
63+ in_nop_region = any (a <= pc < b for a , b in nop_ranges )
64+
65+ # If the PC *is* in a NOP region, interpret the opcode at PC as a
66+ # NOP (addi x0, x0, x0). If not, decode the opcode and find the
67+ # appropriate instruction and operands.
68+ if in_nop_region :
69+ insn = INSNS_FILE .mnemonic_to_insn ["addi" ]
70+ enc_vals = {'imm' : 0 , 'grs1' : 0 , 'grd' : 0 }
71+ else :
72+ mnem = INSNS_FILE .mnem_for_word (opcode )
73+ if mnem is None :
74+ raise ValueError (f'No mnemonic for opcode { opcode :#08x} ' )
75+
76+ insn = INSNS_FILE .mnemonic_to_insn [mnem ]
77+ assert insn .encoding is not None
78+ enc_vals = insn .encoding .extract_operands (opcode )
79+
3680 op_vals = insn .enc_vals_to_op_vals (pc , enc_vals )
3781 self .insns [pc ] = (insn , op_vals )
3882
@@ -69,7 +113,7 @@ def _decode_mem(base_addr: int, data: bytes) -> Dict[int, int]:
69113 for offset , int_val in enumerate (struct .iter_unpack ('<I' , data ))}
70114
71115
72- def decode_elf (path : str ) -> OTBNProgram :
116+ def decode_elf (path : str , nop_subfuncs : List [ str ] ) -> OTBNProgram :
73117 '''Read ELF file at path and decode contents into an OTBNProgram instance
74118
75119 Returns the OTBNProgram instance representing the program in the ELF file.
@@ -79,4 +123,4 @@ def decode_elf(path: str) -> OTBNProgram:
79123 insns = _decode_mem (0 , imem_bytes )
80124 data = _decode_mem (0 , dmem_bytes )
81125
82- return OTBNProgram (symbols , insns , data )
126+ return OTBNProgram (symbols , insns , data , nop_subfuncs )
0 commit comments