@@ -208,13 +208,71 @@ def _find_reusable_zero_reg(start_idx: int) -> Optional[str]:
208208 return out
209209
210210
211- def _x86_mov_reg_imm8 (lines : List [str ]) -> List [str ]:
211+ def _x86_mov_reg_imm_narrow (lines : List [str ], arch : str = "x86_64" ) -> List [str ]:
212+ """Narrow MOV destination to the smallest register that can hold the immediate.
213+
214+ Risky: using 8/16-bit forms will not zero upper bits of the 32/64-bit register.
215+ """
212216 out : List [str ] = []
217+
218+ def _arch_is_64 (a : str ) -> bool :
219+ al = (a or '' ).lower ()
220+ return 'x86_64' in al or 'amd64' in al or 'x64' in al
221+
222+ is64 = _arch_is_64 (arch )
223+
213224 # Capture indentation, spacing, register and immediate
214225 mov_re = re .compile (
215226 r"^(?P<indent>\s*)mov(?P<sep>\s+)(?P<reg>r(?:[0-9]{1,2}|[abcd]x|[sb]p|si|di)|e[abcd]x|e[sb]p|e[sd]i)\s*,\s*(?P<imm>0x[0-9a-fA-F]+|\d+)\s*(?P<cmt>;.*)?$" ,
216227 re .I ,
217228 )
229+
230+ def _narrow_reg (reg : str , bits : int ) -> Optional [str ]:
231+ r = reg .lower ()
232+ if is64 :
233+ fam_map = {
234+ 'rax' : {8 : 'al' , 16 : 'ax' , 32 : 'eax' },
235+ 'rbx' : {8 : 'bl' , 16 : 'bx' , 32 : 'ebx' },
236+ 'rcx' : {8 : 'cl' , 16 : 'cx' , 32 : 'ecx' },
237+ 'rdx' : {8 : 'dl' , 16 : 'dx' , 32 : 'edx' },
238+ 'rsi' : {8 : 'sil' , 16 : 'si' , 32 : 'esi' },
239+ 'rdi' : {8 : 'dil' , 16 : 'di' , 32 : 'edi' },
240+ 'rbp' : {8 : 'bpl' , 16 : 'bp' , 32 : 'ebp' },
241+ 'rsp' : {8 : 'spl' , 16 : 'sp' , 32 : 'esp' },
242+ 'eax' : {8 : 'al' , 16 : 'ax' , 32 : 'eax' },
243+ 'ebx' : {8 : 'bl' , 16 : 'bx' , 32 : 'ebx' },
244+ 'ecx' : {8 : 'cl' , 16 : 'cx' , 32 : 'ecx' },
245+ 'edx' : {8 : 'dl' , 16 : 'dx' , 32 : 'edx' },
246+ 'esi' : {8 : 'sil' , 16 : 'si' , 32 : 'esi' },
247+ 'edi' : {8 : 'dil' , 16 : 'di' , 32 : 'edi' },
248+ 'ebp' : {8 : 'bpl' , 16 : 'bp' , 32 : 'ebp' },
249+ 'esp' : {8 : 'spl' , 16 : 'sp' , 32 : 'esp' },
250+ }
251+ if r in fam_map and bits in fam_map [r ]:
252+ return fam_map [r ][bits ]
253+ if r .startswith ('r' ) and r [1 :].isdigit ():
254+ try :
255+ n = int (r [1 :])
256+ except Exception :
257+ n = - 1
258+ if 8 <= n <= 15 :
259+ return f"r{ n } { {8 :'b' ,16 :'w' ,32 :'d' }[bits ] } " .replace (' ' , '' )
260+ return None
261+ else :
262+ fam_map = {
263+ 'eax' : {8 : 'al' , 16 : 'ax' , 32 : 'eax' },
264+ 'ebx' : {8 : 'bl' , 16 : 'bx' , 32 : 'ebx' },
265+ 'ecx' : {8 : 'cl' , 16 : 'cx' , 32 : 'ecx' },
266+ 'edx' : {8 : 'dl' , 16 : 'dx' , 32 : 'edx' },
267+ 'esi' : {16 : 'si' , 32 : 'esi' }, # no sil in 32-bit mode
268+ 'edi' : {16 : 'di' , 32 : 'edi' }, # no dil
269+ 'ebp' : {16 : 'bp' , 32 : 'ebp' }, # no bpl
270+ 'esp' : {16 : 'sp' , 32 : 'esp' }, # no spl
271+ }
272+ if r in fam_map and bits in fam_map [r ]:
273+ return fam_map [r ][bits ]
274+ return None
275+
218276 for ln in lines :
219277 m = mov_re .match (ln )
220278 if not m :
@@ -230,30 +288,14 @@ def _x86_mov_reg_imm8(lines: List[str]) -> List[str]:
230288 except Exception :
231289 out .append (ln )
232290 continue
233- if 0 <= imm <= 0xFF :
234- # Map 64-bit regs to 8-bit low regs (x86_64)
235- reg8_map = {
236- 'rax' : 'al' , 'rbx' : 'bl' , 'rcx' : 'cl' , 'rdx' : 'dl' ,
237- 'rsi' : 'sil' , 'rdi' : 'dil' , 'rbp' : 'bpl' , 'rsp' : 'spl' ,
238- 'eax' : 'al' , 'ebx' : 'bl' , 'ecx' : 'cl' , 'edx' : 'dl' ,
239- 'esi' : 'sil' , 'edi' : 'dil' , 'ebp' : 'bpl' , 'esp' : 'spl' ,
240- }
241- reg8 = None
242- if reg in reg8_map :
243- reg8 = reg8_map [reg ]
244- elif reg .startswith ('r' ) and reg [1 :].isdigit ():
245- # r8-r15 family
246- try :
247- num = int (reg [1 :])
248- if 8 <= num <= 15 :
249- reg8 = f"r{ num } b"
250- except Exception :
251- reg8 = None
252- # If we found a valid 8-bit register, emit replacement
253- if reg8 :
254- out .append (f"{ indent } mov{ sep } { reg8 } , { imm_s } { cmt } " )
255- else :
256- out .append (ln )
291+ # Determine minimal width that fits the immediate
292+ width = 8 if 0 <= imm <= 0xFF else (16 if imm <= 0xFFFF else (32 if imm <= 0xFFFFFFFF else None ))
293+ if width is None :
294+ out .append (ln )
295+ continue
296+ narrow = _narrow_reg (reg , int (width ))
297+ if narrow :
298+ out .append (f"{ indent } mov{ sep } { narrow } , { imm_s } { cmt } " )
257299 else :
258300 out .append (ln )
259301 return out
@@ -275,10 +317,10 @@ def default_rules_for_arch(arch: str) -> List[TransformRule]:
275317 rules .append (
276318 TransformRule (
277319 name = "mov-reg-imm8-to-mov-reg8" ,
278- description = "Use mov reg8, imm8 when immediate fits in 8 bits (risky)" ,
279- archs = ["x86_64" ],
320+ description = "Use minimal-width destination for immediate (risky)" ,
321+ archs = ["x86" , " x86_64" ],
280322 enabled = False ,
281- apply = lambda asm : _join (_x86_mov_reg_imm8 (_normalize_asm (asm ))),
323+ apply = lambda asm , arch = a : _join (_x86_mov_reg_imm_narrow (_normalize_asm (asm ), arch )),
282324 )
283325 )
284326 return rules
0 commit comments