Skip to content

Commit d6e13c7

Browse files
committed
[armv7/thumb2] Fix Lifting for PC-relative vldr instruction does not align PC when calculating address #6947
Updated thumb2 pcode parser used by disassembler generator to use Tatsu instead of deprecated Grako
1 parent ce38990 commit d6e13c7

File tree

8 files changed

+343
-274
lines changed

8 files changed

+343
-274
lines changed

arch/armv7/il.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4933,6 +4933,30 @@ bool GetLowLevelILForArmInstruction(Architecture* arch, uint64_t addr, LowLevelI
49334933
ConditionExecute(il, instr.cond, SetRegisterOrBranch(il, op1.reg,
49344934
il.DivUnsigned(get_register_size(op2.reg), ReadRegisterOrPointer(il, op2, addr), ReadRegisterOrPointer(il, op3, addr))));
49354935
break;
4936+
case ARMV7_VCVT:
4937+
switch (instr.dataType)
4938+
{
4939+
case DT_S32:
4940+
case DT_U32:
4941+
switch (instr.dataType2)
4942+
{
4943+
case DT_F32:
4944+
case DT_F64:
4945+
// ConditionExecute(il, instr.cond,
4946+
// il.SetRegister(get_register_size(op1.reg), op1.reg,
4947+
// il.FloatToInt(4, il.Register(get_register_size(op2.reg), op2.reg))));
4948+
ConditionExecute(il, instr.cond,
4949+
il.SetRegister(get_register_size(op1.reg), op1.reg,
4950+
il.FloatToInt(get_register_size(op1.reg), il.Register(get_register_size(op2.reg), op2.reg))));
4951+
break;
4952+
default:
4953+
break;
4954+
}
4955+
break;
4956+
default:
4957+
break;
4958+
}
4959+
break;
49364960
case ARMV7_VADD:
49374961
if((instr.dataType != DT_F32) && (instr.dataType != DT_F64))
49384962
break;

arch/armv7/thumb2_disasm/arm_pcode_parser/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@ translate the pseudocode for arm instructions (given in the docs) to target lang
44
once the pcode is extracted, automatic generation of ultra-accurate disassemblers should become possible
55

66
# how
7-
use Grako parser generator, describe the language (pcode.ebnf) and write code generator (codegen.py)
7+
~~use Grako parser generator, describe the language (pcode.ebnf) and write code generator (codegen.py)~~
8+
use Tatsu parser generator, describe the language (pcode.ebnf) and write code generator (codegen.py):
9+
10+
```
11+
python3 -m tatsu --name pcode pcode.ebnf -o parse.py
12+
```
813

914
# example
1015
input statement:

arch/armv7/thumb2_disasm/arm_pcode_parser/codegencpp.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
import re
44
import os
55
import sys
6+
from collections.abc import Sequence
7+
from pprint import pformat
68

7-
from parse import pcodeParser, pcodeSemantics
9+
from parse import pcodeParser
810

911
DEBUG = 0
1012

1113
###############################################################################
1214
# misc utils
1315
###############################################################################
1416

17+
def is_seq(x):
18+
return type(x) not in [str, bytes] and isinstance(x, Sequence)
19+
1520
# convert "MOV (register)" text to the function that handles it
1621
# ->"mov_register"
1722
def convertHandlerName(name):
@@ -43,10 +48,20 @@ def applyIndent(text, level=0):
4348
class BetterNode(object):
4449
def __init__(self, name, children=[], semicolon=False):
4550
self.name = name
46-
self.children = children
51+
if isinstance(children, filter):
52+
self.children = list(children)
53+
else:
54+
self.children = children
4755
self.semicolon = semicolon
4856

57+
def __repr__(self):
58+
if is_seq(self.children):
59+
return 'BetterNode(%s)[%d]%s{\n %s\n}' % (self.name, len(self.children), ';' if self.semicolon else '', pformat(list(map(repr, self.children)), indent=4))
60+
else:
61+
return 'BetterNode(%s)[%r]' % (self.name, self.children)
62+
4963
def gen(self, extra=''):
64+
code = 'BOGUS'
5065
# leaf nodes (no possible descent)
5166
if self.name == 'ident':
5267
tmp = (self.children[0] + extra).replace('.', '_')
@@ -64,8 +79,17 @@ def gen(self, extra=''):
6479
code = '\nreturn %s(req, res);' % self.children[0]
6580
self.semicolon = 0
6681
else:
67-
subCode = map(lambda x: x.gen(), self.children)
68-
subCode = tuple(subCode)
82+
83+
def c_gen(c):
84+
if type(c) is str:
85+
return c
86+
elif is_seq(c):
87+
c = tuple(map(c_gen, c))
88+
return c
89+
else:
90+
return ''.join(c.gen())
91+
92+
subCode = c_gen(self.children)
6993

7094
# binary operations translate directly to C
7195
if self.name == 'xor':
@@ -181,7 +205,7 @@ def gen(self, extra=''):
181205
if shamt:
182206
code = '((%s >> %d) & 1)' % (subCode[0], shamt)
183207
else:
184-
code = '(%s & 1)' % subCode[0]
208+
code = '(%s & 1)' % subCode[0]
185209
else:
186210
# there is a bit range to extract, [hi,lo]
187211
hi = int(subCode[1])
@@ -192,7 +216,6 @@ def gen(self, extra=''):
192216
code = '((%s >> %d) & 0x%X)' % (subCode[0], lo, 2**width-1)
193217
else:
194218
code = '(%s & 0x%X)' % (subCode[0], 2**width-1)
195-
196219
# if else
197220
elif self.name == 'if':
198221
if len(subCode) == 2:
@@ -467,7 +490,7 @@ def tuple(self, ast):
467490
def expr0(self, ast):
468491
rv = None
469492

470-
if type(ast) == type([]):
493+
if is_seq(ast):
471494
lookup = {'EOR':'xor', '+':'add', '-':'sub',
472495
'&&':'log_and', '||':'log_or' }
473496

@@ -493,7 +516,7 @@ def expr0(self, ast):
493516
def expr1(self, ast):
494517
rv = ast
495518

496-
if type(ast) == type([]):
519+
if is_seq(ast):
497520
lookup = {'*':'mul', '/':'div', 'XOR':'xor', 'DIV':'div', '==':'equals', '!=':'not_equals',
498521
'<':'less_than', '>':'greater_than', '<<':'shl', '>>':'rshl',
499522
'>=':'greater_than_or_equals', '<=':'less_than_or_equals'}
@@ -529,7 +552,7 @@ def expr2(self, ast):
529552
def expr3(self, ast):
530553
rv = 'BLUNDER'
531554

532-
if type(ast) == type([]):
555+
if is_seq(ast):
533556
#print('ast is: ', ast)
534557

535558
# empty closure, return original
@@ -540,7 +563,7 @@ def expr3(self, ast):
540563
rv = BetterNode('group', [ast[1]])
541564
elif ast[0] == '!':
542565
rv = BetterNode('log_not', [ast[1]])
543-
elif type(ast[1]==[]):
566+
elif is_seq(ast[1]):
544567
closure = ast[1]
545568
assert closure[0][0] == ':'
546569
bn = BetterNode('concat', [ast[0], closure[0][1]])
@@ -657,7 +680,7 @@ def func_call(self, ast):
657680
if type(ast) == type(u'x'):
658681
funcName = ast[:-2]
659682
# function with arguments
660-
elif type(ast) == type([]):
683+
elif is_seq(ast):
661684
funcName = str(ast[0][:-1])
662685
args = filter(lambda x: x!=',', ast[1:-1])
663686

@@ -725,7 +748,7 @@ def genBlock(pcode, comments=True):
725748
(caseVar, indent) = (None, 0)
726749

727750
for l in lines:
728-
#print('line is: -%s-' % l)
751+
# print('line is: -%s-' % l, file=sys.stderr)
729752
if l[0:5] == 'case ':
730753
m = re.match(r'^case (.*) of', l)
731754
result.append('/* pcode: %s */' % l.lstrip())
@@ -763,6 +786,8 @@ def genBlock(pcode, comments=True):
763786
result.append('}')
764787
(caseVar, indent) = (None, 0)
765788
code = gen(l)
789+
# print(f'code is: -{code}-', file=sys.stderr)
790+
766791
result.append(code)
767792

768793
return '\n'.join(result)

arch/armv7/thumb2_disasm/arm_pcode_parser/filter.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,20 @@
1010
import os
1111
import sys
1212

13-
print "filtering %s" % sys.argv[1]
13+
print("filtering %s" % sys.argv[1])
1414
fp = open(sys.argv[1],'rb')
1515
buf = fp.read()
1616
fp.close()
1717

1818
len0 = len(buf)
19-
print "file size before: %d\n" % len0
19+
print("file size before: %d\n" % len0)
2020
buf = buf.replace("\xe2\x80\x98", "'")
2121
buf = buf.replace("\xe2\x80\x99", "'")
2222
buf = buf.replace("\xe2\x80\x9C", '"')
2323
buf = buf.replace("\xe2\x80\x9D", '"')
2424
len1 = len(buf)
25-
print "file size after: %d\n" % len1
26-
print "(%d stupid quotes replaced)" % ((len0-len1)/3)
25+
print("file size after: %d\n" % len1)
26+
print("(%d stupid quotes replaced)" % ((len0-len1)/3))
2727

2828
fp = open(sys.argv[1],'wb')
2929
fp.write(buf)

0 commit comments

Comments
 (0)