Skip to content

Commit 972d4bb

Browse files
haeter525sidra-asa
authored andcommitted
Update parser for Rizin 0.4.x
1 parent 1c29381 commit 972d4bb

File tree

2 files changed

+137
-140
lines changed

2 files changed

+137
-140
lines changed

quark/core/rzapkinfo.py

Lines changed: 136 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,6 @@
3535
"long": "J",
3636
"float": "F",
3737
"double": "D",
38-
"Boolean": "Ljava/lang/Boolean;",
39-
"Byte": "Ljava/lang/Byte;",
40-
"Character": "Ljava/lang/Character;",
41-
"Short": "Ljava/lang/Short;",
42-
"Integer": "Ljava/lang/Integer;",
43-
"Long": "Ljava/lang/Long;",
44-
"Float": "Ljava/lang/Float;",
45-
"Double": "Ljava/lang/Double;",
46-
"String": "Ljava/lang/String;",
4738
}
4839

4940
RIZIN_ESCAPE_CHAR_LIST = ["<", ">", "$"]
@@ -92,12 +83,19 @@ def _get_rz(self, index):
9283
return rz
9384

9485
def _convert_type_to_type_signature(self, raw_type: str):
86+
if not raw_type:
87+
return raw_type
88+
9589
if raw_type.endswith("[]"):
9690
return "[" + self._convert_type_to_type_signature(raw_type[:-2])
9791

9892
if raw_type.startswith("["):
9993
return "[" + self._convert_type_to_type_signature(raw_type[1:])
10094

95+
if "..." in raw_type:
96+
index = raw_type.index("...")
97+
return "[" + self._convert_type_to_type_signature(raw_type[:index])
98+
10199
if raw_type in PRIMITIVE_TYPE_MAPPING:
102100
return PRIMITIVE_TYPE_MAPPING[raw_type]
103101

@@ -106,121 +104,126 @@ def _convert_type_to_type_signature(self, raw_type: str):
106104
raw_type = raw_type.replace("_", "$")
107105
return "L" + raw_type + ";"
108106

109-
return raw_type
107+
return "Ljava/lang/" + raw_type + ";"
110108

111109
@staticmethod
112110
def _escape_str_in_rizin_manner(raw_str: str):
113111
for c in RIZIN_ESCAPE_CHAR_LIST:
114112
raw_str = raw_str.replace(c, "_")
115113
return raw_str
116114

117-
@functools.lru_cache
118-
def _get_methods_classified(self, dexindex):
119-
rz = self._get_rz(dexindex)
115+
def _parse_method_from_isj_obj(self, json_obj, dexindex):
116+
if json_obj.get("type") not in ["FUNC", "METH"]:
117+
return None
120118

121-
method_json_list = rz.cmdj("isj")
122-
method_dict = defaultdict(list)
123-
for json_obj in method_json_list:
124-
if json_obj.get("type") not in ["FUNC", "METH"]:
125-
continue
119+
# -- Descriptor --
120+
full_method_name = json_obj["name"]
121+
raw_argument_str = next(
122+
re.finditer("\\(.*\\).*", full_method_name), None
123+
)
124+
if raw_argument_str is None:
125+
return None
126+
127+
raw_argument_str = raw_argument_str.group(0)
126128

127-
# -- Descriptor --
128-
full_method_name = json_obj["name"]
129-
raw_argument_str = next(
130-
re.finditer("\\(.*\\).*", full_method_name), None
129+
if raw_argument_str.endswith(")"):
130+
# Convert Java lauguage type to JVM type signature
131+
132+
# Parse the arguments
133+
raw_argument_str = raw_argument_str[1:-1]
134+
arguments = [
135+
self._convert_type_to_type_signature(arg)
136+
for arg in raw_argument_str.split(", ")
137+
]
138+
139+
# Parse the return type
140+
return_type = next(
141+
re.finditer(
142+
"[A-Za-zL][A-Za-z0-9L/\\;[\\]$.]+ ", full_method_name
143+
),
144+
None,
145+
)
146+
if return_type is None:
147+
print(f"Unresolved method signature: {full_method_name}")
148+
return None
149+
return_type = return_type.group(0).strip()
150+
151+
# Convert
152+
raw_argument_str = (
153+
"("
154+
+ " ".join(arguments)
155+
+ ")"
156+
+ self._convert_type_to_type_signature(return_type)
131157
)
132-
if raw_argument_str is None:
133-
continue
134-
raw_argument_str = raw_argument_str.group(0)
135158

136-
if raw_argument_str.endswith(")"):
137-
# Convert Java lauguage type to JVM type signature
159+
descriptor = descriptor_to_androguard_format(raw_argument_str)
138160

139-
# Parse the arguments
140-
raw_argument_str = raw_argument_str[1:-1]
141-
arguments = [
142-
self._convert_type_to_type_signature(arg)
143-
for arg in raw_argument_str.split(", ")
144-
]
161+
# -- Method name --
162+
method_name = json_obj["realname"]
145163

146-
# Parse the return type
147-
return_type = next(
148-
re.finditer(
149-
"[A-Za-zL][A-Za-z0-9L/\\;[\\]$.]+ ", full_method_name
150-
),
151-
None,
152-
)
153-
if return_type is None:
154-
print(f"Unresolved method signature: {full_method_name}")
155-
continue
156-
return_type = return_type.group(0).strip()
157-
158-
# Convert
159-
raw_argument_str = (
160-
"("
161-
+ " ".join(arguments)
162-
+ ")"
163-
+ self._convert_type_to_type_signature(return_type)
164-
)
164+
# -- Is imported --
165+
is_imported = json_obj["is_imported"]
165166

166-
descriptor = descriptor_to_androguard_format(raw_argument_str)
167+
# -- Class name --
168+
# Test if the class name is truncated
169+
escaped_method_name = self._escape_str_in_rizin_manner(method_name)
170+
if escaped_method_name.endswith("_"):
171+
escaped_method_name = escaped_method_name[:-1]
167172

168-
# -- Method name --
169-
method_name = json_obj["realname"]
173+
flag_name = json_obj["flagname"]
170174

171-
# -- Is imported --
172-
is_imported = json_obj["is_imported"]
175+
# sym.imp.clone doesn't belong to a class
176+
if flag_name == "sym.imp.clone":
177+
method = MethodObject(
178+
class_name="",
179+
name="clone",
180+
descriptor="()Ljava/lang/Object;",
181+
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
182+
)
183+
return method
173184

174-
# -- Class name --
175-
# Test if the class name is truncated
176-
escaped_method_name = self._escape_str_in_rizin_manner(method_name)
177-
if escaped_method_name.endswith("_"):
178-
escaped_method_name = escaped_method_name[:-1]
185+
if escaped_method_name not in flag_name:
186+
logging.warning(
187+
f"The class name may be truncated: {json_obj['flagname']}"
188+
)
179189

180-
flag_name = json_obj["flagname"]
190+
# Drop the method name
191+
match = None
192+
for match in re.finditer("_+[A-Za-z]+", flag_name):
193+
pass
194+
if match is None:
195+
logging.warning(f"Skip the damaged flag: {json_obj['flagname']}")
196+
return None
197+
match = match.group(0)
198+
flag_name = flag_name[: flag_name.rfind(match)]
181199

182-
# sym.imp.clone doesn't belong to a class
183-
if flag_name == "sym.imp.clone":
184-
method = MethodObject(
185-
class_name="",
186-
name="clone",
187-
descriptor="()Ljava/lang/Object;",
188-
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
189-
)
190-
method_dict[""].append(method)
191-
continue
200+
# Drop the prefixes sym. and imp.
201+
while flag_name.startswith("sym.") or flag_name.startswith("imp."):
202+
flag_name = flag_name[4:]
192203

193-
if escaped_method_name not in flag_name:
194-
logging.warning(
195-
f"The class name may be truncated: {json_obj['flagname']}"
196-
)
204+
class_name = self._convert_type_to_type_signature(flag_name)
197205

198-
# Drop the method name
199-
match = None
200-
for match in re.finditer("_+[A-Za-z]+", flag_name):
201-
pass
202-
if match is None:
203-
logging.warning(
204-
f"Skip the damaged flag: {json_obj['flagname']}"
205-
)
206-
continue
207-
match = match.group(0)
208-
flag_name = flag_name[: flag_name.rfind(match)]
206+
# Append the method
207+
method = MethodObject(
208+
class_name=class_name,
209+
name=method_name,
210+
descriptor=descriptor,
211+
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
212+
)
209213

210-
# Drop the prefixes sym. and imp.
211-
while flag_name.startswith("sym.") or flag_name.startswith("imp."):
212-
flag_name = flag_name[4:]
214+
return method
213215

214-
class_name = self._convert_type_to_type_signature(flag_name)
216+
@functools.lru_cache
217+
def _get_methods_classified(self, dexindex):
218+
rz = self._get_rz(dexindex)
215219

216-
# Append the method
217-
method = MethodObject(
218-
class_name=class_name,
219-
name=method_name,
220-
descriptor=descriptor,
221-
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
222-
)
223-
method_dict[class_name].append(method)
220+
method_json_list = rz.cmdj("isj")
221+
method_dict = defaultdict(list)
222+
for json_obj in method_json_list:
223+
method = self._parse_method_from_isj_obj(json_obj, dexindex)
224+
225+
if method:
226+
method_dict[method.class_name].append(method)
224227

225228
# Remove duplicates
226229
for class_name, method_list in method_dict.items():
@@ -347,19 +350,19 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
347350
if xref["type"] != "CALL":
348351
continue
349352

350-
if "fcn_addr" in xref:
351-
matched_method = self._get_method_by_address(xref["fcn_addr"])
353+
if "from" in xref:
354+
matched_method = self._get_method_by_address(xref["from"])
352355
if not matched_method:
353356
logging.debug(
354-
f"Cannot identify function at {xref['fcn_addr']}."
357+
f"Cannot identify function at {xref['from']}."
355358
)
356359
continue
357360

358361
upperfunc_set.add(matched_method)
359362
else:
360363
logging.debug(
361-
f"Key from was not found at searching"
362-
f" upper methods for {method_object}."
364+
f"Key from was not found when trying to search"
365+
f" upper methods of {method_object}."
363366
)
364367

365368
return upperfunc_set
@@ -368,41 +371,32 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
368371
def lowerfunc(self, method_object: MethodObject) -> Set[MethodObject]:
369372
cache = method_object.cache
370373

371-
r2 = self._get_rz(cache.dexindex)
372-
373-
xrefs = r2.cmdj(f"axffj @ {cache.address}")
374+
rz = self._get_rz(cache.dexindex)
374375

375-
if not xrefs:
376-
return set()
376+
instruct_flow = rz.cmdj(f"pdfj @ {cache.address}")["ops"]
377377

378-
lowerfunc_set = set()
379-
for xref in xrefs:
380-
if xref["type"] != "CALL":
381-
continue
378+
lowerfunc_list = []
379+
for ins in instruct_flow:
380+
if "xrefs_from" in ins:
381+
call_xrefs = (
382+
xref
383+
for xref in ins["xrefs_from"]
384+
if xref["type"] == "CALL"
385+
)
382386

383-
if "to" in xref:
384-
matched_method = self._get_method_by_address(xref["to"])
385-
if not matched_method:
386-
logging.debug(
387-
f"Cannot identify function at {xref['fcn_addr']}."
388-
)
389-
continue
387+
for call_xref in call_xrefs:
388+
lowerfunc = self._get_method_by_address(call_xref["addr"])
389+
if not lowerfunc:
390+
logging.debug(
391+
f"Cannot identify function at {call_xref['addr']}."
392+
)
393+
continue
390394

391-
offset = xref["from"] - cache.address
395+
offset = ins["offset"] - cache.address
392396

393-
lowerfunc_set.add(
394-
(
395-
matched_method,
396-
offset,
397-
)
398-
)
399-
else:
400-
logging.debug(
401-
f"Key from was not found at searching"
402-
f" upper methods for {method_object}."
403-
)
397+
lowerfunc_list.append((lowerfunc, offset))
404398

405-
return lowerfunc_set
399+
return lowerfunc_list
406400

407401
def get_method_bytecode(
408402
self, method_object: MethodObject
@@ -533,12 +527,15 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
533527
return hierarchy_dict
534528

535529
def _get_method_by_address(self, address: int) -> MethodObject:
536-
if address < 0:
537-
return None
530+
dexindex = 0
538531

539-
for method in self.all_methods:
540-
if method.cache.address == address:
541-
return method
532+
rz = self._get_rz(dexindex)
533+
json_array = rz.cmdj(f"is.j @ {address}")
534+
535+
if json_array:
536+
return self._parse_method_from_isj_obj(json_array[0], dexindex)
537+
else:
538+
return None
542539

543540
@staticmethod
544541
def _parse_parameter(mnemonic: str, parameter: str) -> Any:

quark/utils/tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def descriptor_to_androguard_format(descriptor):
4949

5050
delimiter = descriptor.index(")")
5151

52-
arg_str = descriptor[:delimiter]
52+
arg_str = descriptor[1:delimiter]
5353
args = re.findall(r"L.+?;|[ZBCSIJFD]|\[", arg_str)
5454

5555
new_descriptor = "(" + " ".join(args) + descriptor[delimiter:]

0 commit comments

Comments
 (0)