Skip to content

Commit 4ccedcd

Browse files
committed
Update parser for RIzin 0.4.x
1 parent 155e76c commit 4ccedcd

File tree

2 files changed

+137
-140
lines changed

2 files changed

+137
-140
lines changed

quark/core/rzapkinfo.py

Lines changed: 136 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,6 @@
3232
"long": "J",
3333
"float": "F",
3434
"double": "D",
35-
"Boolean": "Ljava/lang/Boolean;",
36-
"Byte": "Ljava/lang/Byte;",
37-
"Character": "Ljava/lang/Character;",
38-
"Short": "Ljava/lang/Short;",
39-
"Integer": "Ljava/lang/Integer;",
40-
"Long": "Ljava/lang/Long;",
41-
"Float": "Ljava/lang/Float;",
42-
"Double": "Ljava/lang/Double;",
43-
"String": "Ljava/lang/String;",
4435
}
4536

4637
RIZIN_ESCAPE_CHAR_LIST = ["<", ">", "$"]
@@ -89,12 +80,19 @@ def _get_rz(self, index):
8980
return rz
9081

9182
def _convert_type_to_type_signature(self, raw_type: str):
83+
if not raw_type:
84+
return raw_type
85+
9286
if raw_type.endswith("[]"):
9387
return "[" + self._convert_type_to_type_signature(raw_type[:-2])
9488

9589
if raw_type.startswith("["):
9690
return "[" + self._convert_type_to_type_signature(raw_type[1:])
9791

92+
if "..." in raw_type:
93+
index = raw_type.index("...")
94+
return "[" + self._convert_type_to_type_signature(raw_type[:index])
95+
9896
if raw_type in PRIMITIVE_TYPE_MAPPING:
9997
return PRIMITIVE_TYPE_MAPPING[raw_type]
10098

@@ -103,121 +101,126 @@ def _convert_type_to_type_signature(self, raw_type: str):
103101
raw_type = raw_type.replace("_", "$")
104102
return "L" + raw_type + ";"
105103

106-
return raw_type
104+
return "Ljava/lang/" + raw_type + ";"
107105

108106
@staticmethod
109107
def _escape_str_in_rizin_manner(raw_str: str):
110108
for c in RIZIN_ESCAPE_CHAR_LIST:
111109
raw_str = raw_str.replace(c, "_")
112110
return raw_str
113111

114-
@functools.lru_cache
115-
def _get_methods_classified(self, dexindex):
116-
rz = self._get_rz(dexindex)
112+
def _parse_method_from_isj_obj(self, json_obj, dexindex):
113+
if json_obj.get("type") not in ["FUNC", "METH"]:
114+
return None
117115

118-
method_json_list = rz.cmdj("isj")
119-
method_dict = defaultdict(list)
120-
for json_obj in method_json_list:
121-
if json_obj.get("type") not in ["FUNC", "METH"]:
122-
continue
116+
# -- Descriptor --
117+
full_method_name = json_obj["name"]
118+
raw_argument_str = next(
119+
re.finditer("\\(.*\\).*", full_method_name), None
120+
)
121+
if raw_argument_str is None:
122+
return None
123+
124+
raw_argument_str = raw_argument_str.group(0)
123125

124-
# -- Descriptor --
125-
full_method_name = json_obj["name"]
126-
raw_argument_str = next(
127-
re.finditer("\\(.*\\).*", full_method_name), None
126+
if raw_argument_str.endswith(")"):
127+
# Convert Java lauguage type to JVM type signature
128+
129+
# Parse the arguments
130+
raw_argument_str = raw_argument_str[1:-1]
131+
arguments = [
132+
self._convert_type_to_type_signature(arg)
133+
for arg in raw_argument_str.split(", ")
134+
]
135+
136+
# Parse the return type
137+
return_type = next(
138+
re.finditer(
139+
"[A-Za-zL][A-Za-z0-9L/\\;[\\]$.]+ ", full_method_name
140+
),
141+
None,
142+
)
143+
if return_type is None:
144+
print(f"Unresolved method signature: {full_method_name}")
145+
return None
146+
return_type = return_type.group(0).strip()
147+
148+
# Convert
149+
raw_argument_str = (
150+
"("
151+
+ " ".join(arguments)
152+
+ ")"
153+
+ self._convert_type_to_type_signature(return_type)
128154
)
129-
if raw_argument_str is None:
130-
continue
131-
raw_argument_str = raw_argument_str.group(0)
132155

133-
if raw_argument_str.endswith(")"):
134-
# Convert Java lauguage type to JVM type signature
156+
descriptor = descriptor_to_androguard_format(raw_argument_str)
135157

136-
# Parse the arguments
137-
raw_argument_str = raw_argument_str[1:-1]
138-
arguments = [
139-
self._convert_type_to_type_signature(arg)
140-
for arg in raw_argument_str.split(", ")
141-
]
158+
# -- Method name --
159+
method_name = json_obj["realname"]
142160

143-
# Parse the return type
144-
return_type = next(
145-
re.finditer(
146-
"[A-Za-zL][A-Za-z0-9L/\\;[\\]$.]+ ", full_method_name
147-
),
148-
None,
149-
)
150-
if return_type is None:
151-
print(f"Unresolved method signature: {full_method_name}")
152-
continue
153-
return_type = return_type.group(0).strip()
154-
155-
# Convert
156-
raw_argument_str = (
157-
"("
158-
+ " ".join(arguments)
159-
+ ")"
160-
+ self._convert_type_to_type_signature(return_type)
161-
)
161+
# -- Is imported --
162+
is_imported = json_obj["is_imported"]
162163

163-
descriptor = descriptor_to_androguard_format(raw_argument_str)
164+
# -- Class name --
165+
# Test if the class name is truncated
166+
escaped_method_name = self._escape_str_in_rizin_manner(method_name)
167+
if escaped_method_name.endswith("_"):
168+
escaped_method_name = escaped_method_name[:-1]
164169

165-
# -- Method name --
166-
method_name = json_obj["realname"]
170+
flag_name = json_obj["flagname"]
167171

168-
# -- Is imported --
169-
is_imported = json_obj["is_imported"]
172+
# sym.imp.clone doesn't belong to a class
173+
if flag_name == "sym.imp.clone":
174+
method = MethodObject(
175+
class_name="",
176+
name="clone",
177+
descriptor="()Ljava/lang/Object;",
178+
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
179+
)
180+
return method
170181

171-
# -- Class name --
172-
# Test if the class name is truncated
173-
escaped_method_name = self._escape_str_in_rizin_manner(method_name)
174-
if escaped_method_name.endswith("_"):
175-
escaped_method_name = escaped_method_name[:-1]
182+
if escaped_method_name not in flag_name:
183+
logging.warning(
184+
f"The class name may be truncated: {json_obj['flagname']}"
185+
)
176186

177-
flag_name = json_obj["flagname"]
187+
# Drop the method name
188+
match = None
189+
for match in re.finditer("_+[A-Za-z]+", flag_name):
190+
pass
191+
if match is None:
192+
logging.warning(f"Skip the damaged flag: {json_obj['flagname']}")
193+
return None
194+
match = match.group(0)
195+
flag_name = flag_name[: flag_name.rfind(match)]
178196

179-
# sym.imp.clone doesn't belong to a class
180-
if flag_name == "sym.imp.clone":
181-
method = MethodObject(
182-
class_name="",
183-
name="clone",
184-
descriptor="()Ljava/lang/Object;",
185-
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
186-
)
187-
method_dict[""].append(method)
188-
continue
197+
# Drop the prefixes sym. and imp.
198+
while flag_name.startswith("sym.") or flag_name.startswith("imp."):
199+
flag_name = flag_name[4:]
189200

190-
if escaped_method_name not in flag_name:
191-
logging.warning(
192-
f"The class name may be truncated: {json_obj['flagname']}"
193-
)
201+
class_name = self._convert_type_to_type_signature(flag_name)
194202

195-
# Drop the method name
196-
match = None
197-
for match in re.finditer("_+[A-Za-z]+", flag_name):
198-
pass
199-
if match is None:
200-
logging.warning(
201-
f"Skip the damaged flag: {json_obj['flagname']}"
202-
)
203-
continue
204-
match = match.group(0)
205-
flag_name = flag_name[: flag_name.rfind(match)]
203+
# Append the method
204+
method = MethodObject(
205+
class_name=class_name,
206+
name=method_name,
207+
descriptor=descriptor,
208+
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
209+
)
206210

207-
# Drop the prefixes sym. and imp.
208-
while flag_name.startswith("sym.") or flag_name.startswith("imp."):
209-
flag_name = flag_name[4:]
211+
return method
210212

211-
class_name = self._convert_type_to_type_signature(flag_name)
213+
@functools.lru_cache
214+
def _get_methods_classified(self, dexindex):
215+
rz = self._get_rz(dexindex)
212216

213-
# Append the method
214-
method = MethodObject(
215-
class_name=class_name,
216-
name=method_name,
217-
descriptor=descriptor,
218-
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
219-
)
220-
method_dict[class_name].append(method)
217+
method_json_list = rz.cmdj("isj")
218+
method_dict = defaultdict(list)
219+
for json_obj in method_json_list:
220+
method = self._parse_method_from_isj_obj(json_obj, dexindex)
221+
222+
if method:
223+
method_dict[method.class_name].append(method)
221224

222225
# Remove duplicates
223226
for class_name, method_list in method_dict.items():
@@ -296,19 +299,19 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
296299
if xref["type"] != "CALL":
297300
continue
298301

299-
if "fcn_addr" in xref:
300-
matched_method = self._get_method_by_address(xref["fcn_addr"])
302+
if "from" in xref:
303+
matched_method = self._get_method_by_address(xref["from"])
301304
if not matched_method:
302305
logging.debug(
303-
f"Cannot identify function at {xref['fcn_addr']}."
306+
f"Cannot identify function at {xref['from']}."
304307
)
305308
continue
306309

307310
upperfunc_set.add(matched_method)
308311
else:
309312
logging.debug(
310-
f"Key from was not found at searching"
311-
f" upper methods for {method_object}."
313+
f"Key from was not found when trying to search"
314+
f" upper methods of {method_object}."
312315
)
313316

314317
return upperfunc_set
@@ -317,41 +320,32 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
317320
def lowerfunc(self, method_object: MethodObject) -> Set[MethodObject]:
318321
cache = method_object.cache
319322

320-
r2 = self._get_rz(cache.dexindex)
321-
322-
xrefs = r2.cmdj(f"axffj @ {cache.address}")
323+
rz = self._get_rz(cache.dexindex)
323324

324-
if not xrefs:
325-
return set()
325+
instruct_flow = rz.cmdj(f"pdfj @ {cache.address}")["ops"]
326326

327-
lowerfunc_set = set()
328-
for xref in xrefs:
329-
if xref["type"] != "CALL":
330-
continue
327+
lowerfunc_list = []
328+
for ins in instruct_flow:
329+
if "xrefs_from" in ins:
330+
call_xrefs = (
331+
xref
332+
for xref in ins["xrefs_from"]
333+
if xref["type"] == "CALL"
334+
)
331335

332-
if "to" in xref:
333-
matched_method = self._get_method_by_address(xref["to"])
334-
if not matched_method:
335-
logging.debug(
336-
f"Cannot identify function at {xref['fcn_addr']}."
337-
)
338-
continue
336+
for call_xref in call_xrefs:
337+
lowerfunc = self._get_method_by_address(call_xref["addr"])
338+
if not lowerfunc:
339+
logging.debug(
340+
f"Cannot identify function at {call_xref['addr']}."
341+
)
342+
continue
339343

340-
offset = xref["from"] - cache.address
344+
offset = ins["offset"] - cache.address
341345

342-
lowerfunc_set.add(
343-
(
344-
matched_method,
345-
offset,
346-
)
347-
)
348-
else:
349-
logging.debug(
350-
f"Key from was not found at searching"
351-
f" upper methods for {method_object}."
352-
)
346+
lowerfunc_list.append((lowerfunc, offset))
353347

354-
return lowerfunc_set
348+
return lowerfunc_list
355349

356350
def get_method_bytecode(
357351
self, method_object: MethodObject
@@ -482,12 +476,15 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
482476
return hierarchy_dict
483477

484478
def _get_method_by_address(self, address: int) -> MethodObject:
485-
if address < 0:
486-
return None
479+
dexindex = 0
487480

488-
for method in self.all_methods:
489-
if method.cache.address == address:
490-
return method
481+
rz = self._get_rz(dexindex)
482+
json_array = rz.cmdj(f"is.j @ {address}")
483+
484+
if json_array:
485+
return self._parse_method_from_isj_obj(json_array[0], dexindex)
486+
else:
487+
return None
491488

492489
@staticmethod
493490
def _parse_smali(smali: str) -> BytecodeObject:

quark/utils/tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def descriptor_to_androguard_format(descriptor):
4848

4949
delimiter = descriptor.index(")")
5050

51-
arg_str = descriptor[:delimiter]
51+
arg_str = descriptor[1:delimiter]
5252
args = re.findall(r"L.+?;|[ZBCSIJFD]|\[", arg_str)
5353

5454
new_descriptor = "(" + " ".join(args) + descriptor[delimiter:]

0 commit comments

Comments
 (0)