Skip to content

Commit 73db8ff

Browse files
authored
Merge pull request #318 from haeter525/update_parser_for_rizin_v0.3.x
Update the analysis library for Rizin v0.3.x
2 parents deafc76 + 66c1b92 commit 73db8ff

File tree

5 files changed

+193
-55
lines changed

5 files changed

+193
-55
lines changed

.github/workflows/pytest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
3232
3333
# Install Rizin
34-
sudo git clone --branch v0.2.1 https://github.com/rizinorg/rizin /opt/rizin/
34+
sudo git clone --branch v0.3.4 https://github.com/rizinorg/rizin /opt/rizin/
3535
cd /opt/rizin/
3636
meson build
3737
ninja -C build

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ quark -a Ahmyth.apk -s --multi-process 4
150150
```
151151

152152
### Upcoming unstable feature
153-
Now Quark also supports [Rizin](https://github.com/rizinorg/rizin) (v0.2.0 or v0.2.1, please see [#305](https://github.com/quark-engine/quark-engine/issues/305) for more detail) as one of our Android analysis frameworks. You can use option `--core-library` with `rizin` to enable the Rizin-based analysis library.
153+
Now Quark also supports [Rizin](https://github.com/rizinorg/rizin) as one of our Android analysis frameworks. You can use option `--core-library` with `rizin` to enable the Rizin-based analysis library.
154154
```bash
155155
quark -a Ahmyth.apk -s --core-library rizin
156156
```

quark/core/quark.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
output_parent_function_json,
3030
output_parent_function_table,
3131
)
32-
from quark.utils.pprint import print_info, print_success
32+
from quark.utils.pprint import print_info, print_success, print_warning
3333
from quark.utils.weight import Weight
3434

3535
MAX_SEARCH_LAYER = 3
@@ -334,7 +334,7 @@ def find_api_usage(self, class_name, method_name, descriptor_name):
334334
for method in potential_method_list:
335335
current_class_set = {method.class_name}
336336

337-
while not current_class_set.intersection(
337+
while current_class_set and not current_class_set.intersection(
338338
{class_name, "Ljava/lang/Object;"}
339339
):
340340
next_class_set = set()
@@ -423,6 +423,17 @@ def run(self, rule_obj):
423423
second_api_xref_from
424424
)
425425

426+
if not first_api_xref_from:
427+
print_warning(
428+
f"Unable to find the upperfunc of {first_api}"
429+
)
430+
continue
431+
if not second_api_xref_from:
432+
print_warning(
433+
f"Unable to find the upperfunc of{second_api}"
434+
)
435+
continue
436+
426437
mutual_parent_function_list = self.find_intersection(
427438
first_api_xref_from, second_api_xref_from
428439
)

quark/core/rzapkinfo.py

Lines changed: 151 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,33 @@
1818
from quark.core.interface.baseapkinfo import BaseApkinfo
1919
from quark.core.struct.bytecodeobject import BytecodeObject
2020
from quark.core.struct.methodobject import MethodObject
21-
from quark.utils.tools import descriptor_to_androguard_format
21+
from quark.utils.tools import descriptor_to_androguard_format, remove_dup_list
2222

2323
RizinCache = namedtuple("rizin_cache", "address dexindex is_imported")
2424

25+
PRIMITIVE_TYPE_MAPPING = {
26+
"void": "V",
27+
"boolean": "Z",
28+
"byte": "B",
29+
"char": "C",
30+
"short": "S",
31+
"int": "I",
32+
"long": "J",
33+
"float": "F",
34+
"double": "D",
35+
"Boolean": "Ljava/lang/Boolean;",
36+
"Byte": "Ljava/lang/Byte;",
37+
"Character": "Ljava/lang/Character;",
38+
"Short": "Ljava/lang/Short;",
39+
"Integer": "Ljava/lang/Integer;",
40+
"Long": "Ljava/lang/Long;",
41+
"Float": "Ljava/lang/Float;",
42+
"Double": "Ljava/lang/Double;",
43+
"String": "Ljava/lang/String;",
44+
}
45+
46+
RIZIN_ESCAPE_CHAR_LIST = ["<", ">", "$"]
47+
2548

2649
class RizinImp(BaseApkinfo):
2750
def __init__(
@@ -65,6 +88,29 @@ def _get_rz(self, index):
6588
rz.cmd("aa")
6689
return rz
6790

91+
def _convert_type_to_type_signature(self, raw_type: str):
92+
if raw_type.endswith("[]"):
93+
return "[" + self._convert_type_to_type_signature(raw_type[:-2])
94+
95+
if raw_type.startswith("["):
96+
return "[" + self._convert_type_to_type_signature(raw_type[1:])
97+
98+
if raw_type in PRIMITIVE_TYPE_MAPPING:
99+
return PRIMITIVE_TYPE_MAPPING[raw_type]
100+
101+
if "." in raw_type or "_" in raw_type:
102+
raw_type = raw_type.replace(".", "/")
103+
raw_type = raw_type.replace("_", "$")
104+
return "L" + raw_type + ";"
105+
106+
return raw_type
107+
108+
@staticmethod
109+
def _escape_str_in_rizin_manner(raw_str: str):
110+
for c in RIZIN_ESCAPE_CHAR_LIST:
111+
raw_str = raw_str.replace(c, "_")
112+
return raw_str
113+
68114
@functools.lru_cache
69115
def _get_methods_classified(self, dexindex):
70116
rz = self._get_rz(dexindex)
@@ -75,25 +121,108 @@ def _get_methods_classified(self, dexindex):
75121
if json_obj.get("type") not in ["FUNC", "METH"]:
76122
continue
77123

78-
full_name = json_obj["realname"]
79-
class_name, method_descriptor = full_name.split(".method.", maxsplit=1)
80-
class_name = class_name + ";"
124+
# -- Descriptor --
125+
full_method_name = json_obj["name"]
126+
raw_argument_str = next(
127+
re.finditer("\\(.*\\).*", full_method_name), None
128+
)
129+
if raw_argument_str is None:
130+
continue
131+
raw_argument_str = raw_argument_str.group(0)
132+
133+
if raw_argument_str.endswith(")"):
134+
# Convert Java lauguage type to JVM type signature
135+
136+
# Parse the arguments
137+
raw_argument_str = raw_argument_str[1:-1]
138+
arguments = [
139+
self._convert_type_to_type_signature(arg)
140+
for arg in raw_argument_str.split(", ")
141+
]
81142

82-
delimiter = method_descriptor.index('(')
83-
methodname = method_descriptor[:delimiter]
84-
descriptor = method_descriptor[delimiter:]
85-
descriptor = descriptor_to_androguard_format(descriptor)
143+
# Parse the return type
144+
return_type = next(
145+
re.finditer(
146+
"[A-Za-zL][A-Za-z0-9L/\\;[\\]$.]+ ", full_method_name
147+
),
148+
None,
149+
)
150+
if return_type is None:
151+
print(f"Unresolved method signature: {full_method_name}")
152+
continue
153+
return_type = return_type.group(0).strip()
154+
155+
# Convert
156+
raw_argument_str = (
157+
"("
158+
+ " ".join(arguments)
159+
+ ")"
160+
+ self._convert_type_to_type_signature(return_type)
161+
)
86162

163+
descriptor = descriptor_to_androguard_format(raw_argument_str)
164+
165+
# -- Method name --
166+
method_name = json_obj["realname"]
167+
168+
# -- Is imported --
87169
is_imported = json_obj["is_imported"]
88170

171+
# -- Class name --
172+
# Test if the class name is truncated
173+
escaped_method_name = self._escape_str_in_rizin_manner(method_name)
174+
if escaped_method_name.endswith("_"):
175+
escaped_method_name = escaped_method_name[:-1]
176+
177+
flag_name = json_obj["flagname"]
178+
179+
# sym.imp.clone doesn't belong to a class
180+
if flag_name == "sym.imp.clone":
181+
method = MethodObject(
182+
class_name="",
183+
name="clone",
184+
descriptor="()Ljava/lang/Object;",
185+
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
186+
)
187+
method_dict[""].append(method)
188+
continue
189+
190+
if escaped_method_name not in flag_name:
191+
logging.warning(
192+
f"The class name may be truncated: {json_obj['flagname']}"
193+
)
194+
195+
# Drop the method name
196+
match = None
197+
for match in re.finditer("_+[A-Za-z]+", flag_name):
198+
pass
199+
if match is None:
200+
logging.warning(
201+
f"Skip the damaged flag: {json_obj['flagname']}"
202+
)
203+
continue
204+
match = match.group(0)
205+
flag_name = flag_name[: flag_name.rfind(match)]
206+
207+
# Drop the prefixes sym. and imp.
208+
while flag_name.startswith("sym.") or flag_name.startswith("imp."):
209+
flag_name = flag_name[4:]
210+
211+
class_name = self._convert_type_to_type_signature(flag_name)
212+
213+
# Append the method
89214
method = MethodObject(
90215
class_name=class_name,
91-
name=methodname,
216+
name=method_name,
92217
descriptor=descriptor,
93218
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
94219
)
95220
method_dict[class_name].append(method)
96221

222+
# Remove duplicates
223+
for class_name, method_list in method_dict.items():
224+
method_dict[class_name] = remove_dup_list(method_list)
225+
97226
return method_dict
98227

99228
@functools.cached_property
@@ -326,17 +455,12 @@ def superclass_relationships(self) -> Dict[str, Set[str]]:
326455

327456
rz = self._get_rz(dex_index)
328457

329-
hierarchy_graph = rz.cmd("icg").split("\n")
330-
331-
for element in hierarchy_graph:
332-
if element.startswith("age"):
333-
element_part = element.split()
334-
for index, class_name in enumerate(element_part):
335-
if not class_name.endswith(";"):
336-
element_part[index] = class_name + ";"
458+
class_info_list = rz.cmdj("icj")
459+
for class_info in class_info_list:
460+
class_name = class_info["classname"]
461+
super_class = class_info["super"]
337462

338-
for subclass in element_part[2:]:
339-
hierarchy_dict[subclass].add(element_part[1])
463+
hierarchy_dict[class_name].add(super_class)
340464

341465
return hierarchy_dict
342466

@@ -348,16 +472,12 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
348472

349473
rz = self._get_rz(dex_index)
350474

351-
hierarchy_graph = rz.cmd("icg").split("\n")
352-
353-
for element in hierarchy_graph:
354-
if element.startswith("age"):
355-
element_part = element.split()
356-
for index, class_name in enumerate(element_part):
357-
if not class_name.endswith(";"):
358-
element_part[index] = class_name + ";"
475+
class_info_list = rz.cmdj("icj")
476+
for class_info in class_info_list:
477+
class_name = class_info["classname"]
478+
super_class = class_info["super"]
359479

360-
hierarchy_dict[element_part[1]].update(element_part[2:])
480+
hierarchy_dict[super_class].add(class_name)
361481

362482
return hierarchy_dict
363483

@@ -380,8 +500,8 @@ def _parse_smali(smali: str) -> BytecodeObject:
380500
mnemonic, args = smali.split(maxsplit=1) # Split into twe parts
381501

382502
# invoke-kind instruction may left method index at the last
383-
if mnemonic.startswith("invoke"):
384-
args = args[: args.rfind(" ;")]
503+
# if mnemonic.startswith("invoke"):
504+
# args = args[: args.rfind(" ;")]
385505

386506
args = [arg.strip() for arg in re.split("[{},]+", args) if arg]
387507

@@ -392,7 +512,7 @@ def _parse_smali(smali: str) -> BytecodeObject:
392512
args = args[:-1]
393513

394514
if mnemonic.startswith("invoke"):
395-
parameter = re.sub(r"\.", ";->", parameter, count=1)
515+
parameter = re.sub(r"\.", "->", parameter, count=1)
396516

397517
register_list = []
398518
# Ranged registers

tests/core/test_apkinfo.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@ def test_android_apis(self, apkinfo):
117117
),
118118
}
119119

120-
assert len(apkinfo.android_apis) == 1270
120+
if apkinfo.core_library == "androguard":
121+
assert len(apkinfo.android_apis) == 1270
122+
elif apkinfo.core_library == "rizin":
123+
assert len(apkinfo.android_apis) == 1269
121124
assert api.issubset(apkinfo.android_apis)
122125

123126
def test_custom_methods(self, apkinfo):
@@ -133,7 +136,10 @@ def test_custom_methods(self, apkinfo):
133136
"()V",
134137
),
135138
}
136-
assert len(apkinfo.custom_methods) == 3999
139+
if apkinfo.core_library == "androguard":
140+
assert len(apkinfo.custom_methods) == 3999
141+
elif apkinfo.core_library == "rizin":
142+
assert len(apkinfo.custom_methods) == 3990
137143
assert test_custom_method.issubset(apkinfo.custom_methods)
138144

139145
def test_all_methods(self, apkinfo):
@@ -153,7 +159,7 @@ def test_all_methods(self, apkinfo):
153159
if apkinfo.core_library == "androguard":
154160
assert len(apkinfo.all_methods) == 5452
155161
elif apkinfo.core_library == "rizin":
156-
assert len(apkinfo.all_methods) == 5273
162+
assert len(apkinfo.all_methods) == 5260
157163

158164
assert test_custom_method.issubset(apkinfo.all_methods)
159165

@@ -168,20 +174,21 @@ def test_find_method(self, apkinfo):
168174
assert str(result.descriptor) == "(Z)V"
169175

170176
def test_upperfunc(self, apkinfo):
171-
api = apkinfo.find_method("Ljava/lang/reflect/Field;", "setAccessible", "(Z)V")
172-
173-
expect_function = MethodObject(
174-
(
175-
"Landroid/support/v4/widget/SlidingPaneLayout$"
176-
"SlidingPanelLayoutImplJB;"
177-
),
177+
api = apkinfo.find_method(
178+
"Lcom/example/google/service/ContactsHelper;",
178179
"<init>",
179-
"()V",
180+
"(Landroid/content/Context;)V",
181+
)
182+
183+
expect_function = apkinfo.find_method(
184+
"Lcom/example/google/service/SMSReceiver;",
185+
"isContact",
186+
"(Ljava/lang/String;)Ljava/lang/Boolean;",
180187
)
181188

182-
upper = list(apkinfo.upperfunc(api))[0]
189+
upper_methods = list(apkinfo.upperfunc(api))
183190

184-
assert upper == expect_function
191+
assert expect_function in upper_methods
185192

186193
def test_lowerfunc(self, apkinfo):
187194
method = apkinfo.find_method(
@@ -238,17 +245,17 @@ def test_get_method_bytecode(self, apkinfo):
238245

239246
def test_lowerfunc(self, apkinfo):
240247
method = apkinfo.find_method(
241-
"Lcom/example/google/service/WebServiceCalling;",
242-
"Send",
243-
"(Landroid/os/Handler; Ljava/lang/String;)V",
248+
"Lcom/example/google/service/SMSReceiver;",
249+
"isContact",
250+
"(Ljava/lang/String;)Ljava/lang/Boolean;",
244251
)
245252

246253
expect_method = apkinfo.find_method(
247-
"Ljava/lang/StringBuilder;",
248-
"append",
249-
"(Ljava/lang/String;)Ljava/lang/StringBuilder;",
254+
"Lcom/example/google/service/ContactsHelper;",
255+
"<init>",
256+
"(Landroid/content/Context;)V",
250257
)
251-
expect_offset = 42
258+
expect_offset = 10
252259

253260
upper_methods = apkinfo.lowerfunc(method)
254261

0 commit comments

Comments
 (0)