Skip to content

Commit ad430bc

Browse files
committed
Make APK file format is available with Rizin lib and remove unused code
1 parent 136d0b6 commit ad430bc

File tree

1 file changed

+48
-99
lines changed

1 file changed

+48
-99
lines changed

quark/core/rzapkinfo.py

Lines changed: 48 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
remove_dup_list,
2424
)
2525

26-
RizinCache = namedtuple("rizin_cache", "address dexindex is_imported")
26+
RizinCache = namedtuple("rizin_cache", "address is_imported")
2727

2828
PRIMITIVE_TYPE_MAPPING = {
2929
"void": "V",
@@ -50,7 +50,6 @@ def __init__(
5050

5151
if self.ret_type == "DEX":
5252
self._tmp_dir = None
53-
self._dex_list = [apk_filepath]
5453

5554
elif self.ret_type == "APK":
5655
self._tmp_dir = tempfile.mkdtemp() if tmp_dir is None else tmp_dir
@@ -60,32 +59,21 @@ def __init__(
6059

6160
self._manifest = os.path.join(self._tmp_dir, "AndroidManifest.xml")
6261

63-
dex_files = [
64-
file
65-
for file in apk.namelist()
66-
if file.startswith("classes") and file.endswith(".dex")
67-
]
68-
69-
for dex in dex_files:
70-
apk.extract(dex, path=self._tmp_dir)
71-
72-
self._dex_list = [os.path.join(self._tmp_dir, dex) for dex in dex_files]
73-
7462
else:
7563
raise ValueError("Unsupported File type.")
7664

77-
self._number_of_dex = len(self._dex_list)
78-
79-
@functools.lru_cache
80-
def _get_rz(self, index):
65+
@functools.cached_property
66+
def _rz(self):
8167
"""
82-
Return a Rizin object that opens the specified Dex file.
68+
Return a Rizin object that opens the specified Dex file or APK file.
8369
84-
:param index: an index indicating which Dex file should the returned
85-
object open
8670
:return: a Rizin object opening the specified Dex file
8771
"""
88-
rz = rzpipe.open(self._dex_list[index])
72+
if self.ret_type == "DEX":
73+
rz = rzpipe.open(f"{self.apk_filepath}")
74+
elif self.ret_type == "APK":
75+
rz = rzpipe.open(f"apk://{self.apk_filepath}")
76+
8977
rz.cmd("aa")
9078
return rz
9179

@@ -140,15 +128,13 @@ def _escape_str_in_rizin_manner(raw_str: str):
140128
raw_str = raw_str.replace(c, "_")
141129
return raw_str
142130

143-
def _parse_method_from_isj_obj(self, json_obj, dexindex):
131+
def _parse_method_from_isj_obj(self, json_obj):
144132
"""
145133
Parse a JSON object provided by the Rizin command `isj` or `is.j` into
146134
an instance of MethodObject.
147135
148136
:param json_obj: a JSON object provided by the Rizin command `isj` or
149137
`is.j`
150-
:param dexindex: an index indicating from which Dex file the JSON
151-
object is generated
152138
:return: an instance of MethodObject
153139
"""
154140
if json_obj.get("type") not in ["FUNC", "METH"]:
@@ -220,7 +206,7 @@ def _parse_method_from_isj_obj(self, json_obj, dexindex):
220206
class_name="",
221207
name="clone",
222208
descriptor="()Ljava/lang/Object;",
223-
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
209+
cache=RizinCache(json_obj["vaddr"], is_imported),
224210
)
225211
return method
226212

@@ -250,28 +236,23 @@ def _parse_method_from_isj_obj(self, json_obj, dexindex):
250236
class_name=class_name,
251237
name=method_name,
252238
descriptor=descriptor,
253-
cache=RizinCache(json_obj["vaddr"], dexindex, is_imported),
239+
cache=RizinCache(json_obj["vaddr"], is_imported),
254240
)
255241

256242
return method
257243

258244
@functools.lru_cache
259-
def _get_methods_classified(
260-
self, dex_index: int
261-
) -> Dict[str, List[MethodObject]]:
245+
def _get_methods_classified(self) -> Dict[str, List[MethodObject]]:
262246
"""
263247
Use command isj to get all the methods and categorize them into
264248
a dictionary.
265249
266-
:param dex_index: an index to the Dex file that need to be parsed.
267250
:return: a dict that holds methods categorized by their class name
268251
"""
269-
rz = self._get_rz(dex_index)
270-
271-
method_json_list = rz.cmdj("isj")
252+
method_json_list = self._rz.cmdj("isj")
272253
method_dict = defaultdict(list)
273254
for json_obj in method_json_list:
274-
method = self._parse_method_from_isj_obj(json_obj, dex_index)
255+
method = self._parse_method_from_isj_obj(json_obj)
275256
if method:
276257
method_dict[method.class_name].append(method)
277258

@@ -377,9 +358,8 @@ def all_methods(self) -> Set[MethodObject]:
377358
:return: a set of MethodObjects
378359
"""
379360
method_set = set()
380-
for dex_index in range(self._number_of_dex):
381-
for method_list in self._get_methods_classified(dex_index).values():
382-
method_set.update(method_list)
361+
for method_list in self._get_methods_classified().values():
362+
method_set.update(method_list)
383363

384364
return method_set
385365

@@ -421,22 +401,19 @@ def method_filter(method):
421401
descriptor, method.descriptor
422402
)
423403

424-
dex_list = range(self._number_of_dex)
425404
filtered_methods = list()
426405

427406
if class_name != ".*":
428-
for dex_index in dex_list:
429-
method_dict = self._get_methods_classified(dex_index)
407+
method_dict = self._get_methods_classified()
408+
filtered_methods += list(
409+
filter(method_filter, method_dict[class_name])
410+
)
411+
else:
412+
method_dict = self._get_methods_classified()
413+
for key_name in method_dict:
430414
filtered_methods += list(
431-
filter(method_filter, method_dict[class_name])
415+
filter(method_filter, method_dict[key_name])
432416
)
433-
else:
434-
for dex_index in dex_list:
435-
method_dict = self._get_methods_classified(dex_index)
436-
for key_name in method_dict:
437-
filtered_methods += list(
438-
filter(method_filter, method_dict[key_name])
439-
)
440417

441418
return filtered_methods
442419

@@ -452,10 +429,7 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
452429
"""
453430
cache = method_object.cache
454431

455-
r2 = self._get_rz(cache.dexindex)
456-
457-
xrefs = r2.cmdj(f"axtj @ {cache.address}")
458-
432+
xrefs = self._rz.cmdj(f"axtj @ {cache.address}")
459433
upperfunc_set = set()
460434
for xref in xrefs:
461435
if xref["type"] != "CALL":
@@ -493,9 +467,7 @@ def lowerfunc(
493467
"""
494468
cache = method_object.cache
495469

496-
rz = self._get_rz(cache.dexindex)
497-
498-
instruct_flow = rz.cmdj(f"pdfj @ {cache.address}")["ops"]
470+
instruct_flow = self._rz.cmdj(f"pdfj @ {cache.address}")["ops"]
499471

500472
lowerfunc_list = []
501473
for ins in instruct_flow:
@@ -532,13 +504,9 @@ def get_method_bytecode(
532504
:yield: a generator of BytecodeObjects
533505
"""
534506
cache = method_object.cache
535-
536507
if not cache.is_imported:
537508

538-
rz = self._get_rz(cache.dexindex)
539-
540-
instruct_flow = rz.cmdj(f"pdfj @ {cache.address}")["ops"]
541-
509+
instruct_flow = self._rz.cmdj(f"pdfj @ {cache.address}")["ops"]
542510
if instruct_flow:
543511
for ins in instruct_flow:
544512
if "disasm" not in ins:
@@ -554,13 +522,10 @@ def get_strings(self) -> Set[str]:
554522
:return: a set of strings
555523
"""
556524
strings = set()
557-
for dex_index in range(self._number_of_dex):
558-
rz = self._get_rz(dex_index)
559-
560-
string_detail_list = rz.cmdj("izzj")
561-
strings.update(
562-
[string_detail["string"] for string_detail in string_detail_list]
563-
)
525+
string_detail_list = self._rz.cmdj("izzj")
526+
strings.update(
527+
[string_detail["string"] for string_detail in string_detail_list]
528+
)
564529

565530
return strings
566531

@@ -610,9 +575,7 @@ def convert_bytecode_to_list(bytecode):
610575
if cache.is_imported:
611576
return {}
612577

613-
rz = self._get_rz(cache.dexindex)
614-
615-
instruction_flow = rz.cmdj(f"pdfj @ {cache.address}")["ops"]
578+
instruction_flow = self._rz.cmdj(f"pdfj @ {cache.address}")["ops"]
616579

617580
if instruction_flow:
618581
for ins in instruction_flow:
@@ -661,18 +624,14 @@ def superclass_relationships(self) -> Dict[str, Set[str]]:
661624
"""
662625
hierarchy_dict = defaultdict(set)
663626

664-
for dex_index in range(self._number_of_dex):
665-
666-
rz = self._get_rz(dex_index)
627+
class_info_list = self._rz.cmdj("icj")
628+
for class_info in class_info_list:
629+
class_name = class_info["classname"]
630+
class_name = self._convert_type_to_type_signature(class_name)
631+
super_class = class_info["super"]
632+
super_class = self._convert_type_to_type_signature(super_class)
667633

668-
class_info_list = rz.cmdj("icj")
669-
for class_info in class_info_list:
670-
class_name = class_info["classname"]
671-
class_name = self._convert_type_to_type_signature(class_name)
672-
super_class = class_info["super"]
673-
super_class = self._convert_type_to_type_signature(super_class)
674-
675-
hierarchy_dict[class_name].add(super_class)
634+
hierarchy_dict[class_name].add(super_class)
676635

677636
return hierarchy_dict
678637

@@ -690,16 +649,12 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
690649
"""
691650
hierarchy_dict = defaultdict(set)
692651

693-
for dex_index in range(self._number_of_dex):
694-
695-
rz = self._get_rz(dex_index)
652+
class_info_list = self._rz.cmdj("icj")
653+
for class_info in class_info_list:
654+
class_name = class_info["classname"]
655+
super_class = class_info["super"]
696656

697-
class_info_list = rz.cmdj("icj")
698-
for class_info in class_info_list:
699-
class_name = class_info["classname"]
700-
super_class = class_info["super"]
701-
702-
hierarchy_dict[super_class].add(class_name)
657+
hierarchy_dict[super_class].add(class_name)
703658

704659
return hierarchy_dict
705660

@@ -710,13 +665,10 @@ def _get_method_by_address(self, address: int) -> MethodObject:
710665
:param address: an address used to find the corresponding method
711666
:return: the MethodObject of the method in the given address
712667
"""
713-
dexindex = 0
714-
715-
rz = self._get_rz(dexindex)
716-
json_array = rz.cmdj(f"is.j @ {address}")
668+
json_array = self._rz.cmdj(f"is.j @ {address}")
717669

718670
if json_array:
719-
return self._parse_method_from_isj_obj(json_array[0], dexindex)
671+
return self._parse_method_from_isj_obj(json_array[0])
720672
else:
721673
return None
722674

@@ -727,10 +679,7 @@ def _get_string_by_address(self, address: str) -> str:
727679
:param address: an address used to find the corresponding method
728680
:return: the content in the given address
729681
"""
730-
dexindex = 0
731-
732-
rz = self._get_rz(dexindex)
733-
content = rz.cmd(f"pr @ {int(address, 16)}")
682+
content = self._rz.cmd(f"pr @ {int(address, 16)}")
734683
return content
735684

736685
@staticmethod

0 commit comments

Comments
 (0)