18
18
from quark .core .interface .baseapkinfo import BaseApkinfo
19
19
from quark .core .struct .bytecodeobject import BytecodeObject
20
20
from quark .core .struct .methodobject import MethodObject
21
- from quark .utils .tools import descriptor_to_androguard_format
21
+ from quark .utils .tools import descriptor_to_androguard_format , remove_dup_list
22
22
23
23
RizinCache = namedtuple ("rizin_cache" , "address dexindex is_imported" )
24
24
25
+ PRIMITIVE_TYPE_MAPPING = {
26
+ "void" : "V" ,
27
+ "boolean" : "Z" ,
28
+ "byte" : "B" ,
29
+ "char" : "C" ,
30
+ "short" : "S" ,
31
+ "int" : "I" ,
32
+ "long" : "J" ,
33
+ "float" : "F" ,
34
+ "double" : "D" ,
35
+ "Boolean" : "Ljava/lang/Boolean;" ,
36
+ "Byte" : "Ljava/lang/Byte;" ,
37
+ "Character" : "Ljava/lang/Character;" ,
38
+ "Short" : "Ljava/lang/Short;" ,
39
+ "Integer" : "Ljava/lang/Integer;" ,
40
+ "Long" : "Ljava/lang/Long;" ,
41
+ "Float" : "Ljava/lang/Float;" ,
42
+ "Double" : "Ljava/lang/Double;" ,
43
+ "String" : "Ljava/lang/String;" ,
44
+ }
45
+
46
+ RIZIN_ESCAPE_CHAR_LIST = ["<" , ">" , "$" ]
47
+
25
48
26
49
class RizinImp (BaseApkinfo ):
27
50
def __init__ (
@@ -65,6 +88,29 @@ def _get_rz(self, index):
65
88
rz .cmd ("aa" )
66
89
return rz
67
90
91
+ def _convert_type_to_type_signature (self , raw_type : str ):
92
+ if raw_type .endswith ("[]" ):
93
+ return "[" + self ._convert_type_to_type_signature (raw_type [:- 2 ])
94
+
95
+ if raw_type .startswith ("[" ):
96
+ return "[" + self ._convert_type_to_type_signature (raw_type [1 :])
97
+
98
+ if raw_type in PRIMITIVE_TYPE_MAPPING :
99
+ return PRIMITIVE_TYPE_MAPPING [raw_type ]
100
+
101
+ if "." in raw_type or "_" in raw_type :
102
+ raw_type = raw_type .replace ("." , "/" )
103
+ raw_type = raw_type .replace ("_" , "$" )
104
+ return "L" + raw_type + ";"
105
+
106
+ return raw_type
107
+
108
+ @staticmethod
109
+ def _escape_str_in_rizin_manner (raw_str : str ):
110
+ for c in RIZIN_ESCAPE_CHAR_LIST :
111
+ raw_str = raw_str .replace (c , "_" )
112
+ return raw_str
113
+
68
114
@functools .lru_cache
69
115
def _get_methods_classified (self , dexindex ):
70
116
rz = self ._get_rz (dexindex )
@@ -75,25 +121,108 @@ def _get_methods_classified(self, dexindex):
75
121
if json_obj .get ("type" ) not in ["FUNC" , "METH" ]:
76
122
continue
77
123
78
- full_name = json_obj ["realname" ]
79
- class_name , method_descriptor = full_name .split (".method." , maxsplit = 1 )
80
- class_name = class_name + ";"
124
+ # -- Descriptor --
125
+ full_method_name = json_obj ["name" ]
126
+ raw_argument_str = next (
127
+ re .finditer ("\\ (.*\\ ).*" , full_method_name ), None
128
+ )
129
+ if raw_argument_str is None :
130
+ continue
131
+ raw_argument_str = raw_argument_str .group (0 )
132
+
133
+ if raw_argument_str .endswith (")" ):
134
+ # Convert Java lauguage type to JVM type signature
135
+
136
+ # Parse the arguments
137
+ raw_argument_str = raw_argument_str [1 :- 1 ]
138
+ arguments = [
139
+ self ._convert_type_to_type_signature (arg )
140
+ for arg in raw_argument_str .split (", " )
141
+ ]
81
142
82
- delimiter = method_descriptor .index ('(' )
83
- methodname = method_descriptor [:delimiter ]
84
- descriptor = method_descriptor [delimiter :]
85
- descriptor = descriptor_to_androguard_format (descriptor )
143
+ # Parse the return type
144
+ return_type = next (
145
+ re .finditer (
146
+ "[A-Za-zL][A-Za-z0-9L/\\ ;[\\ ]$.]+ " , full_method_name
147
+ ),
148
+ None ,
149
+ )
150
+ if return_type is None :
151
+ print (f"Unresolved method signature: { full_method_name } " )
152
+ continue
153
+ return_type = return_type .group (0 ).strip ()
154
+
155
+ # Convert
156
+ raw_argument_str = (
157
+ "("
158
+ + " " .join (arguments )
159
+ + ")"
160
+ + self ._convert_type_to_type_signature (return_type )
161
+ )
86
162
163
+ descriptor = descriptor_to_androguard_format (raw_argument_str )
164
+
165
+ # -- Method name --
166
+ method_name = json_obj ["realname" ]
167
+
168
+ # -- Is imported --
87
169
is_imported = json_obj ["is_imported" ]
88
170
171
+ # -- Class name --
172
+ # Test if the class name is truncated
173
+ escaped_method_name = self ._escape_str_in_rizin_manner (method_name )
174
+ if escaped_method_name .endswith ("_" ):
175
+ escaped_method_name = escaped_method_name [:- 1 ]
176
+
177
+ flag_name = json_obj ["flagname" ]
178
+
179
+ # sym.imp.clone doesn't belong to a class
180
+ if flag_name == "sym.imp.clone" :
181
+ method = MethodObject (
182
+ class_name = "" ,
183
+ name = "clone" ,
184
+ descriptor = "()Ljava/lang/Object;" ,
185
+ cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
186
+ )
187
+ method_dict ["" ].append (method )
188
+ continue
189
+
190
+ if escaped_method_name not in flag_name :
191
+ logging .warning (
192
+ f"The class name may be truncated: { json_obj ['flagname' ]} "
193
+ )
194
+
195
+ # Drop the method name
196
+ match = None
197
+ for match in re .finditer ("_+[A-Za-z]+" , flag_name ):
198
+ pass
199
+ if match is None :
200
+ logging .warning (
201
+ f"Skip the damaged flag: { json_obj ['flagname' ]} "
202
+ )
203
+ continue
204
+ match = match .group (0 )
205
+ flag_name = flag_name [: flag_name .rfind (match )]
206
+
207
+ # Drop the prefixes sym. and imp.
208
+ while flag_name .startswith ("sym." ) or flag_name .startswith ("imp." ):
209
+ flag_name = flag_name [4 :]
210
+
211
+ class_name = self ._convert_type_to_type_signature (flag_name )
212
+
213
+ # Append the method
89
214
method = MethodObject (
90
215
class_name = class_name ,
91
- name = methodname ,
216
+ name = method_name ,
92
217
descriptor = descriptor ,
93
218
cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
94
219
)
95
220
method_dict [class_name ].append (method )
96
221
222
+ # Remove duplicates
223
+ for class_name , method_list in method_dict .items ():
224
+ method_dict [class_name ] = remove_dup_list (method_list )
225
+
97
226
return method_dict
98
227
99
228
@functools .cached_property
@@ -326,17 +455,12 @@ def superclass_relationships(self) -> Dict[str, Set[str]]:
326
455
327
456
rz = self ._get_rz (dex_index )
328
457
329
- hierarchy_graph = rz .cmd ("icg" ).split ("\n " )
330
-
331
- for element in hierarchy_graph :
332
- if element .startswith ("age" ):
333
- element_part = element .split ()
334
- for index , class_name in enumerate (element_part ):
335
- if not class_name .endswith (";" ):
336
- element_part [index ] = class_name + ";"
458
+ class_info_list = rz .cmdj ("icj" )
459
+ for class_info in class_info_list :
460
+ class_name = class_info ["classname" ]
461
+ super_class = class_info ["super" ]
337
462
338
- for subclass in element_part [2 :]:
339
- hierarchy_dict [subclass ].add (element_part [1 ])
463
+ hierarchy_dict [class_name ].add (super_class )
340
464
341
465
return hierarchy_dict
342
466
@@ -348,16 +472,12 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
348
472
349
473
rz = self ._get_rz (dex_index )
350
474
351
- hierarchy_graph = rz .cmd ("icg" ).split ("\n " )
352
-
353
- for element in hierarchy_graph :
354
- if element .startswith ("age" ):
355
- element_part = element .split ()
356
- for index , class_name in enumerate (element_part ):
357
- if not class_name .endswith (";" ):
358
- element_part [index ] = class_name + ";"
475
+ class_info_list = rz .cmdj ("icj" )
476
+ for class_info in class_info_list :
477
+ class_name = class_info ["classname" ]
478
+ super_class = class_info ["super" ]
359
479
360
- hierarchy_dict [element_part [ 1 ]]. update ( element_part [ 2 :] )
480
+ hierarchy_dict [super_class ]. add ( class_name )
361
481
362
482
return hierarchy_dict
363
483
@@ -380,8 +500,8 @@ def _parse_smali(smali: str) -> BytecodeObject:
380
500
mnemonic , args = smali .split (maxsplit = 1 ) # Split into twe parts
381
501
382
502
# invoke-kind instruction may left method index at the last
383
- if mnemonic .startswith ("invoke" ):
384
- args = args [: args .rfind (" ;" )]
503
+ # if mnemonic.startswith("invoke"):
504
+ # args = args[: args.rfind(" ;")]
385
505
386
506
args = [arg .strip () for arg in re .split ("[{},]+" , args ) if arg ]
387
507
@@ -392,7 +512,7 @@ def _parse_smali(smali: str) -> BytecodeObject:
392
512
args = args [:- 1 ]
393
513
394
514
if mnemonic .startswith ("invoke" ):
395
- parameter = re .sub (r"\." , "; ->" , parameter , count = 1 )
515
+ parameter = re .sub (r"\." , "->" , parameter , count = 1 )
396
516
397
517
register_list = []
398
518
# Ranged registers
0 commit comments