32
32
"long" : "J" ,
33
33
"float" : "F" ,
34
34
"double" : "D" ,
35
- "Boolean" : "Ljava/lang/Boolean;" ,
36
- "Byte" : "Ljava/lang/Byte;" ,
37
- "Character" : "Ljava/lang/Character;" ,
38
- "Short" : "Ljava/lang/Short;" ,
39
- "Integer" : "Ljava/lang/Integer;" ,
40
- "Long" : "Ljava/lang/Long;" ,
41
- "Float" : "Ljava/lang/Float;" ,
42
- "Double" : "Ljava/lang/Double;" ,
43
- "String" : "Ljava/lang/String;" ,
44
35
}
45
36
46
37
RIZIN_ESCAPE_CHAR_LIST = ["<" , ">" , "$" ]
@@ -89,12 +80,19 @@ def _get_rz(self, index):
89
80
return rz
90
81
91
82
def _convert_type_to_type_signature (self , raw_type : str ):
83
+ if not raw_type :
84
+ return raw_type
85
+
92
86
if raw_type .endswith ("[]" ):
93
87
return "[" + self ._convert_type_to_type_signature (raw_type [:- 2 ])
94
88
95
89
if raw_type .startswith ("[" ):
96
90
return "[" + self ._convert_type_to_type_signature (raw_type [1 :])
97
91
92
+ if "..." in raw_type :
93
+ index = raw_type .index ("..." )
94
+ return "[" + self ._convert_type_to_type_signature (raw_type [:index ])
95
+
98
96
if raw_type in PRIMITIVE_TYPE_MAPPING :
99
97
return PRIMITIVE_TYPE_MAPPING [raw_type ]
100
98
@@ -103,121 +101,126 @@ def _convert_type_to_type_signature(self, raw_type: str):
103
101
raw_type = raw_type .replace ("_" , "$" )
104
102
return "L" + raw_type + ";"
105
103
106
- return raw_type
104
+ return "Ljava/lang/" + raw_type + ";"
107
105
108
106
@staticmethod
109
107
def _escape_str_in_rizin_manner (raw_str : str ):
110
108
for c in RIZIN_ESCAPE_CHAR_LIST :
111
109
raw_str = raw_str .replace (c , "_" )
112
110
return raw_str
113
111
114
- @ functools . lru_cache
115
- def _get_methods_classified ( self , dexindex ) :
116
- rz = self . _get_rz ( dexindex )
112
+ def _parse_method_from_isj_obj ( self , json_obj , dexindex ):
113
+ if json_obj . get ( "type" ) not in [ "FUNC" , "METH" ] :
114
+ return None
117
115
118
- method_json_list = rz .cmdj ("isj" )
119
- method_dict = defaultdict (list )
120
- for json_obj in method_json_list :
121
- if json_obj .get ("type" ) not in ["FUNC" , "METH" ]:
122
- continue
116
+ # -- Descriptor --
117
+ full_method_name = json_obj ["name" ]
118
+ raw_argument_str = next (
119
+ re .finditer ("\\ (.*\\ ).*" , full_method_name ), None
120
+ )
121
+ if raw_argument_str is None :
122
+ return None
123
+
124
+ raw_argument_str = raw_argument_str .group (0 )
123
125
124
- # -- Descriptor --
125
- full_method_name = json_obj ["name" ]
126
- raw_argument_str = next (
127
- re .finditer ("\\ (.*\\ ).*" , full_method_name ), None
126
+ if raw_argument_str .endswith (")" ):
127
+ # Convert Java lauguage type to JVM type signature
128
+
129
+ # Parse the arguments
130
+ raw_argument_str = raw_argument_str [1 :- 1 ]
131
+ arguments = [
132
+ self ._convert_type_to_type_signature (arg )
133
+ for arg in raw_argument_str .split (", " )
134
+ ]
135
+
136
+ # Parse the return type
137
+ return_type = next (
138
+ re .finditer (
139
+ "[A-Za-zL][A-Za-z0-9L/\\ ;[\\ ]$.]+ " , full_method_name
140
+ ),
141
+ None ,
142
+ )
143
+ if return_type is None :
144
+ print (f"Unresolved method signature: { full_method_name } " )
145
+ return None
146
+ return_type = return_type .group (0 ).strip ()
147
+
148
+ # Convert
149
+ raw_argument_str = (
150
+ "("
151
+ + " " .join (arguments )
152
+ + ")"
153
+ + self ._convert_type_to_type_signature (return_type )
128
154
)
129
- if raw_argument_str is None :
130
- continue
131
- raw_argument_str = raw_argument_str .group (0 )
132
155
133
- if raw_argument_str .endswith (")" ):
134
- # Convert Java lauguage type to JVM type signature
156
+ descriptor = descriptor_to_androguard_format (raw_argument_str )
135
157
136
- # Parse the arguments
137
- raw_argument_str = raw_argument_str [1 :- 1 ]
138
- arguments = [
139
- self ._convert_type_to_type_signature (arg )
140
- for arg in raw_argument_str .split (", " )
141
- ]
158
+ # -- Method name --
159
+ method_name = json_obj ["realname" ]
142
160
143
- # Parse the return type
144
- return_type = next (
145
- re .finditer (
146
- "[A-Za-zL][A-Za-z0-9L/\\ ;[\\ ]$.]+ " , full_method_name
147
- ),
148
- None ,
149
- )
150
- if return_type is None :
151
- print (f"Unresolved method signature: { full_method_name } " )
152
- continue
153
- return_type = return_type .group (0 ).strip ()
154
-
155
- # Convert
156
- raw_argument_str = (
157
- "("
158
- + " " .join (arguments )
159
- + ")"
160
- + self ._convert_type_to_type_signature (return_type )
161
- )
161
+ # -- Is imported --
162
+ is_imported = json_obj ["is_imported" ]
162
163
163
- descriptor = descriptor_to_androguard_format (raw_argument_str )
164
+ # -- Class name --
165
+ # Test if the class name is truncated
166
+ escaped_method_name = self ._escape_str_in_rizin_manner (method_name )
167
+ if escaped_method_name .endswith ("_" ):
168
+ escaped_method_name = escaped_method_name [:- 1 ]
164
169
165
- # -- Method name --
166
- method_name = json_obj ["realname" ]
170
+ flag_name = json_obj ["flagname" ]
167
171
168
- # -- Is imported --
169
- is_imported = json_obj ["is_imported" ]
172
+ # sym.imp.clone doesn't belong to a class
173
+ if flag_name == "sym.imp.clone" :
174
+ method = MethodObject (
175
+ class_name = "" ,
176
+ name = "clone" ,
177
+ descriptor = "()Ljava/lang/Object;" ,
178
+ cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
179
+ )
180
+ return method
170
181
171
- # -- Class name --
172
- # Test if the class name is truncated
173
- escaped_method_name = self ._escape_str_in_rizin_manner (method_name )
174
- if escaped_method_name .endswith ("_" ):
175
- escaped_method_name = escaped_method_name [:- 1 ]
182
+ if escaped_method_name not in flag_name :
183
+ logging .warning (
184
+ f"The class name may be truncated: { json_obj ['flagname' ]} "
185
+ )
176
186
177
- flag_name = json_obj ["flagname" ]
187
+ # Drop the method name
188
+ match = None
189
+ for match in re .finditer ("_+[A-Za-z]+" , flag_name ):
190
+ pass
191
+ if match is None :
192
+ logging .warning (f"Skip the damaged flag: { json_obj ['flagname' ]} " )
193
+ return None
194
+ match = match .group (0 )
195
+ flag_name = flag_name [: flag_name .rfind (match )]
178
196
179
- # sym.imp.clone doesn't belong to a class
180
- if flag_name == "sym.imp.clone" :
181
- method = MethodObject (
182
- class_name = "" ,
183
- name = "clone" ,
184
- descriptor = "()Ljava/lang/Object;" ,
185
- cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
186
- )
187
- method_dict ["" ].append (method )
188
- continue
197
+ # Drop the prefixes sym. and imp.
198
+ while flag_name .startswith ("sym." ) or flag_name .startswith ("imp." ):
199
+ flag_name = flag_name [4 :]
189
200
190
- if escaped_method_name not in flag_name :
191
- logging .warning (
192
- f"The class name may be truncated: { json_obj ['flagname' ]} "
193
- )
201
+ class_name = self ._convert_type_to_type_signature (flag_name )
194
202
195
- # Drop the method name
196
- match = None
197
- for match in re .finditer ("_+[A-Za-z]+" , flag_name ):
198
- pass
199
- if match is None :
200
- logging .warning (
201
- f"Skip the damaged flag: { json_obj ['flagname' ]} "
202
- )
203
- continue
204
- match = match .group (0 )
205
- flag_name = flag_name [: flag_name .rfind (match )]
203
+ # Append the method
204
+ method = MethodObject (
205
+ class_name = class_name ,
206
+ name = method_name ,
207
+ descriptor = descriptor ,
208
+ cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
209
+ )
206
210
207
- # Drop the prefixes sym. and imp.
208
- while flag_name .startswith ("sym." ) or flag_name .startswith ("imp." ):
209
- flag_name = flag_name [4 :]
211
+ return method
210
212
211
- class_name = self ._convert_type_to_type_signature (flag_name )
213
+ @functools .lru_cache
214
+ def _get_methods_classified (self , dexindex ):
215
+ rz = self ._get_rz (dexindex )
212
216
213
- # Append the method
214
- method = MethodObject (
215
- class_name = class_name ,
216
- name = method_name ,
217
- descriptor = descriptor ,
218
- cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
219
- )
220
- method_dict [class_name ].append (method )
217
+ method_json_list = rz .cmdj ("isj" )
218
+ method_dict = defaultdict (list )
219
+ for json_obj in method_json_list :
220
+ method = self ._parse_method_from_isj_obj (json_obj , dexindex )
221
+
222
+ if method :
223
+ method_dict [method .class_name ].append (method )
221
224
222
225
# Remove duplicates
223
226
for class_name , method_list in method_dict .items ():
@@ -296,19 +299,19 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
296
299
if xref ["type" ] != "CALL" :
297
300
continue
298
301
299
- if "fcn_addr " in xref :
300
- matched_method = self ._get_method_by_address (xref ["fcn_addr " ])
302
+ if "from " in xref :
303
+ matched_method = self ._get_method_by_address (xref ["from " ])
301
304
if not matched_method :
302
305
logging .debug (
303
- f"Cannot identify function at { xref ['fcn_addr ' ]} ."
306
+ f"Cannot identify function at { xref ['from ' ]} ."
304
307
)
305
308
continue
306
309
307
310
upperfunc_set .add (matched_method )
308
311
else :
309
312
logging .debug (
310
- f"Key from was not found at searching "
311
- f" upper methods for { method_object } ."
313
+ f"Key from was not found when trying to search "
314
+ f" upper methods of { method_object } ."
312
315
)
313
316
314
317
return upperfunc_set
@@ -317,41 +320,32 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
317
320
def lowerfunc (self , method_object : MethodObject ) -> Set [MethodObject ]:
318
321
cache = method_object .cache
319
322
320
- r2 = self ._get_rz (cache .dexindex )
321
-
322
- xrefs = r2 .cmdj (f"axffj @ { cache .address } " )
323
+ rz = self ._get_rz (cache .dexindex )
323
324
324
- if not xrefs :
325
- return set ()
325
+ instruct_flow = rz .cmdj (f"pdfj @ { cache .address } " )["ops" ]
326
326
327
- lowerfunc_set = set ()
328
- for xref in xrefs :
329
- if xref ["type" ] != "CALL" :
330
- continue
327
+ lowerfunc_list = []
328
+ for ins in instruct_flow :
329
+ if "xrefs_from" in ins :
330
+ call_xrefs = (
331
+ xref
332
+ for xref in ins ["xrefs_from" ]
333
+ if xref ["type" ] == "CALL"
334
+ )
331
335
332
- if "to" in xref :
333
- matched_method = self ._get_method_by_address (xref [ "to " ])
334
- if not matched_method :
335
- logging .debug (
336
- f"Cannot identify function at { xref [ 'fcn_addr ' ]} ."
337
- )
338
- continue
336
+ for call_xref in call_xrefs :
337
+ lowerfunc = self ._get_method_by_address (call_xref [ "addr " ])
338
+ if not lowerfunc :
339
+ logging .debug (
340
+ f"Cannot identify function at { call_xref [ 'addr ' ]} ."
341
+ )
342
+ continue
339
343
340
- offset = xref [ "from " ] - cache .address
344
+ offset = ins [ "offset " ] - cache .address
341
345
342
- lowerfunc_set .add (
343
- (
344
- matched_method ,
345
- offset ,
346
- )
347
- )
348
- else :
349
- logging .debug (
350
- f"Key from was not found at searching"
351
- f" upper methods for { method_object } ."
352
- )
346
+ lowerfunc_list .append ((lowerfunc , offset ))
353
347
354
- return lowerfunc_set
348
+ return lowerfunc_list
355
349
356
350
def get_method_bytecode (
357
351
self , method_object : MethodObject
@@ -482,12 +476,15 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
482
476
return hierarchy_dict
483
477
484
478
def _get_method_by_address (self , address : int ) -> MethodObject :
485
- if address < 0 :
486
- return None
479
+ dexindex = 0
487
480
488
- for method in self .all_methods :
489
- if method .cache .address == address :
490
- return method
481
+ rz = self ._get_rz (dexindex )
482
+ json_array = rz .cmdj (f"is.j @ { address } " )
483
+
484
+ if json_array :
485
+ return self ._parse_method_from_isj_obj (json_array [0 ], dexindex )
486
+ else :
487
+ return None
491
488
492
489
@staticmethod
493
490
def _parse_smali (smali : str ) -> BytecodeObject :
0 commit comments