3535 "long" : "J" ,
3636 "float" : "F" ,
3737 "double" : "D" ,
38- "Boolean" : "Ljava/lang/Boolean;" ,
39- "Byte" : "Ljava/lang/Byte;" ,
40- "Character" : "Ljava/lang/Character;" ,
41- "Short" : "Ljava/lang/Short;" ,
42- "Integer" : "Ljava/lang/Integer;" ,
43- "Long" : "Ljava/lang/Long;" ,
44- "Float" : "Ljava/lang/Float;" ,
45- "Double" : "Ljava/lang/Double;" ,
46- "String" : "Ljava/lang/String;" ,
4738}
4839
4940RIZIN_ESCAPE_CHAR_LIST = ["<" , ">" , "$" ]
@@ -92,12 +83,19 @@ def _get_rz(self, index):
9283 return rz
9384
9485 def _convert_type_to_type_signature (self , raw_type : str ):
86+ if not raw_type :
87+ return raw_type
88+
9589 if raw_type .endswith ("[]" ):
9690 return "[" + self ._convert_type_to_type_signature (raw_type [:- 2 ])
9791
9892 if raw_type .startswith ("[" ):
9993 return "[" + self ._convert_type_to_type_signature (raw_type [1 :])
10094
95+ if "..." in raw_type :
96+ index = raw_type .index ("..." )
97+ return "[" + self ._convert_type_to_type_signature (raw_type [:index ])
98+
10199 if raw_type in PRIMITIVE_TYPE_MAPPING :
102100 return PRIMITIVE_TYPE_MAPPING [raw_type ]
103101
@@ -106,121 +104,126 @@ def _convert_type_to_type_signature(self, raw_type: str):
106104 raw_type = raw_type .replace ("_" , "$" )
107105 return "L" + raw_type + ";"
108106
109- return raw_type
107+ return "Ljava/lang/" + raw_type + ";"
110108
111109 @staticmethod
112110 def _escape_str_in_rizin_manner (raw_str : str ):
113111 for c in RIZIN_ESCAPE_CHAR_LIST :
114112 raw_str = raw_str .replace (c , "_" )
115113 return raw_str
116114
117- @ functools . lru_cache
118- def _get_methods_classified ( self , dexindex ) :
119- rz = self . _get_rz ( dexindex )
115+ def _parse_method_from_isj_obj ( self , json_obj , dexindex ):
116+ if json_obj . get ( "type" ) not in [ "FUNC" , "METH" ] :
117+ return None
120118
121- method_json_list = rz .cmdj ("isj" )
122- method_dict = defaultdict (list )
123- for json_obj in method_json_list :
124- if json_obj .get ("type" ) not in ["FUNC" , "METH" ]:
125- continue
119+ # -- Descriptor --
120+ full_method_name = json_obj ["name" ]
121+ raw_argument_str = next (
122+ re .finditer ("\\ (.*\\ ).*" , full_method_name ), None
123+ )
124+ if raw_argument_str is None :
125+ return None
126+
127+ raw_argument_str = raw_argument_str .group (0 )
126128
127- # -- Descriptor --
128- full_method_name = json_obj ["name" ]
129- raw_argument_str = next (
130- re .finditer ("\\ (.*\\ ).*" , full_method_name ), None
129+ if raw_argument_str .endswith (")" ):
130+ # Convert Java lauguage type to JVM type signature
131+
132+ # Parse the arguments
133+ raw_argument_str = raw_argument_str [1 :- 1 ]
134+ arguments = [
135+ self ._convert_type_to_type_signature (arg )
136+ for arg in raw_argument_str .split (", " )
137+ ]
138+
139+ # Parse the return type
140+ return_type = next (
141+ re .finditer (
142+ "[A-Za-zL][A-Za-z0-9L/\\ ;[\\ ]$.]+ " , full_method_name
143+ ),
144+ None ,
145+ )
146+ if return_type is None :
147+ print (f"Unresolved method signature: { full_method_name } " )
148+ return None
149+ return_type = return_type .group (0 ).strip ()
150+
151+ # Convert
152+ raw_argument_str = (
153+ "("
154+ + " " .join (arguments )
155+ + ")"
156+ + self ._convert_type_to_type_signature (return_type )
131157 )
132- if raw_argument_str is None :
133- continue
134- raw_argument_str = raw_argument_str .group (0 )
135158
136- if raw_argument_str .endswith (")" ):
137- # Convert Java lauguage type to JVM type signature
159+ descriptor = descriptor_to_androguard_format (raw_argument_str )
138160
139- # Parse the arguments
140- raw_argument_str = raw_argument_str [1 :- 1 ]
141- arguments = [
142- self ._convert_type_to_type_signature (arg )
143- for arg in raw_argument_str .split (", " )
144- ]
161+ # -- Method name --
162+ method_name = json_obj ["realname" ]
145163
146- # Parse the return type
147- return_type = next (
148- re .finditer (
149- "[A-Za-zL][A-Za-z0-9L/\\ ;[\\ ]$.]+ " , full_method_name
150- ),
151- None ,
152- )
153- if return_type is None :
154- print (f"Unresolved method signature: { full_method_name } " )
155- continue
156- return_type = return_type .group (0 ).strip ()
157-
158- # Convert
159- raw_argument_str = (
160- "("
161- + " " .join (arguments )
162- + ")"
163- + self ._convert_type_to_type_signature (return_type )
164- )
164+ # -- Is imported --
165+ is_imported = json_obj ["is_imported" ]
165166
166- descriptor = descriptor_to_androguard_format (raw_argument_str )
167+ # -- Class name --
168+ # Test if the class name is truncated
169+ escaped_method_name = self ._escape_str_in_rizin_manner (method_name )
170+ if escaped_method_name .endswith ("_" ):
171+ escaped_method_name = escaped_method_name [:- 1 ]
167172
168- # -- Method name --
169- method_name = json_obj ["realname" ]
173+ flag_name = json_obj ["flagname" ]
170174
171- # -- Is imported --
172- is_imported = json_obj ["is_imported" ]
175+ # sym.imp.clone doesn't belong to a class
176+ if flag_name == "sym.imp.clone" :
177+ method = MethodObject (
178+ class_name = "" ,
179+ name = "clone" ,
180+ descriptor = "()Ljava/lang/Object;" ,
181+ cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
182+ )
183+ return method
173184
174- # -- Class name --
175- # Test if the class name is truncated
176- escaped_method_name = self ._escape_str_in_rizin_manner (method_name )
177- if escaped_method_name .endswith ("_" ):
178- escaped_method_name = escaped_method_name [:- 1 ]
185+ if escaped_method_name not in flag_name :
186+ logging .warning (
187+ f"The class name may be truncated: { json_obj ['flagname' ]} "
188+ )
179189
180- flag_name = json_obj ["flagname" ]
190+ # Drop the method name
191+ match = None
192+ for match in re .finditer ("_+[A-Za-z]+" , flag_name ):
193+ pass
194+ if match is None :
195+ logging .warning (f"Skip the damaged flag: { json_obj ['flagname' ]} " )
196+ return None
197+ match = match .group (0 )
198+ flag_name = flag_name [: flag_name .rfind (match )]
181199
182- # sym.imp.clone doesn't belong to a class
183- if flag_name == "sym.imp.clone" :
184- method = MethodObject (
185- class_name = "" ,
186- name = "clone" ,
187- descriptor = "()Ljava/lang/Object;" ,
188- cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
189- )
190- method_dict ["" ].append (method )
191- continue
200+ # Drop the prefixes sym. and imp.
201+ while flag_name .startswith ("sym." ) or flag_name .startswith ("imp." ):
202+ flag_name = flag_name [4 :]
192203
193- if escaped_method_name not in flag_name :
194- logging .warning (
195- f"The class name may be truncated: { json_obj ['flagname' ]} "
196- )
204+ class_name = self ._convert_type_to_type_signature (flag_name )
197205
198- # Drop the method name
199- match = None
200- for match in re .finditer ("_+[A-Za-z]+" , flag_name ):
201- pass
202- if match is None :
203- logging .warning (
204- f"Skip the damaged flag: { json_obj ['flagname' ]} "
205- )
206- continue
207- match = match .group (0 )
208- flag_name = flag_name [: flag_name .rfind (match )]
206+ # Append the method
207+ method = MethodObject (
208+ class_name = class_name ,
209+ name = method_name ,
210+ descriptor = descriptor ,
211+ cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
212+ )
209213
210- # Drop the prefixes sym. and imp.
211- while flag_name .startswith ("sym." ) or flag_name .startswith ("imp." ):
212- flag_name = flag_name [4 :]
214+ return method
213215
214- class_name = self ._convert_type_to_type_signature (flag_name )
216+ @functools .lru_cache
217+ def _get_methods_classified (self , dexindex ):
218+ rz = self ._get_rz (dexindex )
215219
216- # Append the method
217- method = MethodObject (
218- class_name = class_name ,
219- name = method_name ,
220- descriptor = descriptor ,
221- cache = RizinCache (json_obj ["vaddr" ], dexindex , is_imported ),
222- )
223- method_dict [class_name ].append (method )
220+ method_json_list = rz .cmdj ("isj" )
221+ method_dict = defaultdict (list )
222+ for json_obj in method_json_list :
223+ method = self ._parse_method_from_isj_obj (json_obj , dexindex )
224+
225+ if method :
226+ method_dict [method .class_name ].append (method )
224227
225228 # Remove duplicates
226229 for class_name , method_list in method_dict .items ():
@@ -347,19 +350,19 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
347350 if xref ["type" ] != "CALL" :
348351 continue
349352
350- if "fcn_addr " in xref :
351- matched_method = self ._get_method_by_address (xref ["fcn_addr " ])
353+ if "from " in xref :
354+ matched_method = self ._get_method_by_address (xref ["from " ])
352355 if not matched_method :
353356 logging .debug (
354- f"Cannot identify function at { xref ['fcn_addr ' ]} ."
357+ f"Cannot identify function at { xref ['from ' ]} ."
355358 )
356359 continue
357360
358361 upperfunc_set .add (matched_method )
359362 else :
360363 logging .debug (
361- f"Key from was not found at searching "
362- f" upper methods for { method_object } ."
364+ f"Key from was not found when trying to search "
365+ f" upper methods of { method_object } ."
363366 )
364367
365368 return upperfunc_set
@@ -368,41 +371,32 @@ def upperfunc(self, method_object: MethodObject) -> Set[MethodObject]:
368371 def lowerfunc (self , method_object : MethodObject ) -> Set [MethodObject ]:
369372 cache = method_object .cache
370373
371- r2 = self ._get_rz (cache .dexindex )
372-
373- xrefs = r2 .cmdj (f"axffj @ { cache .address } " )
374+ rz = self ._get_rz (cache .dexindex )
374375
375- if not xrefs :
376- return set ()
376+ instruct_flow = rz .cmdj (f"pdfj @ { cache .address } " )["ops" ]
377377
378- lowerfunc_set = set ()
379- for xref in xrefs :
380- if xref ["type" ] != "CALL" :
381- continue
378+ lowerfunc_list = []
379+ for ins in instruct_flow :
380+ if "xrefs_from" in ins :
381+ call_xrefs = (
382+ xref
383+ for xref in ins ["xrefs_from" ]
384+ if xref ["type" ] == "CALL"
385+ )
382386
383- if "to" in xref :
384- matched_method = self ._get_method_by_address (xref [ "to " ])
385- if not matched_method :
386- logging .debug (
387- f"Cannot identify function at { xref [ 'fcn_addr ' ]} ."
388- )
389- continue
387+ for call_xref in call_xrefs :
388+ lowerfunc = self ._get_method_by_address (call_xref [ "addr " ])
389+ if not lowerfunc :
390+ logging .debug (
391+ f"Cannot identify function at { call_xref [ 'addr ' ]} ."
392+ )
393+ continue
390394
391- offset = xref [ "from " ] - cache .address
395+ offset = ins [ "offset " ] - cache .address
392396
393- lowerfunc_set .add (
394- (
395- matched_method ,
396- offset ,
397- )
398- )
399- else :
400- logging .debug (
401- f"Key from was not found at searching"
402- f" upper methods for { method_object } ."
403- )
397+ lowerfunc_list .append ((lowerfunc , offset ))
404398
405- return lowerfunc_set
399+ return lowerfunc_list
406400
407401 def get_method_bytecode (
408402 self , method_object : MethodObject
@@ -533,12 +527,15 @@ def subclass_relationships(self) -> Dict[str, Set[str]]:
533527 return hierarchy_dict
534528
535529 def _get_method_by_address (self , address : int ) -> MethodObject :
536- if address < 0 :
537- return None
530+ dexindex = 0
538531
539- for method in self .all_methods :
540- if method .cache .address == address :
541- return method
532+ rz = self ._get_rz (dexindex )
533+ json_array = rz .cmdj (f"is.j @ { address } " )
534+
535+ if json_array :
536+ return self ._parse_method_from_isj_obj (json_array [0 ], dexindex )
537+ else :
538+ return None
542539
543540 @staticmethod
544541 def _parse_parameter (mnemonic : str , parameter : str ) -> Any :
0 commit comments