Skip to content

Commit 9315b54

Browse files
committed
APK main feature into DB
For repackage detection.
1 parent b040980 commit 9315b54

File tree

6 files changed

+208
-94
lines changed

6 files changed

+208
-94
lines changed

LibRadar/_settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
DB_HOST = 'localhost'
4141
DB_PORT = 6379
4242
DB_ID = 2
43+
DB_ID_REP = 1
4344
# if you don't have Password, delete DB_PSWD
4445
DB_PSWD = ''
4546

LibRadar/dex_tree.py

Lines changed: 61 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020
# This script is used to implement the tree node and tree structure.
2121

2222

23-
24-
2523
from _settings import *
24+
from collections import Counter
2625
import hashlib
2726
import csv
2827
import redis
28+
import zlib
29+
import rputil
2930

3031

3132
# tag_rules
@@ -62,23 +63,27 @@ def __init__(self, n_weight=-1, n_pn="", n_parent=None):
6263
self.match = list()
6364
self.permissions = set()
6465
self.db = redis.StrictRedis(host=DB_HOST, port=DB_PORT, db=DB_ID, password=DB_PSWD)
66+
self.api_id_list = []
6567

66-
def insert(self, package_name, weight, sha256, permission_list):
68+
def insert(self, package_name, weight, sha256, permission_list, api_id_list):
6769
# no matter how deep the package is, add permissions here.
6870
for permission in permission_list:
6971
self.permissions.add(permission)
72+
# no matter how deep the package is, add api_id_list
73+
# self.api_id_list = self.api_id_list + api_id_list
7074
current_depth = 0 if self.pn == "" else self.pn.count('/') + 1
7175
target_depth = package_name.count('/') + 1
7276
if current_depth == target_depth:
7377
self.sha256 = sha256
78+
self.api_id_list = api_id_list
7479
return "F: %s" % package_name
7580
target_package_name = '/'.join(package_name.split('/')[:current_depth + 1])
7681
if target_package_name in self.children:
7782
self.children[target_package_name].weight += weight
78-
return self.children[target_package_name].insert(package_name, weight, sha256, permission_list)
83+
return self.children[target_package_name].insert(package_name, weight, sha256, permission_list, api_id_list)
7984
else:
8085
self.children[target_package_name] = TreeNode(n_weight=weight, n_pn=target_package_name, n_parent=self)
81-
return self.children[target_package_name].insert(package_name, weight, sha256, permission_list)
86+
return self.children[target_package_name].insert(package_name, weight, sha256, permission_list, api_id_list)
8287

8388
def brand(self, package_name, standard_package):
8489
current_depth = 0 if self.pn == "" else self.pn.count('/') + 1
@@ -108,22 +113,15 @@ class Tree(object):
108113
"""
109114
Tree
110115
"""
111-
def __init__(self, lite=True):
112-
self.lite = lite
116+
def __init__(self):
113117
self.root = TreeNode()
114118
self.db = None
115119
self.feature = None
116-
if not self.lite:
117-
self.db = redis.StrictRedis(host=DB_HOST, port=DB_PORT, db=DB_ID, password=DB_PSWD)
118-
else:
119-
self.feature = dict()
120-
with open(LITE_DATASET_10, 'r') as file_rules:
121-
csv_rules_reader = csv.reader(file_rules, delimiter=',', quotechar='|')
122-
for row in csv_rules_reader:
123-
self.feature[row[0]] = row[1:5]
120+
self.db = redis.StrictRedis(host=DB_HOST, port=DB_PORT, db=DB_ID, password=DB_PSWD)
121+
self.db_rep = redis.StrictRedis(host=DB_HOST, port=DB_PORT, db=DB_ID_REP, password=DB_PSWD)
124122

125-
def insert(self, package_name, weight, sha256, permission_list):
126-
self.root.insert(package_name, weight, sha256, permission_list)
123+
def insert(self, package_name, weight, sha256, permission_list, api_id_list):
124+
self.root.insert(package_name, weight, sha256, permission_list, api_id_list)
127125

128126
def brand(self, package_name, standard_package):
129127
return self.root.brand(package_name, standard_package)
@@ -139,6 +137,17 @@ def _pre_order_res(self, node, visit, res):
139137
for child_pn in node.children:
140138
self._pre_order_res(node.children[child_pn], visit, res)
141139

140+
def pre_order_res_ret(self, visit, res, ret):
141+
self._pre_order_res_ret(node=self.root, visit=visit, res=res, ret=ret)
142+
143+
def _pre_order_res_ret(self, node, visit, res, ret):
144+
retu = visit(node, res, ret)
145+
if retu < 0:
146+
return
147+
else:
148+
for child_pn in node.children:
149+
self._pre_order_res_ret(node.children[child_pn], visit, res, ret)
150+
142151
def pre_order(self, visit):
143152
self._pre_order(self.root, visit)
144153

@@ -183,17 +192,12 @@ def cal_sha256(self):
183192

184193
def _match(self, node):
185194
a, c, u = None, None, None
186-
if not self.lite:
187-
pipe = self.db.pipeline()
188-
pipe.hget(name=DB_UN_OB_PN, key=node.sha256)
189-
pipe.hget(name=DB_FEATURE_CNT, key=node.sha256)
190-
pipe.hget(name=DB_UN_OB_CNT, key=node.sha256)
191-
pipe_res = pipe.execute()
192-
a, c, u = pipe_res
193-
else:
194-
if node.sha256 in self.feature:
195-
acu_cur = self.feature[node.sha256]
196-
a, c, u = acu_cur[3], acu_cur[0], acu_cur[2]
195+
pipe = self.db.pipeline()
196+
pipe.hget(name=DB_UN_OB_PN, key=node.sha256)
197+
pipe.hget(name=DB_FEATURE_CNT, key=node.sha256)
198+
pipe.hget(name=DB_UN_OB_CNT, key=node.sha256)
199+
pipe_res = pipe.execute()
200+
a, c, u = pipe_res
197201

198202
# if could not find this package in database, search its children.
199203
if a is None:
@@ -308,17 +312,13 @@ def _find_untagged(self, node, res):
308312
a, c, u = None, None, None
309313
if len(node.match) != 0:
310314
return -1
311-
if not self.lite:
312-
pipe = self.db.pipeline()
313-
pipe.hget(name=DB_UN_OB_PN, key=node.sha256)
314-
pipe.hget(name=DB_FEATURE_CNT, key=node.sha256)
315-
pipe.hget(name=DB_UN_OB_CNT, key=node.sha256)
316-
pipe_res = pipe.execute()
317-
a, c, u = pipe_res
318-
else:
319-
if node.sha256 in self.feature:
320-
acu_cur = self.feature[node.sha256]
321-
a, c, u = acu_cur[3], acu_cur[0], acu_cur[2]
315+
pipe = self.db.pipeline()
316+
pipe.hget(name=DB_UN_OB_PN, key=node.sha256)
317+
pipe.hget(name=DB_FEATURE_CNT, key=node.sha256)
318+
pipe.hget(name=DB_UN_OB_CNT, key=node.sha256)
319+
pipe_res = pipe.execute()
320+
a, c, u = pipe_res
321+
322322

323323
if a is None:
324324
return 1
@@ -387,3 +387,25 @@ def _get_lib(node, res):
387387
def get_lib(self, res):
388388
self.pre_order_res(visit=self._get_lib, res=res)
389389

390+
@staticmethod
391+
def _get_repackage_main(node, res, ret):
392+
if node.pn in res:
393+
return -1
394+
if len(node.children) == 0:
395+
ret.extend(node.api_id_list)
396+
ret += node.api_id_list
397+
return 0
398+
399+
def get_repackage_main(self, res, hex_sha256):
400+
# res is a list of libraries. Result.
401+
pn_list = list()
402+
for item in res:
403+
pn_list.append(item["Package"])
404+
ret = list()
405+
self.pre_order_res_ret(visit=self._get_repackage_main, res=pn_list, ret=ret)
406+
ret_length = len(ret)
407+
kvd = dict(Counter(ret))
408+
str = rputil.Util.dict2str(kvd)
409+
zstr = zlib.compress(str,1)
410+
self.db_rep.hset(name="apk_feature", key=hex_sha256, value=zstr)
411+
self.db_rep.zadd("apk_weight", ret_length, hex_sha256 )

LibRadar/libradar.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,21 @@
2727
import hashlib
2828
import zipfile
2929
import json
30+
from collections import Counter
3031

3132
class LibRadar(object):
3233
"""
3334
LibRadar
3435
"""
35-
def __init__(self, apk_path, lite=True):
36+
def __init__(self, apk_path):
3637
"""
3738
Init LibRadar instance with apk_path as a basestring.
3839
Create a Tree for every LibRadar instance. The tree describe the architecture of the apk. Every package is a
3940
node.
4041
:param apk_path: basestring
4142
"""
4243
self.apk_path = apk_path
43-
self.tree = dex_tree.Tree(lite=lite)
44+
self.tree = dex_tree.Tree()
4445
self.dex_name = ""
4546
# Instance of Dex Object in dex_parser.
4647
self.dex = None
@@ -50,6 +51,7 @@ def __init__(self, apk_path, lite=True):
5051
"""
5152
self.k_api_v_permission = dict()
5253
with open(SCRIPT_PATH + "/Data/IntermediateData/strict_api.csv", 'r') as api_and_permission:
54+
api_id = 0
5355
for line in api_and_permission:
5456
api, permission_with_colon = line.split(",")
5557
permissions = permission_with_colon[:-2].split(":")
@@ -58,7 +60,8 @@ def __init__(self, apk_path, lite=True):
5860
for permission in permissions:
5961
if permission != "":
6062
permission_list.append(permission)
61-
self.k_api_v_permission[api] = permission_list
63+
self.k_api_v_permission[api] = (permission_list, api_id)
64+
api_id += 1
6265
"""
6366
invoke_file = open(SCRIPT_PATH +"/Data/IntermediateData/invokeFormat.txt", 'r')
6467
self.invokes = set()
@@ -125,7 +128,7 @@ def get_api_list(self, dex_method, api_list, permission_list):
125128
if 0x6e <= op_code <= 0x72:
126129
if decoded_instruction.getApi in self.k_api_v_permission:
127130
api_list.append(decoded_instruction.getApi)
128-
for permission in self.k_api_v_permission[decoded_instruction.getApi]:
131+
for permission in self.k_api_v_permission[decoded_instruction.getApi][0]:
129132
permission_list.add(permission)
130133
return
131134

@@ -154,7 +157,12 @@ def extract_class(self, dex_class_def_obj):
154157
class_sha256.update(api)
155158
if not IGNORE_ZERO_API_FILES or len(api_list) != 0:
156159
pass
157-
return len(api_list), class_sha256.hexdigest(), class_sha256.hexdigest(), sorted(list(permission_list))
160+
# api_id_list
161+
api_id_list = []
162+
for api in api_list:
163+
api_id_list.append(self.k_api_v_permission[api][1])
164+
return len(api_list), class_sha256.hexdigest(), class_sha256.hexdigest(), sorted(list(permission_list)),\
165+
api_id_list
158166

159167
def extract_dex(self):
160168
# Log Start
@@ -166,7 +174,8 @@ def extract_dex(self):
166174
# Create a Dex object
167175
self.dex = dex_parser.DexFile(self.dex_name)
168176
for dex_class_def_obj in self.dex.dexClassDefList:
169-
weight, raw_sha256, hex_sha256, permission_list = self.extract_class(dex_class_def_obj=dex_class_def_obj)
177+
weight, raw_sha256, hex_sha256, permission_list, api_id_list = \
178+
self.extract_class(dex_class_def_obj=dex_class_def_obj)
170179
class_name = self.dex.getDexTypeId(dex_class_def_obj.classIdx)
171180
"""
172181
I got many \x01 here before the class name.
@@ -180,7 +189,8 @@ def extract_dex(self):
180189
class_name = class_name[l_index:]
181190
if IGNORE_ZERO_API_FILES and weight == 0:
182191
continue
183-
self.tree.insert(package_name=class_name, weight=weight, sha256=raw_sha256, permission_list=permission_list)
192+
self.tree.insert(package_name=class_name, weight=weight, sha256=raw_sha256,
193+
permission_list=permission_list, api_id_list=api_id_list)
184194
return 0
185195

186196
def analyse(self):
@@ -206,17 +216,18 @@ def compare(self):
206216
self.tree.get_lib(res)
207217
# Step 6: traverse the tree, find potential libraries that has not been tagged.
208218
self.tree.find_untagged(res)
219+
# Step 7: repackage feature store.
220+
self.tree.get_repackage_main(res, self.hex_sha256)
209221
return res
210222

211223

212-
213224
if __name__ == '__main__':
214225
if len(sys.argv) != 2:
215226
print("LibRadar only takes 1 arguments.")
216227
print("Usage:")
217228
print(" $ python libradar.py example.apk")
218229
exit(1)
219230
apk_path = sys.argv[1]
220-
lrd = LibRadar(apk_path, lite=True)
231+
lrd = LibRadar(apk_path)
221232
res = lrd.compare()
222233
print(json.dumps(res, indent=4, sort_keys=True))

LibRadar/util.py renamed to LibRadar/rputil.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,21 @@
1919
from _settings import *
2020

2121
class Util(object):
22+
"""
23+
Format of String Storage:
24+
25+
{1:2}
26+
|--------|--------|--------|---------|
27+
|00000000|00000001|00000000|000000010|
28+
|--------|--------|--------|---------|
29+
30+
31+
{2:3,4:128}
32+
|--------|--------|--------|---------|--------|--------|--------|---------|
33+
|00000000|00000010|00000000|000000011|00000000|00000100|00000000|100000000|
34+
|--------|--------|--------|---------|--------|--------|--------|---------|
35+
36+
"""
2237
@staticmethod
2338
def dict2str(kvd):
2439
"""
@@ -70,3 +85,49 @@ def str2dict(feature_str):
7085
value_int = i3 * 256 + i4
7186
kvd[key_int] = value_int
7287
return kvd
88+
89+
@staticmethod
90+
def get_key(feature_str, offset):
91+
"""
92+
93+
:param feature_str:
94+
:param offset:
95+
:return:
96+
"""
97+
return ord(feature_str[offset]) * 256 + ord(feature_str[offset + 1])
98+
99+
@staticmethod
100+
def get_value(feature_str, offset):
101+
return ord(feature_str[offset + 2]) * 256 + ord(feature_str[offset + 3])
102+
103+
@staticmethod
104+
def comp_str(str1, str2):
105+
assert len(str1) % 4 == 0 and len(str2) % 4 == 0, "Feature_str length is not 4X"
106+
feature_length1, feature_length2 = len(str1), len(str2)
107+
cur1, cur2 = 0, 0
108+
diff = 0
109+
sum1, sum2 = 0, 0
110+
while cur1 < feature_length1 or cur2 < feature_length2:
111+
if cur1 == feature_length1 or Util.get_key(str2, cur2) < Util.get_key(str1, cur1):
112+
v2 = Util.get_value(str2, cur2)
113+
sum2 += v2
114+
diff += v2
115+
cur2 += 4
116+
continue
117+
if cur2 == feature_length2 or Util.get_key(str2, cur2) > Util.get_key(str1, cur1):
118+
v1 = Util.get_value(str1, cur1)
119+
sum1 += v1
120+
diff += v1
121+
cur1 += 4
122+
continue
123+
if Util.get_key(str2, cur2) == Util.get_key(str1, cur1):
124+
v1 = Util.get_value(str1, cur1)
125+
v2 = Util.get_value(str2, cur2)
126+
diff += abs(v1 - v2)
127+
sum1 += v1
128+
sum2 += v2
129+
cur1 += 4
130+
cur2 += 4
131+
continue
132+
assert False, "Not reachable."
133+
return float(diff) / (sum1 + sum2)

0 commit comments

Comments
 (0)