Skip to content

Commit 4c1feef

Browse files
esythanroot
authored andcommitted
on off diff
1 parent f5ae2b9 commit 4c1feef

File tree

5 files changed

+448
-0
lines changed

5 files changed

+448
-0
lines changed

tools/onoff_diff/get_xbox_model.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python
2+
# coding=utf-8
3+
4+
import os
5+
import sys
6+
import re
7+
import json
8+
import numpy
9+
emb_dim = 9
10+
11+
# def get_xbox_model(infile, outfile):
12+
# fout = open(outfile, 'w')
13+
# with open(infile, 'r') as fin:
14+
# for line in fin:
15+
# out_list = []
16+
# feasign, emb = re.split('\t', line.strip('\n'))
17+
# emb = re.split(' ', emb)
18+
# out_list.append(feasign)
19+
# out_list.extend(['0'] * 5)
20+
# out_list.append(emb[0])
21+
# out_list.append('0')
22+
# out_list.extend(emb[1:])
23+
# out_list.extend(['0'] * len(emb[1:]))
24+
# fout.write('{}\n'.format(' '.join(out_list)))
25+
26+
# fout.close()
27+
28+
def get_xbox_model(infile, outfile):
29+
fout = open(outfile, 'w')
30+
out_list = ['0', '1', '0', '1']
31+
out_str1 = '\t'.join(out_list)
32+
out_list = ['0.000000'] * (3 * emb_dim + 3)
33+
out_str2 = ','.join(out_list)
34+
fout.write('{}\t{}\n'.format(out_str1, out_str2))
35+
with open(infile, 'r') as fin:
36+
for line in fin:
37+
out_list = []
38+
feasign, emb = re.split('\t', line.strip('\n'))
39+
emb = re.split(',', emb.strip(','))
40+
if len(emb) < emb_dim:
41+
emb.extend(['0.000000'] * (emb_dim - len(emb)))
42+
out_list.append(feasign)
43+
out_list.extend(['1', '0', '1'])
44+
out_str1 = '\t'.join(out_list)
45+
out_list = []
46+
out_list.extend(emb)
47+
out_list.extend(['0.000000'] * emb_dim)
48+
out_list.extend(['0.000000'] * emb_dim)
49+
out_list.extend(['0.000000'] * 3)
50+
out_str2 = ','.join(out_list)
51+
fout.write('{}\t{}\n'.format(out_str1, out_str2))
52+
fout.close()
53+
54+
if __name__ == '__main__':
55+
import argparse
56+
parser = argparse.ArgumentParser()
57+
parser.add_argument('-i', help='输入文件,cube结果', dest='infile', required=True)
58+
parser.add_argument(
59+
'-o', help='输出文件,xbox模型', dest='outfile', required=True)
60+
args = parser.parse_args()
61+
get_xbox_model(args.infile, args.outfile)

tools/onoff_diff/onoff_diff.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env python
2+
# coding=utf-8
3+
4+
import os
5+
import sys
6+
import re
7+
import json
8+
import numpy
9+
10+
FLAG = True
11+
12+
#FLAG=False
13+
14+
15+
def get_all_vars(var_file):
16+
var_list = []
17+
18+
with open(var_file, 'r') as fin:
19+
for line in fin:
20+
line = line.strip('\n')
21+
if len(line) > 0:
22+
var_list.append(line)
23+
24+
return var_list
25+
26+
27+
def get_data_from_log(online_log_file, origin_file):
28+
insid_list = []
29+
with open(origin_file, 'r') as fin:
30+
for line in fin:
31+
insid = line.strip('\n').split('\t')[0]
32+
insid_list.append(insid)
33+
# print("insid_list:", insid_list)
34+
35+
res = {}
36+
pattern = r'\[([^\]]*)\]'
37+
with open(online_log_file, 'r') as fin:
38+
for line in fin:
39+
insid = ''
40+
var_name = ''
41+
length = 0
42+
data = []
43+
ele_list = re.findall(pattern, line.strip('\n'))
44+
for ele in ele_list:
45+
if ele.find(':') < 0:
46+
continue
47+
key, value = ele.split(':')
48+
if key == 'batchIdx':
49+
insid = insid_list[int(value)]
50+
if key == 'name':
51+
var_name = value
52+
if key == 'shape':
53+
dim1, dim2 = value.split(',')
54+
length = int(dim1) * int(dim2)
55+
if key == 'data':
56+
data = [float(x) for x in value.split(',')]
57+
assert length == len(data)
58+
if insid not in res:
59+
res[insid] = {}
60+
res[insid][var_name] = (length, data)
61+
# print("online log:", res.keys())
62+
63+
return res
64+
65+
66+
def get_data_from_model(model_dump_file):
67+
res = {}
68+
69+
with open(model_dump_file, 'r') as fin:
70+
for line in fin:
71+
line = line.strip('\n').split('\t')
72+
insid = line[0]
73+
res[insid] = {}
74+
for var in line[2:]:
75+
var_name, length, data = var.split(':', 2)
76+
length = int(length)
77+
data = [float(x) for x in data.split(':')]
78+
assert length == len(data)
79+
res[insid][var_name] = (length, data)
80+
# print("offline log:", res.keys())
81+
return res
82+
83+
84+
def onoff_var_diff(log_data, model_data, var_name):
85+
res = {'labels': [], 'values': []}
86+
diff_ins_list = []
87+
88+
same_insid_counts = 0
89+
all_diff = []
90+
for insid in log_data.keys():
91+
if insid in model_data:
92+
same_insid_counts += 1
93+
log_val = log_data[insid][var_name]
94+
if var_name in model_data[insid]:
95+
model_val = model_data[insid][var_name]
96+
assert log_val[0] == model_val[0]
97+
for i in range(log_val[0]):
98+
all_diff.append(abs(log_val[1][i] - model_val[1][i]))
99+
print(insid, log_val[1][i], model_val[1][i])
100+
if abs(log_val[1][i] - model_val[1][i]) > 0.01:
101+
diff_ins_list.append((insid, log_val[1], model_val[1]))
102+
print("diff_ins_list:", diff_ins_list)
103+
# print("all_diff:", all_diff)
104+
all_diff = numpy.asarray(all_diff)
105+
total = same_insid_counts
106+
print("same ins total: {}".format(same_insid_counts))
107+
no_diff = numpy.sum(all_diff < 0.000001)
108+
if no_diff:
109+
res['labels'].append('无diff')
110+
res['values'].append(no_diff)
111+
print("无diff: {}, {}".format(no_diff, float(no_diff) / total))
112+
labels = [
113+
'个位diff', '十分位diff', '百分位diff', '千分位diff', '万分位diff', '十万分位diff',
114+
'百万分位diff'
115+
]
116+
multis = [1, 10, 100, 1000, 10000, 100000, 1000000]
117+
for label, multi in zip(labels, multis):
118+
diff_counts = numpy.sum(all_diff * multi > 1)
119+
if diff_counts:
120+
res['labels'].append(label)
121+
res['values'].append(diff_counts)
122+
print("{}: {}, {}".format(label, diff_counts,
123+
float(diff_counts) / total))
124+
# return json.dumps(res, ensure_ascii=False) if res['values'] else ""
125+
return diff_ins_list
126+
127+
128+
def onoff_max_diff(log_data, model_data, ins_id):
129+
log_ins = log_data[ins_id]
130+
model_ins = model_data[ins_id]
131+
for var_name in model_ins:
132+
max_diff = 0.0
133+
if var_name not in log_ins:
134+
print(
135+
'var {} is not in online log'.format(var_name),
136+
file=sys.stderr)
137+
continue
138+
if log_ins[var_name][0] != model_ins[var_name][0]:
139+
print(
140+
'The length of {} is wrong, online is {}, offline is {}'.
141+
format(var_name, log_ins[var_name][0], model_ins[var_name][0]),
142+
file=sys.stderr)
143+
continue
144+
for i in range(log_ins[var_name][0]):
145+
diff = abs(log_ins[var_name][1][i] - model_ins[var_name][1][i])
146+
if max_diff < diff:
147+
max_diff = diff
148+
if max_diff > 2e-5:
149+
print("ins_id:{}, var_name:{}: {}".format(ins_id, var_name, max_diff))
150+
151+
152+
if __name__ == '__main__':
153+
import argparse
154+
parser = argparse.ArgumentParser()
155+
parser.add_argument('-l', help='线上日志文件', dest='online_log_file', required=True)
156+
parser.add_argument(
157+
'-m', help='模型dump文件', dest='model_dump_file', required=True)
158+
parser.add_argument('-v', help='所有var文件', dest='var_file', required=True)
159+
parser.add_argument('-o', help='offline原始数据文件', dest='origin_file', required=True)
160+
args = parser.parse_args()
161+
log_data = get_data_from_log(args.online_log_file, args.origin_file)
162+
model_data = get_data_from_model(args.model_dump_file)
163+
var_list = get_all_vars(args.var_file)
164+
diff_ins_list = onoff_var_diff(log_data, model_data, var_list[-1])
165+
# diff_ins_list = [str(i) for i in range(1, 11)]
166+
for ins in diff_ins_list:
167+
onoff_max_diff(log_data, model_data, ins)
168+
# print('<pie-charts>{}</pie-charts>'.format(res))

tools/onoff_diff/run.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
day=20190720
2+
pass_id=6
3+
ROOT_PATH=/xxx/PaddleRec
4+
TOOL_PATH=$ROOT_PATH/tools/onoff_diff
5+
MODEL_PATH=$ROOT_PATH/models/rank/slot_dnn
6+
# INFERENCE_PATH=$ROOT_PATH/tools/inference_c++2.0
7+
# 在线预测输入数据
8+
ONLINE_FILE=$MODEL_PATH/infer_data/online/demo_10
9+
# 离线预测输入数据
10+
OFFLINE_FILE=$MODEL_PATH/infer_data/offline/demo_10
11+
# 在线预测输入数据添加insid作为离线预测输入数据
12+
echo "get offline data..."
13+
cat $ONLINE_FILE | awk -F'\t' 'BEGIN{OFS="\t"}{print NR,$0}' > $OFFLINE_FILE
14+
15+
# 在线预测
16+
# 在线预测数据放在$MODEL_PATH/infer_data/online中,无insid
17+
# cd $INFERENCE_PATH
18+
# ./bin/main --flagfile ./user.flags
19+
# cp std.log $TOOL_PATH/data/log.online
20+
21+
# 离线预测
22+
CUBE_FILE=./data/cube.result
23+
XBOX_FILE=./data/xbox_model_result
24+
SAVE_MODEL_PATH=$MODEL_PATH/output_model/$day/inference_model_$pass_id/
25+
# cp online.cube.result to data/cube.result
26+
# 将cube输出文件修改成大模型格式,并放入离线预测加载模型路径
27+
echo "get xbox model..."
28+
cd $TOOL_PATH
29+
python3 get_xbox_model.py -i $CUBE_FILE \
30+
-o $XBOX_FILE
31+
sort -u $XBOX_FILE > $SAVE_MODEL_PATH/embedding.shard/embedding.block0.txt
32+
33+
# copy小模型
34+
35+
# 离线预测数据放在$MODEL_PATH/infer_data/offline中,添加insid
36+
echo "offline predict..."
37+
cd $MODEL_PATH
38+
fleetrun --server_num=1 --worker_num=1 ../../../tools/static_ps_offline_infer.py -m config_offline_infer.yaml
39+
OFFLINE_DUMP_PATH=$MODEL_PATH/dump_offline_infer
40+
cat $OFFLINE_DUMP_PATH/part* > $TOOL_PATH/data/log.offline
41+
42+
echo "online offline diff..."
43+
cd $TOOL_PATH
44+
python3 onoff_diff.py -l ./data/log.online \
45+
-m ./data/log.offline \
46+
-v $MODEL_PATH/all_vars.txt \
47+
-o $OFFLINE_FILE

0 commit comments

Comments
 (0)