Skip to content

Commit 293fdf7

Browse files
committed
fix bug in ps
1 parent e6e00ac commit 293fdf7

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

models/rank/slot_dnn/config_online.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ table_parameters:
5858
table_class: "MemorySparseTable"
5959
shard_num: 10
6060
accessor:
61-
accessor_class: "CtrCommonAccessor"
61+
accessor_class: "SparseAccessor"
6262
fea_dim: 11
6363
embedx_dim: 8
6464
embedx_threshold: 10

tools/static_ps_online_trainer.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def run_online_worker(self):
254254
logger.info("training a new day {}, end_day = {}".format(
255255
day, self.end_day))
256256
if last_day != -1 and int(day) < last_day:
257-
day = int(get_next_day(day))
257+
day = get_next_day(day)
258258
continue
259259
# base_model_saved = False
260260
for pass_id in range(1, 1 + len(self.online_intervals)):
@@ -409,18 +409,24 @@ def run_online_worker(self):
409409
monitor_data=metric_str)
410410
fleet.barrier_worker()
411411

412+
logger.info("shrink table")
413+
begin = time.time()
414+
fleet.shrink()
415+
end = time.time()
416+
logger.info("shrink table done, cost %s min" % (
417+
(end - begin) / 60.0))
418+
412419
if fleet.is_first_worker():
413420
last_base_day, last_base_path, last_base_key = get_last_save_xbox_base(
414421
self.save_model_path, self.hadoop_client)
415422
logger.info(
416423
"one epoch finishes, get_last_save_xbox, last_base_day = {}, last_base_path = {}, last_base_key = {}".
417424
format(last_base_day, last_base_path, last_base_key))
418-
next_day = int(get_next_day(day))
419-
if next_day <= last_base_day:
425+
next_day = get_next_day(day)
426+
if int(next_day) <= last_base_day:
420427
logger.info("batch model/base xbox model exists")
421428
else:
422429
xbox_base_key = int(time.time())
423-
fleet.shrink()
424430
save_xbox_model(self.save_model_path, next_day, -1,
425431
self.exe, self.inference_feed_vars,
426432
self.inference_target_var,

tools/utils/static_ps/flow_helper.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def load_model(model_path, mode, client):
118118
if os.path.exists(local_path):
119119
shutil.rmtree(local_path)
120120
os.mkdir(local_path)
121-
client.download(model_path + "/dnn_plugin", local_path)
121+
client.download(model_path + "/dnn_plugin/", local_path)
122122
fleet.load_model(model_path, mode)
123123

124124

@@ -354,7 +354,8 @@ def get_last_save_xbox(output_path, client):
354354
logger.info("get_last_save_xbox donefile_path {} is file".format(
355355
donefile_path))
356356
pre_content = client.cat(donefile_path)
357-
logger.info("get_last_save_xbox get a pre_content = ", pre_content)
357+
logger.info("get_last_save_xbox get a pre_content = {}".format(
358+
pre_content))
358359
last_dict = json.loads(pre_content.split("\n")[-1])
359360
last_day = int(last_dict["input"].split("/")[-3])
360361
last_pass = int(last_dict["input"].split("/")[-2].split("-")[-1])

0 commit comments

Comments
 (0)