Skip to content

Commit 3b50f5e

Browse files
committed
change to bigdata_set
1 parent e2c6829 commit 3b50f5e

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

models/rank/naml/NAMLDataReader.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,18 @@ def init(self):
6464
for file in self.article_file_list:
6565
with open(file, "r") as rf:
6666
for l in rf:
67-
line = l.strip().split('\t')
68-
id = line[0]
67+
line_x = [x.strip() for x in l.split('\t')]
68+
id = line_x[0]
6969
#line 0 cate 1:subcate, 2:title, 3 content;
70-
line = [[int(line[1])], [int(line[2])],
71-
[int(t) for t in line[3].split(" ")],
72-
[int(t) for t in line[4].split(" ")]]
70+
line = [[int(line_x[1])], [int(line_x[2])]]
71+
if len(line_x[3]) == 0:
72+
line.append([])
73+
else:
74+
line.append([int(t) for t in line_x[3].split(" ")])
75+
if len(line_x[4]) == 0:
76+
line.append([])
77+
else:
78+
line.append([int(t) for t in line_x[4].split(" ")])
7379
line[2] += [self.word_dict_size] * (
7480
self.article_title_size - len(line[2]))
7581
line[3] += [self.word_dict_size] * (

models/rank/naml/config_bigdata.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@
1515
runner:
1616
train_data_dir: "../../../datasets/MIND/data/train"
1717
train_reader_path: "NAMLDataReader" # importlib format
18-
use_gpu: True
18+
use_gpu: False
1919
train_batch_size: 50
2020
epochs: 2
21-
print_interval: 2
21+
print_interval: 10
2222
#model_init_path: "output_model/0" # init model
23-
model_save_path: "output_model"
23+
model_save_path: "output_model_all"
2424
infer_batch_size: 10
2525
infer_reader_path: "NAMLDataReader" # importlib format
2626
test_data_dir: "../../../datasets/MIND/data/test"
27-
infer_load_path: "output_model"
27+
infer_load_path: "output_model_all"
2828
infer_start_epoch: 0
2929
infer_end_epoch: 2
3030

@@ -42,6 +42,6 @@ hyper_parameters:
4242
neg_condidate_sample_size: 4
4343
word_dimension: 60
4444
category_size: 18
45-
sub_category_size: 260
45+
sub_category_size: 285
4646
category_dimension: 32
47-
word_dict_size: 19200
47+
word_dict_size: 27756

0 commit comments

Comments
 (0)