Skip to content

Commit 75edf33

Browse files
authored
Fix datas to data (#2410)
* fix datas to data * update code * change all_data to texts in triton deploy
1 parent 0e76981 commit 75edf33

File tree

20 files changed

+89
-90
lines changed

20 files changed

+89
-90
lines changed

applications/neural_search/recall/domain_adaptive_pretraining/data_tools/dataset_utils.py

100644100755
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def __init__(self, path, skip_warmup=False):
5252

5353
self._token_ids = np.load(
5454
path + "_ids.npy", mmap_mode="r", allow_pickle=True)
55-
process_datas = np.load(path + "_idx.npz")
56-
self._sizes = process_datas["lens"]
57-
self._pointers = process_datas["sents"]
58-
self._doc_idx = process_datas["docs"]
55+
process_data = np.load(path + "_idx.npz")
56+
self._sizes = process_data["lens"]
57+
self._pointers = process_data["sents"]
58+
self._doc_idx = process_data["docs"]
5959

6060
def __getstate__(self):
6161
return self._path

examples/information_extraction/DuEE/duee_1_postprocess.py

100644100755
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424
def predict_data_process(trigger_file, role_file, schema_file, save_path):
2525
"""predict_data_process"""
2626
pred_ret = []
27-
trigger_datas = read_by_lines(trigger_file)
27+
trigger_data = read_by_lines(trigger_file)
2828
role_data = read_by_lines(role_file)
29-
schema_datas = read_by_lines(schema_file)
29+
schema_data = read_by_lines(schema_file)
3030
print("trigger predict {} load from {}".format(
31-
len(trigger_datas), trigger_file))
31+
len(trigger_data), trigger_file))
3232
print("role predict {} load from {}".format(len(role_data), role_file))
33-
print("schema {} load from {}".format(len(schema_datas), schema_file))
33+
print("schema {} load from {}".format(len(schema_data), schema_file))
3434

3535
schema = {}
36-
for s in schema_datas:
36+
for s in schema_data:
3737
d_json = json.loads(s)
3838
schema[d_json["event_type"]] = [r["role"] for r in d_json["role_list"]]
3939

@@ -50,7 +50,7 @@ def predict_data_process(trigger_file, role_file, schema_file, save_path):
5050
role_ret[role_type].append("".join(r["text"]))
5151
sent_role_mapping[d_json["id"]] = role_ret
5252

53-
for d in trigger_datas:
53+
for d in trigger_data:
5454
d_json = json.loads(d)
5555
t_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
5656
pred_event_types = list(set([t["type"] for t in t_ret]))

examples/language_model/chinesebert/utils.py

100644100755
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def load_ds(datafiles):
210210
MapDataset
211211
'''
212212

213-
datas = []
213+
data = []
214214

215215
def read(ds_file):
216216
with open(ds_file, 'r', encoding='utf-8') as fp:
@@ -229,7 +229,7 @@ def read(ds_file):
229229

230230

231231
def load_ds_xnli(datafiles):
232-
datas = []
232+
data = []
233233

234234
def read(ds_file):
235235
with open(ds_file, 'r', encoding='utf-8') as fp:

examples/language_model/elmo/elmo.py

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,10 @@ def encode(self, sentences: List[List[str]]):
338338
Each sentence is a list of tokens without <s> or </s>, e.g.
339339
[['The', 'first', 'sentence', '.'], ['Second', '.']]
340340
"""
341-
batch_datas = create_batches(sentences, self._batch_size, self._vocab,
342-
self._max_seq_len)
341+
batch_data = create_batches(sentences, self._batch_size, self._vocab,
342+
self._max_seq_len)
343343
embeddings = []
344-
for data in batch_datas:
344+
for data in batch_data:
345345
ids, ids_reverse, seq_lens = data
346346
# [batch_size, num_lstm_layers + 1, max_seq_len, projection_dim * 2]
347347
outputs = self._elmo([ids, ids_reverse])

examples/language_model/gpt-3/dygraph/dataset.py

100644100755
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,10 +301,10 @@ def create_pretrained_dataset(args,
301301
logger.warning(
302302
"You are using compatible dataset, please make new dataset as the readme!"
303303
)
304-
process_datas = np.load(
304+
process_data = np.load(
305305
input_prefix + "_ids.npz", mmap_mode="r+", allow_pickle=True)
306-
sample_ids = process_datas["ids"]
307-
sample_lens = process_datas["lens"].astype("int32")
306+
sample_ids = process_data["ids"]
307+
sample_lens = process_data["lens"].astype("int32")
308308
else:
309309
for suffix in ["_ids.npy", "_idx.npz"]:
310310
if not os.path.isfile(input_prefix + suffix):
@@ -314,10 +314,10 @@ def create_pretrained_dataset(args,
314314
input_prefix + "_ids.npy", mmap_mode="r", allow_pickle=True)
315315
# All documment ids, extend as 1-D array.
316316

317-
process_datas = np.load(input_prefix + "_idx.npz")
317+
process_data = np.load(input_prefix + "_idx.npz")
318318
# The len(sample_lens) num of docs
319319
# The sum(sample_lens) should equal len(sample_ids)
320-
sample_lens = process_datas["lens"]
320+
sample_lens = process_data["lens"]
321321

322322
splits = get_train_valid_test_split_(args.split, len(sample_lens))
323323
assert len(sample_lens) >= splits[

examples/language_model/gpt-3/static/dataset.py

100644100755
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,10 @@ def create_pretrained_dataset(args,
302302
logger.warning(
303303
"You are using compatible dataset, please make new dataset as the readme!"
304304
)
305-
process_datas = np.load(
305+
process_data = np.load(
306306
input_prefix + "_ids.npz", mmap_mode="r+", allow_pickle=True)
307-
sample_ids = process_datas["ids"]
308-
sample_lens = process_datas["lens"].astype("int32")
307+
sample_ids = process_data["ids"]
308+
sample_lens = process_data["lens"].astype("int32")
309309
else:
310310
for suffix in ["_ids.npy", "_idx.npz"]:
311311
if not os.path.isfile(input_prefix + suffix):
@@ -315,10 +315,10 @@ def create_pretrained_dataset(args,
315315
input_prefix + "_ids.npy", mmap_mode="r", allow_pickle=True)
316316
# All documment ids, extend as 1-D array.
317317

318-
process_datas = np.load(input_prefix + "_idx.npz")
318+
process_data = np.load(input_prefix + "_idx.npz")
319319
# The len(sample_lens) num of docs
320320
# The sum(sample_lens) should equal len(sample_ids)
321-
sample_lens = process_datas["lens"]
321+
sample_lens = process_data["lens"]
322322

323323
splits = get_train_valid_test_split_(args.split, len(sample_lens))
324324
assert len(sample_lens) >= splits[

examples/language_model/moe/dygraph/dataset.py

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,12 +248,12 @@ def create_pretrained_dataset(args,
248248
"The distributed run, total device num:{}, distinct dataflow num:{}.".
249249
format(device_world_size, data_world_size))
250250

251-
process_datas = np.load(input_path, mmap_mode="r+", allow_pickle=True)
251+
process_data = np.load(input_path, mmap_mode="r+", allow_pickle=True)
252252
# All documment ids, extend as 1-D array.
253-
sample_ids = process_datas["ids"]
253+
sample_ids = process_data["ids"]
254254
# The len(sample_lens) num of docs
255255
# The sum(sample_lens) should equal len(sample_ids)
256-
sample_lens = process_datas["lens"]
256+
sample_lens = process_data["lens"]
257257

258258
splits = get_train_valid_test_split_(args.split, len(sample_lens))
259259
assert len(sample_lens) >= splits[

examples/text_graph/erniesage/data/graph_reader.py

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def construct(tensors):
5454
"""
5555
graph_num = 1
5656
start_len = 0
57-
datas = []
57+
data = []
5858
graph_list = []
5959
for graph in range(graph_num):
6060
graph_list.append(
@@ -64,7 +64,7 @@ def construct(tensors):
6464
start_len += 2
6565

6666
for i in range(start_len, len(tensors)):
67-
datas.append(tensors[i])
68-
return graph_list, datas
67+
data.append(tensors[i])
68+
return graph_list, data
6969

7070
return construct

examples/text_graph/erniesage/models/model.py

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@ def __init__(self, ernie, config):
4646
self.encoder = Encoder.factory(self.config_file, self.ernie)
4747
self.loss_func = LossFactory(self.config_file)
4848

49-
def forward(self, graphs, datas):
49+
def forward(self, graphs, data):
5050
"""Forward function of link prediction task.
5151
5252
Args:
5353
graphs (Graph List): the Graph list.
54-
datas (Tensor List): other input of the model.
54+
data (Tensor List): other input of the model.
5555
5656
Returns:
5757
Tensor: loss and output tensors.
5858
"""
59-
term_ids, user_index, pos_item_index, neg_item_index, user_real_index, pos_item_real_index = datas
59+
term_ids, user_index, pos_item_index, neg_item_index, user_real_index, pos_item_real_index = data
6060
# encoder model
6161
outputs = self.encoder(graphs, term_ids,
6262
[user_index, pos_item_index, neg_item_index])

model_zoo/electra/deploy/python/predict.py

100644100755
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def predict(args, sentences=[], paths=[]):
157157
predictor = create_paddle_predictor(config)
158158

159159
start_time = time.time()
160-
output_datas = []
160+
output_data = []
161161
count = 0
162162
for i, sen in enumerate(predicted_input):
163163
sen = np.array(sen).astype("int64")
@@ -176,9 +176,9 @@ def predict(args, sentences=[], paths=[]):
176176
output_names = predictor.get_output_names()
177177
# get output pointer and copy data(nd.array)
178178
output_tensor = predictor.get_output_tensor(output_names[0])
179-
output_data = output_tensor.copy_to_cpu()
180-
output_res = np.argmax(output_data, axis=1).tolist()
181-
output_datas.append(output_res)
179+
predict_data = output_tensor.copy_to_cpu()
180+
output_res = np.argmax(predict_data, axis=1).tolist()
181+
output_data.append(output_res)
182182

183183
print("===== batch {} =====".format(i))
184184
for j in range(len(predicted_sens[i])):

0 commit comments

Comments
 (0)