Skip to content

Commit 3ce34a1

Browse files
authored
fix dtype bug in windows (#481)
1 parent 4ede05a commit 3ce34a1

File tree

4 files changed

+16
-16
lines changed

4 files changed

+16
-16
lines changed

examples/information_extraction/DuEE/classifier.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ def do_train():
200200
convert_example, tokenizer=tokenizer, label_map=label_map, max_seq_len=args.max_seq_len)
201201

202202
batchify_fn = lambda samples, fn=Tuple(
203-
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token]),
204-
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token]),
203+
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token], dtype='int32'),
204+
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token], dtype='int32'),
205205
Stack(dtype="int64") # label
206206
): fn(list(map(trans_func, samples)))
207207

examples/information_extraction/DuEE/sequence_labeling.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ def do_train():
161161
ignore_label=ignore_label,
162162
is_test=False)
163163
batchify_fn = lambda samples, fn=Tuple(
164-
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token]), # input ids
165-
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token]), # token type ids
164+
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token], dtype='int32'), # input ids
165+
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token], dtype='int32'), # token type ids
166166
Stack(dtype='int64'), # sequence lens
167-
Pad(axis=0, pad_val=ignore_label) # labels
167+
Pad(axis=0, pad_val=ignore_label, dtype='int64') # labels
168168
): fn(list(map(trans_func, samples)))
169169

170170
batch_sampler = paddle.io.DistributedBatchSampler(train_ds, batch_size=args.batch_size, shuffle=True)
@@ -257,9 +257,9 @@ def do_predict():
257257
encoded_inputs_list.append((input_ids, token_type_ids, seq_len))
258258

259259
batchify_fn = lambda samples, fn=Tuple(
260-
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token]), # input_ids
261-
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token]), # token_type_ids
262-
Stack() # sequence lens
260+
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token], dtype='int32'), # input_ids
261+
Pad(axis=0, pad_val=tokenizer.vocab[tokenizer.pad_token], dtype='int32'), # token_type_ids
262+
Stack(dtype='int64') # sequence lens
263263
): fn(samples)
264264
# Seperates data into some batches.
265265
batch_encoded_inputs = [encoded_inputs_list[i: i + args.batch_size]

examples/information_extraction/msra_ner/eval.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ def do_eval(args):
7979
max_seq_len=args.max_seq_length)
8080
ignore_label = -100
8181
batchify_fn = lambda samples, fn=Dict({
82-
'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
83-
'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # segment
84-
'seq_len': Stack(),
85-
'labels': Pad(axis=0, pad_val=ignore_label) # label
82+
'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int32'), # input
83+
'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int32'), # segment
84+
'seq_len': Stack(dtype='int64'),
85+
'labels': Pad(axis=0, pad_val=ignore_label, dtype='int64') # label
8686
}): fn(samples)
8787
eval_ds = eval_ds.map(trans_func)
8888
eval_data_loader = DataLoader(

examples/information_extraction/msra_ner/train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,10 @@ def do_train(args):
116116
ignore_label = -100
117117

118118
batchify_fn = lambda samples, fn=Dict({
119-
'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
120-
'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # segment
121-
'seq_len': Stack(), # seq_len
122-
'labels': Pad(axis=0, pad_val=ignore_label) # label
119+
'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int32'), # input
120+
'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int32'), # segment
121+
'seq_len': Stack(dtype='int64'), # seq_len
122+
'labels': Pad(axis=0, pad_val=ignore_label, dtype='int64') # label
123123
}): fn(samples)
124124

125125
train_batch_sampler = paddle.io.DistributedBatchSampler(

0 commit comments

Comments
 (0)