|
29 | 29 | import paddlenlp as ppnlp
|
30 | 30 |
|
31 | 31 | from paddlenlp.data import Pad, Stack, Tuple, Dict
|
32 |
| -from paddlenlp.transformers import BertForQuestionAnswering, BertTokenizer, ErnieForQuestionAnswering, ErnieTokenizer |
| 32 | +from paddlenlp.transformers import BertForQuestionAnswering, BertTokenizer, ErnieForQuestionAnswering, ErnieTokenizer, FunnelForQuestionAnswering, FunnelTokenizer |
33 | 33 | from paddlenlp.transformers import LinearDecayWithWarmup
|
34 | 34 | from paddlenlp.metrics.squad import squad_evaluate, compute_prediction
|
35 | 35 | from paddlenlp.datasets import load_dataset
|
36 | 36 |
|
37 | 37 | MODEL_CLASSES = {
|
38 | 38 | "bert": (BertForQuestionAnswering, BertTokenizer),
|
39 |
| - "ernie": (ErnieForQuestionAnswering, ErnieTokenizer) |
| 39 | + "ernie": (ErnieForQuestionAnswering, ErnieTokenizer), |
| 40 | + 'funnel':(FunnelForQuestionAnswering, FunnelTokenizer) |
40 | 41 | }
|
41 | 42 |
|
42 | 43 |
|
@@ -162,9 +163,9 @@ def evaluate(model, data_loader, args):
|
162 | 163 | tic_eval = time.time()
|
163 | 164 |
|
164 | 165 | for batch in data_loader:
|
165 |
| - input_ids, token_type_ids = batch |
| 166 | + input_ids, token_type_ids, attention_mask = batch |
166 | 167 | start_logits_tensor, end_logits_tensor = model(input_ids,
|
167 |
| - token_type_ids) |
| 168 | + token_type_ids=token_type_ids, attention_mask=attention_mask) |
168 | 169 |
|
169 | 170 | for idx in range(start_logits_tensor.shape[0]):
|
170 | 171 | if len(all_start_logits) % 1000 == 0 and len(all_start_logits):
|
@@ -251,6 +252,7 @@ def run(args):
|
251 | 252 | train_batchify_fn = lambda samples, fn=Dict({
|
252 | 253 | "input_ids": Pad(axis=0, pad_val=tokenizer.pad_token_id),
|
253 | 254 | "token_type_ids": Pad(axis=0, pad_val=tokenizer.pad_token_type_id),
|
| 255 | + 'attention_mask': Pad(axis=0, pad_val=tokenizer.pad_token_type_id), |
254 | 256 | "start_positions": Stack(dtype="int64"),
|
255 | 257 | "end_positions": Stack(dtype="int64")
|
256 | 258 | }): fn(samples)
|
@@ -288,10 +290,10 @@ def run(args):
|
288 | 290 | for epoch in range(num_train_epochs):
|
289 | 291 | for step, batch in enumerate(train_data_loader):
|
290 | 292 | global_step += 1
|
291 |
| - input_ids, token_type_ids, start_positions, end_positions = batch |
| 293 | + input_ids, token_type_ids, attention_mask, start_positions, end_positions = batch |
292 | 294 |
|
293 | 295 | logits = model(
|
294 |
| - input_ids=input_ids, token_type_ids=token_type_ids) |
| 296 | + input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask) |
295 | 297 | loss = criterion(logits, (start_positions, end_positions))
|
296 | 298 |
|
297 | 299 | if global_step % args.logging_steps == 0:
|
@@ -329,7 +331,8 @@ def run(args):
|
329 | 331 |
|
330 | 332 | dev_batchify_fn = lambda samples, fn=Dict({
|
331 | 333 | "input_ids": Pad(axis=0, pad_val=tokenizer.pad_token_id),
|
332 |
| - "token_type_ids": Pad(axis=0, pad_val=tokenizer.pad_token_type_id) |
| 334 | + "token_type_ids": Pad(axis=0, pad_val=tokenizer.pad_token_type_id), |
| 335 | + "attention_mask": Pad(axis=0, pad_val=tokenizer.pad_token_type_id) |
333 | 336 | }): fn(samples)
|
334 | 337 |
|
335 | 338 | dev_data_loader = DataLoader(
|
|
0 commit comments