Skip to content

Commit 5ef0ca7

Browse files
committed
add PET to few shot
1 parent 181a1e0 commit 5ef0ca7

File tree

5 files changed

+12
-104
lines changed

5 files changed

+12
-104
lines changed

examples/few_shot/pet/README.md

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,12 @@ train_ds, dev_ds, public_test_ds = load_dataset("fewclue", name="tnews", splits=
2727
### 模型训练&评估
2828
通过如下命令,指定 GPU 0 卡, 在 FewCLUE 的 `tnews` 数据集上进行训练&评估
2929
```
30-
#task_name="iflytek"
31-
task_name="tnews"
32-
#task_name="eprstmt"
33-
#task_name="bustm"
34-
#task_name="ocnli"
35-
#task_name="csl"
36-
#task_name="csldcp"
37-
#task_name="cluewsc"
38-
#task_name="chid"
3930
python -u -m paddle.distributed.launch --gpus "0" \
4031
pet.py \
41-
--task_name ${task_name} \
32+
--task_name "tnews" \
4233
--device gpu \
4334
--pattern_id 0 \
44-
--save_dir ./${task_name} \
35+
--save_dir ./tnews \
4536
--index 0 \
4637
--batch_size 16 \
4738
--learning_rate 1E-4 \
@@ -61,20 +52,11 @@ python -u -m paddle.distributed.launch --gpus "0" \
6152
### 模型预测
6253
通过如下命令,指定 GPU 0 卡, 在 `FewCLUE` 的 `iflytek` 数据集上进行预测
6354
```
64-
#task_name="iflytek"
65-
task_name="tnews"
66-
#task_name="eprstmt"
67-
#task_name="bustm"
68-
#task_name="ocnli"
69-
#task_name="csl"
70-
#task_name="csldcp"
71-
#task_name="cluewsc"
72-
#task_name="chid"
7355
python -u -m paddle.distributed.launch --gpus "0" predict.py \
74-
--task_name ${task_name} \
56+
--task_name "tnews" \
7557
--device gpu \
76-
--init_from_ckpt "./${task_name}/model_120/model_state.pdparams" \
77-
--output_dir "./${task_name}/output" \
58+
--init_from_ckpt "./tnews/model_120/model_state.pdparams" \
59+
--output_dir "./tnews/output" \
7860
--batch_size 32 \
7961
--max_seq_length 512
8062
```

examples/few_shot/pet/evaluate.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,6 @@ def do_evaluate_chid(model, tokenizer, data_loader, label_normalize_dict):
145145
for batch in data_loader:
146146
src_ids, token_type_ids, masked_positions, masked_lm_labels, candidate_label_ids = batch
147147

148-
# [bs * label_length, vocab_size]
149-
# prediction_probs = model.predict(
150-
# input_ids=src_ids,
151-
# token_type_ids=token_type_ids,
152-
# masked_positions=masked_positions)
153-
154148
max_len = src_ids.shape[1]
155149
new_masked_positions = []
156150

@@ -215,4 +209,4 @@ def do_evaluate_chid(model, tokenizer, data_loader, label_normalize_dict):
215209
total_num += len(y_true_index)
216210
correct_num += (y_true_index == y_pred_index).sum()
217211

218-
return 100 * correct_num / total_num, total_num
212+
return 100 * correct_num / total_num, total_num

examples/few_shot/pet/model.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import paddle
1717
import paddle.nn as nn
1818
from paddlenlp.transformers.ernie.modeling import ErniePretrainedModel, ErniePretrainingHeads, ErnieLMPredictionHead
19-
from paddlenlp.transformers.albert.modeling import AlbertPretrainedModel, AlbertMLMHead, AlbertForMaskedLM
2019

2120

2221
class ErnieForPretraining(ErniePretrainedModel):

examples/few_shot/pet/pet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
from data import create_dataloader, transform_fn_dict
2121
from data import convert_example, convert_chid_example
22-
from evaluate import do_evaluate, do_evaluate_chid, do_evaluate_cluewsc
23-
from predict import do_predict, do_predict_chid, predict_file, write_fn, do_predict_cluewsc
22+
from evaluate import do_evaluate, do_evaluate_chid
23+
from predict import do_predict, do_predict_chid, predict_file, write_fn
2424

2525

2626
def set_seed(seed):

examples/few_shot/pet/predict.py

Lines changed: 4 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -105,73 +105,6 @@ def do_predict(model, tokenizer, data_loader, label_normalize_dict):
105105
y_pred_labels.append(origin_labels[index])
106106

107107
return y_pred_labels
108-
# return paddle.concat(y_pred_logits, axis=0).unsqueeze(1)
109-
110-
@paddle.no_grad()
111-
def do_predict_cluewsc(model, tokenizer, data_loader, label_normalize_dict):
112-
model.eval()
113-
114-
normed_labels = [
115-
normalized_lable
116-
for origin_lable, normalized_lable in label_normalize_dict.items()
117-
]
118-
119-
origin_labels = [
120-
origin_lable
121-
for origin_lable, normalized_lable in label_normalize_dict.items()
122-
]
123-
124-
label_length = len(normed_labels[0])
125-
126-
y_pred_labels = []
127-
128-
for batch in data_loader:
129-
src_ids, token_type_ids, masked_positions, judge = batch
130-
131-
new_masked_positions = []
132-
133-
for bs_index, mask_pos in enumerate(masked_positions.numpy()):
134-
for pos in mask_pos:
135-
new_masked_positions.append(bs_index * max_len + pos)
136-
new_masked_positions = paddle.to_tensor(np.array(new_masked_positions).astype('int32'))
137-
prediction_scores = model(
138-
input_ids=src_ids,
139-
token_type_ids=token_type_ids,
140-
masked_positions=new_masked_positions)
141-
142-
softmax_fn = paddle.nn.Softmax()
143-
prediction_probs = softmax_fn(prediction_scores)
144-
145-
batch_size = len(src_ids)
146-
vocab_size = prediction_probs.shape[1]
147-
148-
# prediction_probs: [batch_size, label_lenght, vocab_size]
149-
prediction_probs = paddle.reshape(
150-
prediction_probs, shape=[batch_size, -1, vocab_size]).numpy()
151-
152-
# [label_num, label_length]
153-
label_ids = np.array(
154-
[tokenizer(label)["input_ids"][1:-1] for label in normed_labels])
155-
156-
y_pred = np.ones(shape=[batch_size, len(label_ids)])
157-
158-
# Calculate joint distribution of candidate labels
159-
for index in range(label_length):
160-
y_pred *= prediction_probs[:, index, label_ids[:, index]]
161-
162-
# Get max probs label's index
163-
y_pred_index = np.argmax(y_pred, axis=-1)
164-
165-
for index in range(len(y_pred_index)):
166-
if judge.numpy()[index] == 1:
167-
y_pred_labels.append(origin_labels[1])
168-
continue
169-
y_pred_labels.append(origin_labels[y_pred_index[index]])
170-
171-
172-
return y_pred_labels
173-
# return paddle.concat(y_pred_logits, axis=0).unsqueeze(1)
174-
175108

176109
@paddle.no_grad()
177110
def do_predict_chid(model, tokenizer, data_loader, label_normalize_dict):
@@ -238,11 +171,11 @@ def do_predict_chid(model, tokenizer, data_loader, label_normalize_dict):
238171

239172
y_pred[:, label_idx] *= np.array(batch_single_token_prob)
240173

241-
# # Get max probs label's index
174+
# Get max probs label's index
242175
y_pred_index = np.argmax(y_pred, axis=-1)
243176
y_pred_all.extend(y_pred_index)
244177
return y_pred_all
245-
# return y_pred
178+
246179

247180

248181
predict_file = {
@@ -302,7 +235,7 @@ def write_csldcp(task_name, output_file, pred_labels):
302235
for idx, example in enumerate(test_ds):
303236
test_example["id"] = example["id"]
304237
test_example["label"] = pred_labels[idx]
305-
# {"id": 0, "label": "力学"}
238+
306239
str_test_example = "\"{}\": {}, \"{}\": \"{}\"".format(
307240
"id", test_example['id'], "label", test_example["label"])
308241
f.write("{" + str_test_example + "}\n")
@@ -339,7 +272,7 @@ def write_cluewsc(task_name, output_file, pred_labels):
339272
for idx, example in enumerate(test_ds):
340273
test_example["id"] = example["id"]
341274
test_example["label"] = pred_labels[idx]
342-
# {"id": 0, "label": "力学"}
275+
343276
str_test_example = "\"{}\": {}, \"{}\": \"{}\"".format(
344277
"id", test_example['id'], "label", test_example["label"])
345278
f.write("{" + str_test_example + "}\n")

0 commit comments

Comments
 (0)