@@ -105,73 +105,6 @@ def do_predict(model, tokenizer, data_loader, label_normalize_dict):
105
105
y_pred_labels .append (origin_labels [index ])
106
106
107
107
return y_pred_labels
108
- # return paddle.concat(y_pred_logits, axis=0).unsqueeze(1)
109
-
110
- @paddle .no_grad ()
111
- def do_predict_cluewsc (model , tokenizer , data_loader , label_normalize_dict ):
112
- model .eval ()
113
-
114
- normed_labels = [
115
- normalized_lable
116
- for origin_lable , normalized_lable in label_normalize_dict .items ()
117
- ]
118
-
119
- origin_labels = [
120
- origin_lable
121
- for origin_lable , normalized_lable in label_normalize_dict .items ()
122
- ]
123
-
124
- label_length = len (normed_labels [0 ])
125
-
126
- y_pred_labels = []
127
-
128
- for batch in data_loader :
129
- src_ids , token_type_ids , masked_positions , judge = batch
130
-
131
- new_masked_positions = []
132
-
133
- for bs_index , mask_pos in enumerate (masked_positions .numpy ()):
134
- for pos in mask_pos :
135
- new_masked_positions .append (bs_index * max_len + pos )
136
- new_masked_positions = paddle .to_tensor (np .array (new_masked_positions ).astype ('int32' ))
137
- prediction_scores = model (
138
- input_ids = src_ids ,
139
- token_type_ids = token_type_ids ,
140
- masked_positions = new_masked_positions )
141
-
142
- softmax_fn = paddle .nn .Softmax ()
143
- prediction_probs = softmax_fn (prediction_scores )
144
-
145
- batch_size = len (src_ids )
146
- vocab_size = prediction_probs .shape [1 ]
147
-
148
- # prediction_probs: [batch_size, label_lenght, vocab_size]
149
- prediction_probs = paddle .reshape (
150
- prediction_probs , shape = [batch_size , - 1 , vocab_size ]).numpy ()
151
-
152
- # [label_num, label_length]
153
- label_ids = np .array (
154
- [tokenizer (label )["input_ids" ][1 :- 1 ] for label in normed_labels ])
155
-
156
- y_pred = np .ones (shape = [batch_size , len (label_ids )])
157
-
158
- # Calculate joint distribution of candidate labels
159
- for index in range (label_length ):
160
- y_pred *= prediction_probs [:, index , label_ids [:, index ]]
161
-
162
- # Get max probs label's index
163
- y_pred_index = np .argmax (y_pred , axis = - 1 )
164
-
165
- for index in range (len (y_pred_index )):
166
- if judge .numpy ()[index ] == 1 :
167
- y_pred_labels .append (origin_labels [1 ])
168
- continue
169
- y_pred_labels .append (origin_labels [y_pred_index [index ]])
170
-
171
-
172
- return y_pred_labels
173
- # return paddle.concat(y_pred_logits, axis=0).unsqueeze(1)
174
-
175
108
176
109
@paddle .no_grad ()
177
110
def do_predict_chid (model , tokenizer , data_loader , label_normalize_dict ):
@@ -238,11 +171,11 @@ def do_predict_chid(model, tokenizer, data_loader, label_normalize_dict):
238
171
239
172
y_pred [:, label_idx ] *= np .array (batch_single_token_prob )
240
173
241
- # # Get max probs label's index
174
+ # Get max probs label's index
242
175
y_pred_index = np .argmax (y_pred , axis = - 1 )
243
176
y_pred_all .extend (y_pred_index )
244
177
return y_pred_all
245
- # return y_pred
178
+
246
179
247
180
248
181
predict_file = {
@@ -302,7 +235,7 @@ def write_csldcp(task_name, output_file, pred_labels):
302
235
for idx , example in enumerate (test_ds ):
303
236
test_example ["id" ] = example ["id" ]
304
237
test_example ["label" ] = pred_labels [idx ]
305
- # {"id": 0, "label": "力学"}
238
+
306
239
str_test_example = "\" {}\" : {}, \" {}\" : \" {}\" " .format (
307
240
"id" , test_example ['id' ], "label" , test_example ["label" ])
308
241
f .write ("{" + str_test_example + "}\n " )
@@ -339,7 +272,7 @@ def write_cluewsc(task_name, output_file, pred_labels):
339
272
for idx , example in enumerate (test_ds ):
340
273
test_example ["id" ] = example ["id" ]
341
274
test_example ["label" ] = pred_labels [idx ]
342
- # {"id": 0, "label": "力学"}
275
+
343
276
str_test_example = "\" {}\" : {}, \" {}\" : \" {}\" " .format (
344
277
"id" , test_example ['id' ], "label" , test_example ["label" ])
345
278
f .write ("{" + str_test_example + "}\n " )
0 commit comments