Skip to content

Commit bac370b

Browse files
authored
Update chnsenticorp examples for qianyan dataset modification(#485)
1 parent 07d414a commit bac370b

File tree

4 files changed

+6
-40
lines changed

4 files changed

+6
-40
lines changed

examples/sentiment_analysis/skep/train_sentence.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,12 @@ def create_dataloader(dataset,
172172
set_seed(args.seed)
173173
if args.model_name == "skep_ernie_1.0_large_ch":
174174
dataset_name = "chnsenticorp"
175-
train_ds, dev_ds, test_ds = load_dataset(
176-
dataset_name, splits=["train", "dev", "test"])
175+
train_ds, dev_ds = load_dataset(dataset_name, splits=["train", "dev"])
177176

178177
else:
179178
dataset_name = "sst-2"
180-
train_ds, dev_ds, test_ds = load_dataset(
181-
"glue", dataset_name, splits=["train", "dev", "test"])
179+
train_ds, dev_ds = load_dataset(
180+
"glue", dataset_name, splits=["train", "dev"])
182181
label_map = {0: 'negative', 1: 'positive'}
183182

184183
model = SkepForSequenceClassification.from_pretrained(

examples/text_classification/pretrained_models/train.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ def do_train():
152152

153153
set_seed(args.seed)
154154

155-
train_ds, dev_ds, test_ds = load_dataset(
156-
"chnsenticorp", splits=["train", "dev", "test"])
155+
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])
157156

158157
# If you wanna use bert/roberta/electra pretrained model,
159158
# model = ppnlp.transformers.BertForSequenceClassification.from_pretrained('bert-base-chinese', num_class=2)
@@ -191,12 +190,6 @@ def do_train():
191190
batch_size=args.batch_size,
192191
batchify_fn=batchify_fn,
193192
trans_fn=trans_func)
194-
test_data_loader = create_dataloader(
195-
test_ds,
196-
mode='test',
197-
batch_size=args.batch_size,
198-
batchify_fn=batchify_fn,
199-
trans_fn=trans_func)
200193

201194
if args.init_from_ckpt and os.path.isfile(args.init_from_ckpt):
202195
state_dict = paddle.load(args.init_from_ckpt)
@@ -254,10 +247,6 @@ def do_train():
254247
model._layers.save_pretrained(save_dir)
255248
tokenizer.save_pretrained(save_dir)
256249

257-
if rank == 0:
258-
print('Evaluating on test data.')
259-
evaluate(model, criterion, metric, test_data_loader)
260-
261250

262251
if __name__ == "__main__":
263252
do_train()

examples/text_classification/rnn/train.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@ def create_dataloader(dataset,
9494
vocab = Vocab.load_vocabulary(
9595
args.vocab_path, unk_token='[UNK]', pad_token='[PAD]')
9696
# Loads dataset.
97-
train_ds, dev_ds, test_ds = load_dataset(
98-
"chnsenticorp", splits=["train", "dev", "test"])
97+
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])
9998

10099
# Constructs the newtork.
101100
network = args.network.lower()
@@ -180,12 +179,6 @@ def create_dataloader(dataset,
180179
batch_size=args.batch_size,
181180
mode='validation',
182181
batchify_fn=batchify_fn)
183-
test_loader = create_dataloader(
184-
test_ds,
185-
trans_fn=trans_fn,
186-
batch_size=args.batch_size,
187-
mode='test',
188-
batchify_fn=batchify_fn)
189182

190183
optimizer = paddle.optimizer.Adam(
191184
parameters=model.parameters(), learning_rate=args.lr)
@@ -208,7 +201,3 @@ def create_dataloader(dataset,
208201
epochs=args.epochs,
209202
save_dir=args.save_dir,
210203
callbacks=callback)
211-
212-
# Finally tests model.
213-
results = model.evaluate(test_loader)
214-
print("Finally test acc: %.5f" % results['acc'])

examples/word_embedding/train.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,7 @@ def forward(self, text, seq_len=None):
151151
if '[PAD]' not in vocab:
152152
vocab['[PAD]'] = len(vocab)
153153
# Loads dataset.
154-
train_ds, dev_ds, test_ds = load_dataset(
155-
"chnsenticorp", splits=["train", "dev", "test"])
154+
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])
156155

157156
# Constructs the newtork.
158157
model = BoWModel(
@@ -187,12 +186,6 @@ def forward(self, text, seq_len=None):
187186
batch_size=args.batch_size,
188187
mode='validation',
189188
pad_token_id=vocab['[PAD]'])
190-
test_loader = create_dataloader(
191-
test_ds,
192-
trans_fn=trans_fn,
193-
batch_size=args.batch_size,
194-
mode='test',
195-
pad_token_id=vocab['[PAD]'])
196189

197190
optimizer = paddle.optimizer.Adam(
198191
parameters=model.parameters(), learning_rate=args.lr)
@@ -219,7 +212,3 @@ def forward(self, text, seq_len=None):
219212
epochs=args.epochs,
220213
save_dir=args.save_dir,
221214
callbacks=callback)
222-
223-
# Finally tests model.
224-
results = model.evaluate(test_loader, callbacks=callback)
225-
print("Finally test acc: %.5f" % results['acc'])

0 commit comments

Comments
 (0)