Skip to content

Commit 837c8f4

Browse files
Add cloud training support for BERT example (#226)
* Add cloud training support for BERT example * Change how we support cloud training * delete the unused util
1 parent 84bed77 commit 837c8f4

File tree

4 files changed

+105
-364
lines changed

4 files changed

+105
-364
lines changed

examples/bert/README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ python3 examples/bert/bert_preprocess.py \
3636
--output_file $OUTPUT_DIR/pretraining-data/pretraining.tfrecord
3737
# Run pretraining for 100 train steps only.
3838
python3 examples/bert/bert_train.py \
39-
--input_files $OUTPUT_DIR/pretraining-data/ \
39+
--input_directory $OUTPUT_DIR/pretraining-data/ \
4040
--vocab_file $OUTPUT_DIR/bert_vocab_uncased.txt \
4141
--saved_model_output $OUTPUT_DIR/model/ \
4242
--num_train_steps 100
@@ -197,12 +197,14 @@ python3 -c "from examples.utils.data_utils import preview_tfrecord; preview_tfre
197197

198198
After preprocessing, we can run pretraining with the `bert_train.py`
199199
script. This will train a model and save it to the `--saved_model_output`
200-
directory.
200+
directory. If you are willing to train from data stored on google cloud storage bucket (GCS), you can do it by setting the file path to
201+
the URL of GCS bucket. For example, `--input_directory=gs://your-bucket-name/you-data-path`. You can also save models directly to GCS by the same approach.
201202

202203
```shell
203204
python3 examples/bert/bert_train.py \
204-
--input_files path/to/data/ \
205+
--input_directory path/to/data/ \
205206
--vocab_file path/to/bert_vocab_uncased.txt \
207+
--model_size tiny \
206208
--saved_model_output path/to/model/
207209
```
208210

@@ -219,7 +221,8 @@ training for a few epochs to finetune the model.
219221
```shell
220222
python3 examples/bert/bert_finetune_glue.py \
221223
--saved_model_input path/to/model/ \
222-
--vocab_file path/to/bert_vocab_uncased.txt
224+
--vocab_file path/to/bert_vocab_uncased.txt \
225+
--task_name mrpc
223226
```
224227

225228
The script could be easily adapted to any other text classification finetuning

examples/bert/bert_config.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,59 +16,53 @@
1616
"tiny": {
1717
"num_layers": 2,
1818
"hidden_size": 128,
19-
"hidden_dropout": 0.1,
19+
"dropout": 0.1,
2020
"num_attention_heads": 2,
21-
"attention_dropout": 0.1,
2221
"inner_size": 512,
2322
"inner_activation": "gelu",
2423
"initializer_range": 0.02,
2524
},
2625
"mini": {
2726
"num_layers": 4,
2827
"hidden_size": 256,
29-
"hidden_dropout": 0.1,
28+
"dropout": 0.1,
3029
"num_attention_heads": 4,
31-
"attention_dropout": 0.1,
3230
"inner_size": 1024,
3331
"inner_activation": "gelu",
3432
"initializer_range": 0.02,
3533
},
3634
"small": {
3735
"num_layers": 4,
3836
"hidden_size": 512,
39-
"hidden_dropout": 0.1,
37+
"dropout": 0.1,
4038
"num_attention_heads": 8,
41-
"attention_dropout": 0.1,
4239
"inner_size": 2048,
4340
"inner_activation": "gelu",
4441
"initializer_range": 0.02,
4542
},
4643
"medium": {
4744
"num_layers": 8,
4845
"hidden_size": 512,
49-
"hidden_dropout": 0.1,
46+
"dropout": 0.1,
5047
"num_attention_heads": 8,
51-
"attention_dropout": 0.1,
5248
"inner_size": 2048,
5349
"inner_activation": "gelu",
5450
"initializer_range": 0.02,
5551
},
5652
"base": {
5753
"num_layers": 12,
5854
"hidden_size": 768,
59-
"hidden_dropout": 0.1,
55+
"dropout": 0.1,
6056
"num_attention_heads": 12,
61-
"attention_dropout": 0.1,
6257
"inner_size": 3072,
6358
"inner_activation": "gelu",
6459
"initializer_range": 0.02,
6560
},
6661
"large": {
6762
"num_layers": 24,
6863
"hidden_size": 1024,
69-
"hidden_dropout": 0.1,
64+
"dropout": 0.1,
7065
"num_attention_heads": 16,
71-
"attention_dropout": 0.1,
7266
"inner_size": 4096,
7367
"inner_activation": "gelu",
7468
"initializer_range": 0.02,

0 commit comments

Comments
 (0)