File tree Expand file tree Collapse file tree 1 file changed +4
-8
lines changed
TensorFlow/LanguageModeling/BERT/data Expand file tree Collapse file tree 1 file changed +4
-8
lines changed Original file line number Diff line number Diff line change @@ -28,26 +28,22 @@ python3 /workspace/bert/data/bertPrep.py --action download --dataset squad
2828python3 /workspace/bert/data/bertPrep.py --action download --dataset mrpc
2929python3 /workspace/bert/data/bertPrep.py --action download --dataset sst-2
3030
31+ DATASET=" wikicorpus_en"
3132# Properly format the text files
3233if [ " $to_download " = " wiki_books" ] ; then
3334 python3 /workspace/bert/data/bertPrep.py --action text_formatting --dataset bookscorpus
34- fi
35- python3 /workspace/bert/data/bertPrep.py --action text_formatting --dataset wikicorpus_en
36-
37- if [ " $to_download " = " wiki_books" ] ; then
3835 DATASET=" books_wiki_en_corpus"
39- else
40- DATASET=" wikicorpus_en"
4136fi
37+ python3 /workspace/bert/data/bertPrep.py --action text_formatting --dataset wikicorpus_en
4238
4339# Shard the text files
4440python3 /workspace/bert/data/bertPrep.py --action sharding --dataset $DATASET
4541
4642# Create TFRecord files Phase 1
47- python3 ${BERT_PREP_WORKING_DIR} /bertPrep.py --action create_tfrecord_files --dataset books_wiki_en_corpus --max_seq_length 128 \
43+ python3 ${BERT_PREP_WORKING_DIR} /bertPrep.py --action create_tfrecord_files --dataset ${DATASET} --max_seq_length 128 \
4844 --max_predictions_per_seq 20 --vocab_file ${BERT_PREP_WORKING_DIR} /download/google_pretrained_weights/uncased_L-24_H-1024_A-16/vocab.txt
4945
5046
5147# Create TFRecord files Phase 2
52- python3 ${BERT_PREP_WORKING_DIR} /bertPrep.py --action create_tfrecord_files --dataset books_wiki_en_corpus --max_seq_length 512 \
48+ python3 ${BERT_PREP_WORKING_DIR} /bertPrep.py --action create_tfrecord_files --dataset ${DATASET} --max_seq_length 512 \
5349 --max_predictions_per_seq 80 --vocab_file ${BERT_PREP_WORKING_DIR} /download/google_pretrained_weights/uncased_L-24_H-1024_A-16/vocab.txt
You can’t perform that action at this time.
0 commit comments