Skip to content

Commit 36a6985

Browse files
committed
[BERT/TF] TRT int8 and Triton
1 parent e159774 commit 36a6985

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+510
-3079
lines changed

TensorFlow/LanguageModeling/BERT/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
__pycache__/
55
*.py[cod]
66
*$py.class
7+
*.png
8+
.idea/
9+
*swp
10+
data/
11+
checkpoints/
12+
data_dl/
713

814
# C extensions
915
*.so

TensorFlow/LanguageModeling/BERT/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ RUN git clone https://github.com/titipata/pubmed_parser
1717
RUN pip3 install /workspace/pubmed_parser
1818

1919
#Copy the perf_client over
20-
ARG TRTIS_CLIENTS_URL=https://github.com/NVIDIA/triton-inference-server/releases/download/v2.0.0/v2.0.0_ubuntu1804.clients.tar.gz
20+
ARG TRTIS_CLIENTS_URL=https://github.com/NVIDIA/triton-inference-server/releases/download/v2.2.0/v2.2.0_ubuntu1804.clients.tar.gz
2121
RUN mkdir -p /workspace/install \
2222
&& curl -L ${TRTIS_CLIENTS_URL} | tar xvz -C /workspace/install
2323

TensorFlow/LanguageModeling/BERT/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ Note: Not using BookCorpus can potentially change final accuracy on a few downst
273273

274274
4. Download the pretrained models from NGC.
275275

276-
We have uploaded checkpoints that have been [fine tuned](https://ngc.nvidia.com/catalog/models/nvidia:bert_tf_v1_1_large_fp32_384) and [pre-trained](https://ngc.nvidia.com/catalog/models/nvidia:bert_tf_pretraining_lamb_16n) for various configurations on the NGC Model Registry. You can browse and download the relevant checkpoints directly from the [NGC model catalog](https://ngc.nvidia.com/catalog/models). Download them to the `results/models/` to easily access them in your scripts.
276+
We have uploaded checkpoints that have been [fine tuned](https://ngc.nvidia.com/catalog/models/nvidia:bert_tf_v1_1_large_fp16_384) and [pre-trained](https://ngc.nvidia.com/catalog/models/nvidia:bert_tf_pretraining_lamb_16n) for various configurations on the NGC Model Registry. Our data download scripts, by default download some of them but you can browse and download the relevant checkpoints directly from the [NGC model catalog](https://ngc.nvidia.com/catalog/models). Download them to the `data/download/nvidia_pretrained/` to easily access them in your scripts.
277277

278278
5. Start an interactive session in the NGC container to run training/inference.
279279

TensorFlow/LanguageModeling/BERT/biobert/conlleval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
class FormatError(Exception):
2323
pass
2424

25-
Metrics = namedtuple('Metrics', 'tp fp fn precision recall f1')
25+
Metrics = namedtuple('Metrics', 'tp fp fn prec rec fscore')
2626

2727

2828
class EvalCounts(object):

TensorFlow/LanguageModeling/BERT/biobert/scripts/ner_bc5cdr-chem.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
echo "Container nvidia build = " $NVIDIA_BUILD_ID
44

5-
init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt-4340"}
5+
init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt"}
66
train_batch_size=${2:-8}
77
learning_rate=${3:-3.125e-6}
88
cased=${4:-false}
@@ -30,7 +30,6 @@ else
3030
export BERT_DIR=/workspace/bert/data/download/google_pretrained_weights/${CASING_DIR_PREFIX}_L-12_H-768_A-12
3131
fi
3232

33-
3433
export GBS=$(expr $train_batch_size \* $num_gpu)
3534
printf -v TAG "tf_bert_biobert_ner_bc5cdr_chem_%s_%s_gbs%d" "$bert_model" "$precision" $GBS
3635
DATESTAMP=`date +'%y%m%d%H%M%S'`

TensorFlow/LanguageModeling/BERT/biobert/scripts/ner_bc5cdr-disease.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
echo "Container nvidia build = " $NVIDIA_BUILD_ID
44

5-
init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt-4340"}
5+
init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt"}
66
train_batch_size=${2:-8}
77
learning_rate=${3:-3.125e-6}
88
cased=${4:-false}

TensorFlow/LanguageModeling/BERT/biobert/scripts/rel_chemprot.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
echo "Container nvidia build = " $NVIDIA_BUILD_ID
44

5-
init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt-4340"}
5+
init_checkpoint=${1:-"/results/biobert_tf_uncased_base/model.ckpt"}
66
train_batch_size=${2:-8}
77
learning_rate=${3:-1.5e-6}
88
cased=${4:-false}

TensorFlow/LanguageModeling/BERT/biobert/scripts/run_biobert_finetuning_inference.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
task=${1:-"ner_bc5cdr-chem"}
16-
init_checkpoint=${2:-"/results/biobert_tf_uncased_base/model.ckpt-4340"}
16+
init_checkpoint=${2:-"/results/biobert_tf_uncased_base/model.ckpt"}
1717
bert_model=${3:-"base"}
1818
cased=${4:-"false"}
1919
precision=${5:-"fp16"}

TensorFlow/LanguageModeling/BERT/data/create_datasets_from_start.sh

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,30 @@ if [ "$to_download" = "wiki_books" ] ; then
2323
fi
2424

2525
python3 /workspace/bert/data/bertPrep.py --action download --dataset wikicorpus_en
26-
python3 /workspace/bert/data/bertPrep.py --action download --dataset google_pretrained_weights # Includes vocab
2726
python3 /workspace/bert/data/bertPrep.py --action download --dataset squad
2827
python3 /workspace/bert/data/bertPrep.py --action download --dataset mrpc
2928
python3 /workspace/bert/data/bertPrep.py --action download --dataset sst-2
29+
python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action download --dataset google_pretrained_weights
30+
31+
mkdir -p /workspace/bert/data/download/nvidia_pretrained
32+
#SQuAD Large Checkpoint
33+
echo "Downloading SQuAD Large Checkpoint"
34+
cd /workspace/bert/data/download/nvidia_pretrained && \
35+
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/bert_tf_ckpt_large_qa_squad11_amp_384/versions/19.03.1/zip -O bert_tf_ckpt_large_qa_squad11_amp_384_19.03.1.zip \
36+
&& unzip bert_tf_ckpt_large_qa_squad11_amp_384_19.03.1.zip -d bert_tf_squad11_large_384 && rm bert_tf_ckpt_large_qa_squad11_amp_384_19.03.1.zip
37+
38+
#SQuAD Base Checkpoint
39+
cd /workspace/bert/data/download/nvidia_pretrained && \
40+
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/bert_tf_ckpt_base_qa_squad11_amp_128/versions/19.03.1/zip -O bert_tf_ckpt_base_qa_squad11_amp_128_19.03.1.zip \
41+
&& unzip bert_tf_ckpt_base_qa_squad11_amp_128_19.03.1.zip -d bert_tf_squad11_base_128 && rm bert_tf_ckpt_base_qa_squad11_amp_128_19.03.1.zip
42+
43+
#Pretraining Large checkpoint
44+
cd /workspace/bert/data/download/nvidia_pretrained && \
45+
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/bert_tf_ckpt_large_pretraining_amp_lamb/versions/19.03.1/zip -O bert_tf_ckpt_large_pretraining_amp_lamb_19.03.1.zip \
46+
&& unzip bert_tf_ckpt_large_pretraining_amp_lamb_19.03.1.zip -d bert_tf_pretraining_large_lamb && rm bert_tf_ckpt_large_pretraining_amp_lamb_19.03.1.zip
47+
48+
python3 /workspace/bert/data/bertPrep.py --action download --dataset google_pretrained_weights # Redundant, to verify and remove
49+
3050

3151
DATASET="wikicorpus_en"
3252
# Properly format the text files
26.1 KB
Loading

0 commit comments

Comments
 (0)