Skip to content

Commit 26e47a1

Browse files
author
Swetha Mandava
committed
update triton for amp
1 parent 1182059 commit 26e47a1

File tree

9 files changed

+19
-58
lines changed

9 files changed

+19
-58
lines changed

TensorFlow/LanguageModeling/BERT/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.06-tf1-py3
22

33
FROM ${FROM_IMAGE_NAME}
44

5-
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 libcurl4 curl
5+
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 libcurl4 curl libb64-dev
66
RUN pip install --upgrade pip
77
RUN pip install toposort networkx pytest nltk tqdm html2text progressbar
88
RUN pip --no-cache-dir --no-cache install git+https://github.com/NVIDIA/dllogger
@@ -17,12 +17,12 @@ RUN git clone https://github.com/titipata/pubmed_parser
1717
RUN pip3 install /workspace/pubmed_parser
1818

1919
#Copy the perf_client over
20-
ARG TRTIS_CLIENTS_URL=https://github.com/NVIDIA/triton-inference-server/releases/download/v1.12.0/v1.12.0_ubuntu1804.clients.tar.gz
20+
ARG TRTIS_CLIENTS_URL=https://github.com/NVIDIA/triton-inference-server/releases/download/v1.14.0/v1.14.0_ubuntu1804.clients.tar.gz
2121
RUN mkdir -p /workspace/install \
2222
&& curl -L ${TRTIS_CLIENTS_URL} | tar xvz -C /workspace/install
2323

2424
#Install the python wheel with pip
25-
RUN pip install /workspace/install/python/tensorrtserver-1.12.0-py3-none-linux_x86_64.whl
25+
RUN pip install /workspace/install/python/tensorrtserver-1.14.0-py3-none-linux_x86_64.whl
2626

2727
WORKDIR /workspace/bert
2828
COPY . .

TensorFlow/LanguageModeling/BERT/run_classifier.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,9 @@ def metric_fn(per_example_loss, label_ids, logits):
379379
dummy_op = tf.no_op()
380380
# Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
381381
if FLAGS.amp:
382+
loss_scaler = tf.train.experimental.FixedLossScale(1)
382383
dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
383-
optimization.LAMBOptimizer(learning_rate=0.0))
384+
optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler)
384385
eval_metric_ops = metric_fn(per_example_loss, label_ids, logits)
385386
output_spec = tf.estimator.EstimatorSpec(
386387
mode=mode,

TensorFlow/LanguageModeling/BERT/run_ner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,9 @@ def model_fn(features, labels, mode, params):
545545
dummy_op = tf.no_op()
546546
# Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
547547
if amp:
548+
loss_scaler = tf.train.experimental.FixedLossScale(1)
548549
dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
549-
optimization.LAMBOptimizer(learning_rate=0.0))
550+
optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler)
550551

551552
def metric_fn(per_example_loss, label_ids, logits):
552553
# def metric_fn(label_ids, logits):

TensorFlow/LanguageModeling/BERT/run_re.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,9 @@ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
626626
dummy_op = tf.no_op()
627627
# Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
628628
if amp:
629+
loss_scaler = tf.train.experimental.FixedLossScale(1)
629630
dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
630-
optimization.LAMBOptimizer(learning_rate=0.0))
631+
optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler)
631632

632633
def metric_fn(per_example_loss, label_ids, logits, is_real_example):
633634
predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)

TensorFlow/LanguageModeling/BERT/run_squad.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,9 @@ def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
325325
if init_checkpoint and (hvd is None or hvd.rank() == 0):
326326
(assignment_map, initialized_variable_names
327327
) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
328-
328+
329329
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
330-
330+
331331
if FLAGS.verbose_logging:
332332
tf.compat.v1.logging.info("**** Trainable Variables ****")
333333
for var in tvars:
@@ -370,8 +370,9 @@ def compute_loss(logits, positions):
370370
dummy_op = tf.no_op()
371371
# Need to call mixed precision graph rewrite if fp16 to enable graph rewrite
372372
if amp:
373+
loss_scaler = tf.train.experimental.FixedLossScale(1)
373374
dummy_op = tf.train.experimental.enable_mixed_precision_graph_rewrite(
374-
optimization.LAMBOptimizer(learning_rate=0.0))
375+
optimization.LAMBOptimizer(learning_rate=0.0), loss_scaler)
375376

376377
predictions = {
377378
"unique_ids": unique_ids,
@@ -807,8 +808,6 @@ def validate_flags_or_throw(bert_config):
807808

808809
def export_model(estimator, export_dir, init_checkpoint):
809810
"""Exports a checkpoint in SavedModel format in a directory structure compatible with Triton."""
810-
811-
812811
def serving_input_fn():
813812
label_ids = tf.placeholder(tf.int32, [None,], name='unique_ids')
814813
input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids')

TensorFlow/LanguageModeling/BERT/triton/scripts/generate_figures.sh

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,6 @@ use_xla=true
3737
EXPORT_MODEL_ARGS="${precision} ${use_xla} ${seq_length} ${doc_stride} ${BERT_DIR} 1 ${MODEL_NAME}"
3838
PERF_CLIENT_ARGS="1000 10 20 localhost"
3939

40-
# Start Server
41-
bash triton/scripts/launch_server.sh $precision
42-
43-
# Restart Server
44-
restart_server() {
45-
docker kill triton_server_cont
46-
bash triton/scripts/launch_server.sh $precision
47-
}
48-
4940
############## Dynamic Batching Comparison ##############
5041
SERVER_BATCH_SIZE=8
5142
CLIENT_BATCH_SIZE=1
@@ -54,30 +45,22 @@ TRITON_ENGINE_COUNT=1
5445
# Dynamic batching 10 ms
5546
TRITON_DYN_BATCHING_DELAY=10
5647
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
57-
restart_server
58-
sleep 15
5948
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
6049

6150
# Dynamic batching 5 ms
6251
TRITON_DYN_BATCHING_DELAY=5
6352
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
64-
restart_server
65-
sleep 15
6653
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
6754

6855
# Dynamic batching 2 ms
6956
TRITON_DYN_BATCHING_DELAY=2
7057
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
71-
restart_server
72-
sleep 15
7358
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
7459

7560

7661
# Static Batching (i.e. Dynamic batching 0 ms)
7762
TRITON_DYN_BATCHING_DELAY=0
7863
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
79-
restart_server
80-
sleep 15
8164
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
8265

8366

@@ -89,58 +72,44 @@ TRITON_DYN_BATCHING_DELAY=0
8972
# Engine Count = 4
9073
TRITON_ENGINE_COUNT=4
9174
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
92-
restart_server
93-
sleep 15
9475
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
9576

9677
# Engine Count = 2
9778
TRITON_ENGINE_COUNT=2
9879
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
99-
restart_server
100-
sleep 15
10180
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
10281

10382
# Engine Count = 1
10483
TRITON_ENGINE_COUNT=1
10584
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
106-
restart_server
107-
sleep 15
10885
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
10986

11087

11188
############## Batch Size Comparison ##############
11289
# BATCH=1 Generate model and perf
11390
SERVER_BATCH_SIZE=1
11491
CLIENT_BATCH_SIZE=1
115-
TRITON_ENGINE_COUNT=1
116-
TRITON_DYN_BATCHING_DELAY=0
92+
TRITON_ENGINE_COUNT=1
93+
TRITON_DYN_BATCHING_DELAY=0
11794

11895
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
119-
restart_server
120-
sleep 15
12196
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 64 localhost
12297

12398
# BATCH=2 Generate model and perf
12499
SERVER_BATCH_SIZE=2
125100
CLIENT_BATCH_SIZE=2
126101
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
127-
restart_server
128-
sleep 15
129102
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 32 localhost
130103

131104
# BATCH=4 Generate model and perf
132105
SERVER_BATCH_SIZE=4
133106
CLIENT_BATCH_SIZE=4
134107
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
135-
restart_server
136-
sleep 15
137108
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 16 localhost
138109

139110
# BATCH=8 Generate model and perf
140111
SERVER_BATCH_SIZE=8
141112
CLIENT_BATCH_SIZE=8
142113
bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
143-
restart_server
144-
sleep 15
145114
bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 8 localhost
146115

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
1-
precision=${1:-"fp16"}
21
NV_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-"all"}
32

4-
if [ "$precision" = "fp16" ] ; then
5-
echo "fp16 activated!"
6-
export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=1
7-
else
8-
echo "fp32 activated!"
9-
export TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE=0
10-
fi
11-
123
# Start TRITON server in detached state
13-
docker run --gpus all -d --rm \
4+
docker run --gpus $NV_VISIBLE_DEVICES --rm -d \
145
--shm-size=1g \
156
--ulimit memlock=-1 \
167
--ulimit stack=67108864 \
@@ -19,6 +10,5 @@ docker run --gpus all -d --rm \
1910
-p8002:8002 \
2011
--name triton_server_cont \
2112
-e NVIDIA_VISIBLE_DEVICES=$NV_VISIBLE_DEVICES \
22-
-e TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE \
2313
-v $PWD/results/triton_models:/models \
24-
nvcr.io/nvidia/tritonserver:20.03-py3 trtserver --model-store=/models --strict-model-config=false
14+
nvcr.io/nvidia/tritonserver:20.06-v1-py3 tritonserver --model-store=/models --strict-model-config=false

TensorFlow/LanguageModeling/BERT/triton/scripts/run_perf_client.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ then
3232
if [ ! "$(docker inspect -f "{{.State.Running}}" triton_server_cont)" = "true" ] ; then
3333

3434
echo "Launching TRITON server"
35-
bash triton/scripts/launch_server.sh $precision
35+
bash triton/scripts/launch_server.sh
3636
SERVER_LAUNCHED=true
3737

3838
function cleanup_server {

TensorFlow/LanguageModeling/BERT/triton/scripts/run_triton.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ if [ "$triton_export_model" = "true" ] ; then
8888
fi
8989

9090
# Start TRTIS server in detached state
91-
bash triton/scripts/launch_server.sh $precision
91+
bash triton/scripts/launch_server.sh
9292

9393
# Wait until server is up. curl on the health of the server and sleep until its ready
9494
bash triton/scripts/wait_for_triton_server.sh localhost

0 commit comments

Comments
 (0)