@@ -37,15 +37,6 @@ use_xla=true
3737EXPORT_MODEL_ARGS=" ${precision} ${use_xla} ${seq_length} ${doc_stride} ${BERT_DIR} 1 ${MODEL_NAME} "
3838PERF_CLIENT_ARGS=" 1000 10 20 localhost"
3939
40- # Start Server
41- bash triton/scripts/launch_server.sh $precision
42-
43- # Restart Server
44- restart_server () {
45- docker kill triton_server_cont
46- bash triton/scripts/launch_server.sh $precision
47- }
48-
4940# ############# Dynamic Batching Comparison ##############
5041SERVER_BATCH_SIZE=8
5142CLIENT_BATCH_SIZE=1
@@ -54,30 +45,22 @@ TRITON_ENGINE_COUNT=1
5445# Dynamic batching 10 ms
5546TRITON_DYN_BATCHING_DELAY=10
5647bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
57- restart_server
58- sleep 15
5948bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
6049
6150# Dynamic batching 5 ms
6251TRITON_DYN_BATCHING_DELAY=5
6352bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
64- restart_server
65- sleep 15
6653bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
6754
6855# Dynamic batching 2 ms
6956TRITON_DYN_BATCHING_DELAY=2
7057bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
71- restart_server
72- sleep 15
7358bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
7459
7560
7661# Static Batching (i.e. Dynamic batching 0 ms)
7762TRITON_DYN_BATCHING_DELAY=0
7863bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
79- restart_server
80- sleep 15
8164bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
8265
8366
@@ -89,58 +72,44 @@ TRITON_DYN_BATCHING_DELAY=0
8972# Engine Count = 4
9073TRITON_ENGINE_COUNT=4
9174bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
92- restart_server
93- sleep 15
9475bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
9576
9677# Engine Count = 2
9778TRITON_ENGINE_COUNT=2
9879bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
99- restart_server
100- sleep 15
10180bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
10281
10382# Engine Count = 1
10483TRITON_ENGINE_COUNT=1
10584bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
106- restart_server
107- sleep 15
10885bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} ${PERF_CLIENT_ARGS}
10986
11087
11188# ############# Batch Size Comparison ##############
11289# BATCH=1 Generate model and perf
11390SERVER_BATCH_SIZE=1
11491CLIENT_BATCH_SIZE=1
115- TRITON_ENGINE_COUNT=1
116- TRITON_DYN_BATCHING_DELAY=0
92+ TRITON_ENGINE_COUNT=1
93+ TRITON_DYN_BATCHING_DELAY=0
11794
11895bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
119- restart_server
120- sleep 15
12196bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 64 localhost
12297
12398# BATCH=2 Generate model and perf
12499SERVER_BATCH_SIZE=2
125100CLIENT_BATCH_SIZE=2
126101bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
127- restart_server
128- sleep 15
129102bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 32 localhost
130103
131104# BATCH=4 Generate model and perf
132105SERVER_BATCH_SIZE=4
133106CLIENT_BATCH_SIZE=4
134107bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
135- restart_server
136- sleep 15
137108bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 16 localhost
138109
139110# BATCH=8 Generate model and perf
140111SERVER_BATCH_SIZE=8
141112CLIENT_BATCH_SIZE=8
142113bash triton/scripts/export_model.sh ${init_checkpoint} ${SERVER_BATCH_SIZE} ${EXPORT_MODEL_ARGS} ${TRITON_DYN_BATCHING_DELAY} ${TRITON_ENGINE_COUNT} ${TRITON_MODEL_OVERWRITE}
143- restart_server
144- sleep 15
145114bash triton/scripts/run_perf_client.sh ${MODEL_NAME} 1 ${precision} ${CLIENT_BATCH_SIZE} 1000 10 8 localhost
146115
0 commit comments