@@ -30,10 +30,10 @@ for i in $(env | grep ^PMI_ | cut -d"=" -f 1); do unset -v $i; done
30
30
for i in $( env | grep ^PMIX_ | cut -d" =" -f 1) ; do unset -v $i ; done
31
31
32
32
case $MODEL_TYPE in
33
- llava|phi|vila|mllama)
33
+ llava|phi|vila|mllama|qwen )
34
34
;;
35
35
* )
36
- echo " Unsupported type argument: Expected one of: [llava, phi, vila, mllama]" >&2
36
+ echo " Unsupported type argument: Expected one of: [llava, phi, vila, mllama, qwen ]" >&2
37
37
exit 1
38
38
esac
39
39
@@ -58,10 +58,10 @@ case $SPARSITY_FMT in
58
58
esac
59
59
60
60
case $QFORMAT in
61
- fp8|int8_sq|int4_awq|w4a8_awq|fp16|bf16)
61
+ fp8|nvfp4| int8_sq|int4_awq|w4a8_awq|fp16|bf16)
62
62
;;
63
63
* )
64
- echo " Unknown quant argument: Expected one of: [fp8, int8_sq, int4_awq, w4a8_awq, fp16, bf16]" >&2
64
+ echo " Unknown quant argument: Expected one of: [fp8, nvfp4, int8_sq, int4_awq, w4a8_awq, fp16, bf16]" >&2
65
65
exit 1
66
66
esac
67
67
91
91
92
92
BUILD_MAX_OUTPUT_LEN=512
93
93
94
- if [ " $MODEL_TYPE " = " llava" ] || [ " $MODEL_TYPE " = " vila" ]; then
94
+ if [ " $MODEL_TYPE " = " llava" ] || [ " $MODEL_TYPE " = " vila" ] || [ " $MODEL_TYPE " = " qwen " ] ; then
95
95
BUILD_MAX_BATCH_SIZE=20
96
96
else
97
97
BUILD_MAX_BATCH_SIZE=4
@@ -149,6 +149,9 @@ case "${MODEL_TYPE}" in
149
149
PTQ_ARGS+=" --kv_cache_qformat none "
150
150
VLM_ARGS=" --max_encoder_input_len=6404 --skip_run"
151
151
;;
152
+ " qwen" )
153
+ PTQ_ARGS+=" --kv_cache_qformat none "
154
+ ;;
152
155
esac
153
156
154
157
if [ " ${MODEL_TYPE} " = " vila" ]; then
@@ -177,6 +180,7 @@ if [[ $TASKS =~ "build" ]] || [[ ! -d "$ENGINE_DIR" ]] || [[ ! $(ls -A $ENGINE_D
177
180
--inference_tensor_parallel=$TP \
178
181
--inference_pipeline_parallel=$PP \
179
182
--export_fmt=$EXPORT_FORMAT \
183
+ --no-verbose \
180
184
$PTQ_ARGS
181
185
else
182
186
echo " Quantized model config $MODEL_CONFIG exists, skipping the quantization stage"
@@ -213,6 +217,10 @@ case "${MODEL_TYPE}" in
213
217
" phi" )
214
218
VISUAL_MODEL_TYPE=" phi-3-vision"
215
219
;;
220
+ " qwen" )
221
+ # Map generic type to TRT-LLM multimodal model type
222
+ VISUAL_MODEL_TYPE=" qwen2_vl"
223
+ ;;
216
224
esac
217
225
218
226
0 commit comments