diff --git a/engines/python/setup/djl_python/tests/test_properties_manager.py b/engines/python/setup/djl_python/tests/test_properties_manager.py index f3391edec..012114824 100644 --- a/engines/python/setup/djl_python/tests/test_properties_manager.py +++ b/engines/python/setup/djl_python/tests/test_properties_manager.py @@ -618,7 +618,7 @@ def test_all_vllm_engine_args(): "tokenizer_pool_size": "12", "tokenizer_pool_type": "mytype", "tokenizer_pool_extra_config": '{"a": "b"}', - "limit_mm_per_prompt": "image=2", + "limit_mm_per_prompt": '{"image":2}', "enable_lora": "true", "max_loras": "5", "max_lora_rank": "123", diff --git a/serving/docs/lmi/user_guides/vision_language_models.md b/serving/docs/lmi/user_guides/vision_language_models.md index 699602615..759d1e754 100644 --- a/serving/docs/lmi/user_guides/vision_language_models.md +++ b/serving/docs/lmi/user_guides/vision_language_models.md @@ -22,7 +22,7 @@ You can read more about the supported format in the [chat completions doc](chat_ Deploying Vision Language Models with LMI is very similar to deploying Text Generation Models. There are some additional, optional configs that are exposed: -* `option.limit_mm_per_prompt`: For each multimodal plugin, limit how many input instances to allow for each prompt. Expects a comma-separated list of items, e.g.: `image=16,video=2` allows a maximum of 16 images and 2 videos per prompt. Defaults to 1 for each modality. +* `option.limit_mm_per_prompt`: For each multimodal plugin, limit how many input instances to allow for each prompt. Expects a comma-separated list of items, e.g.: `{"image": 16, "video": 2}` allows a maximum of 16 images and 2 videos per prompt. Defaults to 1 for each modality. Example SageMaker deployment code: @@ -32,7 +32,7 @@ from sagemaker.djl_inference import DJLModel model = DJLModel( model_id="llava-hf/llava-v1.6-mistral-7b-hf", env={ - "OPTION_LIMIT_MM_PER_PROMPT": "image=2", + "OPTION_LIMIT_MM_PER_PROMPT": '{"image":2}', } )