merge with midstream/v0.8.4.0_downstream (f400ce2c2)

dtrifiro · dtrifiro · commit a6d8c3d3738f · 2025-04-30T11:52:42.000+02:00
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -43,3 +43,7 @@ watchfiles # required for http server to monitor the updates of TLS files
 python-json-logger # Used by logging as per examples/other/logging_configuration.md
 scipy # Required for phi-4-multimodal-instruct
 ninja # Required for xgrammar, rocm, tpu, xpu
+opentelemetry-sdk>=1.26.0 # vllm.tracing
+opentelemetry-api>=1.26.0 # vllm.tracing
+opentelemetry-exporter-otlp>=1.26.0 # vllm.tracing
+opentelemetry-semantic-conventions-ai>=0.4.1 # vllm.tracing
diff --git a/requirements/cpu.txt b/requirements/cpu.txt
@@ -17,4 +17,4 @@ torchvision==0.21.0; platform_machine == "ppc64le"
 datasets # for benchmark scripts
 
 # cpu cannot use triton 3.3.0
-triton==3.2.0; platform_machine == "x86_64"
+triton==3.2.0; platform_machine != "ppc64le"
diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py
@@ -318,6 +318,21 @@
         use_tokenizer_eos=True,
         patch_hf_runner=model_utils.internvl_patch_hf_runner,
     ),
+    "llama4": VLMTestInfo(
+        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
+        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n", # noqa: E501
+        img_idx_to_prompt=lambda _: "<|image|>",
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
+        distributed_executor_backend="mp",
+        image_size_factors=[(.25, 0.5, 1.0)],
+        hf_model_kwargs={"device_map": "auto"},
+        max_model_len=8192,
+        max_num_seqs=4,
+        dtype="bfloat16",
+        auto_cls=AutoModelForImageTextToText,
+        tensor_parallel_size=4,
+        marks=multi_gpu_marks(num_gpus=4),
+    ),
     "llava_next": VLMTestInfo(
         models=["llava-hf/llava-v1.6-mistral-7b-hf"],
         test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
@@ -558,22 +573,6 @@
             limit_mm_per_prompt={"image": 1},
         )],
     ),
-    "llama4": VLMTestInfo(
-        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
-        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n", # noqa: E501
-        img_idx_to_prompt=lambda _: "<|image|>",
-        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        distributed_executor_backend="mp",
-        image_size_factors=[(.25, 0.5, 1.0)],
-        hf_model_kwargs={"device_map": "auto"},
-        max_model_len=8192,
-        max_num_seqs=4,
-        dtype="bfloat16",
-        auto_cls=AutoModelForImageTextToText,
-        tensor_parallel_size=8,
-        vllm_runner_kwargs={"gpu_memory_utilization": 0.8},
-        marks=[large_gpu_mark(min_gb=80), multi_gpu_marks(num_gpus=8)],
-    ),
 }
 # yapf: enable