@@ -33,14 +33,13 @@ steps:
3333
3434- label : Documentation Build # 2min
3535 mirror_hardwares : [amdexperimental]
36- working_dir : " /vllm-workspace/test_docs/docs "
36+ working_dir : " /vllm-workspace/test_docs"
3737 fast_check : true
3838 no_gpu : True
3939 commands :
40- - pip install -r ../../requirements/docs.txt
41- - SPHINXOPTS=\"-W\" make html
42- # Check API reference (if it fails, you may have missing mock imports)
43- - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
40+ - pip install -r ../requirements/docs.txt
41+ # TODO: add `--strict` once warnings in docstrings are fixed
42+ - mkdocs build
4443
4544- label : Async Engine, Inputs, Utils, Worker Test # 24min
4645 mirror_hardwares : [amdexperimental]
5958 - pytest -v -s async_engine # AsyncLLMEngine
6059 - NUM_SCHEDULER_STEPS=4 pytest -v -s async_engine/test_async_llm_engine.py
6160 - pytest -v -s test_inputs.py
61+ - pytest -v -s test_outputs.py
6262 - pytest -v -s multimodal
6363 - pytest -v -s test_utils.py # Utils
6464 - pytest -v -s worker # Worker
@@ -128,7 +128,7 @@ steps:
128128 - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
129129 - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
130130 - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
131- - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration .py --ignore=entrypoints/openai/test_chat_with_tool_reasoning .py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
131+ - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning .py --ignore=entrypoints/openai/test_oot_registration .py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/
132132 - pytest -v -s entrypoints/test_chat_utils.py
133133 - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
134134
@@ -226,6 +226,7 @@ steps:
226226 - pytest -v -s v1/test_serial_utils.py
227227 - pytest -v -s v1/test_utils.py
228228 - pytest -v -s v1/test_oracle.py
229+ - pytest -v -s v1/test_metrics_reader.py
229230 # TODO: accuracy does not match, whether setting
230231 # VLLM_USE_FLASHINFER_SAMPLER or not on H100.
231232 - pytest -v -s v1/e2e
@@ -250,7 +251,7 @@ steps:
250251 - python3 offline_inference/vision_language.py --seed 0
251252 - python3 offline_inference/vision_language_embedding.py --seed 0
252253 - python3 offline_inference/vision_language_multi_image.py --seed 0
253- - VLLM_USE_V1=0 python3 other /tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other /tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
254+ - VLLM_USE_V1=0 python3 others /tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others /tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
254255 - python3 offline_inference/encoder_decoder.py
255256 - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
256257 - python3 offline_inference/basic/classify.py
@@ -322,6 +323,7 @@ steps:
322323 - pytest -v -s compile/test_fusion.py
323324 - pytest -v -s compile/test_silu_mul_quant_fusion.py
324325 - pytest -v -s compile/test_sequence_parallelism.py
326+ - pytest -v -s compile/test_async_tp.py
325327
326328- label : PyTorch Fullgraph Smoke Test # 9min
327329 mirror_hardwares : [amdexperimental, amdproduction]
@@ -399,10 +401,12 @@ steps:
399401 source_file_dependencies :
400402 - vllm/model_executor/model_loader
401403 - tests/tensorizer_loader
404+ - tests/entrypoints/openai/test_tensorizer_entrypoint.py
402405 commands :
403406 - apt-get update && apt-get install -y curl libsodium23
404407 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
405408 - pytest -v -s tensorizer_loader
409+ - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
406410
407411- label : Benchmarks # 9min
408412 mirror_hardwares : [amdexperimental, amdproduction]
@@ -481,10 +485,7 @@ steps:
481485 - pytest -v -s models/test_registry.py
482486 - pytest -v -s models/test_utils.py
483487 - pytest -v -s models/test_vision.py
484- # V1 Test: https://github.com/vllm-project/vllm/issues/14531
485- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
486- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
487- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
488+ - pytest -v -s models/test_initialization.py
488489
489490- label : Language Models Test (Standard)
490491 mirror_hardwares : [amdexperimental]
@@ -498,16 +499,25 @@ steps:
498499 - pip freeze | grep -E 'torch'
499500 - pytest -v -s models/language -m core_model
500501
501- - label : Language Models Test (Extended)
502+ - label : Language Models Test (Extended Generation) # 1hr20min
502503 mirror_hardwares : [amdexperimental]
503504 optional : true
504505 source_file_dependencies :
505506 - vllm/
506- - tests/models/language
507+ - tests/models/language/generation
507508 commands :
508509 # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
509510 -
pip install 'git+https://github.com/Dao-AILab/[email protected] ' 510- - pytest -v -s models/language -m 'not core_model'
511+ - pytest -v -s models/language/generation -m 'not core_model'
512+
513+ - label : Language Models Test (Extended Pooling) # 36min
514+ mirror_hardwares : [amdexperimental]
515+ optional : true
516+ source_file_dependencies :
517+ - vllm/
518+ - tests/models/language/pooling
519+ commands :
520+ - pytest -v -s models/language/pooling -m 'not core_model'
511521
512522- label : Multi-Modal Models Test (Standard)
513523 mirror_hardwares : [amdexperimental]
0 commit comments