use official dockerhub image and update example test

junpuf · junpuf · commit 0b4cbd219b99 · 2025-11-18T18:36:54.000-08:00
diff --git a/.github/workflows/pr-vllm.yml b/.github/workflows/pr-vllm.yml
@@ -301,21 +301,21 @@ jobs:
             # Examples Test # 30min
             cd /workdir/examples
             pip install tensorizer # for tensorizer test
-            python3 offline_inference/basic/generate.py --model facebook/opt-125m
-            # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
+            # python3 offline_inference/basic/generate.py --model facebook/opt-125m
+            python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
             python3 offline_inference/basic/chat.py
             python3 offline_inference/prefix_caching.py
             python3 offline_inference/llm_engine_example.py
             python3 offline_inference/audio_language.py --seed 0
             python3 offline_inference/vision_language.py --seed 0
             python3 offline_inference/vision_language_pooling.py --seed 0
             python3 offline_inference/vision_language_multi_image.py --seed 0
-            VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-            python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
-            python3 offline_inference/basic/classify.py
-            python3 offline_inference/basic/embed.py
-            python3 offline_inference/basic/score.py
-            python3 offline_inference/simple_profiling.py
+            # python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+            # python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+            # python3 offline_inference/basic/classify.py
+            # python3 offline_inference/basic/embed.py
+            # python3 offline_inference/basic/score.py
+            # python3 offline_inference/simple_profiling.py
           '
 
       - name: Cleanup container and images
diff --git a/docker/vllm/Dockerfile b/docker/vllm/Dockerfile
@@ -1,4 +1,4 @@
-FROM public.ecr.aws/q9t5s3a7/vllm-release-repo:439368496db48d8f992ba8c606a0c0b1eebbfa69 as base
+FROM docker.io/vllm/vllm-openai:v0.11.1 as base
 ARG PYTHON="python3"
 LABEL maintainer="Amazon AI"
 ARG EFA_VERSION="1.43.3"

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM public.ecr.aws/q9t5s3a7/vllm-release-repo:439368496db48d8f992ba8c606a0c0b1eebbfa69 as base`
	`1`	`+FROM docker.io/vllm/vllm-openai:v0.11.1 as base`
`2`	`2`	`ARG PYTHON="python3"`
`3`	`3`	`LABEL maintainer="Amazon AI"`
`4`	`4`	`ARG EFA_VERSION="1.43.3"`