diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt
index 167edddd32d1..d10b9bb2dfcb 100644
--- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt
+++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt
@@ -31,5 +31,6 @@
 --device=CPU
 --input_file=gs://apache-beam-ml/testing/inputs/sentences_50k.txt
 --runner=DataflowRunner
+--dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver
 --model_path=distilbert-base-uncased-finetuned-sst-2-english
---model_state_dict_path=gs://apache-beam-ml/models/huggingface.sentiment.distilbert-base-uncased.pth
\ No newline at end of file
+--model_state_dict_path=gs://apache-beam-ml/models/huggingface.sentiment.distilbert-base-uncased.pth
diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
index 6101fe5da457..23af8197d8d4 100644
--- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
+++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
@@ -20,7 +20,7 @@
 --input=gs://apache-beam-ml/testing/inputs/sentences_50k.txt
 --machine_type=n1-standard-8
 --worker_zone=us-central1-b
---disk_size_gb=50
+--disk_size_gb=200
 --input_options={}
 --num_workers=8
 --max_num_workers=25
@@ -33,4 +33,4 @@
 --influx_measurement=gemma_vllm_batch
 --model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it
 --dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver
---experiments=use_runner_v2
\ No newline at end of file
+--experiments=use_runner_v2
diff --git a/.github/workflows/refresh_looker_metrics.yml b/.github/workflows/refresh_looker_metrics.yml
index 1fe4406b8779..880c783394c7 100644
--- a/.github/workflows/refresh_looker_metrics.yml
+++ b/.github/workflows/refresh_looker_metrics.yml
@@ -19,11 +19,6 @@ name: Refresh Looker Performance Metrics
 
 on:
   workflow_dispatch:
-    inputs:
-      READ_ONLY:
-        description: 'Run in read-only mode'
-        required: false
-        default: 'true'
 
 env:
   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -31,7 +26,6 @@ env:
   LOOKERSDK_CLIENT_ID: ${{ secrets.LOOKERSDK_CLIENT_ID }}
   LOOKERSDK_CLIENT_SECRET: ${{ secrets.LOOKERSDK_CLIENT_SECRET }}
   GCS_BUCKET: 'public_looker_explores_us_a3853f40'
-  READ_ONLY: ${{ inputs.READ_ONLY }}
 
 jobs:
   refresh_looker_metrics:
diff --git a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
index 5727437809c4..f4022ae90160 100644
--- a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
+++ b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile
@@ -46,7 +46,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \
     python3 -m pip install --upgrade pip setuptools wheel
 
 # 4) Copy the Beam SDK harness (for Dataflow workers)
-COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:2.68.0.dev \
+COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:latest \
      /opt/apache/beam /opt/apache/beam
 
 # 5) Make sure the harness is discovered first
@@ -54,15 +54,9 @@ ENV PYTHONPATH=/opt/apache/beam:$PYTHONPATH
 
 # 6) Install the Beam dev SDK from the local source package.
 # This .tar.gz file will be created by GitHub Actions workflow
-# and copied into the build context.
+# and copied into the build context. This will include vLLM dependencies
 COPY ./sdks/python/build/apache-beam.tar.gz /tmp/beam.tar.gz
-RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp]"
-
-# 7) Install vLLM, and other dependencies
-RUN python3 -m pip install --no-cache-dir \
-      openai>=1.52.2 \
-      vllm>=0.6.3 \
-      triton>=3.1.0
+RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp,vllm]"
 
 # 8) Use the Beam boot script as entrypoint
-ENTRYPOINT ["/opt/apache/beam/boot"]
\ No newline at end of file
+ENTRYPOINT ["/opt/apache/beam/boot"]
diff --git a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
index 939f0526d808..0f8c6a6a673d 100644
--- a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
+++ b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
@@ -19,4 +19,4 @@ torchvision>=0.8.2
 pillow>=8.0.0
 transformers>=4.18.0
 google-cloud-monitoring>=2.27.0
-openai>=1.52.2
\ No newline at end of file
+openai>=1.52.2
diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py b/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py
index 903d67b91969..b0727ffa71b8 100644
--- a/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py
+++ b/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py
@@ -26,7 +26,7 @@ def __init__(self):
     self.metrics_namespace = "BeamML_vLLM"
     super().__init__(
         metrics_namespace=self.metrics_namespace,
-        pcollection="WriteBQ.out0",
+        pcollection="FormatForBQ.out0",
     )
 
   def test(self):
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 719d188ed266..356c6174e656 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -610,7 +610,8 @@ def get_portability_package_data():
           ],
           'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'],
           'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'],
-          'milvus': milvus_dependency
+          'milvus': milvus_dependency,
+          'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1']
       },
       zip_safe=False,
       # PyPI package information.
diff --git a/website/www/site/data/performance.yaml b/website/www/site/data/performance.yaml
index 3dd7e68a9226..17a6612160c6 100644
--- a/website/www/site/data/performance.yaml
+++ b/website/www/site/data/performance.yaml
@@ -238,15 +238,15 @@ looks:
     write:
       folder: 86
       cost:
-        - id: tJWFWW3cnF2CWpmK2zZdXGvWmtNnJgrC
+        - id: J5TtpRykjwPs4W6S88FnJ28Tr8sSHpqN
           title: RunTime and EstimatedCost
       date:
-        - id: J5TtpRykjwPs4W6S88FnJ28Tr8sSHpqN
+        - id: tJWFWW3cnF2CWpmK2zZdXGvWmtNnJgrC
           title: AvgThroughputBytesPerSec by Date
         - id: Jf6qGqN25Zf787DpkNDX5CBpGRvCGMXp
           title: AvgThroughputElementsPerSec by Date
       version:
-        - id: dKyJy5ZKhkBdSTXRY3wZR6fXzptSs2qm
-          title: AvgThroughputBytesPerSec by Version
         - id: Qwxm27qY4fqT4CxXsFfKm2g3734TFJNN
-          title: AvgThroughputElementsPerSec by Version
\ No newline at end of file
+          title: AvgThroughputBytesPerSec by Version
+        - id: dKyJy5ZKhkBdSTXRY3wZR6fXzptSs2qm
+          title: AvgThroughputElementsPerSec by Version