diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt index 167edddd32d1..d10b9bb2dfcb 100644 --- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt +++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt @@ -31,5 +31,6 @@ --device=CPU --input_file=gs://apache-beam-ml/testing/inputs/sentences_50k.txt --runner=DataflowRunner +--dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver --model_path=distilbert-base-uncased-finetuned-sst-2-english ---model_state_dict_path=gs://apache-beam-ml/models/huggingface.sentiment.distilbert-base-uncased.pth \ No newline at end of file +--model_state_dict_path=gs://apache-beam-ml/models/huggingface.sentiment.distilbert-base-uncased.pth diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt index 6101fe5da457..23af8197d8d4 100644 --- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt +++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt @@ -20,7 +20,7 @@ --input=gs://apache-beam-ml/testing/inputs/sentences_50k.txt --machine_type=n1-standard-8 --worker_zone=us-central1-b ---disk_size_gb=50 +--disk_size_gb=200 --input_options={} --num_workers=8 --max_num_workers=25 @@ -33,4 +33,4 @@ --influx_measurement=gemma_vllm_batch --model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it --dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver ---experiments=use_runner_v2 \ No newline at end of file +--experiments=use_runner_v2 diff --git a/.github/workflows/refresh_looker_metrics.yml b/.github/workflows/refresh_looker_metrics.yml index 1fe4406b8779..880c783394c7 100644 --- a/.github/workflows/refresh_looker_metrics.yml +++ b/.github/workflows/refresh_looker_metrics.yml @@ -19,11 +19,6 @@ name: Refresh Looker Performance Metrics on: workflow_dispatch: - inputs: - READ_ONLY: - description: 'Run in read-only mode' - required: false - default: 'true' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -31,7 +26,6 @@ env: LOOKERSDK_CLIENT_ID: ${{ secrets.LOOKERSDK_CLIENT_ID }} LOOKERSDK_CLIENT_SECRET: ${{ secrets.LOOKERSDK_CLIENT_SECRET }} GCS_BUCKET: 'public_looker_explores_us_a3853f40' - READ_ONLY: ${{ inputs.READ_ONLY }} jobs: refresh_looker_metrics: diff --git a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile index 5727437809c4..f4022ae90160 100644 --- a/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile +++ b/sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile @@ -46,7 +46,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \ python3 -m pip install --upgrade pip setuptools wheel # 4) Copy the Beam SDK harness (for Dataflow workers) -COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:2.68.0.dev \ +COPY --from=gcr.io/apache-beam-testing/beam-sdk/beam_python3.10_sdk:latest \ /opt/apache/beam /opt/apache/beam # 5) Make sure the harness is discovered first @@ -54,15 +54,9 @@ ENV PYTHONPATH=/opt/apache/beam:$PYTHONPATH # 6) Install the Beam dev SDK from the local source package. # This .tar.gz file will be created by GitHub Actions workflow -# and copied into the build context. +# and copied into the build context. This will include vLLM dependencies COPY ./sdks/python/build/apache-beam.tar.gz /tmp/beam.tar.gz -RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp]" - -# 7) Install vLLM, and other dependencies -RUN python3 -m pip install --no-cache-dir \ - openai>=1.52.2 \ - vllm>=0.6.3 \ - triton>=3.1.0 +RUN python3 -m pip install --no-cache-dir "/tmp/beam.tar.gz[gcp,vllm]" # 8) Use the Beam boot script as entrypoint -ENTRYPOINT ["/opt/apache/beam/boot"] \ No newline at end of file +ENTRYPOINT ["/opt/apache/beam/boot"] diff --git a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt index 939f0526d808..0f8c6a6a673d 100644 --- a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt @@ -19,4 +19,4 @@ torchvision>=0.8.2 pillow>=8.0.0 transformers>=4.18.0 google-cloud-monitoring>=2.27.0 -openai>=1.52.2 \ No newline at end of file +openai>=1.52.2 diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py b/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py index 903d67b91969..b0727ffa71b8 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/vllm_gemma_benchmarks.py @@ -26,7 +26,7 @@ def __init__(self): self.metrics_namespace = "BeamML_vLLM" super().__init__( metrics_namespace=self.metrics_namespace, - pcollection="WriteBQ.out0", + pcollection="FormatForBQ.out0", ) def test(self): diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 719d188ed266..356c6174e656 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -610,7 +610,8 @@ def get_portability_package_data(): ], 'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'], 'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'], - 'milvus': milvus_dependency + 'milvus': milvus_dependency, + 'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1'] }, zip_safe=False, # PyPI package information. diff --git a/website/www/site/data/performance.yaml b/website/www/site/data/performance.yaml index 3dd7e68a9226..17a6612160c6 100644 --- a/website/www/site/data/performance.yaml +++ b/website/www/site/data/performance.yaml @@ -238,15 +238,15 @@ looks: write: folder: 86 cost: - - id: tJWFWW3cnF2CWpmK2zZdXGvWmtNnJgrC + - id: J5TtpRykjwPs4W6S88FnJ28Tr8sSHpqN title: RunTime and EstimatedCost date: - - id: J5TtpRykjwPs4W6S88FnJ28Tr8sSHpqN + - id: tJWFWW3cnF2CWpmK2zZdXGvWmtNnJgrC title: AvgThroughputBytesPerSec by Date - id: Jf6qGqN25Zf787DpkNDX5CBpGRvCGMXp title: AvgThroughputElementsPerSec by Date version: - - id: dKyJy5ZKhkBdSTXRY3wZR6fXzptSs2qm - title: AvgThroughputBytesPerSec by Version - id: Qwxm27qY4fqT4CxXsFfKm2g3734TFJNN - title: AvgThroughputElementsPerSec by Version \ No newline at end of file + title: AvgThroughputBytesPerSec by Version + - id: dKyJy5ZKhkBdSTXRY3wZR6fXzptSs2qm + title: AvgThroughputElementsPerSec by Version