Skip to content

Commit 6896ceb

Browse files
Merge remote-tracking branch 'upstream/master' into revertUbuntuLatestPR35734
2 parents bd56d2c + b60ce5b commit 6896ceb

File tree

134 files changed

+14794
-870
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+14794
-870
lines changed

.asf.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ github:
5151

5252
protected_branches:
5353
master: {}
54+
release-2.67.0-postrelease: {}
5455
release-2.67: {}
5556
release-2.66.0-postrelease: {}
5657
release-2.66: {}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# This is a composite action to build and push a Docker image.
2+
name: 'Docker Build and Push'
3+
description: 'Builds and pushes a Docker image to a container registry.'
4+
5+
inputs:
6+
dockerfile_path:
7+
description: 'Path to the Dockerfile'
8+
required: true
9+
image_name:
10+
description: 'Base name for the Docker image (e.g., gcr.io/my-project/my-app)'
11+
required: true
12+
image_tag:
13+
description: 'Tag for the Docker image (e.g., latest, or a git sha)'
14+
required: true
15+
build_context:
16+
description: 'The build context for the Docker build command'
17+
required: false
18+
default: '.'
19+
20+
outputs:
21+
image_url:
22+
description: "The full URL of the pushed image, including the tag"
23+
value: ${{ steps.build-push.outputs.image_url }} # the value is set from a step's output
24+
25+
runs:
26+
using: "composite"
27+
steps:
28+
- name: Configure Docker to use Google Cloud credentials
29+
shell: bash
30+
run: gcloud auth configure-docker --quiet
31+
32+
- name: Build and Push Docker Image
33+
id: build-push # give the step an ID to reference its output
34+
shell: bash
35+
run: |
36+
# Construct the full image URL from the inputs
37+
FULL_IMAGE_URL="${{ inputs.image_name }}:${{ inputs.image_tag }}"
38+
echo "Building image: $FULL_IMAGE_URL"
39+
40+
# Build the image
41+
docker build -t $FULL_IMAGE_URL -f ${{ inputs.dockerfile_path }} ${{ inputs.build_context }}
42+
# Push the image
43+
docker push $FULL_IMAGE_URL
44+
# Set the output value for this action
45+
echo "image_url=$FULL_IMAGE_URL" >> $GITHUB_OUTPUT
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run.",
3-
"modification": 101
3+
"modification": 35
44
}
55

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run",
3-
"revision": 5
3+
"revision": 6
44
}

.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ jobs:
5555
(github.event_name == 'schedule' && github.repository == 'apache/beam') ||
5656
github.event.comment.body == 'Run Inference Benchmarks'
5757
runs-on: [self-hosted, ubuntu-20.04, main]
58-
timeout-minutes: 900
58+
timeout-minutes: 1000
5959
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
6060
strategy:
6161
matrix:
@@ -72,7 +72,12 @@ jobs:
7272
- name: Setup Python environment
7373
uses: ./.github/actions/setup-environment-action
7474
with:
75+
java-version: default
7576
python-version: '3.10'
77+
- name: Package Python SDK using Gradle
78+
run: ./gradlew :sdks:python:sdist -PpythonVersion=3.10
79+
- name: Configure Docker for Artifact Registry
80+
run: gcloud auth configure-docker us-docker.pkg.dev
7681
- name: Prepare test arguments
7782
uses: ./.github/actions/test-arguments-action
7883
with:
@@ -86,9 +91,28 @@ jobs:
8691
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Imagenet_Classification_Resnet_152_Tesla_T4_GPU.txt
8792
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Streaming_DistilBert_Base_Uncased.txt
8893
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_Pytorch_Sentiment_Batch_DistilBert_Base_Uncased.txt
94+
${{ github.workspace }}/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
8995
# The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
9096
- name: get current time
9197
run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV
98+
- name: Build VLLM Development Image
99+
id: build_vllm_image
100+
uses: ./.github/actions/build-push-docker-action
101+
with:
102+
dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile'
103+
image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base'
104+
image_tag: ${{ github.sha }}
105+
- name: Run VLLM Gemma Batch Test
106+
uses: ./.github/actions/gradle-command-self-hosted-action
107+
timeout-minutes: 180
108+
with:
109+
gradle-command: :sdks:python:apache_beam:testing:load_tests:run
110+
arguments: |
111+
-PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \
112+
-Prunner=DataflowRunner \
113+
-PsdkLocationOverride=false \
114+
-PpythonVersion=3.10 \
115+
-PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}'
92116
- name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model
93117
uses: ./.github/actions/gradle-command-self-hosted-action
94118
timeout-minutes: 180

.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
arguments: |
101101
-PdisableSpotlessCheck=true \
102102
-PdisableCheckStyle=true \
103-
--no-parallel \
103+
max-workers: 4
104104
- name: Archive JUnit Test Results
105105
uses: actions/upload-artifact@v4
106106
if: ${{ !success() }}

.github/workflows/build_release_candidate.yml

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,17 @@ on:
4040
beam_site_pr: create the documentation update PR against apache/beam-site.
4141
--
4242
prism: build and upload the artifacts to the release for this tag
43+
--
44+
managed_io_docs_pr: create the managed-io.md update PR against apache/beam.
4345
required: true
4446
default: |
4547
{java_artifacts: "no",
4648
java_source: "no",
4749
docker_artifacts: "no",
4850
python_artifacts: "no",
4951
beam_site_pr: "no",
50-
prism: "no"}
52+
prism: "no",
53+
managed_io_docs_pr: "no"}
5154
5255
env:
5356
DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }}
@@ -545,3 +548,69 @@ jobs:
545548
svn add --force --parents prism
546549
svn status
547550
svn commit -m "Staging Prism artifacts for Apache Beam ${RELEASE} RC${RC_NUM}" --non-interactive --username "${{ github.event.inputs.APACHE_ID }}" --password "${{ github.event.inputs.APACHE_PASSWORD }}"
551+
552+
managed_io_docs_pr:
553+
if: ${{ fromJson(github.event.inputs.STAGE).managed_io_docs_pr == 'yes'}}
554+
runs-on: ubuntu-22.04
555+
env:
556+
BRANCH_NAME: updates_managed_io_docs_${{ github.event.inputs.RELEASE }}_rc${{ github.event.inputs.RC }}
557+
BEAM_ROOT_DIR: ${{ github.workspace }}/beam
558+
MANAGED_IO_DOCS_PATH: website/www/site/content/en/documentation/io/managed-io.md
559+
steps:
560+
- name: Checkout Beam Repo
561+
uses: actions/checkout@v4
562+
with:
563+
ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}"
564+
repository: apache/beam
565+
path: beam
566+
token: ${{ github.event.inputs.REPO_TOKEN }}
567+
persist-credentials: false
568+
- name: Install Python 3.9
569+
uses: actions/setup-python@v5
570+
with:
571+
python-version: '3.9'
572+
- name: Install Java 11
573+
uses: actions/setup-java@v4
574+
with:
575+
distribution: 'temurin'
576+
java-version: '11'
577+
- name: Remove default github maven configuration
578+
# This step is a workaround to avoid a decryption issue of Beam's
579+
# net.linguica.gradle.maven.settings plugin and github's provided maven
580+
# settings.xml file
581+
run: rm ~/.m2/settings.xml || true
582+
- name: Install SDK
583+
working-directory: beam/sdks/python
584+
run: |
585+
pip install -e.
586+
- name: Build Expansion Service Jar
587+
working-directory: beam
588+
run: |
589+
./gradlew sdks:java:io:expansion-service:shadowJar
590+
- name: Build GCP Expansion Service Jar
591+
working-directory: beam
592+
run: |
593+
./gradlew sdks:java:io:google-cloud-platform:expansion-service:shadowJar
594+
- name: Generate Managed IO Docs
595+
working-directory: beam/sdks/python
596+
run: |
597+
python gen_managed_doc.py --output_location ${{ runner.temp }}/managed-io.md
598+
- name: Create commit on beam branch
599+
working-directory: beam
600+
run: |
601+
git fetch origin master
602+
git checkout -b $BRANCH_NAME origin/master
603+
mv ${{ runner.temp }}/managed-io.md ${{ env.MANAGED_IO_DOCS_PATH }}
604+
git config user.name $GITHUB_ACTOR
605+
git config user.email actions@"$RUNNER_NAME".local
606+
git add ${{ env.MANAGED_IO_DOCS_PATH }}
607+
git commit --allow-empty -m "Update managed-io.md for release ${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}."
608+
git push -f --set-upstream origin $BRANCH_NAME
609+
- name: Create beam PR
610+
working-directory: beam
611+
env:
612+
GH_TOKEN: ${{ github.event.inputs.REPO_TOKEN }}
613+
PR_TITLE: "Update managed-io.md for release ${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}"
614+
PR_BODY: "Content generated from release ${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}."
615+
run: |
616+
gh pr create -t "$PR_TITLE" -b "$PR_BODY" --base master --repo apache/beam

.github/workflows/finalize_release.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,10 @@ jobs:
9393
echo "::add-mask::$PYPI_PASSWORD"
9494
- name: Validate PyPi id/password
9595
run: |
96-
echo "::add-mask::${{ github.event.inputs.PYPI_API_TOKEN }}"
97-
if [ "${{ github.event.inputs.PYPI_API_TOKEN }}" == "" ]
96+
# Workaround for Actions bug - https://github.com/actions/runner/issues/643
97+
PYPI_API_TOKEN=$(jq -r '.inputs.PYPI_API_TOKEN' $GITHUB_EVENT_PATH)
98+
echo "::add-mask::$PYPI_API_TOKEN"
99+
if [ "$PYPI_API_TOKEN" == "" ]
98100
then
99101
echo "Must provide a PyPi password to publish artifacts to PyPi"
100102
exit 1
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
--runner=DataflowRunner
17+
--region=us-central1
18+
--temp_location=gs://temp-storage-for-perf-tests/loadtests
19+
--staging_location=gs://temp-storage-for-perf-tests/loadtests
20+
--input=gs://apache-beam-ml/testing/inputs/sentences_50k.txt
21+
--machine_type=n1-standard-8
22+
--worker_zone=us-central1-b
23+
--disk_size_gb=50
24+
--input_options={}
25+
--num_workers=8
26+
--max_num_workers=25
27+
--autoscaling_algorithm=THROUGHPUT_BASED
28+
--publish_to_big_query=true
29+
--sdk_location=container
30+
--output_table=apache-beam-testing.beam_run_inference.result_gemma_vllm_batch
31+
--metrics_dataset=beam_run_inference
32+
--metrics_table=gemma_vllm_batch
33+
--influx_measurement=gemma_vllm_batch
34+
--model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it
35+
--dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver
36+
--experiments=use_runner_v2

.github/workflows/republish_released_docker_containers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
- cron: "0 6 * * 1"
3333
env:
3434
docker_registry: gcr.io
35-
release: "${{ github.event.inputs.RELEASE || '2.66.0' }}"
35+
release: "${{ github.event.inputs.RELEASE || '2.67.0' }}"
3636
rc: "${{ github.event.inputs.RC || '2' }}"
3737

3838
jobs:

0 commit comments

Comments
 (0)