From 2f309d1aa9297a71006857a6eaf8b0825a5e1815 Mon Sep 17 00:00:00 2001 From: Vitaly Terentyev Date: Fri, 12 Sep 2025 18:40:00 +0400 Subject: [PATCH 1/5] Update Beam website to release 2.68.0 --- CHANGES.md | 12 +-- website/www/site/config.toml | 2 +- .../www/site/content/en/blog/beam-2.68.0.md | 83 +++++++++++++++++++ .../site/content/en/get-started/downloads.md | 12 ++- 4 files changed, 95 insertions(+), 14 deletions(-) create mode 100644 website/www/site/content/en/blog/beam-2.68.0.md diff --git a/CHANGES.md b/CHANGES.md index 1d138ad43eac..c8d473232dcb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -99,22 +99,18 @@ * ([#X](https://github.com/apache/beam/issues/X)). -# [2.68.0] - Unreleased +# [2.68.0] - 2025-09-?? ## Highlights -* New highly anticipated feature X added to Python SDK ([#X](https://github.com/apache/beam/issues/X)). -* New highly anticipated feature Y added to Java SDK ([#Y](https://github.com/apache/beam/issues/Y)). * [Python] Prism runner now enabled by default for most Python pipelines using the direct runner ([#34612](https://github.com/apache/beam/pull/34612)). This may break some tests, see https://github.com/apache/beam/pull/34612 for details on how to handle issues. ## I/Os -* Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Upgraded Iceberg dependency to 1.9.2 ([#35981](https://github.com/apache/beam/pull/35981)) ## New Features / Improvements -* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * BigtableRead Connector for BeamYaml added with new Config Param ([#35696](https://github.com/apache/beam/pull/35696)) * MongoDB Java driver upgraded from 3.12.11 to 5.5.0 with API refactoring and GridFS implementation updates (Java) ([#35946](https://github.com/apache/beam/pull/35946)). * Introduced a dedicated module for JUnit-based testing support: `sdks/java/testing/junit`, which provides `TestPipelineExtension` for JUnit 5 while maintaining backward compatibility with existing JUnit 4 `TestRule`-based tests (Java) ([#18733](https://github.com/apache/beam/issues/18733), [#35688](https://github.com/apache/beam/pull/35688)). @@ -131,7 +127,6 @@ ## Breaking Changes -* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). * Previously deprecated Beam ZetaSQL component has been removed ([#34423](https://github.com/apache/beam/issues/34423)). ZetaSQL users could migrate to Calcite SQL with BigQuery dialect enabled. * Upgraded Beam vendored Calcite to 1.40.0 for Beam SQL ([#35483](https://github.com/apache/beam/issues/35483)), which @@ -143,7 +138,6 @@ ## Deprecations -* X behavior is deprecated and will be removed in X versions ([#X](https://github.com/apache/beam/issues/X)). * Python SDK native SpannerIO (apache_beam/io/gcp/experimental/spannerio) is deprecated. Use cross-language wrapper (apache_beam/io/gcp/spanner) instead (Python) ([#35860](https://github.com/apache/beam/issues/35860)). * Samza runner is deprecated and scheduled for removal in Beam 3.0 ([#35448](https://github.com/apache/beam/issues/35448)). @@ -156,10 +150,6 @@ * (Go) Fix duplicates due to reads after blind writes to Bag State ([#35869](https://github.com/apache/beam/issues/35869)). * Earlier Go SDK versions can avoid the issue by not reading in the same call after a blind write. -## Known Issues - -* ([#X](https://github.com/apache/beam/issues/X)). - # [2.67.0] - 2025-08-12 ## Highlights diff --git a/website/www/site/config.toml b/website/www/site/config.toml index 7561817fb438..652994ed6d7b 100644 --- a/website/www/site/config.toml +++ b/website/www/site/config.toml @@ -104,7 +104,7 @@ github_project_repo = "https://github.com/apache/beam" [params] description = "Apache Beam is an open source, unified model and set of language-specific SDKs for defining and executing data processing workflows, and also data ingestion and integration flows, supporting Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). Dataflow pipelines simplify the mechanics of large-scale batch and streaming data processing and can run on a number of runtimes like Apache Flink, Apache Spark, and Google Cloud Dataflow (a cloud service). Beam also brings DSL in different languages, allowing users to easily implement their data integration processes." -release_latest = "2.67.0" +release_latest = "2.68.0" # The repository and branch where the files live in Github or Colab. This is used # to serve and stage from your local branch, but publish to the master branch. # e.g. https://github.com/{{< param branch_repo >}}/path/to/notebook.ipynb diff --git a/website/www/site/content/en/blog/beam-2.68.0.md b/website/www/site/content/en/blog/beam-2.68.0.md new file mode 100644 index 000000000000..2d8a73e85604 --- /dev/null +++ b/website/www/site/content/en/blog/beam-2.68.0.md @@ -0,0 +1,83 @@ +--- +title: "Apache Beam 2.68.0" +date: 2025-09-?? 15:00:00 -0500 +categories: + - blog + - release +authors: + - vterentev +--- + + +We are happy to present the new 2.68.0 release of Beam. +This release includes both improvements and new functionality. +See the [download page](/get-started/downloads/#2680-2025-09-??) for this release. + + + +For more information on changes in 2.68.0, check out the [detailed release notes](https://github.com/apache/beam/milestone/36?closed=1). + +## Highlights + +* [Python] Prism runner now enabled by default for most Python pipelines using the direct runner ([#34612](https://github.com/apache/beam/pull/34612)). This may break some tests, see https://github.com/apache/beam/pull/34612 for details on how to handle issues. + +### I/Os + +* Upgraded Iceberg dependency to 1.9.2 ([#35981](https://github.com/apache/beam/pull/35981)) + +### New Features / Improvements + +* BigtableRead Connector for BeamYaml added with new Config Param ([#35696](https://github.com/apache/beam/pull/35696)) +* MongoDB Java driver upgraded from 3.12.11 to 5.5.0 with API refactoring and GridFS implementation updates (Java) ([#35946](https://github.com/apache/beam/pull/35946)). +* Introduced a dedicated module for JUnit-based testing support: `sdks/java/testing/junit`, which provides `TestPipelineExtension` for JUnit 5 while maintaining backward compatibility with existing JUnit 4 `TestRule`-based tests (Java) ([#18733](https://github.com/apache/beam/issues/18733), [#35688](https://github.com/apache/beam/pull/35688)). + - To use JUnit 5 with Beam tests, add a test-scoped dependency on `org.apache.beam:beam-sdks-java-testing-junit`. +* Google CloudSQL enrichment handler added (Python) ([#34398](https://github.com/apache/beam/pull/34398)). + Beam now supports data enrichment capabilities using SQL databases, with built-in support for: + - Managed PostgreSQL, MySQL, and Microsoft SQL Server instances on CloudSQL + - Unmanaged SQL database instances not hosted on CloudSQL (e.g., self-hosted or on-premises databases) +* [Python] Added the `ReactiveThrottler` and `ThrottlingSignaler` classes to streamline throttling behavior in DoFns, expose throttling mechanisms for users ([#35984](https://github.com/apache/beam/pull/35984)) +* Added a pipeline option to specify the processing timeout for a single element by any PTransform (Java/Python/Go) ([#35174](https://github.com/apache/beam/issues/35174)). + - When specified, the SDK harness automatically restarts if an element takes too long to process. Beam runner may then retry processing of the same work item. + - Use the `--element_processing_timeout_minutes` option to reduce the chance of having stalled pipelines due to unexpected cases of slow processing, where slowness might not happen again if processing of the same element is retried. +* (Python) Adding GCP Spanner Change Stream support for Python (apache_beam.io.gcp.spanner) ([#24103](https://github.com/apache/beam/issues/24103)). + +### Breaking Changes + +* Previously deprecated Beam ZetaSQL component has been removed ([#34423](https://github.com/apache/beam/issues/34423)). + ZetaSQL users could migrate to Calcite SQL with BigQuery dialect enabled. +* Upgraded Beam vendored Calcite to 1.40.0 for Beam SQL ([#35483](https://github.com/apache/beam/issues/35483)), which + improves support for BigQuery and other SQL dialects. Note: Minor behavior changes are observed such as output + significant digits related to casting. +* (Python) The deterministic fallback coder for complex types like NamedTuple, Enum, and dataclasses now uses cloudpickle instead of dill. If your pipeline is affected, you may see a warning like: "Using fallback deterministic coder for type X...". You can revert to the previous behavior by using the pipeline option `--update_compatibility_version=2.67.0` ([35725](https://github.com/apache/beam/pull/35725)). Report any pickling related issues to [#34903](https://github.com/apache/beam/issues/34903) +* (Python) Prism runner now enabled by default for most Python pipelines using the direct runner ([#34612](https://github.com/apache/beam/pull/34612)). This may break some tests, see https://github.com/apache/beam/pull/34612 for details on how to handle issues. +* Dropped Java 8 support for [IO expansion-service](https://central.sonatype.com/artifact/org.apache.beam/beam-sdks-java-io-expansion-service). Cross-language pipelines using this expansion service will need a Java11+ runtime ([#35981](https://github.com/apache/beam/pull/35981). + +### Deprecations + +* Python SDK native SpannerIO (apache_beam/io/gcp/experimental/spannerio) is deprecated. Use cross-language wrapper + (apache_beam/io/gcp/spanner) instead (Python) ([#35860](https://github.com/apache/beam/issues/35860)). +* Samza runner is deprecated and scheduled for removal in Beam 3.0 ([#35448](https://github.com/apache/beam/issues/35448)). +* Twister2 runner is deprecated and scheduled for removal in Beam 3.0 ([#35905](https://github.com/apache/beam/issues/35905))). + +### Bugfixes + +* (Python) Fixed Java YAML provider fails on Windows ([#35617](https://github.com/apache/beam/issues/35617)). +* Fixed BigQueryIO creating temporary datasets in wrong project when temp_dataset is specified with a different project than the pipeline project. For some jobs, temporary datasets will now be created in the correct project (Python) ([#35813](https://github.com/apache/beam/issues/35813)). +* (Go) Fix duplicates due to reads after blind writes to Bag State ([#35869](https://github.com/apache/beam/issues/35869)). + * Earlier Go SDK versions can avoid the issue by not reading in the same call after a blind write. + +## List of Contributors + +According to git shortlog, the following people contributed to the 2.68.0 release. Thank you to all contributors! + +Ahmed Abualsaud, Andrew Crites, Ashok Devireddy, Chamikara Jayalath, Charles Nguyen, Danny McCormick, Davda James, Derrick Williams, Diego Hernandez, Dip Patel, Dustin Rhodes, Enrique Calderon, Hai Joey Tran, Jack McCluskey, Kenneth Knowles, Keshav, Khorbaladze A., LEEKYE, Lanny Boarts, Mattie Fu, Minbo Bae, Mohamed Awnallah, Naireen Hussain, Nathaniel Young, Radosław Stankiewicz, Razvan Culea, Robert Bradshaw, Robert Burke, Sam Whittle, Shehab, Shingo Furuyama, Shunping Huang, Steven van Rossum, Suvrat Acharya, Svetak Sundhar, Tarun Annapareddy, Tom Stepp, Valentyn Tymofieiev, Vitaly Terentyev, XQ Hu, Yi Hu, apanich, arnavarora2004, claudevdm, flpablo, kristynsmith, shreyakhajanchi diff --git a/website/www/site/content/en/get-started/downloads.md b/website/www/site/content/en/get-started/downloads.md index f63852850557..e7d06849418d 100644 --- a/website/www/site/content/en/get-started/downloads.md +++ b/website/www/site/content/en/get-started/downloads.md @@ -95,6 +95,16 @@ versions denoted `0.x.y`. ### Current release +#### 2.68.0 (2025-09-23) + +Official [source code download](https://www.apache.org/dyn/closer.lua/beam/2.68.0/apache-beam-2.68.0-source-release.zip). +[SHA-512](https://downloads.apache.org/beam/2.68.0/apache-beam-2.68.0-source-release.zip.sha512). +[signature](https://downloads.apache.org/beam/2.68.0/apache-beam-2.68.0-source-release.zip.asc). + +[Release notes](https://github.com/apache/beam/releases/tag/v2.68.0) + +### Archived releases + #### 2.67.0 (2025-08-12) Official [source code download](https://www.apache.org/dyn/closer.lua/beam/2.67.0/apache-beam-2.67.0-source-release.zip). @@ -103,8 +113,6 @@ Official [source code download](https://www.apache.org/dyn/closer.lua/beam/2.67. [Release notes](https://github.com/apache/beam/releases/tag/v2.67.0) -### Archived releases - #### 2.66.0 (2025-07-01) Official [source code download](https://archive.apache.org/dist/beam/2.66.0/apache-beam-2.66.0-source-release.zip). From 1bc499164b2e73ea420ca7a6e2da41e2c041cd93 Mon Sep 17 00:00:00 2001 From: Vitaly Terentyev Date: Mon, 22 Sep 2025 22:36:17 +0400 Subject: [PATCH 2/5] Update dates --- CHANGES.md | 2 +- website/www/site/content/en/blog/beam-2.68.0.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index c8d473232dcb..302c3b64ecc1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -99,7 +99,7 @@ * ([#X](https://github.com/apache/beam/issues/X)). -# [2.68.0] - 2025-09-?? +# [2.68.0] - 2025-09-23 ## Highlights diff --git a/website/www/site/content/en/blog/beam-2.68.0.md b/website/www/site/content/en/blog/beam-2.68.0.md index 2d8a73e85604..73da3e190e39 100644 --- a/website/www/site/content/en/blog/beam-2.68.0.md +++ b/website/www/site/content/en/blog/beam-2.68.0.md @@ -1,6 +1,6 @@ --- title: "Apache Beam 2.68.0" -date: 2025-09-?? 15:00:00 -0500 +date: 2025-09-23 15:00:00 -0500 categories: - blog - release From 3819d7185ab5b9f7c0c895166ab22b0278701bd7 Mon Sep 17 00:00:00 2001 From: Vitaly Terentyev Date: Mon, 22 Sep 2025 22:45:53 +0400 Subject: [PATCH 3/5] Update links --- CHANGES.md | 2 +- website/www/site/content/en/blog/beam-2.68.0.md | 2 +- website/www/site/content/en/get-started/downloads.md | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 302c3b64ecc1..663d206f707a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -99,7 +99,7 @@ * ([#X](https://github.com/apache/beam/issues/X)). -# [2.68.0] - 2025-09-23 +# [2.68.0] - 2025-09-22 ## Highlights diff --git a/website/www/site/content/en/blog/beam-2.68.0.md b/website/www/site/content/en/blog/beam-2.68.0.md index 73da3e190e39..a634f9d0213a 100644 --- a/website/www/site/content/en/blog/beam-2.68.0.md +++ b/website/www/site/content/en/blog/beam-2.68.0.md @@ -1,6 +1,6 @@ --- title: "Apache Beam 2.68.0" -date: 2025-09-23 15:00:00 -0500 +date: 2025-09-22 15:00:00 -0500 categories: - blog - release diff --git a/website/www/site/content/en/get-started/downloads.md b/website/www/site/content/en/get-started/downloads.md index e7d06849418d..fc8e820cd1bd 100644 --- a/website/www/site/content/en/get-started/downloads.md +++ b/website/www/site/content/en/get-started/downloads.md @@ -95,7 +95,7 @@ versions denoted `0.x.y`. ### Current release -#### 2.68.0 (2025-09-23) +#### 2.68.0 (2025-09-22) Official [source code download](https://www.apache.org/dyn/closer.lua/beam/2.68.0/apache-beam-2.68.0-source-release.zip). [SHA-512](https://downloads.apache.org/beam/2.68.0/apache-beam-2.68.0-source-release.zip.sha512). @@ -107,9 +107,9 @@ Official [source code download](https://www.apache.org/dyn/closer.lua/beam/2.68. #### 2.67.0 (2025-08-12) -Official [source code download](https://www.apache.org/dyn/closer.lua/beam/2.67.0/apache-beam-2.67.0-source-release.zip). -[SHA-512](https://downloads.apache.org/beam/2.67.0/apache-beam-2.67.0-source-release.zip.sha512). -[signature](https://downloads.apache.org/beam/2.67.0/apache-beam-2.67.0-source-release.zip.asc). +Official [source code download](https://archive.apache.org/dist/beam/2.67.0/apache-beam-2.67.0-source-release.zip). +[SHA-512](https://archive.apache.org/dist/beam/2.67.0/apache-beam-2.67.0-source-release.zip.sha512). +[signature](https://archive.apache.org/dist/beam/2.67.0/apache-beam-2.67.0-source-release.zip.asc). [Release notes](https://github.com/apache/beam/releases/tag/v2.67.0) From cf3c12b9c2aafaa23745fe29b668a4a11512fa9c Mon Sep 17 00:00:00 2001 From: Andrew Crites Date: Fri, 7 Nov 2025 17:23:18 +0000 Subject: [PATCH 4/5] Updates multimap entries iterable to make a deep copy of pending adds and deletes. ImmutableMap.copyOf does not. This was causing testMultimapEntries validates runner test to fail. --- .../beam/fn/harness/state/MultimapUserState.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java index 8e3d76f5fc8f..344f5ed89956 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java @@ -273,7 +273,12 @@ public PrefetchableIterable> entries() { keysStateRequest.getStateKey()); // Make a deep copy of pendingAdds so this iterator represents a snapshot of state at the time // it was created. - Map>> pendingAddsNow = ImmutableMap.copyOf(pendingAdds); + Map>> pendingAddsNow = new HashMap<>(); + for (Map.Entry>> entry : pendingAdds.entrySet()) { + pendingAddsNow.put( + entry.getKey(), + KV.of(entry.getValue().getKey(), new ArrayList<>(entry.getValue().getValue()))); + } if (isCleared) { return PrefetchableIterables.maybePrefetchable( Iterables.concat( @@ -285,7 +290,12 @@ public PrefetchableIterable> entries() { value -> Maps.immutableEntry(entry.getValue().getKey(), value))))); } - Set pendingRemovesNow = ImmutableSet.copyOf(pendingRemoves.keySet()); + // Make a deep copy of pendingRemoves so this iterator represents a snapshot of state at the + // time it was created. + Set pendingRemovesNow = new HashSet<>(); + for (Object key : pendingRemoves.keySet()) { + pendingRemovesNow.add(key); + } return new PrefetchableIterables.Default>() { @Override public PrefetchableIterator> createIterator() { From 5b7c52fc7825e4b3445c3b256fbe18e795ac7c2d Mon Sep 17 00:00:00 2001 From: Andrew Crites Date: Fri, 7 Nov 2025 17:43:22 +0000 Subject: [PATCH 5/5] Run spotlessapply. --- .../org/apache/beam/fn/harness/state/MultimapUserState.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java index 344f5ed89956..83d78ff836c7 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java @@ -47,8 +47,6 @@ import org.apache.beam.sdk.util.ByteStringOutputStream; import org.apache.beam.sdk.values.KV; import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets; @@ -276,8 +274,8 @@ public PrefetchableIterable> entries() { Map>> pendingAddsNow = new HashMap<>(); for (Map.Entry>> entry : pendingAdds.entrySet()) { pendingAddsNow.put( - entry.getKey(), - KV.of(entry.getValue().getKey(), new ArrayList<>(entry.getValue().getValue()))); + entry.getKey(), + KV.of(entry.getValue().getKey(), new ArrayList<>(entry.getValue().getValue()))); } if (isCleared) { return PrefetchableIterables.maybePrefetchable(