apache
diff --git a/‎.test-infra/mock-apis/poetry.lock‎
Lines changed: 11 additions & 6 deletions b/‎.test-infra/mock-apis/poetry.lock‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎build.gradle.kts‎
Lines changed: 1 addition & 1 deletion b/‎build.gradle.kts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy‎
Lines changed: 2 additions & 1 deletion b/‎buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎learning/tour-of-beam/learning-content/introduction/introduction-concepts/pipeline-concepts/overview-pipeline/description.md‎
Lines changed: 11 additions & 21 deletions b/‎learning/tour-of-beam/learning-content/introduction/introduction-concepts/pipeline-concepts/overview-pipeline/description.md‎
Lines changed: 11 additions & 21 deletions
diff --git a/‎learning/tour-of-beam/learning-content/introduction/introduction-concepts/pipeline-concepts/setting-pipeline/java-example/Task.java‎
Lines changed: 2 additions & 2 deletions b/‎learning/tour-of-beam/learning-content/introduction/introduction-concepts/pipeline-concepts/setting-pipeline/java-example/Task.java‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎learning/tour-of-beam/learning-content/introduction/introduction-concepts/runner-concepts/description.md‎
Lines changed: 3 additions & 3 deletions b/‎learning/tour-of-beam/learning-content/introduction/introduction-concepts/runner-concepts/description.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎runners/google-cloud-dataflow-java/worker/build.gradle‎
Lines changed: 1 addition & 1 deletion b/‎runners/google-cloud-dataflow-java/worker/build.gradle‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowExecutionStateRegistry.java‎
Lines changed: 1 addition & 1 deletion b/‎runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowExecutionStateRegistry.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/KeyTokenInvalidException.java‎
Lines changed: 3 additions & 4 deletions b/‎runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/KeyTokenInvalidException.java‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/logging/DataflowWorkerLoggingMDC.java‎
Lines changed: 4 additions & 2 deletions b/‎runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/logging/DataflowWorkerLoggingMDC.java‎
Lines changed: 4 additions & 2 deletions
@@ -733,7 +733,7 @@ if (project.hasProperty("javaLinkageArtifactIds")) {
 
   val linkageCheckerJava by configurations.creating
   dependencies {
-    linkageCheckerJava("com.google.cloud.tools:dependencies:1.5.6")
+    linkageCheckerJava("com.google.cloud.tools:dependencies:1.5.15")
   }
 
   // We need to evaluate all the projects first so that we can find depend on all the
 
@@ -651,6 +651,7 @@ class BeamModulePlugin implements Plugin<Project> {
     def arrow_version = "15.0.2"
     def jmh_version = "1.34"
     def jupiter_version = "5.7.0"
+    def spanner_grpc_proto_version = "6.95.1"
 
     // Export Spark versions, so they are defined in a single place only
     project.ext.spark3_version = spark3_version
@@ -860,7 +861,7 @@ class BeamModulePlugin implements Plugin<Project> {
         proto_google_cloud_pubsub_v1                : "com.google.api.grpc:proto-google-cloud-pubsub-v1", // google_cloud_platform_libraries_bom sets version
         proto_google_cloud_pubsublite_v1            : "com.google.api.grpc:proto-google-cloud-pubsublite-v1", // google_cloud_platform_libraries_bom sets version
         proto_google_cloud_secret_manager_v1        : "com.google.api.grpc:proto-google-cloud-secretmanager-v1", // google_cloud_platform_libraries_bom sets version
-        proto_google_cloud_spanner_v1               : "com.google.api.grpc:proto-google-cloud-spanner-v1", // google_cloud_platform_libraries_bom sets version
+        proto_google_cloud_spanner_v1               : "com.google.api.grpc:proto-google-cloud-spanner-v1:$spanner_grpc_proto_version", // google_cloud_platform_libraries_bom sets version
         proto_google_cloud_spanner_admin_database_v1: "com.google.api.grpc:proto-google-cloud-spanner-admin-database-v1", // google_cloud_platform_libraries_bom sets version
         proto_google_common_protos                  : "com.google.api.grpc:proto-google-common-protos", // google_cloud_platform_libraries_bom sets version
         qpid_jms_client                             : "org.apache.qpid:qpid-jms-client:$qpid_jms_client_version",
 
@@ -17,29 +17,19 @@ limitations under the License.
 To use Beam, you first need to first create a driver program using the classes in one of the Beam SDKs. Your driver program defines your pipeline, including all of the inputs, transforms, and outputs. It also sets execution options for your pipeline (typically passed by using command-line options). These include the Pipeline Runner, which, in turn, determines what back-end your pipeline will run on.
 
 The Beam SDKs provide several abstractions that simplify the mechanics of large-scale distributed data processing. The same Beam abstractions work with both batch and streaming data sources. When you create your Beam pipeline, you can think about your data processing task in terms of these abstractions. They include:
-
-→ `Pipeline`: A Pipeline encapsulates your entire data processing task, from start to finish. This includes reading input data, transforming that data, and writing output data. All Beam driver programs must create a Pipeline. When you create the Pipeline, you must also specify the execution options that tell the Pipeline where and how to run.
-
-→ `PCollection`: A PCollection represents a distributed data set that your Beam pipeline operates on. The data set can be bounded, meaning it comes from a fixed source like a file, or unbounded, meaning it comes from a continuously updating source via a subscription or other mechanism. Your pipeline typically creates an initial PCollection by reading data from an external data source, but you can also create a PCollection from in-memory data within your driver program. From there, PCollections are the inputs and outputs for each step in your pipeline.
-
-→ `PTransform`: A PTransform represents a data processing operation, or a step, in your pipeline. Every PTransform takes zero or more PCollection objects as the input, performs a processing function that you provide on the elements of that PCollection, and then produces zero or more output PCollection objects.
+* `Pipeline`: A Pipeline encapsulates your entire data processing task, from start to finish. This includes reading input data, transforming that data, and writing output data. All Beam driver programs must create a Pipeline. When you create the Pipeline, you must also specify the execution options that tell the Pipeline where and how to run.
+* `PCollection`: A PCollection represents a distributed data set that your Beam pipeline operates on. The data set can be bounded, meaning it comes from a fixed source like a file, or unbounded, meaning it comes from a continuously updating source via a subscription or other mechanism. Your pipeline typically creates an initial PCollection by reading data from an external data source, but you can also create a PCollection from in-memory data within your driver program. From there, PCollections are the inputs and outputs for each step in your pipeline.
+* `PTransform`: A PTransform represents a data processing operation, or a step, in your pipeline. Every PTransform takes zero or more PCollection objects as the input, performs a processing function that you provide on the elements of that PCollection, and then produces zero or more output PCollection objects.
 {{if (eq .Sdk "go")}}
-
-→ `Scope`: The Go SDK has an explicit scope variable used to build a `Pipeline`. A Pipeline can return it’s root scope with the `Root()` method. The scope variable is then passed to `PTransform` functions that place them in the `Pipeline` that owns the `Scope`.
+* `Scope`: The Go SDK has an explicit scope variable used to build a `Pipeline`. A Pipeline can return it’s root scope with the `Root()` method. The scope variable is then passed to `PTransform` functions that place them in the `Pipeline` that owns the `Scope`.
 {{end}}
-
-→ `I/O transforms`: Beam comes with a number of “IOs” - library PTransforms that read or write data to various external storage systems.
+* `I/O transforms`: Beam comes with a number of “IOs” - library PTransforms that read or write data to various external storage systems.
 
 A typical Beam driver program works as follows:
+* Create a Pipeline object and set the pipeline execution options, including the Pipeline Runner.
+* Create an initial `PCollection` for pipeline data, either using the IOs to read data from an external storage system, or using a Create transform to build a `PCollection` from in-memory data.
+* Apply `PTransforms` to each `PCollection`. Transforms can change, filter, group, analyze, or otherwise process the elements in a PCollection. A transform creates a new output PCollection without modifying the input collection. A typical pipeline applies subsequent transforms to each new output PCollection in turn until the processing is complete. However, note that a pipeline does not have to be a single straight line of transforms applied one after another: think of PCollections as variables and PTransforms as functions applied to these variables, so the shape of the pipeline can be an arbitrarily complex processing graph.
+* Use IOs to write the final, transformed PCollection(s) to an external source.
+* Run the pipeline using the designated Pipeline Runner.
 
-→ Create a Pipeline object and set the pipeline execution options, including the Pipeline Runner.
-
-→ Create an initial `PCollection` for pipeline data, either using the IOs to read data from an external storage system, or using a Create transform to build a `PCollection` from in-memory data.
-
-→ Apply `PTransforms` to each `PCollection`. Transforms can change, filter, group, analyze, or otherwise process the elements in a PCollection. A transform creates a new output PCollection without modifying the input collection. A typical pipeline applies subsequent transforms to each new output PCollection in turn until the processing is complete. However, note that a pipeline does not have to be a single straight line of transforms applied one after another: think of PCollections as variables and PTransforms as functions applied to these variables: the shape of the pipeline can be an arbitrarily complex processing graph.
-
-→ Use IOs to write the final, transformed PCollection(s) to an external source.
-
-→ Run the pipeline using the designated Pipeline Runner.
-
-When you run your Beam driver program, the Pipeline Runner that you designate constructs a workflow graph of your pipeline based on the PCollection objects you’ve created and the transforms that you’ve applied. That graph is then executed using the appropriate distributed processing back-end, becoming an asynchronous “job” (or equivalent) on that back-end.
+When you run your Beam driver program, the Pipeline Runner that you designate constructs a workflow graph of your pipeline based on the PCollection objects you’ve created and the transforms that you’ve applied. That graph is then executed using the appropriate distributed processing back-end, becoming an asynchronous “job” (or equivalent) on that back-end.
@@ -41,7 +41,7 @@ public class Task {
     private static final Logger LOG = LoggerFactory.getLogger(Task.class);
 
     public interface MyOptions extends PipelineOptions {
-        // Default value if [--output] equal null
+        // Default value if [--inputFile] equal null
         @Description("Path of the file to read from")
         @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt")
         String getInputFile();
@@ -88,4 +88,4 @@ public void processElement(ProcessContext c) throws Exception {
             LOG.info(prefix + ": {}", c.element());
         }
     }
-}
+}
@@ -293,11 +293,11 @@ $ wordcount --input gs://dataflow-samples/shakespeare/kinglear.txt \
 
 {{if (eq .Sdk "java")}}
 
-##### Non portable
+##### Portable
 1. Start the JobService endpoint:
     * with Docker (preferred): docker run --net=host apache/beam_spark_job_server:latest
     * or from Beam source code: ./gradlew :runners:spark:3:job-server:runShadow
-2. Submit the Python pipeline to the above endpoint by using the PortableRunner, job_endpoint set to localhost:8099 (this is the default address of the JobService), and environment_type set to LOOPBACK. For example:
+2. Submit the pipeline to the above endpoint by using the PortableRunner, job_endpoint set to localhost:8099 (this is the default address of the JobService), and environment_type set to LOOPBACK. For example:
 
 Console:
 ```
@@ -307,7 +307,7 @@ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \
 
 ##### Non Portable
 
-When using Java, you must specify your dependency on the Cloud Dataflow Runner in your `pom.xml`.
+When using Java, you must specify your dependency on the Apache Spark Runner in your `pom.xml`.
 
 ```
 <dependency>
 
@@ -90,7 +90,7 @@ applyJavaNature(
                 "org/slf4j/jul/**"
         ],
         generatedClassPatterns: [
-                /^org\.apache\.beam\.runners\.dataflow\.worker\.windmill.*/,
+                /^org\.apache\.beam\.runners\.dataflow\.worker\.windmill\..*AutoBuilder.*/,
                 /^org\.apache\.beam\.runners\.dataflow\.worker\..*AutoBuilder.*/,
         ],
         shadowClosure: {
 
@@ -51,7 +51,7 @@ public abstract class DataflowExecutionStateRegistry {
   public DataflowOperationContext.DataflowExecutionState getState(
       final NameContext nameContext,
       final String stateName,
-      final MetricsContainer container,
+      final @Nullable MetricsContainer container,
       final ProfileScope profileScope) {
     return getStateInternal(nameContext, stateName, null, null, container, profileScope);
   }
 
@@ -17,17 +17,16 @@
  */
 package org.apache.beam.runners.dataflow.worker;
 
+import javax.annotation.Nullable;
+
 /** Indicates that the key token was invalid when data was attempted to be fetched. */
-@SuppressWarnings({
-  "nullness" // TODO(https://github.com/apache/beam/issues/20497)
-})
 public class KeyTokenInvalidException extends RuntimeException {
   public KeyTokenInvalidException(String key) {
     super("Unable to fetch data due to token mismatch for key " + key);
   }
 
   /** Returns whether an exception was caused by a {@link KeyTokenInvalidException}. */
-  public static boolean isKeyTokenInvalidException(Throwable t) {
+  public static boolean isKeyTokenInvalidException(@Nullable Throwable t) {
     while (t != null) {
       if (t instanceof KeyTokenInvalidException) {
         return true;
 
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.runners.dataflow.worker.logging;
 
+import javax.annotation.Nullable;
+
 /** Mapped diagnostic context for the Dataflow worker. */
 @SuppressWarnings({
   "nullness" // TODO(https://github.com/apache/beam/issues/20497)
@@ -34,7 +36,7 @@ public static void setJobId(String newJobId) {
   }
 
   /** Sets the Stage Name of the current thread, which will be inherited by child threads. */
-  public static void setStageName(String newStageName) {
+  public static void setStageName(@Nullable String newStageName) {
     stageName.set(newStageName);
   }
 
@@ -44,7 +46,7 @@ public static void setWorkerId(String newWorkerId) {
   }
 
   /** Sets the Work ID of the current thread, which will be inherited by child threads. */
-  public static void setWorkId(String newWorkId) {
+  public static void setWorkId(@Nullable String newWorkId) {
     workId.set(newWorkId);
   }
Original file line number	Diff line number	Diff line change
`@@ -733,7 +733,7 @@ if (project.hasProperty("javaLinkageArtifactIds")) {`
`733`	`733`
`734`	`734`	`val linkageCheckerJava by configurations.creating`
`735`	`735`	`dependencies {`
`736`		`- linkageCheckerJava("com.google.cloud.tools:dependencies:1.5.6")`
	`736`	`+ linkageCheckerJava("com.google.cloud.tools:dependencies:1.5.15")`
`737`	`737`	`}`
`738`	`738`
`739`	`739`	`// We need to evaluate all the projects first so that we can find depend on all the`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ public class Task {`
`41`	`41`	`private static final Logger LOG = LoggerFactory.getLogger(Task.class);`
`42`	`42`
`43`	`43`	`public interface MyOptions extends PipelineOptions {`
`44`		`- // Default value if [--output] equal null`
	`44`	`+ // Default value if [--inputFile] equal null`
`45`	`45`	`@Description("Path of the file to read from")`
`46`	`46`	`@Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt")`
`47`	`47`	`String getInputFile();`
`@@ -88,4 +88,4 @@ public void processElement(ProcessContext c) throws Exception {`
`88`	`88`	`LOG.info(prefix + ": {}", c.element());`
`89`	`89`	`}`
`90`	`90`	`}`
`91`		`-}`
	`91`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ public abstract class DataflowExecutionStateRegistry {`
`51`	`51`	`public DataflowOperationContext.DataflowExecutionState getState(`
`52`	`52`	`final NameContext nameContext,`
`53`	`53`	`final String stateName,`
`54`		`- final MetricsContainer container,`
	`54`	`+ final @Nullable MetricsContainer container,`
`55`	`55`	`final ProfileScope profileScope) {`
`56`	`56`	`return getStateInternal(nameContext, stateName, null, null, container, profileScope);`
`57`	`57`	`}`
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,8 @@`
`17`	`17`	`*/`
`18`	`18`	`package org.apache.beam.runners.dataflow.worker.logging;`
`19`	`19`
	`20`	`+import javax.annotation.Nullable;`
	`21`	`+`
`20`	`22`	`/** Mapped diagnostic context for the Dataflow worker. */`
`21`	`23`	`@SuppressWarnings({`
`22`	`24`	`"nullness" // TODO(https://github.com/apache/beam/issues/20497)`
`@@ -34,7 +36,7 @@ public static void setJobId(String newJobId) {`
`34`	`36`	`}`
`35`	`37`
`36`	`38`	`/** Sets the Stage Name of the current thread, which will be inherited by child threads. */`
`37`		`- public static void setStageName(String newStageName) {`
	`39`	`+ public static void setStageName(@Nullable String newStageName) {`
`38`	`40`	`stageName.set(newStageName);`
`39`	`41`	`}`
`40`	`42`
`@@ -44,7 +46,7 @@ public static void setWorkerId(String newWorkerId) {`
`44`	`46`	`}`
`45`	`47`
`46`	`48`	`/** Sets the Work ID of the current thread, which will be inherited by child threads. */`
`47`		`- public static void setWorkId(String newWorkId) {`
	`49`	`+ public static void setWorkId(@Nullable String newWorkId) {`
`48`	`50`	`workId.set(newWorkId);`
`49`	`51`	`}`
`50`	`52`