Update Dataflow BigQuery samples to use Managed I/O (#10029)

VeronicaWasson · web-flow · commit 5b0bd27ca4a3 · 2025-02-24T15:29:34.000-08:00
diff --git a/dataflow/snippets/pom.xml b/dataflow/snippets/pom.xml
@@ -37,7 +37,7 @@
     <maven.compiler.source>11</maven.compiler.source>
     <maven.compiler.target>11</maven.compiler.target>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <apache_beam.version>2.58.0</apache_beam.version>
+    <apache_beam.version>2.63.0</apache_beam.version>
     <slf4j.version>2.0.12</slf4j.version>
     <parquet.version>1.14.0</parquet.version>
     <iceberg.version>1.4.2</iceberg.version>
diff --git a/dataflow/snippets/src/main/java/com/example/dataflow/BigQueryReadFromQuery.java b/dataflow/snippets/src/main/java/com/example/dataflow/BigQueryReadFromQuery.java
@@ -17,14 +17,14 @@
 package com.example.dataflow;
 
 // [START dataflow_bigquery_read_query]
-import com.google.api.services.bigquery.model.TableRow;
+import com.google.common.collect.ImmutableMap;
 import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead;
+import org.apache.beam.sdk.managed.Managed;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptors;
 
 public class BigQueryReadFromQuery {
   public static void main(String[] args) {
@@ -39,20 +39,23 @@ public static void main(String[] args) {
     PipelineOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation().create();
 
+    ImmutableMap<String, Object> config = ImmutableMap.<String, Object>builder()
+        .put("query", queryString)
+        .build();
+
     // Create a pipeline and apply transforms.
     Pipeline pipeline = Pipeline.create(options);
     pipeline
-        // Read the query results into TableRow objects.
-        .apply(BigQueryIO.readTableRows()
-            .fromQuery(queryString)
-            .usingStandardSql()
-            .withMethod(TypedRead.Method.DIRECT_READ))
-        // The output from the previous step is a PCollection<TableRow>.
+        .apply(Managed.read(Managed.BIGQUERY).withConfig(config)).getSinglePCollection()
         .apply(MapElements
-            .into(TypeDescriptor.of(TableRow.class))
-            .via((TableRow row) -> {
-              System.out.printf("Repo: %s, commits: %s%n", row.get("repo"), row.get("count"));
-              return row;
+            .into(TypeDescriptors.strings())
+            // Access individual fields in the row.
+            .via((Row row) -> {
+              String output = String.format("Repo: %s, commits: %d%n",
+                  row.getString("repo"),
+                  row.getInt64("count"));
+              System.out.println(output);
+              return output;
             }));
     pipeline.run().waitUntilFinish();
   }
diff --git a/dataflow/snippets/src/main/java/com/example/dataflow/BigQueryReadWithProjectionAndFiltering.java b/dataflow/snippets/src/main/java/com/example/dataflow/BigQueryReadWithProjectionAndFiltering.java
@@ -17,14 +17,14 @@
 package com.example.dataflow;
 
 // [START dataflow_bigquery_read_projection_and_filtering]
-import com.google.api.services.bigquery.model.TableRow;
-import java.util.Arrays;
+import com.google.common.collect.ImmutableMap;
+import java.util.List;
 import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead;
+import org.apache.beam.sdk.managed.Managed;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptors;
 
 public class BigQueryReadWithProjectionAndFiltering {
   public static void main(String[] args) {
@@ -36,28 +36,30 @@ public static void main(String[] args) {
         .withValidation()
         .as(ExamplePipelineOptions.class);
 
+    String tableSpec = String.format("%s:%s.%s",
+        options.getProjectId(),
+        options.getDatasetName(),
+        options.getTableName());
+
+    ImmutableMap<String, Object> config = ImmutableMap.<String, Object>builder()
+        .put("table", tableSpec)
+        .put("row_restriction", "age > 18")
+        .put("fields", List.of("user_name", "age"))
+        .build();
+
     // Create a pipeline and apply transforms.
     Pipeline pipeline = Pipeline.create(options);
     pipeline
-        .apply(BigQueryIO.readTableRows()
-            // Read rows from a specified table.
-            .from(String.format("%s:%s.%s",
-                options.getProjectId(),
-                options.getDatasetName(),
-                options.getTableName()))
-            .withMethod(TypedRead.Method.DIRECT_READ)
-            .withSelectedFields(Arrays.asList("user_name", "age"))
-            .withRowRestriction("age > 18")
-        )
-        // The output from the previous step is a PCollection<TableRow>.
+        .apply(Managed.read(Managed.BIGQUERY).withConfig(config)).getSinglePCollection()
         .apply(MapElements
-            .into(TypeDescriptor.of(TableRow.class))
-            // Use TableRow to access individual fields in the row.
-            .via((TableRow row) -> {
-              var name = (String) row.get("user_name");
-              var age = row.get("age");
-              System.out.printf("Name: %s, Age: %s%n", name, age);
-              return row;
+            .into(TypeDescriptors.strings())
+            // Access individual fields in the row.
+            .via((Row row) -> {
+              String output = String.format("Name: %s, Age: %s%n",
+                  row.getString("user_name"),
+                  row.getInt64("age"));
+              System.out.println(output);
+              return output;
             }));
     pipeline.run().waitUntilFinish();
   }