apache
diff --git a/‎sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java‎
Lines changed: 127 additions & 49 deletions b/‎sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java‎
Lines changed: 127 additions & 49 deletions
@@ -46,6 +46,8 @@
 import com.google.cloud.spanner.Options;
 import com.google.cloud.spanner.Options.RpcPriority;
 import com.google.cloud.spanner.PartitionOptions;
+import com.google.cloud.spanner.ReadOnlyTransaction;
+import com.google.cloud.spanner.ResultSet;
 import com.google.cloud.spanner.Spanner;
 import com.google.cloud.spanner.SpannerException;
 import com.google.cloud.spanner.SpannerOptions;
@@ -177,10 +179,10 @@
  *         .withQuery("SELECT id, name, email FROM users"));
  * }</pre>
  *
- * <p>Reads by default use the <a
- * href="https://cloud.google.com/spanner/docs/reads#read_data_in_parallel">PartitionQuery API</a>
- * which enforces some limitations on the type of queries that can be used so that the data can be
- * read in parallel. If the query is not supported by the PartitionQuery API, then you can specify a
+ * <p>Reads by default use the <a href=
+ * "https://cloud.google.com/spanner/docs/reads#read_data_in_parallel">PartitionQuery API</a> which
+ * enforces some limitations on the type of queries that can be used so that the data can be read in
+ * parallel. If the query is not supported by the PartitionQuery API, then you can specify a
  * non-partitioned read by setting {@link Read#withBatching(boolean) withBatching(false)}. If the
  * amount of data being read by a non-partitioned read is very large, it may be useful to add a
  * {@link Reshuffle#viaRandomKey()} transform on the output so that the downstream transforms can
@@ -191,33 +193,33 @@
  *
  * <pre>{@code
  * PCollection<Struct> rows = p.apply(
- *    SpannerIO.read()
- *        .withInstanceId(instanceId)
- *        .withDatabaseId(dbId)
- *        .withTable("users")
- *        .withColumns("id", "name", "email"));
+ *     SpannerIO.read()
+ *         .withInstanceId(instanceId)
+ *         .withDatabaseId(dbId)
+ *         .withTable("users")
+ *         .withColumns("id", "name", "email"));
  * }</pre>
  *
  * <p>To read using an <strong>Index</strong>, specify the index name using {@link
  * Read#withIndex(String)}.
  *
  * <pre>{@code
  * PCollection<Struct> rows = p.apply(
- *    SpannerIO.read()
- *        .withInstanceId(instanceId)
- *        .withDatabaseId(dbId)
- *        .withTable("users")
- *        .withIndex("users_by_name")
- *        .withColumns("id", "name", "email"));
+ *     SpannerIO.read()
+ *         .withInstanceId(instanceId)
+ *         .withDatabaseId(dbId)
+ *         .withTable("users")
+ *         .withIndex("users_by_name")
+ *         .withColumns("id", "name", "email"));
  * }</pre>
  *
  * <h4>Read consistency</h4>
  *
  * <p>The transform is guaranteed to be executed on a consistent snapshot of data, utilizing the
  * power of read only transactions. Staleness of data can be controlled using {@link
- * Read#withTimestampBound} or {@link Read#withTimestamp(Timestamp)} methods. <a
- * href="https://cloud.google.com/spanner/docs/transactions#read-only_transactions">Read more</a>
- * about transactions in Cloud Spanner.
+ * Read#withTimestampBound} or {@link Read#withTimestamp(Timestamp)} methods. <a href=
+ * "https://cloud.google.com/spanner/docs/transactions#read-only_transactions">Read more</a> about
+ * transactions in Cloud Spanner.
  *
  * <p>It is possible to read several {@link PCollection PCollections} within a single transaction.
  * Apply {@link SpannerIO#createTransaction()} transform, that lazily creates a transaction. The
@@ -303,8 +305,8 @@
  * withMaxNumMutations()} or {@link Write#withMaxNumRows(long) withMaxNumRows()}. Setting either to
  * a small value or zero disables batching.
  *
- * <p>Note that the <a
- * href="https://cloud.google.com/spanner/quotas#limits_for_creating_reading_updating_and_deleting_data">maximum
+ * <p>Note that the <a href=
+ * "https://cloud.google.com/spanner/quotas#limits_for_creating_reading_updating_and_deleting_data">maximum
  * size of a single transaction</a> is 20,000 mutated cells - including cells in indexes. If you
  * have a large number of indexes and are getting exceptions with message: <tt>INVALID_ARGUMENT: The
  * transaction contains too many mutations</tt> you will need to specify a smaller number of {@code
@@ -435,9 +437,8 @@
  * <h3>Updates to the I/O connector code</h3>
  *
  * For any significant significant updates to this I/O connector, please consider involving
- * corresponding code reviewers mentioned <a
- * href="https://github.com/apache/beam/blob/master/sdks/java/io/google-cloud-platform/OWNERS">
- * here</a>.
+ * corresponding code reviewers mentioned <a href=
+ * "https://github.com/apache/beam/blob/master/sdks/java/io/google-cloud-platform/OWNERS"> here</a>.
  */
 @SuppressWarnings({
   "nullness" // TODO(https://github.com/apache/beam/issues/20497)
@@ -455,8 +456,10 @@ public class SpannerIO {
   private static final int DEFAULT_GROUPING_FACTOR = 1000;
 
   // Size of caches for read/write ServiceCallMetric objects .
-  // This is a reasonable limit, as for reads, each worker will process very few different table
-  // read requests, and for writes, batching will ensure that write operations for the same
+  // This is a reasonable limit, as for reads, each worker will process very few
+  // different table
+  // read requests, and for writes, batching will ensure that write operations for
+  // the same
   // table occur at the same time (within a bundle).
   static final int METRICS_CACHE_SIZE = 100;
 
@@ -673,10 +676,10 @@ public ReadAll withTimestampBound(TimestampBound timestampBound) {
     }
 
     /**
-     * By default the <a
-     * href="https://cloud.google.com/spanner/docs/reads#read_data_in_parallel">PartitionQuery
-     * API</a> is used to read data from Cloud Spanner. It is useful to disable batching when the
-     * underlying query is not root-partitionable.
+     * By default the <a href=
+     * "https://cloud.google.com/spanner/docs/reads#read_data_in_parallel">PartitionQuery API</a> is
+     * used to read data from Cloud Spanner. It is useful to disable batching when the underlying
+     * query is not root-partitionable.
      */
     public ReadAll withBatching(boolean batching) {
       return toBuilder().setBatching(batching).build();
@@ -968,8 +971,8 @@ public Read withIndex(String index) {
     }
 
     /**
-     * Note that {@link PartitionOptions} are currently ignored. See <a
-     * href="https://cloud.google.com/spanner/docs/reference/rpc/google.spanner.v1#google.spanner.v1.PartitionOptions">
+     * Note that {@link PartitionOptions} are currently ignored. See <a href=
+     * "https://cloud.google.com/spanner/docs/reference/rpc/google.spanner.v1#google.spanner.v1.PartitionOptions">
      * PartitionOptions in RPC documents</a>
      */
     public Read withPartitionOptions(PartitionOptions partitionOptions) {
@@ -1420,8 +1423,8 @@ public Write withCommitDeadline(Duration commitDeadline) {
     /**
      * Specifies max commit delay for the Commit API call for throughput optimized writes. If not
      * set, Spanner might set a small delay if it thinks that will amortize the cost of the writes.
-     * For more information about the feature, <a
-     * href="https://cloud.google.com/spanner/docs/throughput-optimized-writes#default-behavior">see
+     * For more information about the feature, <a href=
+     * "https://cloud.google.com/spanner/docs/throughput-optimized-writes#default-behavior">see
      * documentation</a>
      */
     public Write withMaxCommitDelay(long millis) {
@@ -1540,7 +1543,8 @@ private void populateDisplayDataWithParamaters(DisplayData.Builder builder) {
       builder.add(
           DisplayData.item("maxNumRows", getMaxNumRows())
               .withLabel("Max number of rows in each batch"));
-      // Grouping factor default value depends on whether it is a batch or streaming pipeline.
+      // Grouping factor default value depends on whether it is a batch or streaming
+      // pipeline.
       // This function is not aware of that state, so use 'DEFAULT' if unset.
       builder.add(
           DisplayData.item(
@@ -1989,7 +1993,8 @@ && getInclusiveStartAt().toSqlTimestamp().after(getInclusiveEndAt().toSqlTimesta
               .orElse(PartitionMetadataTableNames.generateRandom(partitionMetadataDatabaseId));
       final String changeStreamName = getChangeStreamName();
       final Timestamp startTimestamp = getInclusiveStartAt();
-      // Uses (Timestamp.MAX - 1ns) at max for end timestamp to indicate this connector is expected
+      // Uses (Timestamp.MAX - 1ns) at max for end timestamp to indicate this
+      // connector is expected
       // to run forever.
       final Timestamp endTimestamp =
           getInclusiveEndAt().compareTo(MAX_INCLUSIVE_END_AT) > 0
@@ -1998,6 +2003,11 @@ && getInclusiveStartAt().toSqlTimestamp().after(getInclusiveEndAt().toSqlTimesta
       final MapperFactory mapperFactory = new MapperFactory(changeStreamDatabaseDialect);
       final ChangeStreamMetrics metrics = new ChangeStreamMetrics();
       final RpcPriority rpcPriority = MoreObjects.firstNonNull(getRpcPriority(), RpcPriority.HIGH);
+      final SpannerAccessor spannerAccessor =
+          SpannerAccessor.getOrCreate(changeStreamSpannerConfig);
+      final boolean isMutableChangeStream =
+          isMutableChangeStream(
+              spannerAccessor.getDatabaseClient(), changeStreamDatabaseDialect, changeStreamName);
       final DaoFactory daoFactory =
           new DaoFactory(
               changeStreamSpannerConfig,
@@ -2007,7 +2017,8 @@ && getInclusiveStartAt().toSqlTimestamp().after(getInclusiveEndAt().toSqlTimesta
               rpcPriority,
               input.getPipeline().getOptions().getJobName(),
               changeStreamDatabaseDialect,
-              metadataDatabaseDialect);
+              metadataDatabaseDialect,
+              isMutableChangeStream);
       final ActionFactory actionFactory = new ActionFactory();
 
       final Duration watermarkRefreshRate =
@@ -2097,7 +2108,8 @@ static SpannerConfig buildSpannerConfigWithCredential(
     return spannerConfig;
   }
 
-  private static Dialect getDialect(SpannerConfig spannerConfig, PipelineOptions pipelineOptions) {
+  protected static Dialect getDialect(
+      SpannerConfig spannerConfig, PipelineOptions pipelineOptions) {
     // Allow passing the credential from pipeline options to the getDialect() call.
     SpannerConfig spannerConfigWithCredential =
         buildSpannerConfigWithCredential(spannerConfig, pipelineOptions);
@@ -2198,7 +2210,8 @@ private synchronized void sortAndOutputBatches(OutputReceiver<Iterable<MutationG
         }
 
         if (maxSortableNumMutations == maxBatchNumMutations) {
-          // no grouping is occurring, no need to sort and make batches, just output what we have.
+          // no grouping is occurring, no need to sort and make batches, just output what
+          // we have.
           outputBatch(out, 0, mutationsToSort.size());
           return;
         }
@@ -2279,7 +2292,8 @@ public synchronized void processElement(
       }
     }
 
-    // Container class to store a MutationGroup, its sortable encoded key and its statistics.
+    // Container class to store a MutationGroup, its sortable encoded key and its
+    // statistics.
     private static final class MutationGroupContainer
         implements Comparable<MutationGroupContainer> {
 
@@ -2308,7 +2322,8 @@ public int compareTo(MutationGroupContainer o) {
       }
     }
 
-    // TODO(https://github.com/apache/beam/issues/18203): Remove this when FinishBundle has added
+    // TODO(https://github.com/apache/beam/issues/18203): Remove this when
+    // FinishBundle has added
     // support for an {@link OutputReceiver}
     private static class OutputReceiverForFinishBundle
         implements OutputReceiver<Iterable<MutationGroup>> {
@@ -2409,9 +2424,11 @@ static class WriteToSpannerFn extends DoFn<Iterable<MutationGroup>, Void> {
     private final SpannerConfig spannerConfig;
     private final FailureMode failureMode;
 
-    // SpannerAccessor can not be serialized so must be initialized at runtime in setup().
+    // SpannerAccessor can not be serialized so must be initialized at runtime in
+    // setup().
     private transient SpannerAccessor spannerAccessor;
-    // resolved at runtime for metrics report purpose. SpannerConfig may not have projectId set.
+    // resolved at runtime for metrics report purpose. SpannerConfig may not have
+    // projectId set.
     private transient String projectId;
     /* Number of times an aborted write to spanner could be retried */
     private static final int ABORTED_RETRY_ATTEMPTS = 5;
@@ -2420,7 +2437,10 @@ static class WriteToSpannerFn extends DoFn<Iterable<MutationGroup>, Void> {
         "Transaction aborted. "
             + "Database schema probably changed during transaction, retry may succeed.";
 
-    /* Error string in Aborted exception for concurrent transaction in Spanner Emulator */
+    /*
+     * Error string in Aborted exception for concurrent transaction in Spanner
+     * Emulator
+     */
     private final String emulatorErrorString =
         "The emulator only supports one transaction at a time.";
 
@@ -2472,8 +2492,10 @@ public void setup() {
               .withMaxCumulativeBackoff(spannerConfig.getMaxCumulativeBackoff().get())
               .withInitialBackoff(spannerConfig.getMaxCumulativeBackoff().get().dividedBy(60));
 
-      // Use a LoadingCache for metrics as there can be different tables being written to which
-      // result in different service call metrics labels. ServiceCallMetric items are created
+      // Use a LoadingCache for metrics as there can be different tables being written
+      // to which
+      // result in different service call metrics labels. ServiceCallMetric items are
+      // created
       // on-demand and added to the cache.
       writeMetricsByTableName =
           CacheBuilder.newBuilder()
@@ -2535,9 +2557,10 @@ public void processElement(ProcessContext c) throws Exception {
     }
 
     /*
-     Spanner aborts all inflight transactions during a schema change. Client is expected
-     to retry silently. These must not be counted against retry backoff.
-    */
+     * Spanner aborts all inflight transactions during a schema change. Client is
+     * expected
+     * to retry silently. These must not be counted against retry backoff.
+     */
     private void spannerWriteWithRetryIfSchemaChange(List<Mutation> batch) throws SpannerException {
       Set<String> tableNames = batch.stream().map(Mutation::getTable).collect(Collectors.toSet());
       for (int retry = 1; ; retry++) {
@@ -2623,7 +2646,8 @@ private void writeMutations(Iterable<Mutation> mutationIterable)
 
       while (true) {
         Stopwatch timer = Stopwatch.createStarted();
-        // loop is broken on success, timeout backoff/retry attempts exceeded, or other failure.
+        // loop is broken on success, timeout backoff/retry attempts exceeded, or other
+        // failure.
         try {
           spannerWriteWithRetryIfSchemaChange(mutations);
           spannerWriteSuccess.inc();
@@ -2688,4 +2712,58 @@ static String resolveSpannerProjectId(SpannerConfig config) {
         ? SpannerOptions.getDefaultProjectId()
         : config.getProjectId().get();
   }
+
+  @VisibleForTesting
+  static boolean isMutableChangeStream(
+      DatabaseClient databaseClient, Dialect dialect, String changeStreamName) {
+    String fetchedPartitionMode = fetchPartitionMode(databaseClient, dialect, changeStreamName);
+    if (fetchedPartitionMode.isEmpty()
+        || fetchedPartitionMode.equalsIgnoreCase("IMMUTABLE_KEY_RANGE")) {
+      return false;
+    }
+    return true;
+  }
+
+  private static String fetchPartitionMode(
+      DatabaseClient databaseClient, Dialect dialect, String changeStreamName) {
+    try (ReadOnlyTransaction tx = databaseClient.readOnlyTransaction()) {
+      Statement statement;
+      if (dialect == Dialect.POSTGRESQL) {
+        statement =
+            Statement.newBuilder(
+                    "select option_value\n"
+                        + "from information_schema.change_stream_options\n"
+                        + "where change_stream_name = $1 and option_name = 'partition_mode'")
+                .bind("p1")
+                .to(changeStreamName)
+                .build();
+      } else {
+        statement =
+            Statement.newBuilder(
+                    "select option_value\n"
+                        + "from information_schema.change_stream_options\n"
+                        + "where change_stream_name = @changeStreamName and  option_name = 'partition_mode'")
+                .bind("changeStreamName")
+                .to(changeStreamName)
+                .build();
+      }
+      ResultSet resultSet = tx.executeQuery(statement);
+      while (resultSet.next()) {
+        String value = resultSet.getString(0);
+        if (value != null) {
+          return value;
+        }
+      }
+      return "";
+    } catch (RuntimeException e) {
+      // Log the failure (with stack trace) but rethrow so the caller still observes
+      // the error.
+      LOG.warn(
+          "Failed to fetch partition_mode for change stream '{}', dialect={} - will propagate exception",
+          changeStreamName,
+          dialect,
+          e);
+      throw e;
+    }
+  }
 }