docs: update generated documentation (#3264)

github-actions[bot] · liferoad · web-flow · commit 1b20d23f2626 · 2026-01-27T12:15:29.000-05:00
Co-authored-by: liferoad &lt;7833268+liferoad@users.noreply.github.com&gt;
diff --git a/v2/bigquery-to-bigtable/README_BigQuery_to_Bigtable.md b/v2/bigquery-to-bigtable/README_BigQuery_to_Bigtable.md
@@ -40,6 +40,7 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 * **bigtableBulkWriteLatencyTargetMs**: The latency target of Bigtable in milliseconds for latency-based throttling.
 * **bigtableBulkWriteMaxRowKeyCount**: The maximum number of row keys in a Bigtable batch write operation.
 * **bigtableBulkWriteMaxRequestSizeBytes**: The maximum bytes to include per Bigtable batch write operation.
+* **bigtableBulkWriteFlowControl**: When set to true, enables bulk write flow control which will useserver's signal to throttle the writes. Defaults to: false.
 
 
 
@@ -155,6 +156,7 @@ export BIGTABLE_WRITE_PROJECT_ID=<bigtableWriteProjectId>
 export BIGTABLE_BULK_WRITE_LATENCY_TARGET_MS=<bigtableBulkWriteLatencyTargetMs>
 export BIGTABLE_BULK_WRITE_MAX_ROW_KEY_COUNT=<bigtableBulkWriteMaxRowKeyCount>
 export BIGTABLE_BULK_WRITE_MAX_REQUEST_SIZE_BYTES=<bigtableBulkWriteMaxRequestSizeBytes>
+export BIGTABLE_BULK_WRITE_FLOW_CONTROL=false
 
 gcloud dataflow flex-template run "bigquery-to-bigtable-job" \
   --project "$PROJECT" \
@@ -180,7 +182,8 @@ gcloud dataflow flex-template run "bigquery-to-bigtable-job" \
   --parameters "bigtableWriteProjectId=$BIGTABLE_WRITE_PROJECT_ID" \
   --parameters "bigtableBulkWriteLatencyTargetMs=$BIGTABLE_BULK_WRITE_LATENCY_TARGET_MS" \
   --parameters "bigtableBulkWriteMaxRowKeyCount=$BIGTABLE_BULK_WRITE_MAX_ROW_KEY_COUNT" \
-  --parameters "bigtableBulkWriteMaxRequestSizeBytes=$BIGTABLE_BULK_WRITE_MAX_REQUEST_SIZE_BYTES"
+  --parameters "bigtableBulkWriteMaxRequestSizeBytes=$BIGTABLE_BULK_WRITE_MAX_REQUEST_SIZE_BYTES" \
+  --parameters "bigtableBulkWriteFlowControl=$BIGTABLE_BULK_WRITE_FLOW_CONTROL"
 ```
 
 For more information about the command, please check:
@@ -222,6 +225,7 @@ export BIGTABLE_WRITE_PROJECT_ID=<bigtableWriteProjectId>
 export BIGTABLE_BULK_WRITE_LATENCY_TARGET_MS=<bigtableBulkWriteLatencyTargetMs>
 export BIGTABLE_BULK_WRITE_MAX_ROW_KEY_COUNT=<bigtableBulkWriteMaxRowKeyCount>
 export BIGTABLE_BULK_WRITE_MAX_REQUEST_SIZE_BYTES=<bigtableBulkWriteMaxRequestSizeBytes>
+export BIGTABLE_BULK_WRITE_FLOW_CONTROL=false
 
 mvn clean package -PtemplatesRun \
 -DskipTests \
@@ -230,7 +234,7 @@ mvn clean package -PtemplatesRun \
 -Dregion="$REGION" \
 -DjobName="bigquery-to-bigtable-job" \
 -DtemplateName="BigQuery_to_Bigtable" \
--Dparameters="readIdColumn=$READ_ID_COLUMN,timestampColumn=$TIMESTAMP_COLUMN,skipNullValues=$SKIP_NULL_VALUES,inputTableSpec=$INPUT_TABLE_SPEC,outputDeadletterTable=$OUTPUT_DEADLETTER_TABLE,query=$QUERY,useLegacySql=$USE_LEGACY_SQL,queryLocation=$QUERY_LOCATION,queryTempDataset=$QUERY_TEMP_DATASET,KMSEncryptionKey=$KMSENCRYPTION_KEY,bigtableRpcAttemptTimeoutMs=$BIGTABLE_RPC_ATTEMPT_TIMEOUT_MS,bigtableRpcTimeoutMs=$BIGTABLE_RPC_TIMEOUT_MS,bigtableAdditionalRetryCodes=$BIGTABLE_ADDITIONAL_RETRY_CODES,bigtableWriteInstanceId=$BIGTABLE_WRITE_INSTANCE_ID,bigtableWriteTableId=$BIGTABLE_WRITE_TABLE_ID,bigtableWriteColumnFamily=$BIGTABLE_WRITE_COLUMN_FAMILY,bigtableWriteAppProfile=$BIGTABLE_WRITE_APP_PROFILE,bigtableWriteProjectId=$BIGTABLE_WRITE_PROJECT_ID,bigtableBulkWriteLatencyTargetMs=$BIGTABLE_BULK_WRITE_LATENCY_TARGET_MS,bigtableBulkWriteMaxRowKeyCount=$BIGTABLE_BULK_WRITE_MAX_ROW_KEY_COUNT,bigtableBulkWriteMaxRequestSizeBytes=$BIGTABLE_BULK_WRITE_MAX_REQUEST_SIZE_BYTES" \
+-Dparameters="readIdColumn=$READ_ID_COLUMN,timestampColumn=$TIMESTAMP_COLUMN,skipNullValues=$SKIP_NULL_VALUES,inputTableSpec=$INPUT_TABLE_SPEC,outputDeadletterTable=$OUTPUT_DEADLETTER_TABLE,query=$QUERY,useLegacySql=$USE_LEGACY_SQL,queryLocation=$QUERY_LOCATION,queryTempDataset=$QUERY_TEMP_DATASET,KMSEncryptionKey=$KMSENCRYPTION_KEY,bigtableRpcAttemptTimeoutMs=$BIGTABLE_RPC_ATTEMPT_TIMEOUT_MS,bigtableRpcTimeoutMs=$BIGTABLE_RPC_TIMEOUT_MS,bigtableAdditionalRetryCodes=$BIGTABLE_ADDITIONAL_RETRY_CODES,bigtableWriteInstanceId=$BIGTABLE_WRITE_INSTANCE_ID,bigtableWriteTableId=$BIGTABLE_WRITE_TABLE_ID,bigtableWriteColumnFamily=$BIGTABLE_WRITE_COLUMN_FAMILY,bigtableWriteAppProfile=$BIGTABLE_WRITE_APP_PROFILE,bigtableWriteProjectId=$BIGTABLE_WRITE_PROJECT_ID,bigtableBulkWriteLatencyTargetMs=$BIGTABLE_BULK_WRITE_LATENCY_TARGET_MS,bigtableBulkWriteMaxRowKeyCount=$BIGTABLE_BULK_WRITE_MAX_ROW_KEY_COUNT,bigtableBulkWriteMaxRequestSizeBytes=$BIGTABLE_BULK_WRITE_MAX_REQUEST_SIZE_BYTES,bigtableBulkWriteFlowControl=$BIGTABLE_BULK_WRITE_FLOW_CONTROL" \
 -f v2/bigquery-to-bigtable
 ```
 
@@ -296,6 +300,7 @@ resource "google_dataflow_flex_template_job" "bigquery_to_bigtable" {
     # bigtableBulkWriteLatencyTargetMs = "<bigtableBulkWriteLatencyTargetMs>"
     # bigtableBulkWriteMaxRowKeyCount = "<bigtableBulkWriteMaxRowKeyCount>"
     # bigtableBulkWriteMaxRequestSizeBytes = "<bigtableBulkWriteMaxRequestSizeBytes>"
+    # bigtableBulkWriteFlowControl = "false"
   }
 }
 ```
diff --git a/v2/datastream-to-sql/README_Cloud_Datastream_to_SQL.md b/v2/datastream-to-sql/README_Cloud_Datastream_to_SQL.md
@@ -54,12 +54,17 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 * **databaseName**: The name of the SQL database to connect to. The default value is `postgres`.
 * **defaultCasing**: A Toggle for table casing behavior. For example,(ie.LOWERCASE = mytable -> mytable, UPPERCASE = mytable -> MYTABLECAMEL = my_table -> myTable, SNAKE = myTable -> my_table. Defaults to: LOWERCASE.
 * **columnCasing**: A toggle for target column name casing. LOWERCASE (default): my_column -> my_column. UPPERCASE: my_column -> MY_COLUMN. CAMEL: my_column -> myColumn. SNAKE: myColumn -> my_column.
-* **schemaMap**: A map of key/values used to dictate schema name changes (ie. old_name:new_name,CaseError:case_error). Defaults to empty.
+* **schemaMap**: A map of key/values used to dictate schema and table name changes. Examples: Schema to schema (SCHEMA1:SCHEMA2), Table to table (SCHEMA1.table1:SCHEMA2.TABLE1), or multiple mappings using the pipe '|' delimiter (e.g. schema1.source:schema2.target|schema3.source:schema4.target). Defaults to empty.
 * **customConnectionString**: Optional connection string which will be used instead of the default database string.
 * **numThreads**: Determines key parallelism of Format to DML step, specifically, the value is passed into Reshuffle.withNumBuckets. Defaults to: 100.
 * **databaseLoginTimeout**: The timeout in seconds for database login attempts. This helps prevent connection hangs when multiple workers try to connect simultaneously.
-* **datastreamSourceType**: Override the source type detection for Datastream CDC data. When specified, this value will be used instead of deriving the source type from the read_method field. Valid values include 'mysql', 'postgresql', 'oracle', etc. This parameter is useful when the read_method field contains 'cdc' and the actual source type cannot be determined automatically.
 * **orderByIncludesIsDeleted**: Order by configurations for data should include prioritizing data which is not deleted. Defaults to: false.
+* **datastreamSourceType**: Override the source type detection for Datastream CDC data. When specified, this value will be used instead of deriving the source type from the read_method field. Valid values include 'mysql', 'postgresql', 'oracle', etc. This parameter is useful when the read_method field contains 'cdc' and the actual source type cannot be determined automatically.
+* **deadLetterQueueDirectory**: The path that Dataflow uses to write the dead-letter queue output. This path must not be in the same path as the Datastream file output. Defaults to `empty`.
+* **dlqRetryMinutes**: The number of minutes between DLQ Retries. Defaults to `10`.
+* **dlqMaxRetries**: The maximum number of times to retry a failed record from the DLQ before marking it as a permanent failure. Defaults to 5.
+* **schemaCacheRefreshMinutes**: The number of minutes to cache table schemas. Defaults to 1440 (24 hours).
+* **runMode**: This is the run mode type, whether regular or with retryDLQ. Defaults to: regular.
 
 
 
@@ -172,8 +177,13 @@ export SCHEMA_MAP=""
 export CUSTOM_CONNECTION_STRING=""
 export NUM_THREADS=100
 export DATABASE_LOGIN_TIMEOUT=<databaseLoginTimeout>
-export DATASTREAM_SOURCE_TYPE=<datastreamSourceType>
 export ORDER_BY_INCLUDES_IS_DELETED=false
+export DATASTREAM_SOURCE_TYPE=<datastreamSourceType>
+export DEAD_LETTER_QUEUE_DIRECTORY=""
+export DLQ_RETRY_MINUTES=10
+export DLQ_MAX_RETRIES=5
+export SCHEMA_CACHE_REFRESH_MINUTES=1440
+export RUN_MODE=regular
 
 gcloud dataflow flex-template run "cloud-datastream-to-sql-job" \
   --project "$PROJECT" \
@@ -197,8 +207,13 @@ gcloud dataflow flex-template run "cloud-datastream-to-sql-job" \
   --parameters "customConnectionString=$CUSTOM_CONNECTION_STRING" \
   --parameters "numThreads=$NUM_THREADS" \
   --parameters "databaseLoginTimeout=$DATABASE_LOGIN_TIMEOUT" \
+  --parameters "orderByIncludesIsDeleted=$ORDER_BY_INCLUDES_IS_DELETED" \
   --parameters "datastreamSourceType=$DATASTREAM_SOURCE_TYPE" \
-  --parameters "orderByIncludesIsDeleted=$ORDER_BY_INCLUDES_IS_DELETED"
+  --parameters "deadLetterQueueDirectory=$DEAD_LETTER_QUEUE_DIRECTORY" \
+  --parameters "dlqRetryMinutes=$DLQ_RETRY_MINUTES" \
+  --parameters "dlqMaxRetries=$DLQ_MAX_RETRIES" \
+  --parameters "schemaCacheRefreshMinutes=$SCHEMA_CACHE_REFRESH_MINUTES" \
+  --parameters "runMode=$RUN_MODE"
 ```
 
 For more information about the command, please check:
@@ -237,8 +252,13 @@ export SCHEMA_MAP=""
 export CUSTOM_CONNECTION_STRING=""
 export NUM_THREADS=100
 export DATABASE_LOGIN_TIMEOUT=<databaseLoginTimeout>
-export DATASTREAM_SOURCE_TYPE=<datastreamSourceType>
 export ORDER_BY_INCLUDES_IS_DELETED=false
+export DATASTREAM_SOURCE_TYPE=<datastreamSourceType>
+export DEAD_LETTER_QUEUE_DIRECTORY=""
+export DLQ_RETRY_MINUTES=10
+export DLQ_MAX_RETRIES=5
+export SCHEMA_CACHE_REFRESH_MINUTES=1440
+export RUN_MODE=regular
 
 mvn clean package -PtemplatesRun \
 -DskipTests \
@@ -247,7 +267,7 @@ mvn clean package -PtemplatesRun \
 -Dregion="$REGION" \
 -DjobName="cloud-datastream-to-sql-job" \
 -DtemplateName="Cloud_Datastream_to_SQL" \
--Dparameters="inputFilePattern=$INPUT_FILE_PATTERN,gcsPubSubSubscription=$GCS_PUB_SUB_SUBSCRIPTION,inputFileFormat=$INPUT_FILE_FORMAT,streamName=$STREAM_NAME,rfcStartDateTime=$RFC_START_DATE_TIME,dataStreamRootUrl=$DATA_STREAM_ROOT_URL,databaseType=$DATABASE_TYPE,databaseHost=$DATABASE_HOST,databasePort=$DATABASE_PORT,databaseUser=$DATABASE_USER,databasePassword=$DATABASE_PASSWORD,databaseName=$DATABASE_NAME,defaultCasing=$DEFAULT_CASING,columnCasing=$COLUMN_CASING,schemaMap=$SCHEMA_MAP,customConnectionString=$CUSTOM_CONNECTION_STRING,numThreads=$NUM_THREADS,databaseLoginTimeout=$DATABASE_LOGIN_TIMEOUT,datastreamSourceType=$DATASTREAM_SOURCE_TYPE,orderByIncludesIsDeleted=$ORDER_BY_INCLUDES_IS_DELETED" \
+-Dparameters="inputFilePattern=$INPUT_FILE_PATTERN,gcsPubSubSubscription=$GCS_PUB_SUB_SUBSCRIPTION,inputFileFormat=$INPUT_FILE_FORMAT,streamName=$STREAM_NAME,rfcStartDateTime=$RFC_START_DATE_TIME,dataStreamRootUrl=$DATA_STREAM_ROOT_URL,databaseType=$DATABASE_TYPE,databaseHost=$DATABASE_HOST,databasePort=$DATABASE_PORT,databaseUser=$DATABASE_USER,databasePassword=$DATABASE_PASSWORD,databaseName=$DATABASE_NAME,defaultCasing=$DEFAULT_CASING,columnCasing=$COLUMN_CASING,schemaMap=$SCHEMA_MAP,customConnectionString=$CUSTOM_CONNECTION_STRING,numThreads=$NUM_THREADS,databaseLoginTimeout=$DATABASE_LOGIN_TIMEOUT,orderByIncludesIsDeleted=$ORDER_BY_INCLUDES_IS_DELETED,datastreamSourceType=$DATASTREAM_SOURCE_TYPE,deadLetterQueueDirectory=$DEAD_LETTER_QUEUE_DIRECTORY,dlqRetryMinutes=$DLQ_RETRY_MINUTES,dlqMaxRetries=$DLQ_MAX_RETRIES,schemaCacheRefreshMinutes=$SCHEMA_CACHE_REFRESH_MINUTES,runMode=$RUN_MODE" \
 -f v2/datastream-to-sql
 ```
 
@@ -310,8 +330,13 @@ resource "google_dataflow_flex_template_job" "cloud_datastream_to_sql" {
     # customConnectionString = ""
     # numThreads = "100"
     # databaseLoginTimeout = "<databaseLoginTimeout>"
-    # datastreamSourceType = "<datastreamSourceType>"
     # orderByIncludesIsDeleted = "false"
+    # datastreamSourceType = "<datastreamSourceType>"
+    # deadLetterQueueDirectory = ""
+    # dlqRetryMinutes = "10"
+    # dlqMaxRetries = "5"
+    # schemaCacheRefreshMinutes = "1440"
+    # runMode = "regular"
   }
 }
 ```
diff --git a/v2/googlecloud-to-googlecloud/README_Stream_GCS_Text_to_BigQuery_Flex.md b/v2/googlecloud-to-googlecloud/README_Stream_GCS_Text_to_BigQuery_Flex.md
@@ -41,7 +41,7 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 * **outputDeadletterTable**: Table for messages that failed to reach the output table. If a table doesn't exist, it is created during pipeline execution. If not specified, `<outputTableSpec>_error_records` is used. For example, `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`.
 * **useStorageWriteApiAtLeastOnce**: This parameter takes effect only if `Use BigQuery Storage Write API` is enabled. If enabled the at-least-once semantics will be used for Storage Write API, otherwise exactly-once semantics will be used. Defaults to: false.
-* **useStorageWriteApi**: If `true`, the pipeline uses the BigQuery Storage Write API (https://cloud.google.com/bigquery/docs/write-api). The default value is `false`. For more information, see Using the Storage Write API (https://beam.apache.org/documentation/io/built-in/google-bigquery/#storage-write-api).
+* **useStorageWriteApi**: If true, the pipeline uses the BigQuery Storage Write API (https://cloud.google.com/bigquery/docs/write-api). The default value is `false`. For more information, see Using the Storage Write API (https://beam.apache.org/documentation/io/built-in/google-bigquery/#storage-write-api).
 * **numStorageWriteApiStreams**: When using the Storage Write API, specifies the number of write streams. If `useStorageWriteApi` is `true` and `useStorageWriteApiAtLeastOnce` is `false`, then you must set this parameter. Defaults to: 0.
 * **storageWriteApiTriggeringFrequencySec**: When using the Storage Write API, specifies the triggering frequency, in seconds. If `useStorageWriteApi` is `true` and `useStorageWriteApiAtLeastOnce` is `false`, then you must set this parameter.
 * **pythonExternalTextTransformGcsPath**: The Cloud Storage path pattern for the Python code containing your user-defined functions. For example, `gs://your-bucket/your-function.py`.
diff --git a/v2/googlecloud-to-googlecloud/README_Stream_GCS_Text_to_BigQuery_Xlang.md b/v2/googlecloud-to-googlecloud/README_Stream_GCS_Text_to_BigQuery_Xlang.md
@@ -39,7 +39,7 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 * **outputDeadletterTable**: Table for messages that failed to reach the output table. If a table doesn't exist, it is created during pipeline execution. If not specified, `<outputTableSpec>_error_records` is used. For example, `<PROJECT_ID>:<DATASET_NAME>.<TABLE_NAME>`.
 * **useStorageWriteApiAtLeastOnce**: This parameter takes effect only if `Use BigQuery Storage Write API` is enabled. If enabled the at-least-once semantics will be used for Storage Write API, otherwise exactly-once semantics will be used. Defaults to: false.
-* **useStorageWriteApi**: If `true`, the pipeline uses the BigQuery Storage Write API (https://cloud.google.com/bigquery/docs/write-api). The default value is `false`. For more information, see Using the Storage Write API (https://beam.apache.org/documentation/io/built-in/google-bigquery/#storage-write-api).
+* **useStorageWriteApi**: If true, the pipeline uses the BigQuery Storage Write API (https://cloud.google.com/bigquery/docs/write-api). The default value is `false`. For more information, see Using the Storage Write API (https://beam.apache.org/documentation/io/built-in/google-bigquery/#storage-write-api).
 * **numStorageWriteApiStreams**: When using the Storage Write API, specifies the number of write streams. If `useStorageWriteApi` is `true` and `useStorageWriteApiAtLeastOnce` is `false`, then you must set this parameter. Defaults to: 0.
 * **storageWriteApiTriggeringFrequencySec**: When using the Storage Write API, specifies the triggering frequency, in seconds. If `useStorageWriteApi` is `true` and `useStorageWriteApiAtLeastOnce` is `false`, then you must set this parameter.
 * **pythonExternalTextTransformGcsPath**: The Cloud Storage path pattern for the Python code containing your user-defined functions. For example, `gs://your-bucket/your-function.py`.
diff --git a/v2/spanner-to-sourcedb/README_Spanner_to_SourceDb.md b/v2/spanner-to-sourcedb/README_Spanner_to_SourceDb.md