Merge cockroachdb#77230 cockroachdb#107140

craig[bot] · Yevgeniy Miretskiy · AlexTalks · craig[bot] · commit 93546c28cb14 · 2023-08-08T23:51:39.000Z
77230: changefeedccl: Make some of the changefeed settings public. r=miretskiy a=miretskiy Mark some of the settings we expect users to control public. Fixes cockroachdb#71785 Release Notes: None 107140: acceptance: deflake test_demo_node_cmds r=AlexTalks a=AlexTalks Previously the acceptance test `test_demo_node_cmds`, which attempts to shutdown and decommission some nodes using the `cockroach demo` CLI, would sometimes be flaky due to delays in propagating information via gossip. This change fixes these flakes by utilizing the virtual table `crdb_internal.kv_node_liveness` rather than the gossip-based `gossip_liveness` virtual table. Fixes: cockroachdb#76391 Release note: None Co-authored-by: Yevgeniy Miretskiy <yevgeniy@cockroachlabs.com> Co-authored-by: Alex Sarkesian <sarkesian@cockroachlabs.com>
diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
@@ -10,14 +10,19 @@ bulkio.backup.file_size	byte size	128 MiB	target size for individual data files
 bulkio.backup.read_timeout	duration	5m0s	amount of time after which a read attempt is considered timed out, which causes the backup to fail	tenant-rw
 bulkio.backup.read_with_priority_after	duration	1m0s	amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads	tenant-rw
 bulkio.stream_ingestion.minimum_flush_interval	duration	5s	the minimum timestamp between flushes; flushes may still occur if internal buffers fill up	tenant-rw
+changefeed.backfill.concurrent_scan_requests	integer	0	number of concurrent scan requests per node issued during a backfill	tenant-rw
 changefeed.backfill.scan_request_size	integer	524288	the maximum number of bytes returned by each scan request	tenant-rw
 changefeed.balance_range_distribution.enable	boolean	false	if enabled, the ranges are balanced equally among all nodes	tenant-rw
 changefeed.batch_reduction_retry_enabled	boolean	false	if true, kafka changefeeds upon erroring on an oversized batch will attempt to resend the messages with progressively lower batch sizes	tenant-rw
 changefeed.event_consumer_worker_queue_size	integer	16	if changefeed.event_consumer_workers is enabled, this setting sets the maxmimum number of events which a worker can buffer	tenant-rw
 changefeed.event_consumer_workers	integer	0	the number of workers to use when processing events: <0 disables, 0 assigns a reasonable default, >0 assigns the setting value. for experimental/core changefeeds and changefeeds using parquet format, this is disabled	tenant-rw
 changefeed.fast_gzip.enabled	boolean	true	use fast gzip implementation	tenant-rw
+changefeed.frontier_highwater_lag_checkpoint_threshold	duration	10m0s	controls the maximum the high-water mark is allowed to lag behind the leading spans of the frontier before per-span checkpointing is enabled; if 0, checkpointing due to high-water lag is disabled	tenant-rw
+changefeed.memory.per_changefeed_limit	byte size	512 MiB	controls amount of data that can be buffered per changefeed	tenant-rw
+changefeed.min_highwater_advance	duration	0s	minimum amount of time the changefeed high water mark must advance for it to be eligible for checkpointing; Default of 0 will checkpoint every time frontier advances, as long as the rate of checkpointing keeps up with the rate of frontier changes	tenant-rw
 changefeed.node_throttle_config	string		specifies node level throttling configuration for all changefeeeds	tenant-rw
 changefeed.protect_timestamp.max_age	duration	96h0m0s	fail the changefeed if the protected timestamp age exceeds this threshold; 0 disables expiration	tenant-rw
+changefeed.protect_timestamp_interval	duration	10m0s	controls how often the changefeed forwards its protected timestamp to the resolved timestamp	tenant-rw
 changefeed.schema_feed.read_with_priority_after	duration	1m0s	retry with high priority if we were not able to read descriptors for too long; 0 disables	tenant-rw
 changefeed.sink_io_workers	integer	0	the number of workers used by changefeeds when sending requests to the sink (currently webhook only): <0 disables, 0 assigns a reasonable default, >0 assigns the setting value	tenant-rw
 cloudstorage.azure.concurrent_upload_buffers	integer	1	controls the number of concurrent buffers that will be used by the Azure client when uploading chunks.Each buffer can buffer up to cloudstorage.write_chunk.size of memory during an upload	tenant-rw
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
@@ -16,14 +16,19 @@
 <tr><td><div id="setting-bulkio-backup-read-timeout" class="anchored"><code>bulkio.backup.read_timeout</code></div></td><td>duration</td><td><code>5m0s</code></td><td>amount of time after which a read attempt is considered timed out, which causes the backup to fail</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-bulkio-backup-read-with-priority-after" class="anchored"><code>bulkio.backup.read_with_priority_after</code></div></td><td>duration</td><td><code>1m0s</code></td><td>amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-bulkio-stream-ingestion-minimum-flush-interval" class="anchored"><code>bulkio.stream_ingestion.minimum_flush_interval</code></div></td><td>duration</td><td><code>5s</code></td><td>the minimum timestamp between flushes; flushes may still occur if internal buffers fill up</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
+<tr><td><div id="setting-changefeed-backfill-concurrent-scan-requests" class="anchored"><code>changefeed.backfill.concurrent_scan_requests</code></div></td><td>integer</td><td><code>0</code></td><td>number of concurrent scan requests per node issued during a backfill</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-backfill-scan-request-size" class="anchored"><code>changefeed.backfill.scan_request_size</code></div></td><td>integer</td><td><code>524288</code></td><td>the maximum number of bytes returned by each scan request</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-balance-range-distribution-enable" class="anchored"><code>changefeed.balance_range_distribution.enable</code></div></td><td>boolean</td><td><code>false</code></td><td>if enabled, the ranges are balanced equally among all nodes</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-batch-reduction-retry-enabled" class="anchored"><code>changefeed.batch_reduction_retry_enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>if true, kafka changefeeds upon erroring on an oversized batch will attempt to resend the messages with progressively lower batch sizes</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-event-consumer-worker-queue-size" class="anchored"><code>changefeed.event_consumer_worker_queue_size</code></div></td><td>integer</td><td><code>16</code></td><td>if changefeed.event_consumer_workers is enabled, this setting sets the maxmimum number of events which a worker can buffer</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-event-consumer-workers" class="anchored"><code>changefeed.event_consumer_workers</code></div></td><td>integer</td><td><code>0</code></td><td>the number of workers to use when processing events: &lt;0 disables, 0 assigns a reasonable default, &gt;0 assigns the setting value. for experimental/core changefeeds and changefeeds using parquet format, this is disabled</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-fast-gzip-enabled" class="anchored"><code>changefeed.fast_gzip.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>use fast gzip implementation</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
+<tr><td><div id="setting-changefeed-frontier-highwater-lag-checkpoint-threshold" class="anchored"><code>changefeed.frontier_highwater_lag_checkpoint_threshold</code></div></td><td>duration</td><td><code>10m0s</code></td><td>controls the maximum the high-water mark is allowed to lag behind the leading spans of the frontier before per-span checkpointing is enabled; if 0, checkpointing due to high-water lag is disabled</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
+<tr><td><div id="setting-changefeed-memory-per-changefeed-limit" class="anchored"><code>changefeed.memory.per_changefeed_limit</code></div></td><td>byte size</td><td><code>512 MiB</code></td><td>controls amount of data that can be buffered per changefeed</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
+<tr><td><div id="setting-changefeed-min-highwater-advance" class="anchored"><code>changefeed.min_highwater_advance</code></div></td><td>duration</td><td><code>0s</code></td><td>minimum amount of time the changefeed high water mark must advance for it to be eligible for checkpointing; Default of 0 will checkpoint every time frontier advances, as long as the rate of checkpointing keeps up with the rate of frontier changes</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-node-throttle-config" class="anchored"><code>changefeed.node_throttle_config</code></div></td><td>string</td><td><code></code></td><td>specifies node level throttling configuration for all changefeeeds</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-protect-timestamp-max-age" class="anchored"><code>changefeed.protect_timestamp.max_age</code></div></td><td>duration</td><td><code>96h0m0s</code></td><td>fail the changefeed if the protected timestamp age exceeds this threshold; 0 disables expiration</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
+<tr><td><div id="setting-changefeed-protect-timestamp-interval" class="anchored"><code>changefeed.protect_timestamp_interval</code></div></td><td>duration</td><td><code>10m0s</code></td><td>controls how often the changefeed forwards its protected timestamp to the resolved timestamp</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-schema-feed-read-with-priority-after" class="anchored"><code>changefeed.schema_feed.read_with_priority_after</code></div></td><td>duration</td><td><code>1m0s</code></td><td>retry with high priority if we were not able to read descriptors for too long; 0 disables</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-changefeed-sink-io-workers" class="anchored"><code>changefeed.sink_io_workers</code></div></td><td>integer</td><td><code>0</code></td><td>the number of workers used by changefeeds when sending requests to the sink (currently webhook only): &lt;0 disables, 0 assigns a reasonable default, &gt;0 assigns the setting value</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
 <tr><td><div id="setting-cloudstorage-azure-concurrent-upload-buffers" class="anchored"><code>cloudstorage.azure.concurrent_upload_buffers</code></div></td><td>integer</td><td><code>1</code></td><td>controls the number of concurrent buffers that will be used by the Azure client when uploading chunks.Each buffer can buffer up to cloudstorage.write_chunk.size of memory during an upload</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
diff --git a/pkg/ccl/changefeedccl/changefeedbase/settings.go b/pkg/ccl/changefeedccl/changefeedbase/settings.go
@@ -49,7 +49,7 @@ var PerChangefeedMemLimit = settings.RegisterByteSizeSetting(
 	"changefeed.memory.per_changefeed_limit",
 	"controls amount of data that can be buffered per changefeed",
 	1<<29, // 512MiB
-)
+).WithPublic()
 
 // SlowSpanLogThreshold controls when we will log slow spans.
 var SlowSpanLogThreshold = settings.RegisterDurationSetting(
@@ -88,7 +88,7 @@ var FrontierHighwaterLagCheckpointThreshold = settings.RegisterDurationSetting(
 	"controls the maximum the high-water mark is allowed to lag behind the leading spans of the frontier before per-span checkpointing is enabled; if 0, checkpointing due to high-water lag is disabled",
 	10*time.Minute,
 	settings.NonNegativeDuration,
-)
+).WithPublic()
 
 // FrontierCheckpointMaxBytes controls the maximum number of key bytes that will be added
 // to the checkpoint record.
@@ -119,7 +119,7 @@ var ScanRequestLimit = settings.RegisterIntSetting(
 	"changefeed.backfill.concurrent_scan_requests",
 	"number of concurrent scan requests per node issued during a backfill",
 	0,
-)
+).WithPublic()
 
 // ScanRequestSize is the target size of the scan request response.
 //
@@ -181,7 +181,7 @@ var MinHighWaterMarkCheckpointAdvance = settings.RegisterDurationSetting(
 		"advances, as long as the rate of checkpointing keeps up with the rate of frontier changes",
 	0,
 	settings.NonNegativeDuration,
-)
+).WithPublic()
 
 // EventMemoryMultiplier is the multiplier for the amount of memory needed to process an event.
 //
@@ -209,7 +209,7 @@ var ProtectTimestampInterval = settings.RegisterDurationSetting(
 	"controls how often the changefeed forwards its protected timestamp to the resolved timestamp",
 	10*time.Minute,
 	settings.PositiveDuration,
-)
+).WithPublic()
 
 // MaxProtectedTimestampAge controls the frequency of protected timestamp record updates
 var MaxProtectedTimestampAge = settings.RegisterDurationSetting(
diff --git a/pkg/cli/interactive_tests/test_demo_node_cmds.tcl b/pkg/cli/interactive_tests/test_demo_node_cmds.tcl
@@ -67,31 +67,26 @@ send "\\demo restart 3\r"
 eexpect "node 3 has been restarted"
 eexpect "defaultdb>"
 
-# NB: this is flaky, sometimes n3 is still marked as draining due to
-# gossip propagation delays. See:
-# https://github.com/cockroachdb/cockroach/issues/76391
-# send "select node_id, draining, decommissioning, membership from crdb_internal.gossip_liveness ORDER BY node_id;\r"
-# eexpect "1 |  false   |      false      | active"
-# eexpect "2 |  false   |      false      | active"
-# eexpect "3 |  false   |      false      | active"
-# eexpect "4 |  false   |      false      | active"
-# eexpect "5 |  false   |      false      | active"
-# eexpect "defaultdb>"
-
-# Try commissioning commands
+send "select node_id, draining, decommissioning, membership from crdb_internal.gossip_liveness ORDER BY node_id;\r"
+eexpect "1 |    f     |        f        | active"
+eexpect "2 |    f     |        f        | active"
+eexpect "3 |    f     |        f        | active"
+eexpect "4 |    f     |        f        | active"
+eexpect "5 |    f     |        f        | active"
+eexpect "defaultdb>"
+
+# Try decommissioning commands
 send "\\demo decommission 4\r"
 eexpect "node 4 has been decommissioned"
 eexpect "defaultdb>"
 
-# NB: skipping this out of an abundance of caution, see:
-# https://github.com/cockroachdb/cockroach/issues/76391
-# send "select node_id, draining, decommissioning, membership from crdb_internal.gossip_liveness ORDER BY node_id;\r"
-# eexpect "1 |  false   |      false      | active"
-# eexpect "2 |  false   |      false      | active"
-# eexpect "3 |  false   |      false      | active"
-# eexpect "4 |  false   |      true       | decommissioned"
-# eexpect "5 |  false   |      false      | active"
-# eexpect "defaultdb>"
+send "select node_id, draining, membership from crdb_internal.kv_node_liveness ORDER BY node_id;\r"
+eexpect "1 |    f     | active"
+eexpect "2 |    f     | active"
+eexpect "3 |    f     | active"
+eexpect "4 |    f     | decommissioned"
+eexpect "5 |    f     | active"
+eexpect "defaultdb>"
 
 send "\\demo recommission 4\r"
 eexpect "can only recommission a decommissioning node"
@@ -128,17 +123,16 @@ eexpect "node 6 has been shutdown"
 set timeout 30
 eexpect "defaultdb>"
 
-# By now the node should have stabilized in gossip which allows us to query the more detailed information there.
-# NB: skip this to avoid flakes, see:
-# https://github.com/cockroachdb/cockroach/issues/76391
-# send "select node_id, draining, decommissioning, membership from crdb_internal.gossip_liveness ORDER BY node_id;\r"
-# eexpect "1 |  false   |      false      | active"
-# eexpect "2 |  false   |      false      | active"
-# eexpect "3 |  false   |      false      | active"
-# eexpect "4 |  false   |      true       | decommissioned"
-# eexpect "5 |  false   |      false      | active"
-# eexpect "6 |   true   |      false      | active"
-# eexpect "defaultdb>"
+# NB: use kv_node_liveness to avoid flakes due to gossip delays.
+# See https://github.com/cockroachdb/cockroach/issues/76391
+send "select node_id, draining, membership from crdb_internal.kv_node_liveness ORDER BY node_id;\r"
+eexpect "1 |    f     | active"
+eexpect "2 |    f     | active"
+eexpect "3 |    f     | active"
+eexpect "4 |    f     | decommissioned"
+eexpect "5 |    f     | active"
+eexpect "6 |    t     | active"
+eexpect "defaultdb>"
 
 send "\\q\r"
 eexpect eof