Merge #153893

craig[bot] · kev-cao · craig[bot] · commit 4704abfd879c · 2025-09-26T01:25:32.000Z
153893: pcr: add initial/catchup scan metrics r=jeffswenson a=kev-cao This commit adds initial and catchup scan metrics to PCR similar to the metrics supplied in steady-state LDR. The two metrics can be found with `physical_replication.scanning_ranges` and `physical_replication.catchup_ranges`, respectively. Fixes: #152272 Release note: Adds initial and catchup scan metrics to PCR under `physical_replication.scanning_ranges` and `physical_replication.catchup_ranges`. Co-authored-by: Kevin Cao <39608887+kev-cao@users.noreply.github.com>
diff --git a/docs/generated/metrics/metrics.yaml b/docs/generated/metrics/metrics.yaml
@@ -7118,6 +7118,14 @@ layers:
       unit: NANOSECONDS
       aggregation: AVG
       derivative: NONE
+    - name: physical_replication.catchup_ranges
+      exported_name: physical_replication_catchup_ranges
+      description: Source side ranges undergoing catch up scans
+      y_axis_label: Ranges
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
     - name: physical_replication.commit_latency
       exported_name: physical_replication_commit_latency
       description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded'
@@ -7182,6 +7190,14 @@ layers:
       unit: COUNT
       aggregation: AVG
       derivative: NONE
+    - name: physical_replication.scanning_ranges
+      exported_name: physical_replication_scanning_ranges
+      description: Source side ranges undergoing an initial scan
+      y_axis_label: Ranges
+      type: GAUGE
+      unit: COUNT
+      aggregation: AVG
+      derivative: NONE
     - name: requests.slow.distsender
       exported_name: requests_slow_distsender
       description: |-
diff --git a/pkg/crosscluster/physical/BUILD.bazel b/pkg/crosscluster/physical/BUILD.bazel
@@ -94,6 +94,7 @@ go_library(
         "@com_github_cockroachdb_errors//:errors",
         "@com_github_cockroachdb_logtags//:logtags",
         "@com_github_cockroachdb_redact//:redact",
+        "@com_github_gogo_protobuf//types",
     ],
 )
 
diff --git a/pkg/crosscluster/physical/metrics.go b/pkg/crosscluster/physical/metrics.go
@@ -101,6 +101,19 @@ var (
 		Measurement: "Events",
 		Unit:        metric.Unit_COUNT,
 	}
+
+	metaScanningRanges = metric.Metadata{
+		Name:        "physical_replication.scanning_ranges",
+		Help:        "Source side ranges undergoing an initial scan",
+		Measurement: "Ranges",
+		Unit:        metric.Unit_COUNT,
+	}
+	metaCatchupRanges = metric.Metadata{
+		Name:        "physical_replication.catchup_ranges",
+		Help:        "Source side ranges undergoing catch up scans",
+		Measurement: "Ranges",
+		Unit:        metric.Unit_COUNT,
+	}
 )
 
 // Metrics are for production monitoring of stream ingestion jobs.
@@ -116,6 +129,8 @@ type Metrics struct {
 	RunningCount               *metric.Gauge
 	ReplicatedTimeSeconds      *metric.Gauge
 	ReplicationCutoverProgress *metric.Gauge
+	ScanningRanges             *metric.Gauge
+	CatchupRanges              *metric.Gauge
 }
 
 // MetricStruct implements the metric.Struct interface.
@@ -153,6 +168,8 @@ func MakeMetrics(histogramWindow time.Duration) metric.Struct {
 		RunningCount:               metric.NewGauge(metaStreamsRunning),
 		ReplicatedTimeSeconds:      metric.NewGauge(metaReplicatedTimeSeconds),
 		ReplicationCutoverProgress: metric.NewGauge(metaReplicationCutoverProgress),
+		ScanningRanges:             metric.NewGauge(metaScanningRanges),
+		CatchupRanges:              metric.NewGauge(metaCatchupRanges),
 	}
 	return m
 }
diff --git a/pkg/crosscluster/physical/stream_ingestion_dist.go b/pkg/crosscluster/physical/stream_ingestion_dist.go
@@ -859,13 +859,14 @@ func constructStreamIngestionPlanSpecs(
 	// Create a spec for the StreamIngestionFrontier processor on the coordinator
 	// node.
 	streamIngestionFrontierSpec := &execinfrapb.StreamIngestionFrontierSpec{
-		ReplicatedTimeAtStart: previousReplicatedTimestamp,
-		TrackedSpans:          []roachpb.Span{tenantSpan},
-		JobID:                 int64(jobID),
-		StreamID:              uint64(streamID),
-		ConnectionUris:        topology.SerializedClusterUris(),
-		Checkpoint:            checkpoint,
-		PartitionSpecs:        repackagePartitionSpecs(streamIngestionSpecs),
+		ReplicatedTimeAtStart:  previousReplicatedTimestamp,
+		TrackedSpans:           []roachpb.Span{tenantSpan},
+		JobID:                  int64(jobID),
+		StreamID:               uint64(streamID),
+		ConnectionUris:         topology.SerializedClusterUris(),
+		Checkpoint:             checkpoint,
+		PartitionSpecs:         repackagePartitionSpecs(streamIngestionSpecs),
+		NumIngestionProcessors: int32(len(topology.Partitions)),
 	}
 
 	return streamIngestionSpecs, streamIngestionFrontierSpec, nil
diff --git a/pkg/crosscluster/physical/stream_ingestion_frontier_processor.go b/pkg/crosscluster/physical/stream_ingestion_frontier_processor.go
@@ -31,6 +31,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/redact"
+	pbtypes "github.com/gogo/protobuf/types"
 )
 
 const (
@@ -83,6 +84,14 @@ type streamIngestionFrontier struct {
 	// replicatedTimeAtLastPositiveLagNodeCheck records the replicated time the
 	// last time the lagging node checker detected a lagging node.
 	replicatedTimeAtLastPositiveLagNodeCheck hlc.Timestamp
+
+	rangeStats replicationutils.AggregateRangeStatsCollector
+
+	// This stores the last aggregate stats we computed. Because stats are only
+	// updated on a checkpoint event, the stats will be stale until the next
+	// checkpoint and should not be used to update job statuses. Only on a fresh
+	// checkpoint should we update job statuses.
+	lastAggStats streampb.StreamEvent_RangeStats
 }
 
 var _ execinfra.Processor = &streamIngestionFrontier{}
@@ -138,6 +147,9 @@ func newStreamIngestionFrontierProcessor(
 			return crosscluster.StreamReplicationConsumerHeartbeatFrequency.Get(&flowCtx.Cfg.Settings.SV)
 		}),
 		persistedReplicatedTime: spec.ReplicatedTimeAtStart,
+		rangeStats: replicationutils.NewAggregateRangeStatsCollector(
+			int(spec.NumIngestionProcessors),
+		),
 	}
 	if err := sf.Init(
 		ctx,
@@ -184,6 +196,10 @@ func (sf *streamIngestionFrontier) Next() (
 			if meta.Err != nil {
 				sf.MoveToDrainingAndLogError(nil /* err */)
 			}
+			if err := sf.maybeCollectRangeStats(sf.Ctx(), meta); err != nil {
+				sf.MoveToDrainingAndLogError(err)
+				break
+			}
 			return nil, meta
 		}
 		if row == nil {
@@ -328,6 +344,9 @@ func (sf *streamIngestionFrontier) maybeUpdateProgress() error {
 	replicatedTime := f.Frontier()
 	sf.lastPartitionUpdate = timeutil.Now()
 	log.Dev.VInfof(ctx, 2, "persisting replicated time of %s", replicatedTime)
+
+	statusByStats := sf.aggregateAndUpdateRangeMetrics()
+
 	if err := registry.UpdateJobWithTxn(ctx, jobID, nil /* txn */, func(
 		txn isql.Txn, md jobs.JobMetadata, ju *jobs.JobUpdater,
 	) error {
@@ -342,6 +361,8 @@ func (sf *streamIngestionFrontier) maybeUpdateProgress() error {
 		if replicatedTime.IsSet() && streamProgress.ReplicationStatus == jobspb.InitialScan {
 			streamProgress.ReplicationStatus = jobspb.Replicating
 			md.Progress.StatusMessage = streamProgress.ReplicationStatus.String()
+		} else if statusByStats != "" {
+			md.Progress.StatusMessage = statusByStats
 		}
 
 		// Keep the recorded replicatedTime empty until some advancement has been made
@@ -408,6 +429,44 @@ func (sf *streamIngestionFrontier) maybeUpdateProgress() error {
 	return nil
 }
 
+func (sf *streamIngestionFrontier) maybeCollectRangeStats(
+	ctx context.Context, meta *execinfrapb.ProducerMetadata,
+) error {
+	if meta.BulkProcessorProgress == nil {
+		log.Dev.VInfof(ctx, 2, "received non-progress producer meta: %v", meta)
+		return nil
+	}
+
+	var stats streampb.StreamEvent_RangeStats
+	if err := pbtypes.UnmarshalAny(&meta.BulkProcessorProgress.ProgressDetails, &stats); err != nil {
+		return errors.Wrap(err, "unable to unmarshal progress details")
+	}
+
+	sf.rangeStats.Add(meta.BulkProcessorProgress.ProcessorID, &stats)
+	return nil
+}
+
+// aggregateAndUpdateRangeMetrics aggregates the range stats collected from each
+// of the ingestion processors and updates the corresponding metrics. If the
+// stats have changed since the last aggregation, it returns a status message
+// to update the job status with. We do this to avoid overwriting job statuses
+// with stale stats as the stats will be the same until the next checkpoint
+// event.
+func (sf *streamIngestionFrontier) aggregateAndUpdateRangeMetrics() string {
+	aggRangeStats, _, statusMsg := sf.rangeStats.RollupStats()
+	if aggRangeStats.RangeCount != 0 {
+		sf.metrics.ScanningRanges.Update(aggRangeStats.ScanningRangeCount)
+		sf.metrics.CatchupRanges.Update(aggRangeStats.LaggingRangeCount)
+	}
+	if sf.lastAggStats == aggRangeStats {
+		// This is the same stats as last time, so we don't need to update the job
+		// status.
+		return ""
+	}
+	sf.lastAggStats = aggRangeStats
+	return statusMsg
+}
+
 // maybePersistFrontierEntries periodically persists the current state of the
 // frontier to the `system.job_info` table. This information is used to hydrate
 // the execution details that can be requested for the C2C ingestion job. Note,
diff --git a/pkg/crosscluster/physical/stream_ingestion_processor.go b/pkg/crosscluster/physical/stream_ingestion_processor.go
@@ -291,6 +291,9 @@ type streamIngestionProcessor struct {
 	// backupDataProcessors' trace recording.
 	agg      *tracing.TracingAggregator
 	aggTimer timeutil.Timer
+
+	// Pipelines to report range stats down to frontier processor.
+	rangeStatsCh chan *streampb.StreamEvent_RangeStats
 }
 
 // PartitionEvent augments a normal event with the partition it came from.
@@ -347,6 +350,7 @@ func newStreamIngestionDataProcessor(
 		flushCh:          make(chan flushableBuffer),
 		checkpointCh:     make(chan *jobspb.ResolvedSpans),
 		errCh:            make(chan error, 1),
+		rangeStatsCh:     make(chan *streampb.StreamEvent_RangeStats),
 		rekeyer:          rekeyer,
 		rewriteToDiffKey: spec.TenantRekey.NewID != spec.TenantRekey.OldID,
 		logBufferEvery:   log.Every(30 * time.Second),
@@ -534,6 +538,13 @@ func (sip *streamIngestionProcessor) Next() (rowenc.EncDatumRow, *execinfrapb.Pr
 		sip.aggTimer.Reset(15 * time.Second)
 		return nil, bulkutil.ConstructTracingAggregatorProducerMeta(sip.Ctx(),
 			sip.FlowCtx.NodeID.SQLInstanceID(), sip.FlowCtx.ID, sip.agg)
+	case stats := <-sip.rangeStatsCh:
+		meta, err := replicationutils.StreamRangeStatsToProgressMeta(sip.FlowCtx, sip.ProcessorID, stats)
+		if err != nil {
+			sip.MoveToDrainingAndLogError(err)
+			return nil, sip.DrainHelper()
+		}
+		return nil, meta
 	case err := <-sip.errCh:
 		sip.MoveToDrainingAndLogError(err)
 		return nil, sip.DrainHelper()
@@ -927,7 +938,8 @@ func (sip *streamIngestionProcessor) bufferCheckpoint(event PartitionEvent) erro
 		}
 	}
 
-	resolvedSpans := event.GetCheckpoint().ResolvedSpans
+	checkpointEvent := event.GetCheckpoint()
+	resolvedSpans := checkpointEvent.ResolvedSpans
 	if resolvedSpans == nil {
 		return errors.New("checkpoint event expected to have resolved spans")
 	}
@@ -957,6 +969,13 @@ func (sip *streamIngestionProcessor) bufferCheckpoint(event PartitionEvent) erro
 		}
 	}
 	sip.metrics.ResolvedEvents.Inc(1)
+
+	if checkpointEvent.RangeStats != nil {
+		select {
+		case <-sip.stopCh:
+		case sip.rangeStatsCh <- checkpointEvent.RangeStats:
+		}
+	}
 	return nil
 }
 
diff --git a/pkg/sql/execinfrapb/processors_bulk_io.proto b/pkg/sql/execinfrapb/processors_bulk_io.proto
@@ -282,6 +282,9 @@ message StreamIngestionFrontierSpec {
 
   // PartitionSpecs contains the topology of the physical replication stream.
   optional StreamIngestionPartitionSpecs partition_specs = 9 [(gogoproto.nullable) = false];
+
+  // NumIngestionProcessors is the number of ingestion processors in the job.
+  optional int32 num_ingestion_processors = 10 [(gogoproto.nullable) = false];
 }
 
 enum ElidePrefix {
@@ -568,7 +571,7 @@ message CompactBackupsSpec {
 	// if it is only provided the assigned spans, so to be safe we send the entire
 	// set of spans.
   repeated roachpb.Span spans = 8 [(gogoproto.nullable) = false];
-  // assigned_spans represents the spans assigned to this particular processor 
+  // assigned_spans represents the spans assigned to this particular processor
   // to compact.
   repeated roachpb.Span assigned_spans = 9 [(gogoproto.nullable) = false];
   optional string user_proto = 10 [(gogoproto.nullable) = false, (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/security/username.SQLUsernameProto"];

Original file line number	Diff line number	Diff line change
`@@ -94,6 +94,7 @@ go_library(`
`94`	`94`	`"@com_github_cockroachdb_errors//:errors",`
`95`	`95`	`"@com_github_cockroachdb_logtags//:logtags",`
`96`	`96`	`"@com_github_cockroachdb_redact//:redact",`
	`97`	`+ "@com_github_gogo_protobuf//types",`
`97`	`98`	`],`
`98`	`99`	`)`
`99`	`100`