Merge #157131 #157792 #157915

craig[bot] · fqazi · dodeca12 · craig[bot] · commit 0b875dec37a8 · 2025-11-17T21:23:16.000Z
157131: catalog/lease: fix error handling for purgeOldVersions r=fqazi a=fqazi Previously, when purging old versions we would acquire a lease on the latest version, which was introduced when we added logic for acquiring leases on the previous version. The logic to acquire the previous version would clear the error, preventing errors from correctly surfacing and causing issues with dropped / offline descriptors. To address this, ensure the acquisition logic for the previous version is only executed if a clean up will occur. Informs: #156176 Release note: None 157792: storelivenss: update `storeliveness/doc.go` with heartbeat smearing info r=miraradeva a=dodeca12 `storeliveness/doc.go` is outdated with respect to heartbeat smearing changes. In particular, the `Transport` section doesn't have information about heartbeat smearing. Additionally, the `Configuration` section didn't detail the heartbeat smearing cluster setting `kv.store_liveness.heartbeat_smearing.enabled`. Added a dedicated bullet point in the `Transport` section (`5.2`) that explicitly describes heartbeat smearing as a feature to avoid goroutine spikes. Also updated the `Configuration` section (`5.3`) to detail that `kv.store_liveness.heartbeat_smearing.enabled` is available, and describes the behaviour of heartbeat sends when the cluster setting is enabled or disabled. Informs: #156830 Release note: None 157915: kvfollowerreadsccl: maybe deflake TestBoundedStalenessDataDriven r=stevendanna a=stevendanna This test determines what events occur by parsing the trace. In some cases, the parsing it was using to determine a "local read followed by remote leaseholder read" didn't account for changes in the potential trace messages encountered when leader leases are enabled. Here, I widen the scope of the trace parsing. Locally under stress this elliminated the previously encountered failure: ``` datadriven.go:357: ... SNIP ... boundedstaleness/single_row:24: still running after 10.000889738s ... SNIP ... boundedstaleness_test.go:405: condition failed to evaluate within 45s: from boundedstaleness_test.go:436: not yet a match, output: 1 events (1 found): * event 1: colbatchscan trace on node_idx 2: local read datadriven.go:343: ``` Fixes #154710 Release note: None Co-authored-by: Faizan Qazi <faizan@cockroachlabs.com> Co-authored-by: Swapneeth Gorantla <swapneeth.gorantla@cockroachlabs.com> Co-authored-by: Steven Danna <danna@cockroachlabs.com>
diff --git a/pkg/ccl/kvccl/kvfollowerreadsccl/boundedstaleness_test.go b/pkg/ccl/kvccl/kvfollowerreadsccl/boundedstaleness_test.go
@@ -45,6 +45,10 @@ var (
 	)
 )
 
+// fullTraceDebug is a flag that controls whether full traces are printed in the
+// case of some errors.
+const fullTraceDebug = false
+
 func TestBoundedStalenessEnterpriseLicense(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
@@ -148,8 +152,10 @@ type boundedStalenessEvents struct {
 	// A mutex is needed as the event handlers (onStmtTrace) can race.
 	mu struct {
 		syncutil.Mutex
-		stmt   string
-		events []boundedStalenessDataDrivenEvent
+		stmt string
+		// Only populated if fullTraceDebug constant is true.
+		traceForDebugging string
+		events            []boundedStalenessDataDrivenEvent
 	}
 }
 
@@ -167,6 +173,13 @@ func (bse *boundedStalenessEvents) clearEvents() {
 	bse.mu.events = nil
 }
 
+func (bse *boundedStalenessEvents) fullTrace() string {
+	bse.mu.Lock()
+	defer bse.mu.Unlock()
+
+	return bse.mu.traceForDebugging
+}
+
 func (bse *boundedStalenessEvents) setStmt(s string) {
 	bse.mu.Lock()
 	defer bse.mu.Unlock()
@@ -240,17 +253,23 @@ func (bse *boundedStalenessEvents) onStmtTrace(nodeIdx int, rec tracingpb.Record
 	defer bse.mu.Unlock()
 
 	if bse.mu.stmt != "" && bse.mu.stmt == stmt {
+		if fullTraceDebug {
+			bse.mu.traceForDebugging = rec.String()
+		}
+
 		spans := make(map[tracingpb.SpanID]tracingpb.RecordedSpan)
 		for _, sp := range rec {
 			spans[sp.SpanID] = sp
+			notLeaseHolderError := tracing.LogsContainMsg(sp, "[NotLeaseHolderError] lease held by different store;")
+			notLeaseHolderError = notLeaseHolderError || tracing.LogsContainMsg(sp, "[NotLeaseHolderError] leader lease is not held locally, cannot determine validity;")
+
 			if sp.Operation == "dist sender send" && spans[sp.ParentSpanID].Operation == "colbatchscan" {
 				bse.mu.events = append(bse.mu.events, &boundedStalenessTraceEvent{
-					operation:    spans[sp.ParentSpanID].Operation,
-					nodeIdx:      nodeIdx,
-					localRead:    tracing.LogsContainMsg(sp, kvbase.RoutingRequestLocallyMsg),
-					followerRead: kvtestutils.OnlyFollowerReads(rec),
-					remoteLeaseholderRead: tracing.LogsContainMsg(sp, "[NotLeaseHolderError] lease held by different store;") &&
-						tracing.LogsContainMsg(sp, "trying next peer"),
+					operation:             spans[sp.ParentSpanID].Operation,
+					nodeIdx:               nodeIdx,
+					localRead:             tracing.LogsContainMsg(sp, kvbase.RoutingRequestLocallyMsg),
+					followerRead:          kvtestutils.OnlyFollowerReads(rec),
+					remoteLeaseholderRead: notLeaseHolderError && tracing.LogsContainMsg(sp, "trying next peer"),
 				})
 			}
 		}
@@ -261,10 +280,7 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
 
-	const msg = "1μs staleness reads may actually succeed due to the slow environment"
-	skip.UnderStress(t, msg)
-	skip.UnderRace(t, msg)
-	skip.UnderDeadlock(t, msg)
+	skip.UnderDuress(t, "1μs staleness reads may actually succeed due to the slow environment")
 	defer ccl.TestingEnableEnterprise()()
 
 	ctx := context.Background()
@@ -426,14 +442,22 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
 						}
 					}()
 					if !followerRead {
+						var trace string
+						if fullTraceDebug {
+							trace = fmt.Sprintf("\nfull_trace:\n%s", bse.fullTrace())
+						}
 						bse.clearEvents()
-						return errors.AssertionFailedf("not follower reads found:\n%s", bse.String())
+						return errors.AssertionFailedf("not follower reads found:\n%s%s", bse.String(), trace)
 					}
 				}
 				if waitUntilMatch {
 					if d.Expected != ret {
+						var trace string
+						if fullTraceDebug {
+							trace = fmt.Sprintf("\nfull_trace:\n%s", bse.fullTrace())
+						}
 						bse.clearEvents()
-						return errors.AssertionFailedf("not yet a match, output:\n%s\n", ret)
+						return errors.AssertionFailedf("not yet a match, output:\n%s%s", ret, trace)
 					}
 				}
 				return nil
diff --git a/pkg/kv/kvserver/storeliveness/doc.go b/pkg/kv/kvserver/storeliveness/doc.go
@@ -205,6 +205,10 @@
 //   process is also aided by synchronizing the sending of heartbeats across all
 //   stores on a given node.
 //
+// - Heartbeat smearing (enabled by default). The sending of heartbeats across
+//   all nodes is synchronized and paced to avoid goroutine spikes. See
+//   Configuration section for more details.
+//
 // 5.3. Configuration
 //
 // Store liveness can be enabled/disabled using the `kv.store_liveness.enabled`
@@ -213,6 +217,18 @@
 // well as calls to `SupportFor` and `SupportFrom`. This is required for
 // correctness.
 //
+// The behaviour of heartbeat sends is governed by the
+// `kv.store_liveness.heartbeat_smearing.enabled` cluster setting (enabled by
+// default). When enabled, heartbeat sends are distributed over a certain
+// duration heartbeat (configured via the
+// `kv.store_liveness.heartbeat_smearing.refresh` cluster setting), with
+// messages being sent at a certain interval (configured via the
+// `kv.store_liveness.heartbeat_smearing.smear` cluster setting) to avoid
+// spiking the number of runnable goroutines (the per-node send queue
+// processors, and the per-node gRPC connections). When disabled, heartbeat
+// sends are sent immediately upon enqueueing, bypassing the smearing mechanism.
+// Note: the smearing applies to both heartbeat responses and requests.
+//
 // Additionally, `config.go` defines tunable configuration parameters for the
 // various timeouts and intervals that store liveness uses. Other intervals
 // (like support duration and heartbeat interval) are defined in
diff --git a/pkg/sql/catalog/lease/descriptor_state.go b/pkg/sql/catalog/lease/descriptor_state.go
@@ -138,7 +138,7 @@ func (t *descriptorState) findForTimestampImpl(
 
 	// If we have the initial version of the descriptor, and it satisfies the read
 	// timestamp, then the object was just created. We can confirm it satisfies
-	// the request, by executing findForTimestampImpl with the readTimestamp instead.
+	// the request by executing findForTimestampImpl with the readTimestamp instead.
 	if oldest := t.mu.active.findOldest(); hasDifferentReadTimeStamp &&
 		oldest != nil &&
 		oldest.GetVersion() == 1 &&
diff --git a/pkg/sql/catalog/lease/lease.go b/pkg/sql/catalog/lease/lease.go
@@ -1274,7 +1274,7 @@ func (m *Manager) purgeOldVersions(
 		retry.Options{
 			MaxDuration: time.Second * 30}); r.Next(); {
 		// Acquire a refcount on the descriptor on the latest version to maintain an
-		// active lease, so that it doesn't get released when removeInactives()
+		// active lease so that it doesn't get released when removeInactives()
 		// is called below. Release this lease after calling removeInactives().
 		desc, _, err = t.findForTimestamp(ctx, TimestampToReadTimestamp(m.storage.clock.Now()))
 		if err == nil || !errors.Is(err, errRenewLease) {
@@ -1290,7 +1290,7 @@ func (m *Manager) purgeOldVersions(
 		// Assert this should never happen due to a fixed expiration, since the range
 		// feed is responsible for purging old versions and acquiring new versions.
 		if newest.hasFixedExpiration() {
-			return errors.AssertionFailedf("the latest version of the descriptor has" +
+			return errors.AssertionFailedf("the latest version of the descriptor has " +
 				"a fixed expiration, this should never happen")
 		}
 		// Otherwise, we ran into some type of transient issue, where the sqllivness
@@ -1307,9 +1307,10 @@ func (m *Manager) purgeOldVersions(
 		err = nil
 	}
 
-	// Optionally, acquire the refcount on the previous version.
+	// Optionally, acquire the refcount on the previous version for the locked
+	// leasing mode.
 	acquireLeaseOnPrevious := func() error {
-		if dropped || !LockedLeaseTimestamp.Get(&m.storage.settings.SV) {
+		if !LockedLeaseTimestamp.Get(&m.storage.settings.SV) {
 			return nil
 		}
 		var handles []*closeTimeStampHandle
@@ -1376,11 +1377,12 @@ func (m *Manager) purgeOldVersions(
 		return gatheredErrors
 	}
 
-	if err = acquireLeaseOnPrevious(); err != nil {
-		log.Dev.Errorf(ctx, "unable to acquire lease on previous version of descriptor: %s", err)
-	}
-
 	if isInactive := catalog.HasInactiveDescriptorError(err); err == nil || isInactive {
+		// If previous versions are released, then acquire a lease on the previous
+		// version for the locked leasing mode.
+		if acquirePreviousErr := acquireLeaseOnPrevious(); acquirePreviousErr != nil {
+			log.Dev.Errorf(ctx, "unable to acquire lease on previous version of descriptor: %s", acquirePreviousErr)
+		}
 		removeInactives(isInactive)
 		if desc != nil {
 			t.release(ctx, desc)
@@ -2325,12 +2327,25 @@ func (m *Manager) StartRefreshLeasesTask(ctx context.Context, s *stop.Stopper, d
 	defer m.initComplete.Swap(true)
 	m.watchForUpdates(ctx)
 	_ = s.RunAsyncTask(ctx, "refresh-leases", func(ctx context.Context) {
+		defer func() {
+			if err := recover(); err != nil {
+				log.Dev.Warningf(ctx, "panic in refresh-leases: %v", err)
+				panic(err)
+			}
+		}()
+
 		for {
 			select {
 			case id := <-m.descDelCh:
 				// Descriptor is marked as deleted, so mark it for deletion or
 				// remove it if it's no longer in use.
 				_ = s.RunAsyncTask(ctx, "purgeOldVersionsOrAcquireInitialVersion deleted descriptor", func(ctx context.Context) {
+					defer func() {
+						if err := recover(); err != nil {
+							log.Dev.Warningf(ctx, "panic in purgeOldVersionsOrAcquireInitialVersion deleted descriptor: %v", err)
+							panic(err)
+						}
+					}()
 					// Once the descriptor is purged notify that some change has occurred.
 					defer m.leaseGeneration.Add(1)
 					state := m.findNewest(id)
@@ -2412,6 +2427,12 @@ func (m *Manager) StartRefreshLeasesTask(ctx context.Context, s *stop.Stopper, d
 				// of increased latency right as the descriptor has been committed.
 				if now := db.Clock().Now(); now.Less(desc.GetModificationTime()) {
 					_ = s.RunAsyncTask(ctx, "wait to purgeOldVersionsOrAcquireInitialVersion", func(ctx context.Context) {
+						defer func() {
+							if err := recover(); err != nil {
+								log.Dev.Warningf(ctx, "panic in wait to purgeOldVersionsOrAcquireInitialVersion: %v", err)
+								panic(err)
+							}
+						}()
 						toWait := time.Duration(desc.GetModificationTime().WallTime - now.WallTime)
 						select {
 						case <-time.After(toWait):