Skip to content

Commit 0bbe99f

Browse files
authored
Merge pull request #151531 from rafiss/blathers/backport-release-24.3-151431
release-24.3: sql/lease: add progress logging to orphaned lease cleanup
2 parents 790147a + e18c6f6 commit 0bbe99f

File tree

1 file changed

+50
-8
lines changed

1 file changed

+50
-8
lines changed

pkg/sql/catalog/lease/lease.go

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,11 +2079,17 @@ func (m *Manager) deleteOrphanedLeasesFromStaleSession(
20792079
region = locality.Tiers[0].Value
20802080
}
20812081

2082+
log.Infof(ctx, "starting orphaned lease cleanup from stale sessions in region %s", region)
2083+
20822084
var distinctSessions []tree.Datums
20832085
aostTime := hlc.Timestamp{WallTime: initialTimestamp}
20842086
distinctSessionQuery := `SELECT DISTINCT(session_id) FROM system.lease AS OF SYSTEM TIME %s WHERE crdb_region=$1 AND NOT crdb_internal.sql_liveness_is_alive(session_id, true) LIMIT $2`
20852087
syntheticDescriptors := catalog.Descriptors{systemschema.LeaseTable()}
20862088
const limit = 50
2089+
2090+
totalSessionsProcessed := 0
2091+
totalLeasesDeleted := 0
2092+
20872093
for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
20882094
// Get a list of distinct, dead session IDs that exist in the system.lease
20892095
// table.
@@ -2105,20 +2111,35 @@ func (m *Manager) deleteOrphanedLeasesFromStaleSession(
21052111
}
21062112
}
21072113

2114+
if len(distinctSessions) > 0 {
2115+
log.Infof(ctx, "found %d dead sessions from which to clean up orphaned leases", len(distinctSessions))
2116+
}
2117+
21082118
// Delete rows in our lease table with orphaned sessions.
21092119
for _, sessionRow := range distinctSessions {
21102120
sessionID := sqlliveness.SessionID(tree.MustBeDBytes(sessionRow[0]))
2111-
if err = deleteLeaseWithSessionIDWithBatch(ctx, ex, retryOpts, syntheticDescriptors, sessionID, region, limit); err != nil {
2112-
log.Warningf(ctx, "unable to delete orphaned leases: %v", err)
2121+
sessionLeasesDeleted, err := deleteLeaseWithSessionIDWithBatch(ctx, ex, retryOpts, syntheticDescriptors, sessionID, region, limit)
2122+
if err != nil {
2123+
log.Warningf(ctx, "unable to delete orphaned leases for session %s: %v", sessionID, err)
21132124
break
21142125
}
2126+
totalLeasesDeleted += sessionLeasesDeleted
2127+
log.Infof(ctx, "deleted %d orphaned leases for dead session %s", sessionLeasesDeleted, sessionID)
21152128
}
21162129

2130+
totalSessionsProcessed += len(distinctSessions)
2131+
21172132
// No more dead sessions to clean up.
21182133
if len(distinctSessions) < limit {
2134+
log.Infof(ctx, "completed orphaned lease cleanup for region %s: %d sessions processed, %d leases deleted",
2135+
region, totalSessionsProcessed, totalLeasesDeleted)
21192136
return
21202137
}
21212138

2139+
// Log progress for large cleanup operations.
2140+
log.Infof(ctx, "orphaned lease cleanup progress for region %s: %d sessions processed, %d leases deleted so far",
2141+
region, totalSessionsProcessed, totalLeasesDeleted)
2142+
21222143
// Advance our aostTime timstamp so that our query to detect leases with
21232144
// dead sessions is aware of new deletes and does not keep selecting the
21242145
// same leases.
@@ -2127,7 +2148,7 @@ func (m *Manager) deleteOrphanedLeasesFromStaleSession(
21272148
}
21282149

21292150
// deleteLeaseWithSessionIDWithBatch uses batchSize to batch deletes for leases
2130-
// with the given sessionID in system.lease.
2151+
// with the given sessionID in system.lease. Returns the total number of leases deleted.
21312152
func deleteLeaseWithSessionIDWithBatch(
21322153
ctx context.Context,
21332154
ex isql.Executor,
@@ -2136,7 +2157,8 @@ func deleteLeaseWithSessionIDWithBatch(
21362157
sessionID sqlliveness.SessionID,
21372158
region string,
21382159
batchSize int,
2139-
) error {
2160+
) (int, error) {
2161+
totalDeleted := 0
21402162
for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
21412163
var rowsDeleted int
21422164
deleteOrphanedQuery := `DELETE FROM system.lease WHERE session_id=$1 AND crdb_region=$2 LIMIT $3`
@@ -2151,16 +2173,17 @@ func deleteLeaseWithSessionIDWithBatch(
21512173
return err
21522174
}); err != nil {
21532175
if !startup.IsRetryableReplicaError(err) {
2154-
return err
2176+
return totalDeleted, err
21552177
}
21562178
}
2179+
totalDeleted += rowsDeleted
21572180

21582181
// No more rows to clean up.
21592182
if rowsDeleted < batchSize {
21602183
break
21612184
}
21622185
}
2163-
return nil
2186+
return totalDeleted, nil
21642187
}
21652188

21662189
func (m *Manager) deleteOrphanedLeasesWithSameInstanceID(
@@ -2196,8 +2219,15 @@ func (m *Manager) deleteOrphanedLeasesWithSameInstanceID(
21962219
log.Warningf(ctx, "unable to read orphaned leases: %v", err)
21972220
return
21982221
}
2222+
2223+
totalLeases := len(rows)
2224+
log.Infof(ctx, "found %d orphaned leases to clean up for instance ID %d", totalLeases, instanceID)
2225+
if totalLeases == 0 {
2226+
return
2227+
}
2228+
21992229
var wg sync.WaitGroup
2200-
defer wg.Wait()
2230+
var releasedCount atomic.Int64
22012231
for i := range rows {
22022232
// Early exit?
22032233
row := rows[i]
@@ -2222,11 +2252,23 @@ func (m *Manager) deleteOrphanedLeasesWithSameInstanceID(
22222252
WaitForSem: true,
22232253
},
22242254
func(ctx context.Context) {
2255+
defer wg.Done()
22252256
m.storage.release(ctx, m.stopper, lease)
2257+
released := releasedCount.Add(1)
22262258
log.Infof(ctx, "released orphaned lease: %+v", lease)
2227-
wg.Done()
2259+
2260+
// Log progress every 100 leases for large cleanup operations.
2261+
if released%100 == 0 || released == int64(totalLeases) {
2262+
log.Infof(ctx, "orphaned lease cleanup progress for instance ID %d: %d/%d leases released",
2263+
instanceID, released, totalLeases)
2264+
}
22282265
}); err != nil {
2266+
log.Warningf(ctx, "could not start async task for releasing orphaned lease %+v: %v", lease, err)
22292267
wg.Done()
22302268
}
22312269
}
2270+
2271+
wg.Wait()
2272+
log.Infof(ctx, "completed orphaned lease cleanup for instance ID %d: %d/%d leases released",
2273+
instanceID, releasedCount.Load(), totalLeases)
22322274
}

0 commit comments

Comments
 (0)