@@ -1119,13 +1119,8 @@ func releaseLease(ctx context.Context, lease *storedLease, m *Manager) (released
1119
1119
// which will cause existing in-use leases to be eagerly released once
1120
1120
// they're not in use any more.
1121
1121
// If t has no active leases, nothing is done.
1122
- func purgeOldVersions (
1123
- ctx context.Context ,
1124
- db * kv.DB ,
1125
- id descpb.ID ,
1126
- dropped bool ,
1127
- minVersion descpb.DescriptorVersion ,
1128
- m * Manager ,
1122
+ func (m * Manager ) purgeOldVersions (
1123
+ ctx context.Context , db * kv.DB , id descpb.ID , dropped bool , minVersion descpb.DescriptorVersion ,
1129
1124
) error {
1130
1125
t := m .findDescriptorState (id , false /*create*/ )
1131
1126
if t == nil {
@@ -1193,10 +1188,40 @@ func purgeOldVersions(
1193
1188
return err
1194
1189
}
1195
1190
1196
- // Acquire a refcount on the descriptor on the latest version to maintain an
1197
- // active lease, so that it doesn't get released when removeInactives()
1198
- // is called below. Release this lease after calling removeInactives().
1199
- desc , _ , err := t .findForTimestamp (ctx , m .storage .clock .Now ())
1191
+ var err error
1192
+ var desc * descriptorVersionState
1193
+ for r := retry .StartWithCtx (ctx ,
1194
+ retry.Options {
1195
+ MaxDuration : time .Second * 30 }); r .Next (); {
1196
+ // Acquire a refcount on the descriptor on the latest version to maintain an
1197
+ // active lease, so that it doesn't get released when removeInactives()
1198
+ // is called below. Release this lease after calling removeInactives().
1199
+ desc , _ , err = t .findForTimestamp (ctx , m .storage .clock .Now ())
1200
+ if err == nil || ! errors .Is (err , errRenewLease ) {
1201
+ break
1202
+ }
1203
+ // We encountered an error telling us to renew the lease.
1204
+ newest := m .findNewest (id )
1205
+ // Assert this should never happen due to a fixed expiration, since the range
1206
+ // feed is responsible for purging old versions and acquiring new versions.
1207
+ if newest .hasFixedExpiration () {
1208
+ return errors .AssertionFailedf ("the latest version of the descriptor has" +
1209
+ "a fixed expiration, this should never happen" )
1210
+ }
1211
+ // Otherwise, we ran into some type of transient issue, where the sqllivness
1212
+ // session was expired. This could happen if the sqlliveness range is slow
1213
+ // for some reason.
1214
+ log .Infof (ctx , "unable to acquire lease on latest descriptor " +
1215
+ "version of ID: %d, retrying..." , id )
1216
+ }
1217
+ // As a last resort, we will release all versions of the descriptor. This is
1218
+ // suboptimal, but the safest option.
1219
+ if errors .Is (err , errRenewLease ) {
1220
+ log .Warningf (ctx , "unable to acquire lease on latest descriptor " +
1221
+ "version of ID: %d, cleaning up all versions from storage." , id )
1222
+ err = nil
1223
+ }
1224
+
1200
1225
if isInactive := catalog .HasInactiveDescriptorError (err ); err == nil || isInactive {
1201
1226
removeInactives (isInactive )
1202
1227
if desc != nil {
@@ -1866,7 +1891,7 @@ func (m *Manager) StartRefreshLeasesTask(ctx context.Context, s *stop.Stopper, d
1866
1891
defer m .leaseGeneration .Add (1 )
1867
1892
state := m .findNewest (id )
1868
1893
if state != nil {
1869
- if err := purgeOldVersions (ctx , db , id , true /* dropped */ , state .GetVersion (), m ); err != nil {
1894
+ if err := m . purgeOldVersions (ctx , db , id , true /* dropped */ , state .GetVersion ()); err != nil {
1870
1895
log .Warningf (ctx , "error purging leases for deleted descriptor %d" ,
1871
1896
id )
1872
1897
}
@@ -1926,7 +1951,7 @@ func (m *Manager) StartRefreshLeasesTask(ctx context.Context, s *stop.Stopper, d
1926
1951
// descriptor versions, which could have been acquired concurrently.
1927
1952
// For example the range feed sees version 2 and a query concurrently
1928
1953
// acquires version 1.
1929
- if err := purgeOldVersions (ctx , db , desc .GetID (), dropped , desc .GetVersion (), m ); err != nil {
1954
+ if err := m . purgeOldVersions (ctx , db , desc .GetID (), dropped , desc .GetVersion ()); err != nil {
1930
1955
log .Warningf (ctx , "error purging leases for descriptor %d(%s): %s" ,
1931
1956
desc .GetID (), desc .GetName (), err )
1932
1957
}
@@ -2347,8 +2372,8 @@ func (m *Manager) refreshSomeLeases(ctx context.Context, refreshAndPurgeAllDescr
2347
2372
2348
2373
if errors .Is (err , catalog .ErrDescriptorNotFound ) || errors .Is (err , catalog .ErrDescriptorDropped ) {
2349
2374
// Lease renewal failed due to removed descriptor; Remove this descriptor from cache.
2350
- if err := purgeOldVersions (
2351
- ctx , m .storage .db .KV (), id , true /* dropped */ , 0 /* minVersion */ , m ,
2375
+ if err := m . purgeOldVersions (
2376
+ ctx , m .storage .db .KV (), id , true /* dropped */ , 0 , /* minVersion */
2352
2377
); err != nil {
2353
2378
log .Warningf (ctx , "error purging leases for descriptor %d: %v" ,
2354
2379
id , err )
@@ -2363,7 +2388,7 @@ func (m *Manager) refreshSomeLeases(ctx context.Context, refreshAndPurgeAllDescr
2363
2388
if refreshAndPurgeAllDescriptors {
2364
2389
// If we are refreshing all descriptors, then we want to purge older versions as
2365
2390
// we are doing this operation.
2366
- err := purgeOldVersions (ctx , m .storage .db .KV (), id , false /* dropped */ , 0 /* minVersion */ , m )
2391
+ err := m . purgeOldVersions (ctx , m .storage .db .KV (), id , false /* dropped */ , 0 /* minVersion */ )
2367
2392
if err != nil {
2368
2393
log .Warningf (ctx , "error purging leases for descriptor %d: %v" ,
2369
2394
id , err )
0 commit comments