Skip to content

Commit 6034c51

Browse files
committed
comments, add 'TryLater' variant in the no record for Nexus case
1 parent 2dba0e1 commit 6034c51

File tree

1 file changed

+35
-20
lines changed

1 file changed

+35
-20
lines changed

nexus/db-queries/src/db/datastore/db_metadata.rs

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,11 @@ fn skippable_version(
112112
/// Describes the state of the database access with respect this Nexus
113113
#[derive(Debug, Copy, Clone, PartialEq)]
114114
enum NexusAccess {
115-
/// Nexus does not yet have access to the database.
115+
/// Nexus does not yet have access to the database, but can take over when
116+
/// the current-generation Nexus instances quiesce.
116117
DoesNotHaveAccessYet { nexus_id: OmicronZoneUuid },
117118

118-
/// Nexus has been explicitly locked out of the database.
119+
/// Nexus has been permanently, explicitly locked out of the database.
119120
LockedOut,
120121

121122
/// Nexus should have normal access to the database
@@ -128,6 +129,15 @@ enum NexusAccess {
128129
/// We may or may not have a record of this Nexus, but it should have
129130
/// access.
130131
HasImplicitAccess,
132+
133+
/// Nexus does not yet have access to the database, but it might get
134+
/// access later. Unlike [Self::DoesNotHaveAccessYet], this variant
135+
/// is triggered because we don't have an explicit records.
136+
///
137+
/// Although some Nexuses have records, this one doesn't. This can
138+
/// mean that a Nexus zone has just been deployed, and booted before
139+
/// its record has been populated.
140+
NoRecordNoAccess,
131141
}
132142

133143
/// Describes the state of the schema with respect this Nexus
@@ -167,10 +177,17 @@ pub enum DatastoreSetupAction {
167177
/// are either "not_yet" or "quiesced".
168178
NeedsHandoff { nexus_id: OmicronZoneUuid },
169179

180+
/// Wait, then try to set up the datastore later.
181+
///
182+
/// This can be triggered by observing incomplete data, such as missing
183+
/// records in the "db_metadata_nexus" table, which may be populated by
184+
/// waiting for an existing system to finish execution.
185+
TryLater,
186+
170187
/// Start a schema update
171188
Update,
172189

173-
/// Refuse to use the database
190+
/// Permanently refuse to use the database
174191
Refuse,
175192
}
176193

@@ -208,6 +225,12 @@ impl DatastoreSetupAction {
208225
// The schema updated beyond what we want, do not use it.
209226
(_, NewerThanDesired) => Self::Refuse,
210227

228+
// If we aren't sure if we have access yet, try again later.
229+
(
230+
NoRecordNoAccess,
231+
UpToDate | OlderThanDesired | OlderThanDesiredSkipAccessCheck,
232+
) => Self::TryLater,
233+
211234
// If we don't have access yet, but could do something once handoff
212235
// occurs, then handoff is needed
213236
(
@@ -239,13 +262,11 @@ impl DataStore {
239262
// Check if any "db_metadata_nexus" rows exist.
240263
// If they don't exist, treat the database as having access.
241264
//
242-
// This handles the case for:
243-
// - Fresh deployments where RSS hasn't populated the table yet (we need
244-
// access to finish "rack_initialization").
245-
// - Systems that haven't been migrated to include nexus access control
246-
// (we need access to the database to backfill these records).
265+
// This handles the case for fresh deployments where RSS hasn't
266+
// populated the table yet (we need access to finish
267+
// "rack_initialization").
247268
//
248-
// After initialization/migration, this conditional should never trigger
269+
// After initialization, this conditional should never trigger
249270
// again.
250271
let any_records_exist = self.database_nexus_access_any_exist().await?;
251272
if !any_records_exist {
@@ -259,14 +280,14 @@ impl DataStore {
259280
return Ok(NexusAccess::HasImplicitAccess);
260281
}
261282

262-
// Records exist, so enforce the access control check
283+
// Records exist, so enforce the identity check
263284
let Some(state) =
264285
self.database_nexus_access(nexus_id).await?.map(|s| s.state())
265286
else {
266287
let msg = "Nexus does not have access to the database (no \
267288
db_metadata_nexus record)";
268289
warn!(&self.log, "{msg}"; "nexus_id" => ?nexus_id);
269-
return Ok(NexusAccess::DoesNotHaveAccessYet { nexus_id });
290+
return Ok(NexusAccess::NoRecordNoAccess);
270291
};
271292

272293
let status = match state {
@@ -1304,10 +1325,7 @@ mod test {
13041325
)
13051326
.await
13061327
.expect("Failed to check schema and access");
1307-
assert_eq!(
1308-
action.action(),
1309-
&DatastoreSetupAction::NeedsHandoff { nexus_id }
1310-
);
1328+
assert_eq!(action.action(), &DatastoreSetupAction::TryLater);
13111329

13121330
db.terminate().await;
13131331
logctx.cleanup_successful();
@@ -1344,7 +1362,7 @@ mod test {
13441362
assert_eq!(action.action(), &DatastoreSetupAction::Ready);
13451363

13461364
// Explicit CheckAndTakeover with a Nexus ID that doesn't exist should
1347-
// not get access
1365+
// not get access, and should be told to retry later.
13481366
let nexus_id = OmicronZoneUuid::new_v4();
13491367
let action = datastore
13501368
.check_schema_and_access(
@@ -1353,10 +1371,7 @@ mod test {
13531371
)
13541372
.await
13551373
.expect("Failed to check schema and access");
1356-
assert_eq!(
1357-
action.action(),
1358-
&DatastoreSetupAction::NeedsHandoff { nexus_id },
1359-
);
1374+
assert_eq!(action.action(), &DatastoreSetupAction::TryLater);
13601375

13611376
db.terminate().await;
13621377
logctx.cleanup_successful();

0 commit comments

Comments
 (0)