@@ -112,10 +112,11 @@ fn skippable_version(
112
112
/// Describes the state of the database access with respect this Nexus
113
113
#[ derive( Debug , Copy , Clone , PartialEq ) ]
114
114
enum NexusAccess {
115
- /// Nexus does not yet have access to the database.
115
+ /// Nexus does not yet have access to the database, but can take over when
116
+ /// the current-generation Nexus instances quiesce.
116
117
DoesNotHaveAccessYet { nexus_id : OmicronZoneUuid } ,
117
118
118
- /// Nexus has been explicitly locked out of the database.
119
+ /// Nexus has been permanently, explicitly locked out of the database.
119
120
LockedOut ,
120
121
121
122
/// Nexus should have normal access to the database
@@ -128,6 +129,15 @@ enum NexusAccess {
128
129
/// We may or may not have a record of this Nexus, but it should have
129
130
/// access.
130
131
HasImplicitAccess ,
132
+
133
+ /// Nexus does not yet have access to the database, but it might get
134
+ /// access later. Unlike [Self::DoesNotHaveAccessYet], this variant
135
+ /// is triggered because we don't have an explicit records.
136
+ ///
137
+ /// Although some Nexuses have records, this one doesn't. This can
138
+ /// mean that a Nexus zone has just been deployed, and booted before
139
+ /// its record has been populated.
140
+ NoRecordNoAccess ,
131
141
}
132
142
133
143
/// Describes the state of the schema with respect this Nexus
@@ -167,10 +177,17 @@ pub enum DatastoreSetupAction {
167
177
/// are either "not_yet" or "quiesced".
168
178
NeedsHandoff { nexus_id : OmicronZoneUuid } ,
169
179
180
+ /// Wait, then try to set up the datastore later.
181
+ ///
182
+ /// This can be triggered by observing incomplete data, such as missing
183
+ /// records in the "db_metadata_nexus" table, which may be populated by
184
+ /// waiting for an existing system to finish execution.
185
+ TryLater ,
186
+
170
187
/// Start a schema update
171
188
Update ,
172
189
173
- /// Refuse to use the database
190
+ /// Permanently refuse to use the database
174
191
Refuse ,
175
192
}
176
193
@@ -208,6 +225,12 @@ impl DatastoreSetupAction {
208
225
// The schema updated beyond what we want, do not use it.
209
226
( _, NewerThanDesired ) => Self :: Refuse ,
210
227
228
+ // If we aren't sure if we have access yet, try again later.
229
+ (
230
+ NoRecordNoAccess ,
231
+ UpToDate | OlderThanDesired | OlderThanDesiredSkipAccessCheck ,
232
+ ) => Self :: TryLater ,
233
+
211
234
// If we don't have access yet, but could do something once handoff
212
235
// occurs, then handoff is needed
213
236
(
@@ -239,13 +262,11 @@ impl DataStore {
239
262
// Check if any "db_metadata_nexus" rows exist.
240
263
// If they don't exist, treat the database as having access.
241
264
//
242
- // This handles the case for:
243
- // - Fresh deployments where RSS hasn't populated the table yet (we need
244
- // access to finish "rack_initialization").
245
- // - Systems that haven't been migrated to include nexus access control
246
- // (we need access to the database to backfill these records).
265
+ // This handles the case for fresh deployments where RSS hasn't
266
+ // populated the table yet (we need access to finish
267
+ // "rack_initialization").
247
268
//
248
- // After initialization/migration , this conditional should never trigger
269
+ // After initialization, this conditional should never trigger
249
270
// again.
250
271
let any_records_exist = self . database_nexus_access_any_exist ( ) . await ?;
251
272
if !any_records_exist {
@@ -259,14 +280,14 @@ impl DataStore {
259
280
return Ok ( NexusAccess :: HasImplicitAccess ) ;
260
281
}
261
282
262
- // Records exist, so enforce the access control check
283
+ // Records exist, so enforce the identity check
263
284
let Some ( state) =
264
285
self . database_nexus_access ( nexus_id) . await ?. map ( |s| s. state ( ) )
265
286
else {
266
287
let msg = "Nexus does not have access to the database (no \
267
288
db_metadata_nexus record)";
268
289
warn ! ( & self . log, "{msg}" ; "nexus_id" => ?nexus_id) ;
269
- return Ok ( NexusAccess :: DoesNotHaveAccessYet { nexus_id } ) ;
290
+ return Ok ( NexusAccess :: NoRecordNoAccess ) ;
270
291
} ;
271
292
272
293
let status = match state {
@@ -1304,10 +1325,7 @@ mod test {
1304
1325
)
1305
1326
. await
1306
1327
. expect ( "Failed to check schema and access" ) ;
1307
- assert_eq ! (
1308
- action. action( ) ,
1309
- & DatastoreSetupAction :: NeedsHandoff { nexus_id }
1310
- ) ;
1328
+ assert_eq ! ( action. action( ) , & DatastoreSetupAction :: TryLater ) ;
1311
1329
1312
1330
db. terminate ( ) . await ;
1313
1331
logctx. cleanup_successful ( ) ;
@@ -1344,7 +1362,7 @@ mod test {
1344
1362
assert_eq ! ( action. action( ) , & DatastoreSetupAction :: Ready ) ;
1345
1363
1346
1364
// Explicit CheckAndTakeover with a Nexus ID that doesn't exist should
1347
- // not get access
1365
+ // not get access, and should be told to retry later.
1348
1366
let nexus_id = OmicronZoneUuid :: new_v4 ( ) ;
1349
1367
let action = datastore
1350
1368
. check_schema_and_access (
@@ -1353,10 +1371,7 @@ mod test {
1353
1371
)
1354
1372
. await
1355
1373
. expect ( "Failed to check schema and access" ) ;
1356
- assert_eq ! (
1357
- action. action( ) ,
1358
- & DatastoreSetupAction :: NeedsHandoff { nexus_id } ,
1359
- ) ;
1374
+ assert_eq ! ( action. action( ) , & DatastoreSetupAction :: TryLater ) ;
1360
1375
1361
1376
db. terminate ( ) . await ;
1362
1377
logctx. cleanup_successful ( ) ;
0 commit comments