Skip to content

Commit f4e6ee3

Browse files
authored
Reorder blueprint read to the end of loading (#9603)
Partial fix of #9594
1 parent 8ae817f commit f4e6ee3

File tree

1 file changed

+251
-55
lines changed

1 file changed

+251
-55
lines changed

nexus/db-queries/src/db/datastore/deployment.rs

Lines changed: 251 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -672,61 +672,6 @@ impl DataStore {
672672
let blueprint_id =
673673
BlueprintUuid::from_untyped_uuid(authz_blueprint.id());
674674

675-
// Read the metadata from the primary blueprint row, and ensure that it
676-
// exists.
677-
let (
678-
parent_blueprint_id,
679-
internal_dns_version,
680-
external_dns_version,
681-
target_release_minimum_generation,
682-
nexus_generation,
683-
cockroachdb_fingerprint,
684-
cockroachdb_setting_preserve_downgrade,
685-
time_created,
686-
creator,
687-
comment,
688-
source,
689-
) = {
690-
use nexus_db_schema::schema::blueprint::dsl;
691-
692-
let Some(blueprint) = dsl::blueprint
693-
.filter(dsl::id.eq(to_db_typed_uuid(blueprint_id)))
694-
.select(DbBlueprint::as_select())
695-
.get_result_async(&*conn)
696-
.await
697-
.optional()
698-
.map_err(|e| {
699-
public_error_from_diesel(e, ErrorHandler::Server)
700-
})?
701-
else {
702-
return Err(authz_blueprint.not_found());
703-
};
704-
705-
(
706-
blueprint.parent_blueprint_id.map(From::from),
707-
*blueprint.internal_dns_version,
708-
*blueprint.external_dns_version,
709-
*blueprint.target_release_minimum_generation,
710-
*blueprint.nexus_generation,
711-
blueprint.cockroachdb_fingerprint,
712-
blueprint.cockroachdb_setting_preserve_downgrade,
713-
blueprint.time_created,
714-
blueprint.creator,
715-
blueprint.comment,
716-
BlueprintSource::from(blueprint.source),
717-
)
718-
};
719-
let cockroachdb_setting_preserve_downgrade =
720-
CockroachDbPreserveDowngrade::from_optional_string(
721-
&cockroachdb_setting_preserve_downgrade,
722-
)
723-
.map_err(|_| {
724-
Error::internal_error(&format!(
725-
"unrecognized cluster version {:?}",
726-
cockroachdb_setting_preserve_downgrade
727-
))
728-
})?;
729-
730675
// Load the sled metadata for this blueprint. We use this to prime our
731676
// primary map of sled configs, but we leave the zones / disks /
732677
// datasets maps empty (to be filled in when we query those tables
@@ -1477,6 +1422,69 @@ impl DataStore {
14771422
)?;
14781423
}
14791424

1425+
// Read the metadata from the primary blueprint row last. We do this at
1426+
// the end (rather than the beginning) so that if a concurrent delete
1427+
// operation has started, we will observe that the top-level blueprint
1428+
// record is missing and return "not found". This prevents returning a
1429+
// partially-torn blueprint where child rows have been deleted but we
1430+
// still return an incomplete result.
1431+
//
1432+
// The blueprint insert and delete operations are transactional, so if
1433+
// this read succeeds, we know the blueprint exists and hasn't been
1434+
// deleted.
1435+
let (
1436+
parent_blueprint_id,
1437+
internal_dns_version,
1438+
external_dns_version,
1439+
target_release_minimum_generation,
1440+
nexus_generation,
1441+
cockroachdb_fingerprint,
1442+
cockroachdb_setting_preserve_downgrade,
1443+
time_created,
1444+
creator,
1445+
comment,
1446+
source,
1447+
) = {
1448+
use nexus_db_schema::schema::blueprint::dsl;
1449+
1450+
let Some(blueprint) = dsl::blueprint
1451+
.filter(dsl::id.eq(to_db_typed_uuid(blueprint_id)))
1452+
.select(DbBlueprint::as_select())
1453+
.get_result_async(&*conn)
1454+
.await
1455+
.optional()
1456+
.map_err(|e| {
1457+
public_error_from_diesel(e, ErrorHandler::Server)
1458+
})?
1459+
else {
1460+
return Err(authz_blueprint.not_found());
1461+
};
1462+
1463+
(
1464+
blueprint.parent_blueprint_id.map(From::from),
1465+
*blueprint.internal_dns_version,
1466+
*blueprint.external_dns_version,
1467+
*blueprint.target_release_minimum_generation,
1468+
*blueprint.nexus_generation,
1469+
blueprint.cockroachdb_fingerprint,
1470+
blueprint.cockroachdb_setting_preserve_downgrade,
1471+
blueprint.time_created,
1472+
blueprint.creator,
1473+
blueprint.comment,
1474+
BlueprintSource::from(blueprint.source),
1475+
)
1476+
};
1477+
let cockroachdb_setting_preserve_downgrade =
1478+
CockroachDbPreserveDowngrade::from_optional_string(
1479+
&cockroachdb_setting_preserve_downgrade,
1480+
)
1481+
.map_err(|_| {
1482+
Error::internal_error(&format!(
1483+
"unrecognized cluster version {:?}",
1484+
cockroachdb_setting_preserve_downgrade
1485+
))
1486+
})?;
1487+
14801488
Ok(Blueprint {
14811489
id: blueprint_id,
14821490
pending_mgs_updates,
@@ -4770,4 +4778,192 @@ mod tests {
47704778
db.terminate().await;
47714779
logctx.cleanup_successful();
47724780
}
4781+
4782+
// Test that concurrent read and delete operations on blueprints do not
4783+
// result in torn reads. With the fix for issue #9594, blueprint_read
4784+
// checks for the top-level blueprint record at the END of reading, so
4785+
// if a concurrent delete has started (which deletes the top-level record
4786+
// first), the read will fail with "not found" rather than returning
4787+
// partial data.
4788+
//
4789+
// This test spawns concurrent readers and a deleter to exercise the race
4790+
// condition. Readers should either get the complete original blueprint
4791+
// OR a "not found" error - never partial/torn data.
4792+
#[tokio::test]
4793+
async fn test_concurrent_blueprint_read_delete() {
4794+
const TEST_NAME: &str = "test_concurrent_blueprint_read_delete";
4795+
let logctx = dev::test_setup_log(TEST_NAME);
4796+
let db = TestDatabase::new_with_datastore(&logctx.log).await;
4797+
let (opctx, datastore) = (db.opctx(), db.datastore());
4798+
4799+
// Create two blueprints - one to be the target (so it can't be deleted)
4800+
// and one to be deleted while being read.
4801+
let (_, _, target_blueprint) = representative(&logctx.log, TEST_NAME);
4802+
let authz_target = authz_blueprint_from_id(target_blueprint.id);
4803+
4804+
// Insert target blueprint and make it the current target
4805+
datastore
4806+
.blueprint_insert(&opctx, &target_blueprint)
4807+
.await
4808+
.expect("failed to insert target blueprint");
4809+
let target = BlueprintTarget {
4810+
target_id: target_blueprint.id,
4811+
enabled: true,
4812+
time_made_target: now_db_precision(),
4813+
};
4814+
datastore
4815+
.blueprint_target_set_current(&opctx, target)
4816+
.await
4817+
.expect("failed to set target");
4818+
4819+
// Create a second blueprint that we'll delete while reading
4820+
let blueprint_to_delete = {
4821+
let mut builder = BlueprintBuilder::new_based_on(
4822+
&logctx.log,
4823+
&target_blueprint,
4824+
"test blueprint to delete",
4825+
PlannerRng::from_entropy(),
4826+
)
4827+
.expect("failed to create builder");
4828+
builder.comment("blueprint that will be deleted");
4829+
builder.build(BlueprintSource::Test)
4830+
};
4831+
let authz_blueprint = authz_blueprint_from_id(blueprint_to_delete.id);
4832+
4833+
datastore
4834+
.blueprint_insert(&opctx, &blueprint_to_delete)
4835+
.await
4836+
.expect("failed to insert blueprint to delete");
4837+
4838+
// Verify we can read it back correctly
4839+
let read_back = datastore
4840+
.blueprint_read(&opctx, &authz_blueprint)
4841+
.await
4842+
.expect("failed to read blueprint");
4843+
assert_eq!(blueprint_to_delete, read_back);
4844+
4845+
// Track results from concurrent readers
4846+
let successful_reads = Arc::new(std::sync::atomic::AtomicUsize::new(0));
4847+
let not_found_errors = Arc::new(std::sync::atomic::AtomicUsize::new(0));
4848+
let other_errors = Arc::new(std::sync::atomic::AtomicUsize::new(0));
4849+
let delete_completed = Arc::new(AtomicBool::new(false));
4850+
4851+
// Signal when at least one read has completed, so we know readers are
4852+
// running before we start deleting
4853+
let (first_read_tx, first_read_rx) =
4854+
tokio::sync::oneshot::channel::<()>();
4855+
let first_read_tx =
4856+
Arc::new(std::sync::Mutex::new(Some(first_read_tx)));
4857+
4858+
// Spawn reader tasks that loop until deletion completes
4859+
const NUM_READERS: usize = 10;
4860+
let mut reader_handles = Vec::new();
4861+
4862+
for _ in 0..NUM_READERS {
4863+
let datastore = datastore.clone();
4864+
let opctx = opctx.child(std::collections::BTreeMap::new());
4865+
let authz_blueprint = authz_blueprint.clone();
4866+
let blueprint_to_delete = blueprint_to_delete.clone();
4867+
let successful_reads = successful_reads.clone();
4868+
let not_found_errors = not_found_errors.clone();
4869+
let other_errors = other_errors.clone();
4870+
let delete_completed = delete_completed.clone();
4871+
let first_read_tx = first_read_tx.clone();
4872+
4873+
reader_handles.push(tokio::spawn(async move {
4874+
loop {
4875+
match datastore
4876+
.blueprint_read(&opctx, &authz_blueprint)
4877+
.await
4878+
{
4879+
Ok(blueprint) => {
4880+
// If we got a blueprint back, it MUST be complete
4881+
// and match the original. Any mismatch would
4882+
// indicate a torn read.
4883+
assert_eq!(
4884+
blueprint, blueprint_to_delete,
4885+
"Read returned a blueprint that doesn't match \
4886+
the original - this indicates a torn read!"
4887+
);
4888+
successful_reads.fetch_add(1, Ordering::Relaxed);
4889+
4890+
// Signal that at least one read completed (only
4891+
// the first sender to take the channel will send)
4892+
if let Some(tx) =
4893+
first_read_tx.lock().unwrap().take()
4894+
{
4895+
let _ = tx.send(());
4896+
}
4897+
}
4898+
Err(e) => {
4899+
let error_str = e.to_string();
4900+
if error_str.contains("not found") {
4901+
not_found_errors
4902+
.fetch_add(1, Ordering::Relaxed);
4903+
} else {
4904+
eprintln!("Unexpected error: {e}");
4905+
other_errors.fetch_add(1, Ordering::Relaxed);
4906+
}
4907+
}
4908+
}
4909+
4910+
// Stop reading after delete completes
4911+
if delete_completed.load(Ordering::Relaxed) {
4912+
break;
4913+
}
4914+
}
4915+
}));
4916+
}
4917+
4918+
// Wait for at least one successful read before deleting, so we know
4919+
// the reader tasks have started
4920+
first_read_rx.await.expect("no reader completed a read");
4921+
4922+
// Delete the blueprint while readers are running
4923+
datastore
4924+
.blueprint_delete(&opctx, &authz_blueprint)
4925+
.await
4926+
.expect("failed to delete blueprint");
4927+
delete_completed.store(true, Ordering::Relaxed);
4928+
4929+
// Wait for all readers to complete
4930+
for handle in reader_handles {
4931+
handle.await.expect("reader task panicked");
4932+
}
4933+
4934+
// Log results for debugging.
4935+
let successful = successful_reads.load(Ordering::Relaxed);
4936+
let not_found = not_found_errors.load(Ordering::Relaxed);
4937+
let other = other_errors.load(Ordering::Relaxed);
4938+
eprintln!(
4939+
"Results: {} successful reads, {} not-found errors, {} other errors",
4940+
successful, not_found, other
4941+
);
4942+
4943+
// Key invariants:
4944+
// - At least one successful read (we waited for this before deleting)
4945+
// - Successful reads are validated inside the reader loop (must match
4946+
// the original blueprint exactly, or the assert_eq! fails)
4947+
// - "Not found" errors are expected after deletion
4948+
// - No other errors should occur
4949+
assert!(
4950+
successful > 0,
4951+
"Expected at least one successful read (we wait for this)"
4952+
);
4953+
assert_eq!(other, 0, "No unexpected errors should occur");
4954+
4955+
// Verify the target blueprint is still intact
4956+
let target_read = datastore
4957+
.blueprint_read(&opctx, &authz_target)
4958+
.await
4959+
.expect("target blueprint should still be readable");
4960+
assert_eq!(target_blueprint, target_read);
4961+
4962+
// Verify the deleted blueprint is fully cleaned up
4963+
ensure_blueprint_fully_deleted(&datastore, blueprint_to_delete.id)
4964+
.await;
4965+
4966+
db.terminate().await;
4967+
logctx.cleanup_successful();
4968+
}
47734969
}

0 commit comments

Comments
 (0)