oxidecomputer · smklein · Mar 4, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs
@@ -1291,7 +1291,9 @@ pub enum InstanceAutoRestartPolicy {
 /// Affinity policy used to describe "what to do when a request cannot be satisfied"
 ///
 /// Used for both Affinity and Anti-Affinity Groups
-#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)]
+#[derive(
+    Clone, Copy, Debug, Deserialize, Hash, Eq, Serialize, PartialEq, JsonSchema,
+)]
 #[serde(rename_all = "snake_case")]
 pub enum AffinityPolicy {
     /// If the affinity request cannot be satisfied, allow it anyway.

diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml
@@ -75,6 +75,7 @@ testing = ["omicron-test-utils"]
 [dev-dependencies]
 assert_matches.workspace = true
 camino-tempfile.workspace = true
+criterion.workspace = true
 expectorate.workspace = true
 hyper-rustls.workspace = true
 gateway-client.workspace = true
@@ -85,7 +86,7 @@ nexus-inventory.workspace = true
 nexus-reconfigurator-planning.workspace = true
 nexus-test-utils.workspace = true
 omicron-sled-agent.workspace = true
-omicron-test-utils.workspace = true
+omicron-test-utils = { workspace = true, features = ["seed-gen"] }
 openapiv3.workspace = true
 oso.workspace = true
 pem.workspace = true
@@ -97,3 +98,7 @@ regex.workspace = true
 rustls.workspace = true
 subprocess.workspace = true
 term.workspace = true
+
+[[bench]]
+name = "sled_reservation"
+harness = false
diff --git a/nexus/db-queries/benches/README.adoc b/nexus/db-queries/benches/README.adoc
@@ -0,0 +1,18 @@
+:showtitle:
+:toc: left
+:icons: font
+
+= Benchmarks
+
+This directory contains benchmarks for database queries.
+
+These queries can be run with:
+
+[source,bash]
+----
+cargo bench -p nexus-db-queries
+----
+
+Additionally, the "SHOW_CONTENTION" environment variable can be set to display
+extra data from CockroachDB tables about contention statistics, if they
+are available.
diff --git a/nexus/db-queries/benches/harness/db_utils.rs b/nexus/db-queries/benches/harness/db_utils.rs
@@ -0,0 +1,108 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Database test helpers
+//!
+//! These are largely ripped out of "nexus/db-queries/src/db/datastore".
+//!
+//! Benchmarks are compiled as external binaries from library crates, so we
+//! can only access `pub` code.
+//!
+//! It may be worth refactoring some of these functions to a test utility
+//! crate to avoid the de-duplication.
+
+use anyhow::Context;
+use anyhow::Result;
+use nexus_db_model::Sled;
+use nexus_db_model::SledReservationConstraintBuilder;
+use nexus_db_model::SledUpdate;
+use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::DataStore;
+use nexus_db_queries::db::pub_test_utils::helpers::SledUpdateBuilder;
+use nexus_db_queries::db::pub_test_utils::helpers::small_resource_request;
+use omicron_uuid_kinds::InstanceUuid;
+use omicron_uuid_kinds::PropolisUuid;
+use uuid::Uuid;
+
+pub fn rack_id() -> Uuid {
+    Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap()
+}
+
+const USABLE_HARDWARE_THREADS: u32 = 32;
+
+pub fn test_new_sled_update() -> SledUpdate {
+    let mut sled = SledUpdateBuilder::new();
+    sled.rack_id(rack_id())
+        .hardware()
+        .usable_hardware_threads(USABLE_HARDWARE_THREADS);
+    sled.build()
+}
+
+pub async fn create_sleds(datastore: &DataStore, count: usize) -> Vec<Sled> {
+    let mut sleds = vec![];
+    for _ in 0..count {
+        let (sled, _) =
+            datastore.sled_upsert(test_new_sled_update()).await.unwrap();
+        sleds.push(sled);
+    }
+    sleds
+}
+
+/// Given a `sled_count`, returns the number of times a call to
+/// `create_reservation` should succeed.
+///
+/// This can be used to validate parameters before running benchmarks.
+pub fn max_resource_request_count(sled_count: usize) -> usize {
+    let threads_per_request: usize =
+        small_resource_request().hardware_threads.0.try_into().unwrap();
+    let threads_per_sled: usize = USABLE_HARDWARE_THREADS.try_into().unwrap();
+
+    threads_per_sled * sled_count / threads_per_request
+}
+
+pub async fn create_reservation(
+    opctx: &OpContext,
+    db: &DataStore,
+    instance_id: InstanceUuid,
+) -> Result<PropolisUuid> {
+    let vmm_id = PropolisUuid::new_v4();
+
+    loop {
+        match db
+            .sled_reservation_create(
+                &opctx,
+                instance_id,
+                vmm_id,
+                small_resource_request(),
+                SledReservationConstraintBuilder::new().build(),
+            )
+            .await
+        {
+            Ok(_) => break,
+            Err(err) => {
+                // This condition is bad - it would result in a user-visible
+                // error, in most cases - but it's also an indication of failure
+                // due to contention. We normally bubble this out to users,
+                // rather than stalling the request, but in this particular
+                // case, we choose to retry immediately.
+                if err.to_string().contains("restart transaction") {
+                    continue;
+                }
+                return Err(err).context("Failed to create reservation");
+            }
+        }
+    }
+    Ok(vmm_id)
+}
+
+pub async fn delete_reservation(
+    opctx: &OpContext,
+    db: &DataStore,
+    vmm_id: PropolisUuid,
+) -> Result<()> {
+    db.sled_reservation_delete(&opctx, vmm_id)
+        .await
+        .context("Failed to delete reservation")?;
+    Ok(())
+}