Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ tracing-appender = "0.2.4"
tracing-opentelemetry = "0.32.1"

# NV-Redfish
nv-redfish = { version = "0.8.1" }
nv-redfish = { version = "0.10.0" }

########
# MARK: - Pinned packages that we can't upgrade due to conflicts or just bugs
Expand Down
11 changes: 11 additions & 0 deletions crates/api-db/migrations/20260521231500_bmc_redfish_session.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- Tracks the outstanding Redfish sessions
CREATE TABLE bmc_redfish_sessions (
spiffe_service_id TEXT NOT NULL,
bmc_mac_address macaddr NOT NULL,
session_odata_id TEXT NOT NULL,
issued_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (spiffe_service_id, bmc_mac_address)
);

CREATE INDEX bmc_redfish_sessions_by_mac
ON bmc_redfish_sessions (bmc_mac_address);
113 changes: 113 additions & 0 deletions crates/api-db/src/bmc_redfish_session.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//! Persistence layer for the `BmcSessionManager`.
//!
//! Each row records the outstanding Redfish session that the manager has
//! issued for a given `(spiffe_service_id, bmc_mac_address)` pair, so that
//! the next rotate (or a `flush_mac`) can `DELETE` the prior session on the
//! BMC. The `X-Auth-Token` itself is not persisted; it is returned to the
//! caller once and the only durable artifact is the session's `@odata.id`.

use mac_address::MacAddress;
use model::bmc_redfish_session::StoredSession;
use sqlx::PgConnection;

use crate::db_read::DbReader;
use crate::{DatabaseError, DatabaseResult};

/// Returns the outstanding session row for `(spiffe_service_id, bmc_mac)`
/// if any has been recorded.
pub async fn get(
txn: impl DbReader<'_>,
spiffe_service_id: &str,
bmc_mac: MacAddress,
) -> DatabaseResult<Option<StoredSession>> {
let query = "SELECT spiffe_service_id, bmc_mac_address, session_odata_id, issued_at
FROM bmc_redfish_sessions
WHERE spiffe_service_id = $1 AND bmc_mac_address = $2";

sqlx::query_as(query)
.bind(spiffe_service_id)
.bind(bmc_mac)
.fetch_optional(txn)
.await
.map_err(|e| DatabaseError::query(query, e))
}

/// Inserts (or overwrites) the outstanding session for
/// `(spiffe_service_id, bmc_mac)`. `issued_at` is set to `now()` server-side
/// so timestamps are consistent across replicas.
pub async fn upsert(
txn: &mut PgConnection,
spiffe_service_id: &str,
bmc_mac: MacAddress,
session_odata_id: &str,
) -> DatabaseResult<()> {
let query = "INSERT INTO bmc_redfish_sessions
(spiffe_service_id, bmc_mac_address, session_odata_id, issued_at)
VALUES ($1, $2, $3, now())
ON CONFLICT (spiffe_service_id, bmc_mac_address) DO UPDATE
SET session_odata_id = EXCLUDED.session_odata_id,
issued_at = EXCLUDED.issued_at";

sqlx::query(query)
.bind(spiffe_service_id)
.bind(bmc_mac)
.bind(session_odata_id)
.execute(txn)
.await
.map(|_| ())
.map_err(|e| DatabaseError::query(query, e))
}

/// Deletes the outstanding session row for `(spiffe_service_id, bmc_mac)`.
/// No-op if the row does not exist.
pub async fn delete(
txn: &mut PgConnection,
spiffe_service_id: &str,
bmc_mac: MacAddress,
) -> DatabaseResult<()> {
let query = "DELETE FROM bmc_redfish_sessions
WHERE spiffe_service_id = $1 AND bmc_mac_address = $2";

sqlx::query(query)
.bind(spiffe_service_id)
.bind(bmc_mac)
.execute(txn)
.await
.map(|_| ())
.map_err(|e| DatabaseError::query(query, e))
}

/// Deletes every row whose `bmc_mac_address` matches `bmc_mac` and returns
/// the rows that were removed. The returned vector can be used by callers
/// that want to best-effort `DELETE` the corresponding sessions on the BMC.
pub async fn delete_by_mac(
txn: &mut PgConnection,
bmc_mac: MacAddress,
) -> DatabaseResult<Vec<StoredSession>> {
let query = "DELETE FROM bmc_redfish_sessions
WHERE bmc_mac_address = $1
RETURNING spiffe_service_id, bmc_mac_address, session_odata_id, issued_at";

sqlx::query_as(query)
.bind(bmc_mac)
.fetch_all(txn)
.await
.map_err(|e| DatabaseError::query(query, e))
}
1 change: 1 addition & 0 deletions crates/api-db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

pub mod attestation;
pub mod bmc_metadata;
pub mod bmc_redfish_session;
pub mod carbide_version;
pub mod compute_allocation;
pub mod db_read;
Expand Down
28 changes: 28 additions & 0 deletions crates/api-model/src/bmc_redfish_session.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use mac_address::MacAddress;
use sqlx::types::chrono::{DateTime, Utc};

/// A row in the `bmc_redfish_sessions` table.
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct StoredSession {
pub spiffe_service_id: String,
pub bmc_mac_address: MacAddress,
pub session_odata_id: String,
pub issued_at: DateTime<Utc>,
}
1 change: 1 addition & 0 deletions crates/api-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub mod address_selection_strategy;
pub mod allocation_type;
pub mod attestation;
pub mod bmc_info;
pub mod bmc_redfish_session;
pub mod component_manager;
pub mod compute_allocation;
pub mod controller_outcome;
Expand Down
1 change: 1 addition & 0 deletions crates/api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ lazy_static = { workspace = true }
libredfish = { workspace = true }
librms = { workspace = true }
mac_address = { workspace = true }
nv-redfish = { workspace = true, features = ["session-service"] }
num_cpus = { workspace = true }
oauth2 = { default-features = false, workspace = true }
oid-registry = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions crates/api/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub struct Api {
pub(crate) credential_manager: Arc<dyn CredentialManager>,
pub(crate) certificate_provider: Arc<dyn CertificateProvider>,
pub(crate) redfish_pool: Arc<dyn RedfishClientPool>,
pub(crate) bmc_session_manager: Arc<crate::credentials::BmcSessionManager>,
pub(crate) eth_data: EthVirtData,
pub(crate) common_pools: Arc<CommonPools>,
pub(crate) ib_fabric_manager: Arc<dyn IBFabricManager>,
Expand Down
1 change: 1 addition & 0 deletions crates/api/src/cfg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ applicable.
| `networks` | `Option<HashMap<String, NetworkDefinition>>` | — | Networks created at startup. Alternative: `CreateNetworkSegment` gRPC. |
| `dpu_ipmi_tool_impl` | `Option<String>` | — | IPMI tool implementation for DPU power control (`"prod"` or `"fake"`). |
| `dpu_ipmi_reboot_attempts` | `Option<u32>` | — | Retry count when IPMI errors during DPU reboot. |
| `bmc_session_lockout_threshold` | `u32` | `3` | Consecutive BMC HTTP 401/403 responses before session-token login attempts stop for that BMC. |
| `ib_fabrics` | `HashMap<String, IbFabricDefinition>` | `{}` | InfiniBand fabrics managed by the site. Currently only one fabric is supported. |
| `initial_domain_name` | `Option<String>` | — | Domain to create if none exist. Most sites use a single domain. |
| `initial_dpu_agent_upgrade_policy` | `Option<AgentUpgradePolicyChoice>` | — | Policy for nico-dpu-agent upgrades. Also settable via `nico-admin-cli`. |
Expand Down
18 changes: 18 additions & 0 deletions crates/api/src/cfg/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,13 @@ pub struct CarbideConfig {
/// DPU reboot.
pub dpu_ipmi_reboot_attempts: Option<u32>,

/// Number of consecutive HTTP 401/403 responses from a BMC before the
/// session-token path stops attempting to log in to that BMC, to avoid
/// exhausting the BMC root account's retry budget.
/// Default is 3.
#[serde(default = "default_bmc_session_lockout_threshold")]
pub bmc_session_lockout_threshold: u32,

/// Infiniband fabrics managed by the site
/// Note: At the moment, only a single fabric is supported
#[serde(default)]
Expand Down Expand Up @@ -1741,6 +1748,10 @@ fn default_max_database_connections() -> u32 {
1000
}

pub const fn default_bmc_session_lockout_threshold() -> u32 {
3
}

/// DpuConfig related internal configuration
#[derive(Clone, Debug, Serialize)]
pub struct DpuConfig {
Expand Down Expand Up @@ -3036,6 +3047,10 @@ mod tests {
assert!(config.pools.is_none());
assert!(config.ib_config.is_none());
assert!(config.ib_fabrics.is_empty());
assert_eq!(
config.bmc_session_lockout_threshold,
default_bmc_session_lockout_threshold()
);
assert!(config.vpc_peering_policy.is_none());
assert!(config.site_explorer.enabled.load(AtomicOrdering::Relaxed));
assert!(config.initial_objects_file.is_none());
Expand Down Expand Up @@ -3084,6 +3099,7 @@ mod tests {
assert_eq!(config.asn, 777);
assert_eq!(config.dhcp_servers, vec!["99.101.102.103".to_string()]);
assert!(config.route_servers.is_empty());
assert_eq!(config.bmc_session_lockout_threshold, 5);
assert_eq!(config.vpc_peering_policy, Some(VpcPeeringPolicy::Exclusive));
assert_eq!(config.vpc_peering_policy_on_existing, None);
assert_eq!(
Expand Down Expand Up @@ -3223,6 +3239,7 @@ mod tests {
assert_eq!(config.database_url, "postgres://a:b@postgresql".to_string());
assert_eq!(config.max_database_connections, 1222);
assert_eq!(config.asn, 123);
assert_eq!(config.bmc_session_lockout_threshold, 4);
assert_eq!(
config.dhcp_servers,
vec!["1.2.3.4".to_string(), "5.6.7.8".to_string()]
Expand Down Expand Up @@ -3537,6 +3554,7 @@ mod tests {
assert_eq!(config.database_url, "postgres://a:b@postgresql".to_string());
assert_eq!(config.max_database_connections, 1333);
assert_eq!(config.asn, 777);
assert_eq!(config.bmc_session_lockout_threshold, 5);
assert_eq!(config.dhcp_servers, vec!["99.101.102.103".to_string()]);
assert_eq!(config.route_servers, vec!["9.10.11.12".to_string()]);
assert_eq!(
Expand Down
1 change: 1 addition & 0 deletions crates/api/src/cfg/test_data/full_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ initial_dpu_agent_upgrade_policy = "off"
machine_update_run_interval = 60
dpu_ipmi_tool_impl = "fake"
dpu_ipmi_reboot_attempts = 1
bmc_session_lockout_threshold = 4
max_find_by_ids = 75
vpc_peering_policy = "exclusive"
vpc_peering_policy_on_existing = "mixed"
Expand Down
1 change: 1 addition & 0 deletions crates/api/src/cfg/test_data/site_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ rapid_iterations = true
max_database_connections = 1333
max_find_by_ids = 50
dpu_network_monitor_pinger_type = "OobNetBind"
bmc_session_lockout_threshold = 5
vpc_peering_policy = "exclusive"


Expand Down
Loading
Loading