Skip to content

Commit 63c0189

Browse files
authored
[inventory] Add health check information for each sled (#9434)
This is the first PR for #9412 This PR only implements checking SMF service health for each sled. There will be follow up PRs that deal with adding the data types to the DB. _NB: This PR is less than 1000 lines, not ~10,000. Most of the changes here are API JSON additions_ ### Manual testing on a simulated omicron System has two unhealthy enabled services ```console $ pfexec svcs -Za -H -o state,fmri,zone <...> maintenance svc:/site/fake-service2:default global maintenance svc:/site/fake-service:default global <...> $ curl -H "api-version: 11.0.0" http://[::1]:32824/inventory | jq <...> "health_monitor": { "smf_services_in_maintenance": { "ok": { "services": [ { "fmri": "svc:/site/fake-service2:default", "zone": "global" }, { "fmri": "svc:/site/fake-service:default", "zone": "global" } ], "time_of_status": "2025-12-10T08:35:10.604488858Z" } } } } ``` Disabling one of the unhealthy services should only show a single entry ```console $ svcadm disable svc:/site/fake-service:default $ pfexec svcs -Za -H -o state,fmri,zone <...> maintenance svc:/site/fake-service2:default global disabled svc:/site/fake-service:default global <...> $ curl -H "api-version: 11.0.0" http://[::1]:32824/inventory | jq <...> "health_monitor": { "smf_services_in_maintenance": { "ok": { "services": [ { "fmri": "svc:/site/fake-service2:default", "zone": "global" } ], "time_of_status": "2025-12-10T08:35:10.604488858Z" } } } } ``` Disabling the other service should not return any services ```console $ svcadm disable svc:/site/fake-service2:default $ pfexec svcs -Za -H -o state,fmri,zone <...> disabled svc:/site/fake-service2:default global disabled svc:/site/fake-service:default global <...> $ curl -H "api-version: 11.0.0" http://[::1]:32824/inventory | jq <...> "health_monitor": { "smf_services_in_maintenance": { "ok": { "services": [], "time_of_status": "2025-12-10T08:35:10.604488858Z" } } } } ```
1 parent 09b3715 commit 63c0189

File tree

35 files changed

+10096
-11
lines changed

35 files changed

+10096
-11
lines changed

Cargo.lock

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ members = [
133133
"sled-agent/api",
134134
"sled-agent/bootstrap-agent-api",
135135
"sled-agent/config-reconciler",
136+
"sled-agent/health-monitor",
136137
"sled-agent/repo-depot-api",
137138
"sled-agent/types",
138139
"sled-agent/types/versions",
@@ -300,6 +301,7 @@ default-members = [
300301
"sled-agent/api",
301302
"sled-agent/bootstrap-agent-api",
302303
"sled-agent/config-reconciler",
304+
"sled-agent/health-monitor",
303305
"sled-agent/repo-depot-api",
304306
"sled-agent/types",
305307
"sled-agent/types/versions",
@@ -729,6 +731,7 @@ sled = "=0.34.7"
729731
sled-agent-api = { path = "sled-agent/api" }
730732
sled-agent-client = { path = "clients/sled-agent-client" }
731733
sled-agent-config-reconciler = { path = "sled-agent/config-reconciler" }
734+
sled-agent-health-monitor = { path = "sled-agent/health-monitor" }
732735
sled-agent-types = { path = "sled-agent/types" }
733736
sled-agent-types-versions = { path = "sled-agent/types/versions" }
734737
sled-agent-zone-images = { path = "sled-agent/zone-images" }

illumos-utils/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ bhyve_api.workspace = true
1515
byteorder.workspace = true
1616
camino.workspace = true
1717
camino-tempfile.workspace = true
18+
chrono.workspace = true
1819
cfg-if.workspace = true
1920
crucible-smf.workspace = true
2021
debug-ignore.workspace = true
@@ -34,6 +35,8 @@ oxnet.workspace = true
3435
schemars.workspace = true
3536
serde.workspace = true
3637
slog.workspace = true
38+
slog-async.workspace = true
39+
slog-term.workspace = true
3740
slog-error-chain.workspace = true
3841
smf.workspace = true
3942
thiserror.workspace = true

illumos-utils/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
//! Wrappers around illumos-specific commands.
66
77
use dropshot::HttpError;
8-
use slog_error_chain::InlineErrorChain;
8+
use slog_error_chain::{InlineErrorChain, SlogInlineError};
99
#[allow(unused)]
1010
use std::sync::atomic::{AtomicBool, Ordering};
1111

@@ -28,6 +28,7 @@ pub mod scf;
2828
pub mod smf_helper;
2929
pub mod svc;
3030
pub mod svcadm;
31+
pub mod svcs;
3132
pub mod vmm_reservoir;
3233
pub mod zfs;
3334
pub mod zone;
@@ -58,7 +59,7 @@ impl std::fmt::Display for CommandFailureInfo {
5859
}
5960
}
6061

61-
#[derive(thiserror::Error, Debug)]
62+
#[derive(thiserror::Error, Debug, SlogInlineError)]
6263
pub enum ExecutionError {
6364
#[error("Failed to start execution of [{command}]: {err}")]
6465
ExecutionStart { command: String, err: std::io::Error },

0 commit comments

Comments
 (0)