Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
610 changes: 359 additions & 251 deletions Cargo.lock

Large diffs are not rendered by default.

20 changes: 13 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,19 @@ zerocopy = "0.7.34"
# It's common during development to use a local copy of various complex
# dependencies. If you want to use those, uncomment one of these blocks.
#
# [patch."https://github.com/oxidecomputer/omicron"]
# internal-dns = { path = "../omicron/internal-dns" }
# nexus-client = { path = "../omicron/clients/nexus-client" }
# omicron-common = { path = "../omicron/common" }
# oximeter-instruments = { path = "../omicron/oximeter/instruments" }
# oximeter-producer = { path = "../omicron/oximeter/producer" }
# oximeter = { path = "../omicron/oximeter/oximeter" }
[patch."https://github.com/oxidecomputer/omicron"]
#internal-dns = { path = "../omicron/internal-dns" }
nexus-client = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
omicron-uuid-kinds = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
nexus-types = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
illumos-utils = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
omicron-common = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
oximeter-instruments = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
oximeter-producer = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }
oximeter = { git = "https://github.com/oxidecomputer//omicron.git", rev = "3950a44ba51c1696b19392d20fd2cbb9457a000d" }

# i've just picked what Omicron happens to currently point to.
tufaceous-artifact = { git = "https://github.com/oxidecomputer//tufaceous.git", rev = "04681f26ba09e144e5467dd6bd22c4887692a670" }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this all specifically is not intended to merge. to get a new Oximeter metric here, that's a change the the schema (and bump of oximeter). having Oximeter at one rev while Omicron is at other ones got me to a state where Propolis wouldn't build. bumping all of Omicron here left Propolis and Crucible using different Omicrons which again did not build because Cargo.toml had an old tufaceous-artifact.

this has somehow ended up with the three test failures in the last build, which all look like:

  phd-runner: [INSTANCE_ENSURE_INTERNAL - EVENT] permanent error from instance_spec_ensure 
    e = Error Response: status: 400 Bad Request; headers: {
      "content-type": "application/json", "x-request-id": "58e1ca30-797a-48bb-9191-a36b956e243b", 
      "content-length": "533", "date": "Wed, 30 Apr 2025 08:59:40 GMT"
    }; value: Error {
      error_code: None, 
      message: "unable to parse JSON body: init.value.spec.components.boot-disk.type: unknown variant `NvmeDisk`,
      expected one of `virtio_disk`, `nvme_disk`, `virtio_nic`, `serial_port`, `pci_pci_bridge`, `qemu_pvpanic`,
        `boot_settings`, `soft_npu_pci_port`, `soft_npu_port`, `soft_npu_p9`, `p9fs`, `migration_failure_injector`,
        `crucible_storage_backend`, `file_storage_backend`, `blob_storage_backend`, `virtio_network_backend`,
        `dlpi_network_backend` at line 1 column 180",
      request_id: "58e1ca30-797a-48bb-9191-a36b956e243b"
    } 

and frankly i'm at a loss about what changed the casing of variants here. it is presumably related to my wanton version bumping.

anyway, this PR is at "i think it's good, would folks +1 the new metric?" - if others agree i'll go PR the schema change in Omicron and figure out getting that through to Propolis.

# [patch."https://github.com/oxidecomputer/crucible"]
# crucible = { path = "../crucible/upstairs" }
# crucible-client-types = { path = "../crucible/crucible-client-types" }
2 changes: 2 additions & 0 deletions bin/propolis-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ internal-dns-types.workspace = true
itertools.workspace = true
kstat-rs.workspace = true
lazy_static.workspace = true
libc.workspace = true
nexus-client.workspace = true
omicron-common.workspace = true
oximeter-instruments.workspace = true
Expand Down Expand Up @@ -67,6 +68,7 @@ uuid.workspace = true
usdt.workspace = true
base64.workspace = true
schemars = { workspace = true, features = ["chrono", "uuid1"] }
zerocopy.workspace = true

[dev-dependencies]
hex.workspace = true
Expand Down
54 changes: 46 additions & 8 deletions bin/propolis-server/src/lib/stats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::spec::Spec;
use crate::{server::MetricsEndpointConfig, vm::NetworkInterfaceIds};

mod network_interface;
mod process;
mod pvpanic;
mod virtual_disk;
mod virtual_machine;
Expand Down Expand Up @@ -78,17 +79,48 @@ struct ServerStatsInner {
/// data.
virtual_machine: VirtualMachine,

/// The total amount of virtual address space reserved in support of this
/// virtual machine. This is retained to correct the process-wide virtual
/// address space size down to the "non-VM" amount; mappings not derived
/// from a virtual machine explicitly asking for some fixed size of memory.
vm_va_size: usize,

/// The reset count for the relevant instance.
run_count: virtual_machine::Reset,

/// Mapped virtual memory for the Propolis managing this instance, excluding
/// VM memory.
vmm_vss: virtual_machine::VmmVss,

/// Process RSS is sampled in a standalone task. The `tokio::sync::watch`
/// here is to observe that sampling and update the `vmm_vss` tracked
/// here.
process_stats_rx: tokio::sync::watch::Receiver<process::ProcessStats>,
}

impl ServerStatsInner {
pub fn new(virtual_machine: VirtualMachine) -> Self {
pub fn new(virtual_machine: VirtualMachine, vm_va_size: usize) -> Self {
let rx = process::ProcessStats::new();

ServerStatsInner {
virtual_machine,
vm_va_size,
run_count: virtual_machine::Reset { datum: Default::default() },
vmm_vss: virtual_machine::VmmVss { datum: Default::default() },
process_stats_rx: rx,
}
}

fn refresh_vmm_vss(&mut self) {
let last_process_stats = self.process_stats_rx.borrow();
if last_process_stats.vss < self.vm_va_size {
eprintln!("process VSS is smaller than expected; is the VMM being torn down or already stopped?");
return;
}

self.vmm_vss.datum = (last_process_stats.vss - self.vm_va_size) as u64;
eprintln!("reporting vmm_vss of {}", self.vmm_vss.datum);
}
}

/// Type publishing metrics about the Propolis API server itself.
Expand All @@ -103,8 +135,8 @@ pub struct ServerStats {

impl ServerStats {
/// Create new server stats, representing the provided instance.
pub fn new(vm: VirtualMachine) -> Self {
Self { inner: Arc::new(Mutex::new(ServerStatsInner::new(vm))) }
pub fn new(vm: VirtualMachine, vm_va_size: usize) -> Self {
Self { inner: Arc::new(Mutex::new(ServerStatsInner::new(vm, vm_va_size))) }
}

/// Increments the number of times the managed instance was reset.
Expand All @@ -117,14 +149,20 @@ impl Producer for ServerStats {
fn produce(
&mut self,
) -> Result<Box<dyn Iterator<Item = Sample> + 'static>, MetricsError> {
let run_count = {
let inner = self.inner.lock().unwrap();
std::iter::once(Sample::new(
let samples = {
let mut inner = self.inner.lock().unwrap();
inner.refresh_vmm_vss();
let run_count = std::iter::once(Sample::new(
&inner.virtual_machine,
&inner.run_count,
)?)
)?);
let vss = std::iter::once(Sample::new(
&inner.virtual_machine,
&inner.vmm_vss,
)?);
run_count.chain(vss)
};
Ok(Box::new(run_count))
Ok(Box::new(samples))
}
}

Expand Down
131 changes: 131 additions & 0 deletions bin/propolis-server/src/lib/stats/process.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use std::io::Read;
use std::time::{Duration, Instant};

use anyhow::Context;
use tokio::sync::watch;
use zerocopy::{AsBytes, FromBytes, FromZeroes};

#[derive(Debug, Default)]
pub(crate) struct ProcessStats {
pub(crate) measurement_time: Duration,
pub(crate) rss: usize,
pub(crate) vss: usize,
}

impl ProcessStats {
pub fn new() -> watch::Receiver<ProcessStats> {
let (tx, rx) = watch::channel::<ProcessStats>(ProcessStats::default());

tokio::task::spawn(process_stats_task(tx));

rx
}
}

const PRFNSZ: usize = 16;
const PRARGSZ: usize = 80;

/// From `sys/types.h`
#[allow(non_camel_case_types)]
type taskid_t = i32;
/// From `sys/time_impl.h`
#[allow(non_camel_case_types)]
type timespec_t = [i64; 2];
/// From `sys/time_impl.h`
#[allow(non_camel_case_types)]
type timestruc_t = timespec_t;
/// From `sys/types.h`
#[allow(non_camel_case_types)]
type dev_t = u64;

/// `psinfo`'s definition depends on the data model reported by illumos. This is
/// in line with the 64-bit version of the struct, and ignores a few fields at
/// the end of the struct.
#[derive(Copy, Clone, Debug, FromZeroes, AsBytes, FromBytes)]
#[cfg(target_arch = "x86_64")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't the whole module have this cfg? none of the code that references psinfo, which is ... all of it ... will compile if this isn't there...

Copy link
Member Author

@iximeow iximeow May 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on the other hand, nothing else here is dependent on the target architecture - i was kind of thinking that given an aarch64 target or something we'd be able to fill in the right struct for psinfo on that arch. but, yeah, as-is this just breaks even compiling propolis-server on other architectures.

i'll cfg(target_arch = "x86_64") the psinfo read and have it return 0 with a warn!() on other architectures. seeing that would imply running propolis-server on non-x86 targets which would be super neat..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If memory serves, there's other stuff that will just panic on any other architecture (certainly I believe all the CPUID stuff will), but that seems fine to me!

#[repr(C)]
struct psinfo {
pr_flag: i32,
pr_nlwp: i32,
pr_pid: u32,
pr_ppid: u32,
pr_pgid: u32,
pr_sid: u32,
pr_uid: u32,
pr_euid: u32,
pr_gid: u32,
pr_egid: u32,
pr_addr: usize,
pr_size: usize,
pr_rssize: usize,
// From `struct psinfo`. This seems to be present to ensure that `pr_ttydev`
// is 64-bit aligned even on 32-bit targets.
pr_pad1: usize,
pr_ttydev: dev_t,
pr_pctcpu: u16,
pr_pctmem: u16,
// This padding is not explicitly present in illumos' `struct procfs`, but
// is none the less there due to C struct layout rules.
_pad2: u32,
pr_start: timestruc_t,
pr_time: timestruc_t,
pr_ctime: timestruc_t,
pr_fname: [u8; PRFNSZ],
pr_psargs: [u8; PRARGSZ],
pr_wstat: u32,
pr_argc: u32,
pr_argv: usize,
pr_envp: usize,
pr_dmodel: u8,
// More padding from `struct psinfo`.
pr_pad2: [u8; 3],
pr_taskid: taskid_t,
}

pub fn process_stats() -> anyhow::Result<ProcessStats> {
let mut psinfo_file = std::fs::File::open("/proc/self/psinfo")?;

let mut stats = ProcessStats {
measurement_time: Duration::ZERO,
vss: 0,
rss: 0,
};

let mut info: psinfo = FromZeroes::new_zeroed();

let stats_read_start = Instant::now();

psinfo_file.read(info.as_bytes_mut())
.context("reading struct psinfo from file")?;

stats.measurement_time = stats_read_start.elapsed();

stats.vss = info.pr_size;
stats.rss = info.pr_rssize;

Ok(stats)
}

async fn process_stats_task(tx: watch::Sender<ProcessStats>) {
while !tx.is_closed() {
let new_process_stats = tokio::task::spawn_blocking(process_stats)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use tokio::fs rather than spawn_blockinging? not a big deal as tokio::fs is essentially spawn_blocking anyway, but might be a bit nicer...

.await
.expect("collecting address space stats does not panic")
.expect("process_stats() does not error");

// [there isn't a nice way to plumb a slog Logger here, so this is an
// eprintln for demonstrative purposes but otherwise should be removed.]
eprintln!(
"Sending new stats at least.. {:?}, took {}ms",
new_process_stats,
new_process_stats.measurement_time.as_millis()
);
Comment on lines +118 to +124
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, is it really that painful to pass a Logger into ServerStats::new?

tx.send_replace(new_process_stats);

tokio::time::sleep(std::time::Duration::from_millis(15000)).await;
}
Comment on lines +112 to +128
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

turbo nit, not actually important: consider using tokio::time::interval rather than sleep, it should be a bit more efficient as it reuses the same entry in the tokio timer wheel every time:

Suggested change
while !tx.is_closed() {
let new_process_stats = tokio::task::spawn_blocking(process_stats)
.await
.expect("collecting address space stats does not panic")
.expect("process_stats() does not error");
// [there isn't a nice way to plumb a slog Logger here, so this is an
// eprintln for demonstrative purposes but otherwise should be removed.]
eprintln!(
"Sending new stats at least.. {:?}, took {}ms",
new_process_stats,
new_process_stats.measurement_time.as_millis()
);
tx.send_replace(new_process_stats);
tokio::time::sleep(std::time::Duration::from_millis(15000)).await;
}
let mut interval = tokio::time::interval(std::time::Duration::from_millis(15000);
while !tx.is_closed() {
interval.tick().await;
let new_process_stats = tokio::task::spawn_blocking(process_stats)
.await
.expect("collecting address space stats does not panic")
.expect("process_stats() does not error");
// [there isn't a nice way to plumb a slog Logger here, so this is an
// eprintln for demonstrative purposes but otherwise should be removed.]
eprintln!(
"Sending new stats at least.. {:?}, took {}ms",
new_process_stats,
new_process_stats.measurement_time.as_millis()
);
tx.send_replace(new_process_stats);
}

}
3 changes: 3 additions & 0 deletions bin/propolis-server/src/lib/stats/virtual_machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ use super::kstat_types::{KstatList, KstatTarget};
// `./omicron/oximeter/oximeter/schema/virtual-machine.toml`.
oximeter::use_timeseries!("virtual-machine.toml");
pub use self::virtual_machine::Reset;
// [this won't exist in CI until the oximeter schema is updated. hacked on this
// locally with Cargo.toml patching.]
pub use self::virtual_machine::VmmVss;
use self::virtual_machine::{
VcpuUsage, VirtualMachine as VirtualMachineTarget,
};
Expand Down
4 changes: 3 additions & 1 deletion bin/propolis-server/src/lib/vm/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ impl VmServices {
cfg,
registry,
vm_objects.instance_spec(),
vm_objects.machine().map_physmem.virtual_address_size(),
vm_properties,
)
.await
Expand Down Expand Up @@ -119,6 +120,7 @@ async fn register_oximeter_producer(
cfg: &MetricsEndpointConfig,
registry: &ProducerRegistry,
spec: &Spec,
vm_va_size: usize,
vm_properties: &InstanceProperties,
) -> OximeterState {
let mut oximeter_state = OximeterState::default();
Expand Down Expand Up @@ -152,7 +154,7 @@ async fn register_oximeter_producer(
// Assign our own metrics production for this VM instance to the
// registry, letting the server actually return them to oximeter when
// polled.
let stats = ServerStats::new(virtual_machine);
let stats = ServerStats::new(virtual_machine, vm_va_size);
if let Err(e) = registry.register_producer(stats.clone()) {
error!(
log,
Expand Down
4 changes: 2 additions & 2 deletions lib/propolis/src/util/aspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ fn safe_end(start: usize, len: usize) -> Option<usize> {
// Flatten the K/V nested tuple
fn kv_flatten<'a, T>(i: (&'a usize, &'a (usize, T))) -> SpaceItem<'a, T> {
let start = *i.0;
let end = (i.1).0;
let size = (i.1).0;
let item = &(i.1).1;
(start, end, item)
(start, size, item)
}

/// Iterator for all items in an [ASpace], constructed by [ASpace::iter].
Expand Down
13 changes: 13 additions & 0 deletions lib/propolis/src/vmm/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,19 @@ impl PhysMap {
let mut guard = self.map.lock().unwrap();
guard.clear();
}

/// Sum the sizes of virtual address ranges supporting segments of memory
/// for this VM.
pub fn virtual_address_size(&self) -> usize {
let guard = self.map.lock().unwrap();
let mut va_size = 0usize;

for (_start, size, _item) in guard.iter() {
va_size += size;
}

va_size
}
}

#[cfg(test)]
Expand Down