Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libazureinit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ tokio-util = "0.7"
sysinfo = "=0.36"
anyhow = "1"
fstab = "0.4.0"
fs2 = "0.4"
toml = "0.9"
regex = "1"
lazy_static = "1.4"
Expand Down
8 changes: 8 additions & 0 deletions libazureinit/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ pub enum Error {
LoadSshdConfig { details: String },
#[error("unhandled error: {details}")]
UnhandledError { details: String },
#[error("VM ID is unavailable")]
VmIdUnavailable,
#[error("No provisioning state found. Run provisioning first.")]
NoProvisioningState,
#[error("Provisioning status has already been reported to Azure")]
AlreadyReported,
#[error("Invalid state file: file is empty or malformed")]
InvalidStateFile,
}

impl From<tokio::time::error::Elapsed> for Error {
Expand Down
142 changes: 140 additions & 2 deletions libazureinit/src/health.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,57 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

//! Health reporting for Azure provisioning status.
//!
//! This module handles reporting provisioning status (success/failure) to Azure
//! through two parallel channels: the WireServer HTTP endpoint and the Hyper-V KVP
//! (Key-Value Pair) mechanism.
//!
//! # Reporting Channels
//!
//! ## WireServer HTTP Endpoint
//! Reports are sent as JSON payloads to the Azure WireServer health endpoint via HTTP POST.
//! The internal `_report()` function handles this communication with automatic retries for
//! transient failures (503, 500, 429).
//!
//! ## Hyper-V KVP (Key-Value Pair)
//! Reports are also written to the Hyper-V KVP guest pool file (`/var/lib/hyperv/.kvp_pool_1`).
//! This happens automatically via the tracing infrastructure when events include a
//! `health_report` field. The KVP layer monitors these events and writes them as
//! `PROVISIONING_REPORT` entries.
//!
//! # Usage Patterns
//!
//! ## Direct Reporting
//! - [`report_ready()`] - Report success with explicit parameters
//! - [`report_failure()`] - Report failure with explicit error string
//!
//! ## State-Based Reporting (CLI)
//! Use when reporting independently from provisioning.
//! - [`report_ready_from_state()`] - Reads `.provisioned` file and reports success
//! - [`report_failure_from_state()`] - Reads `.failed` file and reports failure
//!
//! # State Files
//!
//! Provisioning state is tracked using files in `/var/lib/azure-init/`:
//! - `{vm_id}.provisioned` - Created on successful provisioning, contains success report
//! - `{vm_id}.failed` - Created on failed provisioning, contains encoded error details
//!
//! Each file can be marked as "REPORTED" by appending a marker after successfully
//! sending to Azure, preventing duplicate reports.
//!
//! # Report Format
//!
//! Reports are pipe-delimited key-value strings:
//! ```text
//! result=success|agent=Azure-Init/0.1.0|vm_id=...|timestamp=...
//! ```
//!
//! Success reports are generated internally by `encoded_success_report()`.
//! Error reports are generated by [`Error::as_encoded_report()`](crate::error::Error::as_encoded_report).
//! Both use [`encode_report()`] for consistent formatting.

use fs2::FileExt;
use reqwest::{
header::{HeaderMap, HeaderValue, USER_AGENT},
Client,
Expand All @@ -11,10 +62,15 @@ use tracing::instrument;
use chrono::Utc;
use reqwest::StatusCode;
use serde_json::json;
use std::fs::{self, OpenOptions};
use std::io::Write;

use crate::config::Config;
use crate::error::Error;
use crate::http;
use crate::status::{
get_provisioning_dir, get_vm_id, has_been_reported, mark_reported,
};

#[derive(Debug)]
enum ProvisioningState {
Expand Down Expand Up @@ -49,7 +105,7 @@ impl std::fmt::Display for ProvisioningSubStatus {
}

/// Constructs a KVP entry representing a successful provisioning event.
pub fn encoded_success_report(
pub(crate) fn encoded_success_report(
vm_id: &str,
optional_key_value: Option<(&str, &str)>,
) -> String {
Expand Down Expand Up @@ -126,7 +182,89 @@ pub async fn report_in_progress(
.await
}

/// Internal helper that handles all HTTP details for health reporting to the wireserver.
/// Helper function to load and validate a state file hasn't been reported yet.
///
/// Returns the config, vm_id, and file path if validation succeeds.
fn load_state_file(
extension: &str,
) -> Result<(Config, String, std::path::PathBuf), Error> {
let config = Config::load(None).unwrap_or_default();
let vm_id = get_vm_id().ok_or(Error::VmIdUnavailable)?;
let file_path = get_provisioning_dir(Some(&config))
.join(format!("{vm_id}.{extension}"));

if !file_path.exists() {
tracing::error!("No {} state file found at {:?}", extension, file_path);
return Err(Error::NoProvisioningState);
}

if has_been_reported(&file_path) {
tracing::info!("Provisioning {} has already been reported", extension);
return Err(Error::AlreadyReported);
}

Ok((config, vm_id, file_path))
}

/// Reports provisioning success by reading from the `.provisioned` state file.
///
/// Loads config, reads the state file, sends the report to Azure, and marks as reported.
pub async fn report_ready_from_state() -> Result<(), Error> {
let (config, vm_id, file_path) = load_state_file("provisioned")?;

// Generate the success report
let success_report = encoded_success_report(&vm_id, None);

// Send it to Azure
tracing::info!("Reporting provisioning success to Azure");
_report(
ProvisioningState::Ready,
None,
Some(success_report.clone()),
&config,
)
.await?;

// Write the report and REPORTED marker to file with file locking
let mut file = OpenOptions::new()
.write(true)
.truncate(true)
.open(&file_path)?;

file.lock_exclusive()?;

write!(file, "{success_report}\nREPORTED")?;

tracing::info!("Successfully reported provisioning success");
Ok(())
}

/// Reports provisioning failure by reading from the `.failed` state file.
///
/// Loads config, reads the error report from the state file (first line), sends to Azure, and marks as reported.
pub async fn report_failure_from_state() -> Result<(), Error> {
let (config, _vm_id, file_path) = load_state_file("failed")?;

// Read the error report (first line of file)
let file_content = fs::read_to_string(&file_path)?;
let error_report = file_content
.lines()
.next()
.ok_or(Error::InvalidStateFile)?
.to_string();

// Send it to Azure
tracing::info!("Reporting provisioning failure to Azure");
report_failure(error_report, &config).await?;

// Mark as reported
mark_reported(&file_path)?;

tracing::info!("Successfully reported provisioning failure");
Ok(())
}

/// Internal function that sends health reports to WireServer via HTTP POST.
///
/// Builds the JSON payload, sets required headers, and performs retries as needed.
#[instrument(
Expand Down
4 changes: 3 additions & 1 deletion libazureinit/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod config;
pub use config::{HostnameProvisioner, PasswordProvisioner, UserProvisioner};
pub mod error;
pub mod health;
pub use health::{report_failure_from_state, report_ready_from_state};
pub(crate) mod http;
pub mod imds;
mod kvp;
Expand All @@ -20,9 +21,10 @@ pub use provision::{
user::User,
Provision,
};
mod status;
pub mod status;
pub use status::{
get_vm_id, is_provisioning_complete, mark_provisioning_complete,
mark_provisioning_failure, mark_reported,
};

#[cfg(test)]
Expand Down
Loading