diff --git a/libazureinit/Cargo.toml b/libazureinit/Cargo.toml index 46764203..6fb0c2e0 100644 --- a/libazureinit/Cargo.toml +++ b/libazureinit/Cargo.toml @@ -29,6 +29,7 @@ tokio-util = "0.7" sysinfo = "=0.36" anyhow = "1" fstab = "0.4.0" +fs2 = "0.4" toml = "0.9" regex = "1" lazy_static = "1.4" diff --git a/libazureinit/src/error.rs b/libazureinit/src/error.rs index 823dbd08..6ed5fe12 100644 --- a/libazureinit/src/error.rs +++ b/libazureinit/src/error.rs @@ -79,6 +79,14 @@ pub enum Error { LoadSshdConfig { details: String }, #[error("unhandled error: {details}")] UnhandledError { details: String }, + #[error("VM ID is unavailable")] + VmIdUnavailable, + #[error("No provisioning state found. Run provisioning first.")] + NoProvisioningState, + #[error("Provisioning status has already been reported to Azure")] + AlreadyReported, + #[error("Invalid state file: file is empty or malformed")] + InvalidStateFile, } impl From for Error { diff --git a/libazureinit/src/health.rs b/libazureinit/src/health.rs index 3579db19..68b856a0 100644 --- a/libazureinit/src/health.rs +++ b/libazureinit/src/health.rs @@ -1,6 +1,57 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +//! Health reporting for Azure provisioning status. +//! +//! This module handles reporting provisioning status (success/failure) to Azure +//! through two parallel channels: the WireServer HTTP endpoint and the Hyper-V KVP +//! (Key-Value Pair) mechanism. +//! +//! # Reporting Channels +//! +//! ## WireServer HTTP Endpoint +//! Reports are sent as JSON payloads to the Azure WireServer health endpoint via HTTP POST. +//! The internal `_report()` function handles this communication with automatic retries for +//! transient failures (503, 500, 429). +//! +//! ## Hyper-V KVP (Key-Value Pair) +//! Reports are also written to the Hyper-V KVP guest pool file (`/var/lib/hyperv/.kvp_pool_1`). +//! This happens automatically via the tracing infrastructure when events include a +//! `health_report` field. The KVP layer monitors these events and writes them as +//! `PROVISIONING_REPORT` entries. +//! +//! # Usage Patterns +//! +//! ## Direct Reporting +//! - [`report_ready()`] - Report success with explicit parameters +//! - [`report_failure()`] - Report failure with explicit error string +//! +//! ## State-Based Reporting (CLI) +//! Use when reporting independently from provisioning. +//! - [`report_ready_from_state()`] - Reads `.provisioned` file and reports success +//! - [`report_failure_from_state()`] - Reads `.failed` file and reports failure +//! +//! # State Files +//! +//! Provisioning state is tracked using files in `/var/lib/azure-init/`: +//! - `{vm_id}.provisioned` - Created on successful provisioning, contains success report +//! - `{vm_id}.failed` - Created on failed provisioning, contains encoded error details +//! +//! Each file can be marked as "REPORTED" by appending a marker after successfully +//! sending to Azure, preventing duplicate reports. +//! +//! # Report Format +//! +//! Reports are pipe-delimited key-value strings: +//! ```text +//! result=success|agent=Azure-Init/0.1.0|vm_id=...|timestamp=... +//! ``` +//! +//! Success reports are generated internally by `encoded_success_report()`. +//! Error reports are generated by [`Error::as_encoded_report()`](crate::error::Error::as_encoded_report). +//! Both use [`encode_report()`] for consistent formatting. + +use fs2::FileExt; use reqwest::{ header::{HeaderMap, HeaderValue, USER_AGENT}, Client, @@ -11,10 +62,15 @@ use tracing::instrument; use chrono::Utc; use reqwest::StatusCode; use serde_json::json; +use std::fs::{self, OpenOptions}; +use std::io::Write; use crate::config::Config; use crate::error::Error; use crate::http; +use crate::status::{ + get_provisioning_dir, get_vm_id, has_been_reported, mark_reported, +}; #[derive(Debug)] enum ProvisioningState { @@ -49,7 +105,7 @@ impl std::fmt::Display for ProvisioningSubStatus { } /// Constructs a KVP entry representing a successful provisioning event. -pub fn encoded_success_report( +pub(crate) fn encoded_success_report( vm_id: &str, optional_key_value: Option<(&str, &str)>, ) -> String { @@ -126,7 +182,89 @@ pub async fn report_in_progress( .await } -/// Internal helper that handles all HTTP details for health reporting to the wireserver. +/// Helper function to load and validate a state file hasn't been reported yet. +/// +/// Returns the config, vm_id, and file path if validation succeeds. +fn load_state_file( + extension: &str, +) -> Result<(Config, String, std::path::PathBuf), Error> { + let config = Config::load(None).unwrap_or_default(); + let vm_id = get_vm_id().ok_or(Error::VmIdUnavailable)?; + let file_path = get_provisioning_dir(Some(&config)) + .join(format!("{vm_id}.{extension}")); + + if !file_path.exists() { + tracing::error!("No {} state file found at {:?}", extension, file_path); + return Err(Error::NoProvisioningState); + } + + if has_been_reported(&file_path) { + tracing::info!("Provisioning {} has already been reported", extension); + return Err(Error::AlreadyReported); + } + + Ok((config, vm_id, file_path)) +} + +/// Reports provisioning success by reading from the `.provisioned` state file. +/// +/// Loads config, reads the state file, sends the report to Azure, and marks as reported. +pub async fn report_ready_from_state() -> Result<(), Error> { + let (config, vm_id, file_path) = load_state_file("provisioned")?; + + // Generate the success report + let success_report = encoded_success_report(&vm_id, None); + + // Send it to Azure + tracing::info!("Reporting provisioning success to Azure"); + _report( + ProvisioningState::Ready, + None, + Some(success_report.clone()), + &config, + ) + .await?; + + // Write the report and REPORTED marker to file with file locking + let mut file = OpenOptions::new() + .write(true) + .truncate(true) + .open(&file_path)?; + + file.lock_exclusive()?; + + write!(file, "{success_report}\nREPORTED")?; + + tracing::info!("Successfully reported provisioning success"); + Ok(()) +} + +/// Reports provisioning failure by reading from the `.failed` state file. +/// +/// Loads config, reads the error report from the state file (first line), sends to Azure, and marks as reported. +pub async fn report_failure_from_state() -> Result<(), Error> { + let (config, _vm_id, file_path) = load_state_file("failed")?; + + // Read the error report (first line of file) + let file_content = fs::read_to_string(&file_path)?; + let error_report = file_content + .lines() + .next() + .ok_or(Error::InvalidStateFile)? + .to_string(); + + // Send it to Azure + tracing::info!("Reporting provisioning failure to Azure"); + report_failure(error_report, &config).await?; + + // Mark as reported + mark_reported(&file_path)?; + + tracing::info!("Successfully reported provisioning failure"); + Ok(()) +} + +/// Internal function that sends health reports to WireServer via HTTP POST. /// /// Builds the JSON payload, sets required headers, and performs retries as needed. #[instrument( diff --git a/libazureinit/src/lib.rs b/libazureinit/src/lib.rs index 3b3f85c0..55a84ada 100644 --- a/libazureinit/src/lib.rs +++ b/libazureinit/src/lib.rs @@ -8,6 +8,7 @@ pub mod config; pub use config::{HostnameProvisioner, PasswordProvisioner, UserProvisioner}; pub mod error; pub mod health; +pub use health::{report_failure_from_state, report_ready_from_state}; pub(crate) mod http; pub mod imds; mod kvp; @@ -20,9 +21,10 @@ pub use provision::{ user::User, Provision, }; -mod status; +pub mod status; pub use status::{ get_vm_id, is_provisioning_complete, mark_provisioning_complete, + mark_provisioning_failure, mark_reported, }; #[cfg(test)] diff --git a/libazureinit/src/status.rs b/libazureinit/src/status.rs index d8c103eb..e9657b96 100644 --- a/libazureinit/src/status.rs +++ b/libazureinit/src/status.rs @@ -19,7 +19,9 @@ //! - On **reboot**, if the same VM ID exists, provisioning is skipped. //! - If the **VM ID changes** (e.g., due to VM cloning), provisioning runs again. +use fs2::FileExt; use std::fs::{self, OpenOptions}; +use std::io::Write; use std::os::unix::fs::{OpenOptionsExt, PermissionsExt}; use std::path::{Path, PathBuf}; use uuid::Uuid; @@ -31,7 +33,7 @@ use crate::error::Error; /// /// If a [`Config`] is provided, this function returns `config.azure_init_data_dir.path`. /// Otherwise, it falls back to the default `/var/lib/azure-init/`. -fn get_provisioning_dir(config: Option<&Config>) -> PathBuf { +pub fn get_provisioning_dir(config: Option<&Config>) -> PathBuf { config .map(|cfg| cfg.azure_init_data_dir.path.clone()) .unwrap_or_else(|| PathBuf::from(DEFAULT_AZURE_INIT_DATA_DIR)) @@ -215,6 +217,7 @@ pub fn is_provisioning_complete(config: Option<&Config>, vm_id: &str) -> bool { /// # Parameters /// - `config`: An optional configuration reference used to determine the provisioning directory. /// If `None`, the default provisioning directory defined by `DEFAULT_AZURE_INIT_DATA_DIR` is used. +/// - `vm_id`: The VM ID for this provisioning instance. /// /// # Returns /// - `Ok(())` if the provisioning status file was successfully created. @@ -234,7 +237,9 @@ pub fn mark_provisioning_complete( .mode(0o600) // Ensures correct permissions from the start .open(&file_path) { - Ok(_) => { + Ok(file) => { + file.lock_exclusive()?; + tracing::info!( target: "libazureinit::status::success", "Provisioning complete. File created: {}", @@ -254,6 +259,106 @@ pub fn mark_provisioning_complete( Ok(()) } +/// Marks provisioning as failed by creating a failure status file with the error report. +/// +/// This function ensures that the provisioning directory exists and creates a +/// `{vm_id}.failed` file containing the encoded error report. +/// +/// # Parameters +/// - `config`: An optional configuration reference used to determine the provisioning directory. +/// If `None`, the default provisioning directory defined by `DEFAULT_AZURE_INIT_DATA_DIR` is used. +/// - `vm_id`: The VM ID for this provisioning instance. +/// - `error_report`: The encoded error report string to write to the file. +/// +/// # Returns +/// - `Ok(())` if the failure status file was successfully created. +/// - `Err(Error)` if an error occurred while creating the failure file. +pub fn mark_provisioning_failure( + config: Option<&Config>, + vm_id: &str, + error_report: &str, +) -> Result<(), Error> { + check_provision_dir(config)?; + let file_path = + get_provisioning_dir(config).join(format!("{vm_id}.failed")); + + match OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .mode(0o600) + .open(&file_path) + { + Ok(mut file) => { + use std::io::Write; + + file.lock_exclusive()?; + + writeln!(file, "{error_report}")?; + tracing::info!( + target: "libazureinit::status::failure", + "Provisioning failure recorded. File created: {}", + file_path.display() + ); + } + Err(error) => { + tracing::error!( + ?error, + file_path=?file_path, + "Failed to create provisioning failure file" + ); + return Err(error.into()); + } + } + + Ok(()) +} + +/// Checks if a provisioning state file has been reported to Azure. +/// +/// This function reads the file and checks if it contains the "REPORTED" marker +/// on a line by itself, indicating that the provisioning status has already been +/// sent to the Azure health endpoint. +/// +/// # Parameters +/// - `file_path`: The path to the provisioning state file (`.provisioned` or `.failed`). +/// +/// # Returns +/// - `true` if the file contains the "REPORTED" marker. +/// - `false` if the file does not contain the marker or cannot be read. +pub fn has_been_reported(file_path: &Path) -> bool { + fs::read_to_string(file_path) + .map(|content| content.lines().any(|line| line.trim() == "REPORTED")) + .unwrap_or(false) +} + +/// Marks a provisioning state file as reported by appending the "REPORTED" marker. +/// +/// This function appends a new line with "REPORTED" to the file, indicating that +/// the provisioning status has been successfully sent to the Azure health endpoint. +/// +/// # Parameters +/// - `file_path`: The path to the provisioning state file (`.provisioned` or `.failed`). +/// +/// # Returns +/// - `Ok(())` if the marker was successfully appended. +/// - `Err(Error)` if the file could not be opened or written to. +pub fn mark_reported(file_path: &Path) -> Result<(), Error> { + let mut file = OpenOptions::new().append(true).open(file_path)?; + + file.lock_exclusive()?; + + writeln!(file, "REPORTED")?; + + tracing::info!( + target: "libazureinit::status::reported", + "Marked provisioning status as reported: {}", + file_path.display() + ); + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -416,4 +521,110 @@ mod tests { "Should not byte-swap for Gen2" ); } + + #[test] + fn test_mark_provisioning_failure() { + let (test_config, test_dir) = create_test_config(); + let vm_id = "00000000-0000-0000-0000-000000000000"; + let error_report = + "result=error|reason=test_failure|vm_id=00000000-0000-0000-0000-000000000000"; + + let file_path = test_dir.path().join(format!("{}.failed", vm_id)); + assert!( + !file_path.exists(), + "Failed file should not exist before marking" + ); + + mark_provisioning_failure(Some(&test_config), vm_id, error_report) + .unwrap(); + + assert!(file_path.exists(), "Failed file should be created"); + + // Verify content + let content = fs::read_to_string(&file_path).unwrap(); + assert!( + content.contains(error_report), + "File should contain the error report" + ); + } + + #[test] + fn test_has_been_reported_false() { + let tmpdir = TempDir::new().unwrap(); + let file_path = tmpdir.path().join("test.provisioned"); + + // File with content but no REPORTED marker + fs::write(&file_path, "result=success|agent=Azure-Init/test").unwrap(); + + assert!( + !has_been_reported(&file_path), + "Should return false when REPORTED marker is absent" + ); + } + + #[test] + fn test_has_been_reported_true() { + let tmpdir = TempDir::new().unwrap(); + let file_path = tmpdir.path().join("test.provisioned"); + + // File with REPORTED marker + fs::write(&file_path, "result=success|agent=Azure-Init/test\nREPORTED") + .unwrap(); + + assert!( + has_been_reported(&file_path), + "Should return true when REPORTED marker is present" + ); + } + + #[test] + fn test_has_been_reported_nonexistent_file() { + let tmpdir = TempDir::new().unwrap(); + let file_path = tmpdir.path().join("nonexistent.provisioned"); + + assert!( + !has_been_reported(&file_path), + "Should return false for nonexistent file" + ); + } + + #[test] + fn test_mark_reported() { + let tmpdir = TempDir::new().unwrap(); + let file_path = tmpdir.path().join("test.provisioned"); + + // Create file with initial content + fs::write(&file_path, "result=success|agent=Azure-Init/test").unwrap(); + + // Mark as reported + mark_reported(&file_path).unwrap(); + + // Verify REPORTED marker was added + let content = fs::read_to_string(&file_path).unwrap(); + assert!( + content.contains("REPORTED"), + "File should contain REPORTED marker" + ); + assert!( + content.starts_with("result=success"), + "Original content should be preserved" + ); + } + + // TODO: In theory, trying to mark reported + #[test] + fn test_mark_reported_idempotent() { + let tmpdir = TempDir::new().unwrap(); + let file_path = tmpdir.path().join("test.provisioned"); + + // Create file + fs::write(&file_path, "result=success|agent=Azure-Init/test").unwrap(); + + // Mark as reported twice + mark_reported(&file_path).unwrap(); + mark_reported(&file_path).unwrap(); + + // Verify file still valid + assert!(has_been_reported(&file_path)); + } } diff --git a/src/main.rs b/src/main.rs index 757834db..79445f14 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,14 +7,18 @@ use clap::{Parser, Subcommand}; use libazureinit::{ config::Config, error::Error as LibError, - get_vm_id, - health::{report_failure, report_ready}, + health::{ + report_failure, report_failure_from_state, report_ready, + report_ready_from_state, + }, imds::{query, InstanceMetadata}, - is_provisioning_complete, logging::setup_layers, - mark_provisioning_complete, media::{get_mount_device, mount_parse_ovf_env, Environment}, reqwest::{header, Client}, + status::{ + get_provisioning_dir, get_vm_id, is_provisioning_complete, + mark_provisioning_complete, mark_provisioning_failure, mark_reported, + }, Provision, User, }; use std::process::ExitCode; @@ -74,6 +78,11 @@ struct Cli { #[arg(long = "version", short = 'V', action = clap::ArgAction::SetTrue)] show_version: bool, + /// Report provisioning status to Azure after provisioning completes. + /// On success, reports ready. On failure, reports the error. + #[arg(long)] + report: bool, + #[command(subcommand)] command: Option, } @@ -87,6 +96,22 @@ enum Command { #[arg(long)] logs: bool, }, + /// Report provisioning status to Azure + #[command(subcommand)] + Report(ReportCommand), +} + +#[derive(Subcommand, Debug)] +enum ReportCommand { + /// Automatically detect provisioning state and report to Azure. + /// Checks for .provisioned or .failed files and reports accordingly. + Auto, + /// Report provisioning success to Azure. + /// Reads the .provisioned file and sends success status. + Ready, + /// Report provisioning failure to Azure. + /// Reads the .failed file and sends failure status. + Failure, } /// Attempts to find and parse provisioning data from an OVF environment. @@ -152,14 +177,15 @@ fn get_username( /// Cleans all provisioning state marker files from the azure-init data directory. /// -/// This removes all files ending in `.provisioned` from the directory specified -/// by `azure_init_data_dir` (typically `/var/lib/azure-init`). These marker files -/// indicate that provisioning has completed. Removing them allows azure-init to -/// re-run provisioning logic on the next boot. +/// This removes all files ending in `.provisioned` or `.failed` from the directory +/// specified by `azure_init_data_dir` (typically `/var/lib/azure-init`). These marker +/// files indicate provisioning completion or failure. Removing them allows azure-init +/// to re-run provisioning logic on the next boot. #[instrument] fn clean_provisioning_status(config: &Config) -> Result<(), std::io::Error> { let data_dir = &config.azure_init_data_dir.path; - let mut found = false; + let mut found_provisioned = false; + let mut found_failed = false; for entry in std::fs::read_dir(data_dir)? { let path = match entry { @@ -174,9 +200,19 @@ fn clean_provisioning_status(config: &Config) -> Result<(), std::io::Error> { } }; - if path.extension().is_some_and(|ext| ext == "provisioned") { - found = true; + let should_remove = path.extension().is_some_and(|ext| { + if ext == "provisioned" { + found_provisioned = true; + true + } else if ext == "failed" { + found_failed = true; + true + } else { + false + } + }); + if should_remove { match std::fs::remove_file(&path) { Ok(_) => { tracing::info!( @@ -202,9 +238,9 @@ fn clean_provisioning_status(config: &Config) -> Result<(), std::io::Error> { } } - if !found { + if !found_provisioned && !found_failed { tracing::info!( - "No provisioning marker files (*.provisioned) found in {:?}", + "No provisioning marker files (*.provisioned or *.failed) found in {:?}", data_dir ); } @@ -212,6 +248,118 @@ fn clean_provisioning_status(config: &Config) -> Result<(), std::io::Error> { Ok(()) } +/// Handles the `azure-init report auto` command. +/// +/// Automatically detects provisioning state and reports to Azure. +/// Checks for `.failed` file first then `.provisioned` file. +/// Reports success or failure accordingly without requiring the user to know which. +async fn handle_report_auto() -> ExitCode { + tracing::info!("Attempting to auto-detect and report provisioning status"); + + match report_failure_from_state().await { + Ok(_) => { + tracing::info!( + "Successfully reported provisioning failure to Azure" + ); + return ExitCode::SUCCESS; + } + Err(LibError::NoProvisioningState) => { + tracing::debug!( + "No failure state found, checking for success state" + ); + } + Err(LibError::AlreadyReported) => { + tracing::info!("Provisioning failure has already been reported"); + return ExitCode::SUCCESS; + } + Err(e) => { + tracing::error!("Failed to report failure: {e:?}"); + return ExitCode::FAILURE; + } + } + + match report_ready_from_state().await { + Ok(_) => { + tracing::info!( + "Successfully reported provisioning success to Azure" + ); + ExitCode::SUCCESS + } + Err(LibError::NoProvisioningState) => { + tracing::error!("No provisioning state found (.failed or .provisioned). Run provisioning first."); + ExitCode::FAILURE + } + Err(LibError::AlreadyReported) => { + tracing::info!("Provisioning success has already been reported"); + ExitCode::SUCCESS + } + Err(e) => { + tracing::error!("Failed to report success: {e:?}"); + ExitCode::FAILURE + } + } +} + +/// Handles the `azure-init report ready` command to report provisioning success to Azure. +/// Reads the `.provisioned` file and sends the success status. +async fn handle_report_ready() -> ExitCode { + tracing::info!("Attempting to report provisioning success to Azure"); + + match report_ready_from_state().await { + Ok(_) => { + tracing::info!( + "Successfully reported provisioning success to Azure" + ); + ExitCode::SUCCESS + } + Err(LibError::NoProvisioningState) => { + tracing::error!("No .provisioned file found. Provisioning may not have completed successfully."); + ExitCode::FAILURE + } + Err(LibError::AlreadyReported) => { + tracing::info!( + "Provisioning success has already been reported to Azure" + ); + ExitCode::SUCCESS + } + Err(e) => { + tracing::error!("Failed to report provisioning success: {e:?}"); + ExitCode::FAILURE + } + } +} + +/// Handles the `azure-init report failure` command to report provisioning failure to Azure. +/// Reads the `.failed` file and sends the failure status with error details. +async fn handle_report_failure() -> ExitCode { + tracing::info!("Attempting to report provisioning failure to Azure"); + + match report_failure_from_state().await { + Ok(_) => { + tracing::info!( + "Successfully reported provisioning failure to Azure" + ); + ExitCode::SUCCESS + } + Err(LibError::NoProvisioningState) => { + tracing::error!( + "No .failed file found. Provisioning may not have failed." + ); + ExitCode::FAILURE + } + Err(LibError::AlreadyReported) => { + tracing::info!( + "Provisioning failure has already been reported to Azure" + ); + ExitCode::SUCCESS + } + Err(e) => { + tracing::error!("Failed to report provisioning failure: {e:?}"); + ExitCode::FAILURE + } + } +} + /// Cleans the azure-init log file defined in the configuration. /// /// This removes the log file at the path configured by `azure_init_log_path`, @@ -285,14 +433,13 @@ async fn main() -> ExitCode { details: format!("{error:?}"), }; - // Report the failure to the health endpoint + // Write failure state file let report_str = err.as_encoded_report(&vm_id); - let report_result = report_failure(report_str, &cfg).await; - - if let Err(report_error) = report_result { + if let Err(mark_err) = + mark_provisioning_failure(Some(&cfg), &vm_id, &report_str) + { tracing::warn!( - "Failed to send provisioning failure report: {:?}", - report_error + "Failed to mark provisioning failure: {mark_err:?}" ); } @@ -306,70 +453,122 @@ async fn main() -> ExitCode { config ); - let exit_code = if let Some(Command::Clean { logs }) = opts.command { - if clean_provisioning_status(&config).is_err() - || (logs && clean_log_file(&config).is_err()) - { - ExitCode::FAILURE - } else { - ExitCode::SUCCESS - } - } else if is_provisioning_complete(Some(&config), &vm_id) { - tracing::info!( - "Provisioning already completed earlier. Skipping provisioning." - ); - ExitCode::SUCCESS - } else { - let clone_config = config.clone(); - match provision(config, &vm_id, opts).await { - Ok(_) => { - let report_result = - report_ready(&clone_config, &vm_id, None).await; - - if let Err(report_error) = report_result { - tracing::warn!( - "Failed to send provisioning success report: {:?}", - report_error - ); - } - - tracing::info!("Provisioning completed successfully"); - + let exit_code = match opts.command { + Some(Command::Clean { logs }) => { + if clean_provisioning_status(&config).is_err() + || (logs && clean_log_file(&config).is_err()) + { + ExitCode::FAILURE + } else { ExitCode::SUCCESS } - Err(e) => { - eprintln!("{e:?}"); - - let report_str = e - .downcast_ref::() - .map(|lib_error| lib_error.as_encoded_report(&vm_id)) - .unwrap_or_else(|| { - LibError::UnhandledError { - details: format!("{e:?}"), + } + Some(Command::Report(report_cmd)) => match report_cmd { + ReportCommand::Auto => handle_report_auto().await, + ReportCommand::Ready => handle_report_ready().await, + ReportCommand::Failure => handle_report_failure().await, + }, + None => { + // Default behavior: provision if not already complete + if is_provisioning_complete(Some(&config), &vm_id) { + tracing::info!( + "Provisioning already completed earlier. Skipping provisioning." + ); + return ExitCode::SUCCESS; + } + let clone_config = config.clone(); + let should_report = opts.report; + match provision(config, &vm_id, opts).await { + Ok(_) => { + tracing::info!("Provisioning completed successfully"); + + // Report success if --report flag is set + if should_report { + tracing::info!( + "Reporting provisioning success to Azure" + ); + if let Err(report_err) = + report_ready(&clone_config, &vm_id, None).await + { + tracing::error!( + "Failed to report success: {report_err:?}" + ); + return ExitCode::FAILURE; } - .as_encoded_report(&vm_id) - }); - let report_result = - report_failure(report_str, &clone_config).await; - - if let Err(report_error) = report_result { - tracing::warn!( - "Failed to send provisioning failure report: {:?}", - report_error - ); + + let file_path = + get_provisioning_dir(Some(&clone_config)) + .join(format!("{vm_id}.provisioned")); + if let Err(mark_err) = mark_reported(&file_path) { + tracing::warn!( + "Failed to mark as reported: {mark_err:?}" + ); + } + tracing::info!("Successfully reported provisioning success to Azure"); + } + + ExitCode::SUCCESS } + Err(e) => { + eprintln!("{e:?}"); + + let report_str = e + .downcast_ref::() + .map(|lib_error| lib_error.as_encoded_report(&vm_id)) + .unwrap_or_else(|| { + LibError::UnhandledError { + details: format!("{e:?}"), + } + .as_encoded_report(&vm_id) + }); + + if let Err(mark_err) = mark_provisioning_failure( + Some(&clone_config), + &vm_id, + &report_str, + ) { + tracing::error!( + "Failed to mark provisioning failure: {mark_err:?}" + ); + } - tracing::error!("Provisioning failed with error: {e:?}"); + tracing::error!("Provisioning failed with error: {e:?}"); + + // Report failure if --report flag is set + if should_report { + tracing::info!( + "Reporting provisioning failure to Azure" + ); + if let Err(report_err) = + report_failure(report_str.clone(), &clone_config) + .await + { + tracing::error!( + "Failed to report failure: {report_err:?}" + ); + } else { + let file_path = + get_provisioning_dir(Some(&clone_config)) + .join(format!("{vm_id}.failed")); + if let Err(mark_err) = mark_reported(&file_path) { + tracing::warn!("Failed to mark failure as reported: {mark_err:?}"); + } + tracing::info!("Successfully reported provisioning failure to Azure"); + } + } - let config: u8 = exitcode::CONFIG - .try_into() - .expect("Error code must be less than 256"); - match e.root_cause().downcast_ref::() { - Some(LibError::UserMissing { user: _ }) => { - ExitCode::from(config) + let config: u8 = exitcode::CONFIG + .try_into() + .expect("Error code must be less than 256"); + match e.root_cause().downcast_ref::() { + Some(LibError::UserMissing { user: _ }) => { + ExitCode::from(config) + } + Some(LibError::NonEmptyPassword) => { + ExitCode::from(config) + } + Some(_) | None => ExitCode::FAILURE, } - Some(LibError::NonEmptyPassword) => ExitCode::from(config), - Some(_) | None => ExitCode::FAILURE, } } } diff --git a/tests/cli.rs b/tests/cli.rs index c03a9366..86868b5f 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -74,9 +74,13 @@ fn setup_clean_test() -> Result< let log_file = temp_dir.path().join("azure-init.log"); fs::create_dir_all(&data_dir)?; + // Create both .provisioned and .failed files let provisioned_file = data_dir.join("vm-id.provisioned"); File::create(provisioned_file)?; + let failed_file = data_dir.join("vm-id.failed"); + fs::write(&failed_file, "result=error|reason=test")?; + let mut log = File::create(&log_file)?; writeln!(log, "fake log line")?; @@ -97,17 +101,22 @@ fn setup_clean_test() -> Result< Ok((temp_dir, data_dir, log_file, config_path)) } -// Ensures that the `clean` command removes only the provisioned file +// Ensures that the `clean` command removes both .provisioned and .failed files #[test] fn clean_removes_only_provision_files_without_log_arg( ) -> Result<(), Box> { let (_temp_dir, _data_dir, log_file, config_path) = setup_clean_test()?; let provisioned_file = _data_dir.join("vm-id.provisioned"); + let failed_file = _data_dir.join("vm-id.failed"); assert!( provisioned_file.exists(), ".provisioned file should exist before cleaning" ); + assert!( + failed_file.exists(), + ".failed file should exist before cleaning" + ); assert!(log_file.exists(), "log file should exist before cleaning"); let mut cmd = Command::cargo_bin("azure-init")?; @@ -119,23 +128,29 @@ fn clean_removes_only_provision_files_without_log_arg( !provisioned_file.exists(), "Expected .provisioned file to be deleted" ); + assert!(!failed_file.exists(), "Expected .failed file to be deleted"); assert!(log_file.exists(), "log file should exist after cleaning"); Ok(()) } // Ensures that the `clean` command with the --logs arg -// removes both the provisioned file and the log file +// removes both .provisioned, .failed, and log files #[test] fn clean_removes_provision_and_log_files_with_log_arg( ) -> Result<(), Box> { let (_temp_dir, _data_dir, log_file, config_path) = setup_clean_test()?; let provisioned_file = _data_dir.join("vm-id.provisioned"); + let failed_file = _data_dir.join("vm-id.failed"); assert!( provisioned_file.exists(), ".provisioned file should exist before cleaning" ); + assert!( + failed_file.exists(), + ".failed file should exist before cleaning" + ); assert!(log_file.exists(), "log file should exist before cleaning"); let mut cmd = Command::cargo_bin("azure-init")?; @@ -147,6 +162,7 @@ fn clean_removes_provision_and_log_files_with_log_arg( !provisioned_file.exists(), "Expected .provisioned file to be deleted" ); + assert!(!failed_file.exists(), "Expected .failed file to be deleted"); assert!( !log_file.exists(), "Expected azure-init.log file to be deleted" @@ -154,3 +170,117 @@ fn clean_removes_provision_and_log_files_with_log_arg( Ok(()) } + +// Assert report command exists in help +#[test] +fn help_shows_report_command() -> Result<(), Box> { + let mut command = Command::cargo_bin("azure-init")?; + command.arg("--help"); + command + .assert() + .success() + .stdout(predicate::str::contains("report")) + .stdout(predicate::str::contains( + "Report provisioning status to Azure", + )); + + Ok(()) +} + +// Assert report subcommands exist +#[test] +fn report_help_shows_subcommands() -> Result<(), Box> { + let mut command = Command::cargo_bin("azure-init")?; + command.args(["report", "--help"]); + command + .assert() + .success() + .stdout(predicate::str::contains("auto")) + .stdout(predicate::str::contains("ready")) + .stdout(predicate::str::contains("failure")); + + Ok(()) +} + +// Test that report auto fails gracefully when no state files exist +#[test] +fn report_auto_fails_without_state_files( +) -> Result<(), Box> { + let temp_dir = tempdir()?; + let data_dir = temp_dir.path().join("data"); + fs::create_dir_all(&data_dir)?; + + let config_contents = format!( + r#" + [azure_init_data_dir] + path = "{}" + "#, + data_dir.display() + ); + let config_path = temp_dir.path().join("azure-init-config.toml"); + fs::write(&config_path, config_contents)?; + + let mut cmd = Command::cargo_bin("azure-init")?; + cmd.args(["--config", config_path.to_str().unwrap(), "report", "auto"]); + + cmd.assert().failure(); + + Ok(()) +} + +// Test that report ready fails gracefully when no .provisioned file exists +#[test] +fn report_ready_fails_without_provisioned_file( +) -> Result<(), Box> { + let temp_dir = tempdir()?; + let data_dir = temp_dir.path().join("data"); + fs::create_dir_all(&data_dir)?; + + let config_contents = format!( + r#" + [azure_init_data_dir] + path = "{}" + "#, + data_dir.display() + ); + let config_path = temp_dir.path().join("azure-init-config.toml"); + fs::write(&config_path, config_contents)?; + + let mut cmd = Command::cargo_bin("azure-init")?; + cmd.args(["--config", config_path.to_str().unwrap(), "report", "ready"]); + + cmd.assert().failure(); + + Ok(()) +} + +// Test that report failure fails gracefully when no .failed file exists +#[test] +fn report_failure_fails_without_failed_file( +) -> Result<(), Box> { + let temp_dir = tempdir()?; + let data_dir = temp_dir.path().join("data"); + fs::create_dir_all(&data_dir)?; + + let config_contents = format!( + r#" + [azure_init_data_dir] + path = "{}" + "#, + data_dir.display() + ); + let config_path = temp_dir.path().join("azure-init-config.toml"); + fs::write(&config_path, config_contents)?; + + let mut cmd = Command::cargo_bin("azure-init")?; + cmd.args([ + "--config", + config_path.to_str().unwrap(), + "report", + "failure", + ]); + + cmd.assert().failure(); + + Ok(()) +}