From d0833cbc8c53499c388601d8e5dcf098db186a1d Mon Sep 17 00:00:00 2001 From: Tiyash Basu Date: Tue, 13 May 2025 15:24:20 +0200 Subject: [PATCH 1/2] Update communication component diagnostics code enum This commit updates the `CommunicationComponentDiagnosticCode` enum in the `communication_components.proto` file to include additional diagnostic codes for better error handling and reporting. The new codes include `COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_UNKNOWN`, `COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_AUTHENTICATION_ERR`, `COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_OVER_TEMPERATURE`. Signed-off-by: Tiyash Basu --- .../communication_components.proto | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto b/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto index 32ceee36..cdbfbab9 100644 --- a/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto +++ b/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto @@ -23,17 +23,28 @@ enum CommunicationComponentDiagnosticCode { // Component is unreachable (e.g., no heartbeat from device, network down). COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_UNREACHABLE = 1; + // Communication component is unknown. This could happen in cases where the + // status could be read from the component, but it could not be identified + // as a known state. + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_UNKNOWN = 2; + + // Authentication error (e.g., invalid credentials, certificate issues). + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_AUTHENTICATION_ERR = 3; + // Configuration error detected (e.g., invalid VLAN, IP conflict). - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_CONFIGURATION_ERR = 2; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_CONFIGURATION_ERR = 3; // High packet loss detected over a sustained period. - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_PACKET_LOSS = 3; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_PACKET_LOSS = 4; // Excessive latency observed (e.g., ping time above threshold). - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HIGH_LATENCY = 4; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HIGH_LATENCY = 5; // Hardware fault reported by device (e.g., port error, cable disconnected). - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HARDWARE_FAULT = 5; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HARDWARE_GENERIC_FAULT = 6; + + // Hardware fault due to overheating. + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_OVER_TEMPERATURE = 7; } // Represents an error or warning condition reported by a microgrid From d1f0e91ec84094e94f64f372d586488a7373e8ef Mon Sep 17 00:00:00 2001 From: Tiyash Basu Date: Tue, 13 May 2025 15:25:36 +0200 Subject: [PATCH 2/2] Add communication component state codes and snapshot message This commit adds a new enum `CommunicationComponentStateCode` to represent the high-level operational state of a communication component. It also introduces a new message `CommunicationComponentStateSnapshot` that captures a snapshot of the component's state, diagnostics, and timing. This is useful for monitoring and diagnosing the status of communication components in a microgrid system. Signed-off-by: Tiyash Basu --- RELEASE_NOTES.md | 1 + .../communication_components.proto | 71 +++++++++++++++++-- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 7d2c0ad9..74429d34 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -20,6 +20,7 @@ - Added warnings to sensor `SensorState`. - Added a common `TimeIntervalFilter` message in `frequenz.api.common.v1.types` to standardize time interval filtering across APIs. This uses `start_time` (inclusive) and `end_time` (exclusive) fields, aligning with ISO 8601 and common programming conventions. - Added new message `CommunicationComponentDiagnostic` to represent warnings and errors in microgrid communication components. +- Added new message `CommunicationComponentStateSnapshot` to represent the state of communication components. - Added new message definitions for streaming events (Deleted, Created, Updated) ## Bug Fixes diff --git a/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto b/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto index cdbfbab9..139f233b 100644 --- a/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto +++ b/proto/frequenz/api/common/v1/microgrid/communication_components/communication_components.proto @@ -12,6 +12,36 @@ syntax = "proto3"; package frequenz.api.common.v1.microgrid.communication_components; +import "google/protobuf/timestamp.proto"; + +// Defines the high-level operational state of a communication component. +// These codes can be used in a one-shot snapshot to show its current condition. +enum CommunicationComponentStateCode { + // Default unset value. + COMMUNICATION_COMPONENT_STATE_CODE_UNSPECIFIED = 0; + + // The component's internal status is unknown. + COMMUNICATION_COMPONENT_STATE_CODE_UNKNOWN = 1; + + // The component is powered on and responding to network traffic. + COMMUNICATION_COMPONENT_STATE_CODE_ONLINE = 2; + + // The component is powered off, unreachable, or administratively disabled. + COMMUNICATION_COMPONENT_STATE_CODE_OFFLINE = 3; + + // The component is in the process of establishing connections. + COMMUNICATION_COMPONENT_STATE_CODE_CONNECTING = 4; + + // The component is in the process of shutting down connections. + COMMUNICATION_COMPONENT_STATE_CODE_DISCONNECTING = 5; + + // The component is undergoing maintenance (e.g., firmware upgrade). + COMMUNICATION_COMPONENT_STATE_CODE_MAINTENANCE = 6; + + // The component is up but experiencing degraded performance (e.g., high packet loss). + COMMUNICATION_COMPONENT_STATE_CODE_DEGRADED = 7; +} + // Enumerated diagnostic codes for communication components. // // These codes indicate common network- or device-level faults that may affect @@ -32,19 +62,50 @@ enum CommunicationComponentDiagnosticCode { COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_AUTHENTICATION_ERR = 3; // Configuration error detected (e.g., invalid VLAN, IP conflict). - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_CONFIGURATION_ERR = 3; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_CONFIGURATION_ERR = 4; // High packet loss detected over a sustained period. - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_PACKET_LOSS = 4; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_PACKET_LOSS = 5; // Excessive latency observed (e.g., ping time above threshold). - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HIGH_LATENCY = 5; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HIGH_LATENCY = 6; // Hardware fault reported by device (e.g., port error, cable disconnected). - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HARDWARE_GENERIC_FAULT = 6; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_HARDWARE_GENERIC_FAULT = 7; // Hardware fault due to overheating. - COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_OVER_TEMPERATURE = 7; + COMMUNICATION_COMPONENT_DIAGNOSTIC_CODE_OVER_TEMPERATURE = 8; +} + +// A single snapshot of a communication component's state, diagnostics, and timing. +// +// !!! note "snapshot semantics" +// all fields in this message represent the component's view at one point +// in time, so there is only a single `snapshot_time`. any parallel reads +// happening to produce this data are collapsed under that timestamp. +message CommunicationComponentStateSnapshot { + // the utc time when this snapshot was taken. + google.protobuf.Timestamp snapshot_time = 1; + + // one or more high-level state codes active at snapshot_time. + // + // !!! note + // typical usage is a single state (e.g., online), but multiple may + // apply (e.g., connecting + degraded) if that makes sense. + repeated CommunicationComponentStateCode states = 2; + + // non-critical warnings detected for the component. + // + // !!! note + // warnings may coexist with an online state, indicating potential + // issues that do not prevent basic operation. + repeated CommunicationComponentDiagnostic warnings = 3; + + // critical errors currently affecting the component. + // + // !!! note + // an error state code should accompany entries here. + repeated CommunicationComponentDiagnostic errors = 4; } // Represents an error or warning condition reported by a microgrid