defined architecture for the central collector + documentation

GioeleB00 · GioeleB00 · commit 796cbb1ca290 · 2025-07-25T17:16:28.000+02:00
diff --git a/documentation/backend_documentation/time_series_metric_architecture.md b/documentation/backend_documentation/time_series_metric_architecture.md
@@ -0,0 +1,91 @@
+## Time‑Series Metrics: Architectural Overview
+
+Collecting high‑frequency, time‑series metrics from a complex simulation requires an architecture that is **performant, maintainable, and extensible**. Our design meets those goals by keeping metric *declaration*, *state management,* and *data collection* in strictly separate layers.
+
+\### 1  Guiding Principles & Architecture
+
+1. **Minimal Hot‑Path Overhead** — every state update in the simulation core is `O(1)`.
+2. **Single Source of Truth** — one “Registry” enumerates every metric that can exist.
+3. **User‑Defined Extensibility** — advanced users can register custom metrics without touching the framework.
+4. **Predictable Memory Footprint** — data structures are pre‑allocated once, never rebuilt at each sample tick.
+
+| Layer         | Responsibility                                                   | Lifetime               |
+| ------------- | ---------------------------------------------------------------- | ---------------------- |
+| **Registry**  | Declare *which* metrics exist for each component type            | Module import (once)   |
+| **Runtime**   | Maintain the **current value** of each metric per component      | Per component instance |
+| **Collector** | Periodically read runtime values and append to time‑series lists | One per simulation run |
+
+---
+
+\### 2  Case Study — Edge Metric Collection
+
+```mermaid
+graph TD
+    subgraph Init
+        A(Registry: EDGE_METRICS) -- builds --> B{Metric Dict}
+    end
+    subgraph Loop
+        C[EdgeRuntime] -- inc/dec --> D(_concurrent_connections)
+        E[SampledMetricCollector] -->|every N ms| F{iterate}
+        F -->|read property| D
+        F -->|append| B
+    end
+    C -- owns --> B
+```
+
+#### Layer Walk‑through
+
+1. **Registry (`metrics/edge.py`)**
+
+   ```python
+   EDGE_METRICS = (SampledMetricName.EDGE_CONCURRENT_CONNECTION,)
+
+   def build_edge_metrics(enabled):
+       return {m: [] for m in EDGE_METRICS if m in enabled}
+   ```
+
+2. **Runtime (`EdgeRuntime`)**
+
+   * Updates the counter `_concurrent_connections` in `O(1)`.
+   * Holds the dict produced by `build_edge_metrics`.
+   * **New:** exposes read‑only properties so external modules never touch private fields directly.
+
+   ```python
+   class EdgeRuntime:
+       @property
+       def concurrent_connections(self) -> int:
+           return self._concurrent_connections
+
+       @property
+       def enabled_metrics(self) -> dict[SampledMetricName, list[float | int]]:
+           return self._edge_enabled_metrics
+   ```
+
+3. **Collector (`SampledMetricCollector`)**
+
+   ```python
+   while True:
+       yield env.timeout(sample_period_s)
+       for edge in self.edges:
+           key = SampledMetricName.EDGE_CONCURRENT_CONNECTION
+           # properties keep encapsulation intact
+           if key in edge.enabled_metrics:
+               edge.enabled_metrics[key].append(edge.concurrent_connections)
+   ```
+
+---
+
+\### 3  Why the `if key in …` Guard Still Matters
+
+Even with the new properties, the guard remains essential:
+
+* **Robustness** — prevents `KeyError` when a metric is disabled for a given edge.
+* **Extensibility** — a user can add `EDGE_PACKET_LOSS` (or any custom metric) to a subset of edges; the collector automatically respects that configuration.
+
+This single `O(1)` check keeps the system plug‑and‑play while preserving full encapsulation:
+
+* Runtime internals stay protected behind properties.
+* The collector never needs to know which metrics exist ahead of time.
+
+---
+
diff --git a/src/app/config/constants.py b/src/app/config/constants.py
@@ -208,6 +208,17 @@ class SampledMetricName(StrEnum):
   EVENT_LOOP_IO_SLEEP = "event_loop_io_sleep"
   RAM_IN_USE = "ram_in_use"
   THROUGHPUT_RPS = "throughput_rps"
+  EDGE_CONCURRENT_CONNECTION = "edge_concurrent_connection"
+
+class SamplePeriods(IntEnum):
+  """
+  defining the value of the sample periods for the metrics for which
+  we have to extract a time series
+  """
+
+  STANDARD_TIME = 0.005 # 5 MILLISECONDS
+  MINIMUM_TIME = 0.001 # 1 MILLISECOND
+  MAXIMUM_TIME = 0.1    # 10 MILLISECONDS
 
 # ======================================================================
 # CONSTANTS FOR EVENT METRICS
diff --git a/src/app/metrics/collector.py b/src/app/metrics/collector.py
@@ -0,0 +1,55 @@
+"""class to centralized the the collection of time series regarding metrics"""
+
+from collections.abc import Generator
+
+import simpy
+
+from app.config.constants import SampledMetricName
+from app.runtime.actors.edge import EdgeRuntime
+from app.schemas.simulation_settings_input import SimulationSettings
+
+# The idea for this class is to gather list of runtime objects that
+# are defined in the central class to build the simulation, in this
+# way we optimize the initialization of various objects reducing
+# the global overhead
+
+
+class SampledMetricCollector:
+    """class to define a centralized object to collect sampled metrics"""
+
+    def __init__(
+        self,
+        *,
+        edges: list[EdgeRuntime],
+        env:  simpy.Environment,
+        sim_settings: SimulationSettings,
+        ) -> None:
+        """Docstring to complete"""
+        self.edges = edges
+        self.sim_settings = sim_settings
+        self.env = env
+        self._sample_period = sim_settings.sample_period_s
+
+        env.process(self._build_time_series())
+
+    def _build_time_series(self) -> Generator[simpy.Event, None, None]:
+        """Function to build time series for enabled metrics"""
+        connection_key = SampledMetricName.EDGE_CONCURRENT_CONNECTION
+        while True:
+
+            yield self.env.timeout(self._sample_period)
+
+            for edge in self.edges:
+                if connection_key in edge.enabled_metrics:
+                    edge.enabled_metrics[connection_key].append(
+                        edge.concurrent_connections,
+                    )
+
+
+
+
+
+
+
+
+
diff --git a/src/app/metrics/edge.py b/src/app/metrics/edge.py
@@ -0,0 +1,29 @@
+"""initialization of the structure to gather the metrics for the edges of the system"""
+
+from collections.abc import Iterable
+
+from app.config.constants import SampledMetricName
+
+# Initialize one time outside the function all possible metrics
+# related to the edges, the idea of this structure is to
+# guarantee scalability in the long term if multiple metrics
+# will be considered
+
+EDGE_METRICS = (
+    SampledMetricName.EDGE_CONCURRENT_CONNECTION,
+)
+
+def build_edge_metrics(
+    enabled_sample_metrics: Iterable[SampledMetricName],
+    ) -> dict[SampledMetricName, list[float | int]]:
+    """
+    Function to populate a dictionary to collect values for
+    time series of sampled metrics related to the edges of
+    the system.
+    """
+    # The edge case of the empty dict is avoided since at least
+    # one metric is always measured by default.
+    return {
+        metric: [] for metric in EDGE_METRICS
+        if metric in enabled_sample_metrics
+    }
diff --git a/src/app/runtime/actors/edge.py b/src/app/runtime/actors/edge.py
@@ -12,9 +12,11 @@
 import numpy as np
 import simpy
 
-from app.config.constants import SystemEdges
+from app.config.constants import SampledMetricName, SystemEdges
+from app.metrics.edge import build_edge_metrics
 from app.runtime.rqs_state import RequestState
 from app.samplers.common_helpers import general_sampler
+from app.schemas.simulation_settings_input import SimulationSettings
 from app.schemas.system_topology.full_system_topology import Edge
 
 if TYPE_CHECKING:
@@ -32,12 +34,26 @@ def __init__(
         edge_config: Edge,
         rng: np.random.Generator | None = None,
         target_box: simpy.Store,
+        settings: SimulationSettings,
         ) -> None:
         """Definition of the instance attributes"""
         self.env = env
         self.edge_config = edge_config
         self.target_box = target_box
         self.rng = rng or np.random.default_rng()
+        self.setting = settings
+        self._edge_enabled_metrics = build_edge_metrics(
+            settings.enabled_sample_metrics,
+        )
+        self._concurrent_connections: int = 0
+
+        # We keep a reference to `settings` because this class needs to observe but not
+        # persist the edge-related metrics the user has enabled.
+        # The actual persistence (appending snapshots to the time series lists)
+        # is handled centrally in metrics/collector.py,which runs every Xmilliseconds.
+        # Here we only expose the current metric values, guarded by a few if checks to
+        # verify that each optional metric is active. For deafult metric settings
+        # is not needed but as we will scale as explained above we will need it
 
     def _deliver(self, state: RequestState) -> Generator[simpy.Event, None, None]:
         """Function to deliver the state to the next node"""
@@ -54,13 +70,17 @@ def _deliver(self, state: RequestState) -> Generator[simpy.Event, None, None]:
             )
             return
 
+        self._concurrent_connections +=1
+
         transit_time = general_sampler(random_variable, self.rng)
         yield self.env.timeout(transit_time)
+
         state.record_hop(
             SystemEdges.NETWORK_CONNECTION,
             self.edge_config.id,
             self.env.now,
             )
+        self._concurrent_connections -=1
         yield self.target_box.put(state)
 
 
@@ -71,7 +91,15 @@ def transport(self, state: RequestState) -> simpy.Process:
         """
         return self.env.process(self._deliver(state))
 
+    @property
+    def enabled_metrics(self) -> dict[SampledMetricName, list[float | int]]:
+        """Read-only access to the metric store."""
+        return self._edge_enabled_metrics
 
+    @property
+    def concurrent_connections(self) -> int:
+        """Current number of open connections on this edge."""
+        return self._concurrent_connections
 
 
 
diff --git a/src/app/schemas/simulation_settings_input.py b/src/app/schemas/simulation_settings_input.py
@@ -2,7 +2,12 @@
 
 from pydantic import BaseModel, Field
 
-from app.config.constants import EventMetricName, SampledMetricName, TimeDefaults
+from app.config.constants import (
+    EventMetricName,
+    SampledMetricName,
+    SamplePeriods,
+    TimeDefaults,
+)
 
 
 class SimulationSettings(BaseModel):
@@ -19,6 +24,7 @@ class SimulationSettings(BaseModel):
             SampledMetricName.READY_QUEUE_LEN,
             SampledMetricName.CORE_BUSY,
             SampledMetricName.RAM_IN_USE,
+            SampledMetricName.EDGE_CONCURRENT_CONNECTION,
         },
         description="Which time-series KPIs to collect by default.",
     )
@@ -29,3 +35,10 @@ class SimulationSettings(BaseModel):
         description="Which per-event KPIs to collect by default.",
     )
 
+    sample_period_s: int = Field(
+        default = SamplePeriods.STANDARD_TIME,
+        ge = SamplePeriods.MINIMUM_TIME,
+        le = SamplePeriods.MAXIMUM_TIME,
+        description="constant interval of time to build time series for metrics",
+    )
+
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -23,6 +23,7 @@
 from app.config.constants import (
     EventMetricName,
     SampledMetricName,
+    SamplePeriods,
     TimeDefaults,
 )
 from app.config.settings import settings
@@ -195,6 +196,7 @@ def sim_settings(
         total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME,
         enabled_sample_metrics=enabled_sample_metrics,
         enabled_event_metrics=enabled_event_metrics,
+        sample_period_s=SamplePeriods.STANDARD_TIME,
     )
 
 
diff --git a/tests/unit/runtime/engine/test_edge.py b/tests/unit/runtime/engine/test_edge.py

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`from app.config.constants import (`
`24`	`24`	`EventMetricName,`
`25`	`25`	`SampledMetricName,`
	`26`	`+ SamplePeriods,`
`26`	`27`	`TimeDefaults,`
`27`	`28`	`)`
`28`	`29`	`from app.config.settings import settings`
`@@ -195,6 +196,7 @@ def sim_settings(`
`195`	`196`	`total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME,`
`196`	`197`	`enabled_sample_metrics=enabled_sample_metrics,`
`197`	`198`	`enabled_event_metrics=enabled_event_metrics,`
	`199`	`+ sample_period_s=SamplePeriods.STANDARD_TIME,`
`198`	`200`	`)`
`199`	`201`
`200`	`202`