NETIZEN-11
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/config.yaml‎
Lines changed: 6 additions & 1 deletion b/‎config/config.yaml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎krkn/prometheus/collector.py‎
Lines changed: 79 additions & 0 deletions b/‎krkn/prometheus/collector.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎krkn/resiliency/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎krkn/resiliency/__init__.py‎
Lines changed: 4 additions & 0 deletions
@@ -17,6 +17,7 @@ __pycache__/*
 kube-burner*
 kube_burner*
 recommender_*.json
+resiliency*.json
 
 # Project files
 .ropeproject
 
@@ -55,6 +55,10 @@ kraken:
        -  kubevirt_vm_outage:
               - scenarios/kubevirt/kubevirt-vm-outage.yaml
 
+resiliency:
+  resiliency_run_mode: standalone  # Options: standalone, controller, disabled
+  resiliency_file: config/alerts.yaml  # Path to SLO definitions, will resolve to performance_monitoring: alert_profile: if not specified
+
 cerberus:
     cerberus_enabled: False                                # Enable it when cerberus is previously installed
     cerberus_url:                                          # When cerberus_enabled is set to True, provide the url where cerberus publishes go/no-go signal
@@ -131,4 +135,5 @@ kubevirt_checks:                                            # Utilizing virt che
     disconnected: False                                     # Boolean of how to try to connect to the VMIs; if True will use the ip_address to try ssh from within a node, if false will use the name and uses virtctl to try to connect; Default is False
     ssh_node: ""                                            # If set, will be a backup way to ssh to a node. Will want to set to a node that isn't targeted in chaos
     node_names: ""
-    exit_on_failure:                                        # If value is True and VMI's are failing post chaos returns failure, values can be True/False
+    exit_on_failure:                                        # If value is True and VMI's are failing post chaos returns failure, values can be True/False
+    
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import datetime
+import logging
+from typing import Dict, Any, List, Optional
+
+from krkn_lib.prometheus.krkn_prometheus import KrknPrometheus
+
+
+# -----------------------------------------------------------------------------
+# SLO evaluation helpers (used by krkn.resiliency)
+# -----------------------------------------------------------------------------
+
+
+def slo_passed(prometheus_result: List[Any]) -> Optional[bool]:
+    if not prometheus_result:
+        return None
+    has_samples = False
+    for series in prometheus_result:
+        if "values" in series:
+            has_samples = True
+            for _ts, val in series["values"]:
+                try:
+                    if float(val) > 0:
+                        return False
+                except (TypeError, ValueError):
+                    continue
+        elif "value" in series:
+            has_samples = True
+            try:
+                return float(series["value"][1]) == 0
+            except (TypeError, ValueError):
+                return False
+
+    # If we reached here and never saw any samples, skip
+    return None if not has_samples else True
+
+
+def evaluate_slos(
+    prom_cli: KrknPrometheus,
+    slo_list: List[Dict[str, Any]],
+    start_time: datetime.datetime,
+    end_time: datetime.datetime,
+) -> Dict[str, bool]:
+    """Evaluate a list of SLO expressions against Prometheus.
+
+    Args:
+        prom_cli: Configured Prometheus client.
+        slo_list: List of dicts with keys ``name``, ``expr``.
+        start_time: Start timestamp.
+        end_time: End timestamp.
+        granularity: Step in seconds for range queries.
+    Returns:
+        Mapping name -> bool indicating pass status.
+        True means good we passed the SLO test otherwise failed the SLO
+    """
+    results: Dict[str, bool] = {}
+    logging.info("Evaluating %d SLOs over window %s – %s", len(slo_list), start_time, end_time)
+    for slo in slo_list:
+        expr = slo["expr"]
+        name = slo["name"]
+        try:
+            response = prom_cli.process_prom_query_in_range(
+                expr,
+                start_time=start_time,
+                end_time=end_time,
+            )
+
+            passed = slo_passed(response)
+            if passed is None:
+                # Absence of data indicates the condition did not trigger; treat as pass.
+                logging.debug("SLO '%s' query returned no data; assuming pass.", name)
+                results[name] = True
+            else:
+                results[name] = passed
+        except Exception as exc:  
+            logging.error("PromQL query failed for SLO '%s': %s", name, exc)
+            results[name] = False  
+    return results
@@ -0,0 +1,4 @@
+"""krkn.resiliency package public interface."""
+
+from .resiliency import Resiliency  # noqa: F401
+from .score import calculate_resiliency_score  # noqa: F401