Skip to content

Commit 471f506

Browse files
authored
Update COS agent lib (#1487)
1 parent 60b0fb8 commit 471f506

File tree

1 file changed

+62
-19
lines changed

1 file changed

+62
-19
lines changed

lib/charms/grafana_agent/v0/cos_agent.py

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ def __init__(self, *args):
211211
```
212212
"""
213213

214+
import copy
214215
import enum
216+
import hashlib
215217
import json
216218
import logging
217219
import socket
@@ -254,7 +256,7 @@ class _MetricsEndpointDict(TypedDict):
254256

255257
LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
256258
LIBAPI = 0
257-
LIBPATCH = 22
259+
LIBPATCH = 25
258260

259261
PYDEPS = ["cosl >= 0.0.50", "pydantic"]
260262

@@ -264,12 +266,6 @@ class _MetricsEndpointDict(TypedDict):
264266
logger = logging.getLogger(__name__)
265267
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")
266268

267-
# Note: MutableMapping is imported from the typing module and not collections.abc
268-
# because subscripting collections.abc.MutableMapping was added in python 3.9, but
269-
# most of our charms are based on 20.04, which has python 3.8.
270-
271-
_RawDatabag = MutableMapping[str, str]
272-
273269

274270
class TransportProtocolType(str, enum.Enum):
275271
"""Receiver Type."""
@@ -305,6 +301,22 @@ class TransportProtocolType(str, enum.Enum):
305301
ReceiverProtocol = Literal["otlp_grpc", "otlp_http", "zipkin", "jaeger_thrift_http", "jaeger_grpc"]
306302

307303

304+
def _dedupe_list(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
305+
"""Deduplicate items in the list via object identity."""
306+
unique_items = []
307+
for item in items:
308+
if item not in unique_items:
309+
unique_items.append(item)
310+
return unique_items
311+
312+
313+
def _dict_hash_except_key(scrape_config: Dict[str, Any], key: Optional[str]):
314+
"""Get a hash of the scrape_config dict, except for the specified key."""
315+
cfg_for_hash = {k: v for k, v in scrape_config.items() if k != key}
316+
serialized = json.dumps(cfg_for_hash, sort_keys=True)
317+
return hashlib.blake2b(serialized.encode(), digest_size=4).hexdigest()
318+
319+
308320
class TracingError(Exception):
309321
"""Base class for custom errors raised by tracing."""
310322

@@ -619,7 +631,8 @@ def __init__(
619631
refresh_events: Optional[List] = None,
620632
tracing_protocols: Optional[List[str]] = None,
621633
*,
622-
scrape_configs: Optional[Union[List[dict], Callable]] = None,
634+
scrape_configs: Optional[Union[List[dict], Callable[[], List[Dict[str, Any]]]]] = None,
635+
extra_alert_groups: Optional[Callable[[], Dict[str, Any]]] = None,
623636
):
624637
"""Create a COSAgentProvider instance.
625638
@@ -640,6 +653,9 @@ def __init__(
640653
scrape_configs: List of standard scrape_configs dicts or a callable
641654
that returns the list in case the configs need to be generated dynamically.
642655
The contents of this list will be merged with the contents of `metrics_endpoints`.
656+
extra_alert_groups: A callable that returns a dict of alert rule groups in case the
657+
alerts need to be generated dynamically. The contents of this dict will be merged
658+
with generic and bundled alert rules.
643659
"""
644660
super().__init__(charm, relation_name)
645661
dashboard_dirs = dashboard_dirs or ["./src/grafana_dashboards"]
@@ -648,6 +664,7 @@ def __init__(
648664
self._relation_name = relation_name
649665
self._metrics_endpoints = metrics_endpoints or []
650666
self._scrape_configs = scrape_configs or []
667+
self._extra_alert_groups = extra_alert_groups or {}
651668
self._metrics_rules = metrics_rules_dir
652669
self._logs_rules = logs_rules_dir
653670
self._recursive = recurse_rules_dirs
@@ -689,12 +706,34 @@ def _on_refresh(self, event):
689706
) as e:
690707
logger.error("Invalid relation data provided: %s", e)
691708

709+
def _deterministic_scrape_configs(
710+
self, scrape_configs: List[Dict[str, Any]]
711+
) -> List[Dict[str, Any]]:
712+
"""Get deterministic scrape_configs with stable job names.
713+
714+
For stability across serializations, compute a short per-config hash
715+
and append it to the existing job name (or 'default'). Keep the app
716+
name as a prefix: <app>_<job_or_default>_<8hex-hash>.
717+
718+
Hash the whole scrape_config (except any existing job_name) so the
719+
suffix is sensitive to all stable fields. Use deterministic JSON
720+
serialization.
721+
"""
722+
local_scrape_configs = copy.deepcopy(scrape_configs)
723+
for scrape_config in local_scrape_configs:
724+
name = scrape_config.get("job_name", "default")
725+
short_id = _dict_hash_except_key(scrape_config, "job_name")
726+
scrape_config["job_name"] = f"{self._charm.app.name}_{name}_{short_id}"
727+
728+
return sorted(local_scrape_configs, key=lambda c: c.get("job_name", ""))
729+
692730
@property
693731
def _scrape_jobs(self) -> List[Dict]:
694-
"""Return a prometheus_scrape-like data structure for jobs.
732+
"""Return a list of scrape_configs.
695733
696734
https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
697735
"""
736+
# Optionally allow the charm to set the scrape_configs
698737
if callable(self._scrape_configs):
699738
scrape_configs = self._scrape_configs()
700739
else:
@@ -712,26 +751,30 @@ def _scrape_jobs(self) -> List[Dict]:
712751

713752
scrape_configs = scrape_configs or []
714753

715-
# Augment job name to include the app name and a unique id (index)
716-
for idx, scrape_config in enumerate(scrape_configs):
717-
scrape_config["job_name"] = "_".join(
718-
[self._charm.app.name, str(idx), scrape_config.get("job_name", "default")]
719-
)
720-
721-
return scrape_configs
754+
return self._deterministic_scrape_configs(scrape_configs)
722755

723756
@property
724757
def _metrics_alert_rules(self) -> Dict:
725-
"""Use (for now) the prometheus_scrape AlertRules to initialize this."""
758+
"""Return a dict of alert rule groups."""
759+
# Optionally allow the charm to add the metrics_alert_rules
760+
if callable(self._extra_alert_groups):
761+
rules = self._extra_alert_groups()
762+
else:
763+
rules = {"groups": []}
764+
726765
alert_rules = AlertRules(
727766
query_type="promql", topology=JujuTopology.from_charm(self._charm)
728767
)
729768
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
730769
alert_rules.add(
731-
generic_alert_groups.application_rules,
770+
copy.deepcopy(generic_alert_groups.application_rules),
732771
group_name_prefix=JujuTopology.from_charm(self._charm).identifier,
733772
)
734-
return alert_rules.as_dict()
773+
774+
# NOTE: The charm could supply rules we implement in this method, so we deduplicate
775+
rules["groups"] = _dedupe_list(rules["groups"] + alert_rules.as_dict()["groups"])
776+
777+
return rules
735778

736779
@property
737780
def _log_alert_rules(self) -> Dict:

0 commit comments

Comments
 (0)