Skip to content

Commit a8c0b0b

Browse files
[MISC] Refresh all charm libs (#608)
1 parent 0e93d21 commit a8c0b0b

File tree

8 files changed

+1004
-317
lines changed

8 files changed

+1004
-317
lines changed

lib/charms/data_platform_libs/v0/data_interfaces.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def _on_topic_requested(self, event: TopicRequestedEvent):
331331

332332
# Increment this PATCH version before using `charmcraft publish-lib` or reset
333333
# to 0 if you are raising the major API version
334-
LIBPATCH = 40
334+
LIBPATCH = 41
335335

336336
PYDEPS = ["ops>=2.0.0"]
337337

@@ -609,7 +609,7 @@ def get_group(self, group: str) -> Optional[SecretGroup]:
609609
class CachedSecret:
610610
"""Locally cache a secret.
611611
612-
The data structure is precisely re-using/simulating as in the actual Secret Storage
612+
The data structure is precisely reusing/simulating as in the actual Secret Storage
613613
"""
614614

615615
KNOWN_MODEL_ERRORS = [MODEL_ERRORS["no_label_and_uri"], MODEL_ERRORS["owner_no_refresh"]]
@@ -2363,7 +2363,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non
23632363
def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None:
23642364
"""Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app."""
23652365
if self.secret_fields and self.deleted_label:
2366-
23672366
_, normal_fields = self._process_secret_fields(
23682367
relation,
23692368
self.secret_fields,

lib/charms/data_platform_libs/v0/data_models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class MergedDataBag(ProviderDataBag, RequirerDataBag):
168168

169169
# Increment this PATCH version before using `charmcraft publish-lib` or reset
170170
# to 0 if you are raising the major API version
171-
LIBPATCH = 4
171+
LIBPATCH = 5
172172

173173
PYDEPS = ["ops>=2.0.0", "pydantic>=1.10,<2"]
174174

@@ -209,7 +209,7 @@ def validate_params(cls: Type[T]):
209209
"""
210210

211211
def decorator(
212-
f: Callable[[CharmBase, ActionEvent, Union[T, ValidationError]], G]
212+
f: Callable[[CharmBase, ActionEvent, Union[T, ValidationError]], G],
213213
) -> Callable[[CharmBase, ActionEvent], G]:
214214
@wraps(f)
215215
def event_wrapper(self: CharmBase, event: ActionEvent):
@@ -287,7 +287,7 @@ def decorator(
287287
Optional[Union[UnitModel, ValidationError]],
288288
],
289289
G,
290-
]
290+
],
291291
) -> Callable[[CharmBase, RelationEvent], G]:
292292
@wraps(f)
293293
def event_wrapper(self: CharmBase, event: RelationEvent):

lib/charms/grafana_agent/v0/cos_agent.py

Lines changed: 108 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
- `COSAgentProvider`: Use in machine charms that need to have a workload's metrics
99
or logs scraped, or forward rule files or dashboards to Prometheus, Loki or Grafana through
1010
the Grafana Agent machine charm.
11+
NOTE: Be sure to add `limit: 1` in your charm for the cos-agent relation. That is the only
12+
way we currently have to prevent two different grafana agent apps deployed on the same VM.
1113
1214
- `COSAgentConsumer`: Used in the Grafana Agent machine charm to manage the requirer side of
1315
the `cos_agent` interface.
@@ -22,7 +24,6 @@
2224
Using the `COSAgentProvider` object only requires instantiating it,
2325
typically in the `__init__` method of your charm (the one which sends telemetry).
2426
25-
The constructor of `COSAgentProvider` has only one required and ten optional parameters:
2627
2728
```python
2829
def __init__(
@@ -233,8 +234,8 @@ def __init__(self, *args):
233234
)
234235

235236
import pydantic
236-
from cosl import GrafanaDashboard, JujuTopology
237-
from cosl.rules import AlertRules
237+
from cosl import DashboardPath40UID, JujuTopology, LZMABase64
238+
from cosl.rules import AlertRules, generic_alert_groups
238239
from ops.charm import RelationChangedEvent
239240
from ops.framework import EventBase, EventSource, Object, ObjectEvents
240241
from ops.model import ModelError, Relation
@@ -253,9 +254,9 @@ class _MetricsEndpointDict(TypedDict):
253254

254255
LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
255256
LIBAPI = 0
256-
LIBPATCH = 11
257+
LIBPATCH = 19
257258

258-
PYDEPS = ["cosl", "pydantic"]
259+
PYDEPS = ["cosl >= 0.0.50", "pydantic"]
259260

260261
DEFAULT_RELATION_NAME = "cos-agent"
261262
DEFAULT_PEER_RELATION_NAME = "peers"
@@ -267,7 +268,6 @@ class _MetricsEndpointDict(TypedDict):
267268
logger = logging.getLogger(__name__)
268269
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")
269270

270-
271271
# Note: MutableMapping is imported from the typing module and not collections.abc
272272
# because subscripting collections.abc.MutableMapping was added in python 3.9, but
273273
# most of our charms are based on 20.04, which has python 3.8.
@@ -317,7 +317,11 @@ class NotReadyError(TracingError):
317317
"""Raised by the provider wrapper if a requirer hasn't published the required data (yet)."""
318318

319319

320-
class ProtocolNotRequestedError(TracingError):
320+
class ProtocolNotFoundError(TracingError):
321+
"""Raised if the user doesn't receive an endpoint for a protocol it requested."""
322+
323+
324+
class ProtocolNotRequestedError(ProtocolNotFoundError):
321325
"""Raised if the user attempts to obtain an endpoint for a protocol it did not request."""
322326

323327

@@ -476,7 +480,7 @@ class CosAgentProviderUnitData(DatabagModel):
476480
# this needs to make its way to the gagent leader
477481
metrics_alert_rules: dict
478482
log_alert_rules: dict
479-
dashboards: List[GrafanaDashboard]
483+
dashboards: List[str]
480484
# subordinate is no longer used but we should keep it until we bump the library to ensure
481485
# we don't break compatibility.
482486
subordinate: Optional[bool] = None
@@ -509,7 +513,7 @@ class CosAgentPeersUnitData(DatabagModel):
509513
# of the outgoing o11y relations.
510514
metrics_alert_rules: Optional[dict]
511515
log_alert_rules: Optional[dict]
512-
dashboards: Optional[List[GrafanaDashboard]]
516+
dashboards: Optional[List[str]]
513517

514518
# when this whole datastructure is dumped into a databag, it will be nested under this key.
515519
# while not strictly necessary (we could have it 'flattened out' into the databag),
@@ -579,7 +583,7 @@ class Receiver(pydantic.BaseModel):
579583
"""Specification of an active receiver."""
580584

581585
protocol: ProtocolType = pydantic.Field(..., description="Receiver protocol name and type.")
582-
url: str = pydantic.Field(
586+
url: Optional[str] = pydantic.Field(
583587
...,
584588
description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL.
585589
Otherwise, it would be the service's fqdn or internal IP.
@@ -727,6 +731,10 @@ def _metrics_alert_rules(self) -> Dict:
727731
query_type="promql", topology=JujuTopology.from_charm(self._charm)
728732
)
729733
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
734+
alert_rules.add(
735+
generic_alert_groups.application_rules,
736+
group_name_prefix=JujuTopology.from_charm(self._charm).identifier,
737+
)
730738
return alert_rules.as_dict()
731739

732740
@property
@@ -737,12 +745,20 @@ def _log_alert_rules(self) -> Dict:
737745
return alert_rules.as_dict()
738746

739747
@property
740-
def _dashboards(self) -> List[GrafanaDashboard]:
741-
dashboards: List[GrafanaDashboard] = []
748+
def _dashboards(self) -> List[str]:
749+
dashboards: List[str] = []
742750
for d in self._dashboard_dirs:
743751
for path in Path(d).glob("*"):
744-
dashboard = GrafanaDashboard._serialize(path.read_bytes())
745-
dashboards.append(dashboard)
752+
with open(path, "rt") as fp:
753+
dashboard = json.load(fp)
754+
rel_path = str(
755+
path.relative_to(self._charm.charm_dir) if path.is_absolute() else path
756+
)
757+
# COSAgentProvider is somewhat analogous to GrafanaDashboardProvider. We need to overwrite the uid here
758+
# because there is currently no other way to communicate the dashboard path separately.
759+
# https://github.com/canonical/grafana-k8s-operator/pull/363
760+
dashboard["uid"] = DashboardPath40UID.generate(self._charm.meta.name, rel_path)
761+
dashboards.append(LZMABase64.compress(json.dumps(dashboard)))
746762
return dashboards
747763

748764
@property
@@ -768,7 +784,7 @@ def is_ready(self, relation: Optional[Relation] = None):
768784
"""Is this endpoint ready?"""
769785
relation = relation or self._relation
770786
if not relation:
771-
logger.debug(f"no relation on {self._relation_name !r}: tracing not ready")
787+
logger.debug(f"no relation on {self._relation_name!r}: tracing not ready")
772788
return False
773789
if relation.data is None:
774790
logger.error(f"relation data is None for {relation}")
@@ -802,29 +818,48 @@ def get_all_endpoints(
802818

803819
def _get_tracing_endpoint(
804820
self, relation: Optional[Relation], protocol: ReceiverProtocol
805-
) -> Optional[str]:
821+
) -> str:
822+
"""Return a tracing endpoint URL if it is available or raise a ProtocolNotFoundError."""
806823
unit_data = self.get_all_endpoints(relation)
807824
if not unit_data:
808-
return None
825+
# we didn't find the protocol because the remote end didn't publish any data yet
826+
# it might also mean that grafana-agent doesn't have a relation to the tracing backend
827+
raise ProtocolNotFoundError(protocol)
809828
receivers: List[Receiver] = [i for i in unit_data.receivers if i.protocol.name == protocol]
810829
if not receivers:
811-
logger.error(f"no receiver found with protocol={protocol!r}")
812-
return None
830+
# we didn't find the protocol because grafana-agent didn't return us the protocol that we requested
831+
# the caller might want to verify that we did indeed request this protocol
832+
raise ProtocolNotFoundError(protocol)
813833
if len(receivers) > 1:
814-
logger.error(
834+
logger.warning(
815835
f"too many receivers with protocol={protocol!r}; using first one. Found: {receivers}"
816836
)
817-
return None
818837

819838
receiver = receivers[0]
839+
if not receiver.url:
840+
# grafana-agent isn't connected to the tracing backend yet
841+
raise ProtocolNotFoundError(protocol)
820842
return receiver.url
821843

822844
def get_tracing_endpoint(
823845
self, protocol: ReceiverProtocol, relation: Optional[Relation] = None
824-
) -> Optional[str]:
825-
"""Receiver endpoint for the given protocol."""
826-
endpoint = self._get_tracing_endpoint(relation or self._relation, protocol=protocol)
827-
if not endpoint:
846+
) -> str:
847+
"""Receiver endpoint for the given protocol.
848+
849+
It could happen that this function gets called before the provider publishes the endpoints.
850+
In such a scenario, if a non-leader unit calls this function, a permission denied exception will be raised due to
851+
restricted access. To prevent this, this function needs to be guarded by the `is_ready` check.
852+
853+
Raises:
854+
ProtocolNotRequestedError:
855+
If the charm unit is the leader unit and attempts to obtain an endpoint for a protocol it did not request.
856+
ProtocolNotFoundError:
857+
If the charm attempts to obtain an endpoint when grafana-agent isn't related to a tracing backend.
858+
"""
859+
try:
860+
return self._get_tracing_endpoint(relation or self._relation, protocol=protocol)
861+
except ProtocolNotFoundError:
862+
# let's see if we didn't find it because we didn't request the endpoint
828863
requested_protocols = set()
829864
relations = [relation] if relation else self.relations
830865
for relation in relations:
@@ -839,8 +874,7 @@ def get_tracing_endpoint(
839874
if protocol not in requested_protocols:
840875
raise ProtocolNotRequestedError(protocol, relation)
841876

842-
return None
843-
return endpoint
877+
raise
844878

845879

846880
class COSAgentDataChanged(EventBase):
@@ -902,6 +936,8 @@ def __init__(
902936
events.relation_joined, self._on_relation_data_changed
903937
) # TODO: do we need this?
904938
self.framework.observe(events.relation_changed, self._on_relation_data_changed)
939+
self.framework.observe(events.relation_departed, self._on_relation_departed)
940+
905941
for event in self._refresh_events:
906942
self.framework.observe(event, self.trigger_refresh) # pyright: ignore
907943

@@ -929,6 +965,26 @@ def _on_peer_relation_changed(self, _):
929965
if self._charm.unit.is_leader():
930966
self.on.data_changed.emit() # pyright: ignore
931967

968+
def _on_relation_departed(self, event):
969+
"""Remove provider's (principal's) alert rules and dashboards from peer data when the cos-agent relation to the principal is removed."""
970+
if not self.peer_relation:
971+
event.defer()
972+
return
973+
# empty the departing unit's alert rules and dashboards from peer data
974+
data = CosAgentPeersUnitData(
975+
unit_name=event.unit.name,
976+
relation_id=str(event.relation.id),
977+
relation_name=event.relation.name,
978+
metrics_alert_rules={},
979+
log_alert_rules={},
980+
dashboards=[],
981+
)
982+
self.peer_relation.data[self._charm.unit][
983+
f"{CosAgentPeersUnitData.KEY}-{event.unit.name}"
984+
] = data.json()
985+
986+
self.on.data_changed.emit() # pyright: ignore
987+
932988
def _on_relation_data_changed(self, event: RelationChangedEvent):
933989
# Peer data is the only means of communication between subordinate units.
934990
if not self.peer_relation:
@@ -988,7 +1044,16 @@ def update_tracing_receivers(self):
9881044
CosAgentRequirerUnitData(
9891045
receivers=[
9901046
Receiver(
991-
url=f"{self._get_tracing_receiver_url(protocol)}",
1047+
# if tracing isn't ready, we don't want the wrong receiver URLs present in the databag.
1048+
# however, because of the backwards compatibility requirements, we need to still provide
1049+
# the protocols list so that the charm with older cos_agent version doesn't error its hooks.
1050+
# before this change was added, the charm with old cos_agent version threw exceptions with
1051+
# connections to grafana-agent timing out. After the change, the charm will fail validating
1052+
# databag contents (as it expects a string in URL) but that won't cause any errors as
1053+
# tracing endpoints are the only content in the grafana-agent's side of the databag.
1054+
url=f"{self._get_tracing_receiver_url(protocol)}"
1055+
if self._charm.tracing.is_ready() # type: ignore
1056+
else None,
9921057
protocol=ProtocolType(
9931058
name=protocol,
9941059
type=receiver_protocol_to_transport_protocol[protocol],
@@ -1030,8 +1095,7 @@ def _get_requested_protocols(self, relation: Relation):
10301095
if len(units) > 1:
10311096
# should never happen
10321097
raise ValueError(
1033-
f"unexpected error: subordinate relation {relation} "
1034-
f"should have exactly one unit"
1098+
f"unexpected error: subordinate relation {relation} should have exactly one unit"
10351099
)
10361100

10371101
unit = next(iter(units), None)
@@ -1287,7 +1351,7 @@ def dashboards(self) -> List[Dict[str, str]]:
12871351
seen_apps.append(app_name)
12881352

12891353
for encoded_dashboard in data.dashboards or ():
1290-
content = GrafanaDashboard(encoded_dashboard)._deserialize()
1354+
content = json.loads(LZMABase64.decompress(encoded_dashboard))
12911355

12921356
title = content.get("title", "no_title")
12931357

@@ -1314,44 +1378,32 @@ def charm_tracing_config(
13141378
If https endpoint is provided but cert_path is not found on disk:
13151379
disable charm tracing.
13161380
If https endpoint is provided and cert_path is None:
1317-
ERROR
1381+
raise TracingError
13181382
Else:
13191383
proceed with charm tracing (with or without tls, as appropriate)
13201384
13211385
Usage:
1322-
If you are using charm_tracing >= v1.9:
1323-
>>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm
1324-
>>> from lib.charms.tempo_k8s.v0.cos_agent import charm_tracing_config
1386+
>>> from lib.charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm
1387+
>>> from lib.charms.tempo_coordinator_k8s.v0.tracing import charm_tracing_config
13251388
>>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path")
13261389
>>> class MyCharm(...):
13271390
>>> _cert_path = "/path/to/cert/on/charm/container.crt"
13281391
>>> def __init__(self, ...):
1329-
>>> self.cos_agent = COSAgentProvider(...)
1392+
>>> self.tracing = TracingEndpointRequirer(...)
13301393
>>> self.my_endpoint, self.cert_path = charm_tracing_config(
1331-
... self.cos_agent, self._cert_path)
1332-
1333-
If you are using charm_tracing < v1.9:
1334-
>>> from lib.charms.tempo_k8s.v1.charm_tracing import trace_charm
1335-
>>> from lib.charms.tempo_k8s.v2.tracing import charm_tracing_config
1336-
>>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path")
1337-
>>> class MyCharm(...):
1338-
>>> _cert_path = "/path/to/cert/on/charm/container.crt"
1339-
>>> def __init__(self, ...):
1340-
>>> self.cos_agent = COSAgentProvider(...)
1341-
>>> self.my_endpoint, self.cert_path = charm_tracing_config(
1342-
... self.cos_agent, self._cert_path)
1343-
>>> @property
1344-
>>> def my_endpoint(self):
1345-
>>> return self._my_endpoint
1346-
>>> @property
1347-
>>> def cert_path(self):
1348-
>>> return self._cert_path
1349-
1394+
... self.tracing, self._cert_path)
13501395
"""
13511396
if not endpoint_requirer.is_ready():
13521397
return None, None
13531398

1354-
endpoint = endpoint_requirer.get_tracing_endpoint("otlp_http")
1399+
try:
1400+
endpoint = endpoint_requirer.get_tracing_endpoint("otlp_http")
1401+
except ProtocolNotFoundError:
1402+
logger.warn(
1403+
"Endpoint for tracing wasn't provided as tracing backend isn't ready yet. If grafana-agent isn't connected to a tracing backend, integrate it. Otherwise this issue should resolve itself in a few events."
1404+
)
1405+
return None, None
1406+
13551407
if not endpoint:
13561408
return None, None
13571409

0 commit comments

Comments
 (0)