Skip to content

Commit ac7f5a6

Browse files
committed
refreshing prometheus loader every 10 minutes so credentials dont expire
1 parent 2118c4d commit ac7f5a6

File tree

3 files changed

+19
-8
lines changed

3 files changed

+19
-8
lines changed

poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ kubernetes = "^26.1.0"
3030
prometheus-api-client = "0.5.3"
3131
numpy = ">=1.26.4,<1.27.0"
3232
alive-progress = "^3.1.2"
33-
prometrix = "^0.2.7"
33+
prometrix = "^0.2.5"
3434
slack-sdk = "^3.21.3"
3535
pandas = "2.2.2"
3636
requests = ">2.32.4"

robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from ..prometheus_utils import ClusterNotSpecifiedException, generate_prometheus_config
2222
from .base_metric_service import MetricsService
2323

24+
PROM_REFRESH_CREDS_SEC = int(os.environ.get("PROM_REFRESH_CREDS_SEC", "600")) # 10 minutes
25+
2426
logger = logging.getLogger("krr")
2527

2628

@@ -105,30 +107,39 @@ def __init__(
105107
elif not settings.inside_cluster and self.api_client is not None:
106108
self.api_client.update_params_for_auth(headers, {}, ["BearerToken"])
107109
self.prom_config = generate_prometheus_config(url=self.url, headers=headers, metrics_service=self)
108-
self.prometheus = get_custom_prometheus_connect(self.prom_config)
110+
self.get_prometheus()
111+
112+
def get_prometheus(self):
113+
now = datetime.utcnow()
114+
if (not self.prometheus
115+
or not self._last_init_at
116+
or now - self._last_init_at >= timedelta(seconds=PROM_REFRESH_CREDS_SEC)):
117+
self.prometheus = get_custom_prometheus_connect(self.prom_config)
118+
self._last_init_at = now
119+
return self.prometheus
109120

110121
def check_connection(self):
111122
"""
112123
Checks the connection to Prometheus.
113124
Raises:
114125
PrometheusNotFound: If the connection to Prometheus cannot be established.
115126
"""
116-
self.prometheus.check_prometheus_connection()
127+
self.get_prometheus().check_prometheus_connection()
117128

118129
@retry(wait=wait_random(min=2, max=10), stop=stop_after_attempt(5))
119130
async def query(self, query: str) -> dict:
120131
loop = asyncio.get_running_loop()
121132
return await loop.run_in_executor(
122133
self.executor,
123-
lambda: self.prometheus.safe_custom_query(query=query)["result"],
134+
lambda: self.get_prometheus().safe_custom_query(query=query)["result"],
124135
)
125136

126137
@retry(wait=wait_random(min=2, max=10), stop=stop_after_attempt(5))
127138
async def query_range(self, query: str, start: datetime, end: datetime, step: timedelta) -> dict:
128139
loop = asyncio.get_running_loop()
129140
return await loop.run_in_executor(
130141
self.executor,
131-
lambda: self.prometheus.safe_custom_query_range(
142+
lambda: self.get_prometheus().safe_custom_query_range(
132143
query=query, start_time=start, end_time=end, step=f"{step.seconds}s"
133144
)["result"],
134145
)
@@ -155,7 +166,7 @@ def validate_cluster_name(self):
155166

156167
def get_cluster_names(self) -> Optional[List[str]]:
157168
try:
158-
return self.prometheus.get_label_values(label_name=settings.prometheus_label)
169+
return self.get_prometheus().get_label_values(label_name=settings.prometheus_label)
159170
except PrometheusApiClientException:
160171
logger.error("Labels api not present on prometheus client")
161172
return []
@@ -194,7 +205,7 @@ async def gather_data(
194205
"""
195206
logger.debug(f"Gathering {LoaderClass.__name__} metric for {object}")
196207
try:
197-
metric_loader = LoaderClass(self.prometheus, self.name(), self.executor)
208+
metric_loader = LoaderClass(self.get_prometheus(), self.name(), self.executor)
198209
data = await metric_loader.load_data(object, period, step)
199210
except Exception:
200211
logger.exception("Failed to gather resource history data for %s", object)

0 commit comments

Comments
 (0)