Skip to content

Commit d58f5d4

Browse files
authored
[ROB-2191] Irsa assume token refresh fix (#40)
* refreshes credentials for irsa * refactoring refresh code * safer refresh * bump version * fix refresh creds * moved timeout to env var * updating log and removing dead code
1 parent e51281e commit d58f5d4

File tree

2 files changed

+44
-36
lines changed

2 files changed

+44
-36
lines changed

prometrix/connect/aws_connect.py

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
SA_TOKEN_PATH = os.environ.get("SA_TOKEN_PATH", "/var/run/secrets/eks.amazonaws.com/serviceaccount/token")
1717
AWS_ASSUME_ROLE = os.environ.get("AWS_ASSUME_ROLE")
18+
AWS_REFRESH_CREDS_SEC = int(os.environ.get("AWS_REFRESH_CREDS_SEC", "900")) # 15 minutes
1819

1920
class AWSPrometheusConnect(CustomPrometheusConnect):
2021
def __init__(
@@ -31,25 +32,43 @@ def __init__(
3132
self.region = region
3233
self.service_name = service_name
3334

34-
if access_key and secret_key:
35-
# Backwards compatibility: use static keys
36-
self._credentials = Credentials(access_key, secret_key, token)
37-
self._has_static_keys = True
38-
self._session = None
39-
else:
40-
# IRSA
41-
session = boto3.Session()
42-
creds = session.get_credentials()
43-
if not creds:
44-
raise RuntimeError("No AWS credentials found (neither static keys nor IRSA)")
45-
self._credentials = creds
46-
self._has_static_keys = False
47-
self._session = session
35+
self._initial_access_key = access_key
36+
self._initial_secret_key = secret_key
37+
self._initial_token = token
38+
self._has_static_keys = bool(access_key and secret_key)
39+
self._session = None
40+
self._credentials = None
4841

4942
role_to_assume = assume_role_arn or AWS_ASSUME_ROLE
5043
self._role_to_assume = role_to_assume
51-
if role_to_assume:
52-
self._assume_role(role_to_assume)
44+
45+
self._last_init_at = None
46+
47+
self.init_credentials()
48+
49+
def init_credentials(self) -> None:
50+
51+
try:
52+
if self._has_static_keys:
53+
self._credentials = Credentials(self._initial_access_key, self._initial_secret_key, self._initial_token)
54+
self._session = None
55+
else:
56+
# IRSA
57+
session = boto3.Session()
58+
creds = session.get_credentials()
59+
if not creds:
60+
raise RuntimeError("No AWS credentials found (neither static keys nor IRSA)")
61+
self._credentials = creds
62+
self._session = session
63+
64+
role_to_assume = self._role_to_assume
65+
if role_to_assume:
66+
self._assume_role(role_to_assume)
67+
68+
self._last_init_at = datetime.utcnow()
69+
except Exception:
70+
logging.exception("Failed to initialize credentials")
71+
raise
5372

5473
def _assume_role(self, role_arn: str) -> None:
5574
try:
@@ -80,6 +99,12 @@ def _assume_role(self, role_arn: str) -> None:
8099

81100
def _build_auth(self) -> SigV4Auth:
82101
"""Builds fresh SigV4 auth with current credentials (handles rotation)."""
102+
try:
103+
if self._last_init_at is None or (datetime.utcnow() - self._last_init_at).total_seconds() >= AWS_REFRESH_CREDS_SEC:
104+
logging.debug("%d seconds passed; re-initializing AWS credentials", AWS_REFRESH_CREDS_SEC)
105+
self.init_credentials()
106+
except Exception:
107+
logging.exception("Time-based credential refresh failed")
83108
frozen = self._credentials.get_frozen_credentials()
84109
return SigV4Auth(frozen, self.service_name, self.region)
85110

@@ -98,24 +123,6 @@ def signed_request(
98123
params=params,
99124
)
100125

101-
def _refresh_credentials(self) -> None:
102-
"""
103-
Boto should automatically refresh expired credentials but when assuming role it cant be done automatically
104-
"""
105-
try:
106-
if not self._has_static_keys and self._session is not None:
107-
# this is also needed for assume role if base credentials fails
108-
refreshed = self._session.get_credentials()
109-
if refreshed:
110-
self._credentials = refreshed
111-
except Exception:
112-
logging.exception("Failed to refresh session credentials")
113-
if self._role_to_assume:
114-
try:
115-
self._assume_role(self._role_to_assume)
116-
except Exception:
117-
logging.exception("Failed to refresh assume role")
118-
119126
def _request_with_refresh(self, *, method, url, data=None, params=None, headers=None, verify=False):
120127
resp = self.signed_request(
121128
method=method,
@@ -126,7 +133,8 @@ def _request_with_refresh(self, *, method, url, data=None, params=None, headers=
126133
headers=headers,
127134
)
128135
if resp is not None and resp.status_code in (400, 401, 403):
129-
self._refresh_credentials()
136+
logging.warning("Auth failure %s, re-initializing credentials", resp.status_code)
137+
self.init_credentials()
130138
resp = self.signed_request(
131139
method=method,
132140
url=url,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "prometrix"
3-
version = "0.2.6"
3+
version = "0.2.7"
44
authors = ["Avi Kotlicky <[email protected]>"]
55
readme = "README.md"
66
packages = [{include = "prometrix"}]

0 commit comments

Comments
 (0)