1515
1616SA_TOKEN_PATH = os .environ .get ("SA_TOKEN_PATH" , "/var/run/secrets/eks.amazonaws.com/serviceaccount/token" )
1717AWS_ASSUME_ROLE = os .environ .get ("AWS_ASSUME_ROLE" )
18+ AWS_REFRESH_CREDS_SEC = int (os .environ .get ("AWS_REFRESH_CREDS_SEC" , "900" )) # 15 minutes
1819
1920class AWSPrometheusConnect (CustomPrometheusConnect ):
2021 def __init__ (
@@ -31,25 +32,43 @@ def __init__(
3132 self .region = region
3233 self .service_name = service_name
3334
34- if access_key and secret_key :
35- # Backwards compatibility: use static keys
36- self ._credentials = Credentials (access_key , secret_key , token )
37- self ._has_static_keys = True
38- self ._session = None
39- else :
40- # IRSA
41- session = boto3 .Session ()
42- creds = session .get_credentials ()
43- if not creds :
44- raise RuntimeError ("No AWS credentials found (neither static keys nor IRSA)" )
45- self ._credentials = creds
46- self ._has_static_keys = False
47- self ._session = session
35+ self ._initial_access_key = access_key
36+ self ._initial_secret_key = secret_key
37+ self ._initial_token = token
38+ self ._has_static_keys = bool (access_key and secret_key )
39+ self ._session = None
40+ self ._credentials = None
4841
4942 role_to_assume = assume_role_arn or AWS_ASSUME_ROLE
5043 self ._role_to_assume = role_to_assume
51- if role_to_assume :
52- self ._assume_role (role_to_assume )
44+
45+ self ._last_init_at = None
46+
47+ self .init_credentials ()
48+
49+ def init_credentials (self ) -> None :
50+
51+ try :
52+ if self ._has_static_keys :
53+ self ._credentials = Credentials (self ._initial_access_key , self ._initial_secret_key , self ._initial_token )
54+ self ._session = None
55+ else :
56+ # IRSA
57+ session = boto3 .Session ()
58+ creds = session .get_credentials ()
59+ if not creds :
60+ raise RuntimeError ("No AWS credentials found (neither static keys nor IRSA)" )
61+ self ._credentials = creds
62+ self ._session = session
63+
64+ role_to_assume = self ._role_to_assume
65+ if role_to_assume :
66+ self ._assume_role (role_to_assume )
67+
68+ self ._last_init_at = datetime .utcnow ()
69+ except Exception :
70+ logging .exception ("Failed to initialize credentials" )
71+ raise
5372
5473 def _assume_role (self , role_arn : str ) -> None :
5574 try :
@@ -80,6 +99,12 @@ def _assume_role(self, role_arn: str) -> None:
8099
81100 def _build_auth (self ) -> SigV4Auth :
82101 """Builds fresh SigV4 auth with current credentials (handles rotation)."""
102+ try :
103+ if self ._last_init_at is None or (datetime .utcnow () - self ._last_init_at ).total_seconds () >= AWS_REFRESH_CREDS_SEC :
104+ logging .debug ("%d seconds passed; re-initializing AWS credentials" , AWS_REFRESH_CREDS_SEC )
105+ self .init_credentials ()
106+ except Exception :
107+ logging .exception ("Time-based credential refresh failed" )
83108 frozen = self ._credentials .get_frozen_credentials ()
84109 return SigV4Auth (frozen , self .service_name , self .region )
85110
@@ -98,24 +123,6 @@ def signed_request(
98123 params = params ,
99124 )
100125
101- def _refresh_credentials (self ) -> None :
102- """
103- Boto should automatically refresh expired credentials but when assuming role it cant be done automatically
104- """
105- try :
106- if not self ._has_static_keys and self ._session is not None :
107- # this is also needed for assume role if base credentials fails
108- refreshed = self ._session .get_credentials ()
109- if refreshed :
110- self ._credentials = refreshed
111- except Exception :
112- logging .exception ("Failed to refresh session credentials" )
113- if self ._role_to_assume :
114- try :
115- self ._assume_role (self ._role_to_assume )
116- except Exception :
117- logging .exception ("Failed to refresh assume role" )
118-
119126 def _request_with_refresh (self , * , method , url , data = None , params = None , headers = None , verify = False ):
120127 resp = self .signed_request (
121128 method = method ,
@@ -126,7 +133,8 @@ def _request_with_refresh(self, *, method, url, data=None, params=None, headers=
126133 headers = headers ,
127134 )
128135 if resp is not None and resp .status_code in (400 , 401 , 403 ):
129- self ._refresh_credentials ()
136+ logging .warning ("Auth failure %s, re-initializing credentials" , resp .status_code )
137+ self .init_credentials ()
130138 resp = self .signed_request (
131139 method = method ,
132140 url = url ,
0 commit comments