Skip to content

Commit 3f15961

Browse files
authored
fix: Retry polling token (#392)
* fix: Retry polling token Signed-off-by: oliver könig <[email protected]> * keep original behavior Signed-off-by: oliver könig <[email protected]> --------- Signed-off-by: oliver könig <[email protected]>
1 parent 6b2240e commit 3f15961

File tree

1 file changed

+18
-9
lines changed

1 file changed

+18
-9
lines changed

nemo_run/core/execution/dgxcloud.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,24 @@ def get_auth_token(self) -> Optional[str]:
9090
"appSecret": self.app_secret,
9191
}
9292

93-
response = requests.post(url, json=payload, headers=self._default_headers())
94-
response_text = response.text.strip()
95-
auth_token = json.loads(response_text).get("accessToken", None) # [1]
96-
if not auth_token:
97-
logger.error("Failed to retrieve auth token; response was: %s", response_text)
98-
return None
99-
100-
logger.debug("Retrieved auth token from %s", url)
101-
return auth_token
93+
n_attempts = 0
94+
while n_attempts < 3:
95+
try:
96+
response = requests.post(url, json=payload, headers=self._default_headers())
97+
response_text = response.text.strip()
98+
auth_token = json.loads(response_text).get("accessToken", None) # [1]
99+
if auth_token:
100+
return auth_token
101+
102+
raise ValueError(f"Failed to retrieve auth token; response was: {response_text}")
103+
104+
except Exception as e:
105+
logger.error("Failed to retrieve auth token; error was: %s", e)
106+
time.sleep(10)
107+
n_attempts += 1
108+
109+
logger.error("Failed to retrieve auth token after 3 attempts.")
110+
return None
102111

103112
def get_project_and_cluster_id(self, token: str) -> tuple[Optional[str], Optional[str]]:
104113
url = f"{self.base_url}/org-unit/projects"

0 commit comments

Comments
 (0)