Skip to content

Commit 7c8e269

Browse files
authored
Improve sanity_check robustness (#138)
evals: improve sanity_check robustness Signed-off-by: Jack Luar <[email protected]>
1 parent a36cc3e commit 7c8e269

File tree

1 file changed

+40
-12
lines changed

1 file changed

+40
-12
lines changed

evaluation/auto_evaluation/eval_main.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,47 @@ def __init__(self, base_url: str, dataset: str, reranker_base_url: str = ""):
5656
def sanity_check(self):
5757
cur_time = time.time()
5858
while time.time() - cur_time < RETRY_TIMEOUT:
59-
if not requests.get(f"{self.base_url}/healthcheck").status_code == 200:
60-
raise ValueError("Endpoint is not running")
59+
try:
60+
# Check main endpoint
61+
response = requests.get(f"{self.base_url}/healthcheck", timeout=10)
62+
if response.status_code != 200:
63+
print(
64+
f"Main endpoint health check failed with status {response.status_code}"
65+
)
66+
time.sleep(RETRY_INTERVAL)
67+
continue
68+
except requests.exceptions.RequestException as e:
69+
print(f"Failed to connect to main endpoint: {e}")
70+
time.sleep(RETRY_INTERVAL)
71+
continue
72+
73+
# Check dataset exists
6174
if not os.path.exists(self.dataset):
62-
raise ValueError("Dataset path does not exist")
63-
if (
64-
self.reranker_base_url
65-
and not requests.get(
66-
f"{self.reranker_base_url}/healthcheck"
67-
).status_code
68-
== 200
69-
):
70-
raise ValueError("Reranker endpoint is not running")
71-
time.sleep(RETRY_INTERVAL)
75+
raise ValueError(f"Dataset path does not exist: {self.dataset}")
76+
77+
# Check reranker endpoint if provided
78+
if self.reranker_base_url:
79+
try:
80+
response = requests.get(
81+
f"{self.reranker_base_url}/healthcheck", timeout=10
82+
)
83+
if response.status_code != 200:
84+
print(
85+
f"Reranker endpoint health check failed with status {response.status_code}"
86+
)
87+
time.sleep(RETRY_INTERVAL)
88+
continue
89+
except requests.exceptions.RequestException as e:
90+
print(f"Failed to connect to reranker endpoint: {e}")
91+
time.sleep(RETRY_INTERVAL)
92+
continue
93+
94+
# All checks passed
95+
print("All sanity checks passed")
96+
return
97+
98+
# Timeout reached
99+
raise TimeoutError(f"Sanity checks failed after {RETRY_TIMEOUT} seconds")
72100

73101
def evaluate(self, retriever: str):
74102
retrieval_tcs = []

0 commit comments

Comments
 (0)