Skip to content

Commit 7ec0541

Browse files
authored
Update eval schema (#137)
* update eval schema * add longer timeout for healthcheck before starting evals --------- Signed-off-by: Jack Luar <jluar@precisioninno.com>
1 parent c1024a4 commit 7ec0541

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

evaluation/auto_evaluation/eval_main.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
"sim": "/graphs/sim",
3333
"ensemble": "/graphs/ensemble",
3434
}
35+
RETRY_INTERVAL = 5
36+
RETRY_TIMEOUT = 600
3537

3638

3739
class EvaluationHarness:
@@ -52,16 +54,21 @@ def __init__(self, base_url: str, dataset: str, reranker_base_url: str = ""):
5254
self.sanity_check()
5355

5456
def sanity_check(self):
55-
if not requests.get(f"{self.base_url}/healthcheck").status_code == 200:
56-
raise ValueError("Endpoint is not running")
57-
if not os.path.exists(self.dataset):
58-
raise ValueError("Dataset path does not exist")
59-
if (
60-
self.reranker_base_url
61-
and not requests.get(f"{self.reranker_base_url}/healthcheck").status_code
62-
== 200
63-
):
64-
raise ValueError("Reranker endpoint is not running")
57+
cur_time = time.time()
58+
while time.time() - cur_time < RETRY_TIMEOUT:
59+
if not requests.get(f"{self.base_url}/healthcheck").status_code == 200:
60+
raise ValueError("Endpoint is not running")
61+
if not os.path.exists(self.dataset):
62+
raise ValueError("Dataset path does not exist")
63+
if (
64+
self.reranker_base_url
65+
and not requests.get(
66+
f"{self.reranker_base_url}/healthcheck"
67+
).status_code
68+
== 200
69+
):
70+
raise ValueError("Reranker endpoint is not running")
71+
time.sleep(RETRY_INTERVAL)
6572

6673
def evaluate(self, retriever: str):
6774
retrieval_tcs = []
@@ -79,8 +86,7 @@ def evaluate(self, retriever: str):
7986
question, ground_truth = qa_pair["question"], qa_pair["ground_truth"]
8087
response, response_time = self.query(retriever, question)
8188
response_text = response["response"]
82-
context = response["context"]
83-
context_list = context[0].split("--------------------------")
89+
context_list = [r["context"] for r in response["context_sources"]]
8490

8591
# works for: precision, recall, hallucination
8692
retrieval_tc = LLMTestCase(

0 commit comments

Comments
 (0)