3232 "sim" : "/graphs/sim" ,
3333 "ensemble" : "/graphs/ensemble" ,
3434}
35+ RETRY_INTERVAL = 5
36+ RETRY_TIMEOUT = 600
3537
3638
3739class EvaluationHarness :
@@ -52,16 +54,21 @@ def __init__(self, base_url: str, dataset: str, reranker_base_url: str = ""):
5254 self .sanity_check ()
5355
5456 def sanity_check (self ):
55- if not requests .get (f"{ self .base_url } /healthcheck" ).status_code == 200 :
56- raise ValueError ("Endpoint is not running" )
57- if not os .path .exists (self .dataset ):
58- raise ValueError ("Dataset path does not exist" )
59- if (
60- self .reranker_base_url
61- and not requests .get (f"{ self .reranker_base_url } /healthcheck" ).status_code
62- == 200
63- ):
64- raise ValueError ("Reranker endpoint is not running" )
57+ cur_time = time .time ()
58+ while time .time () - cur_time < RETRY_TIMEOUT :
59+ if not requests .get (f"{ self .base_url } /healthcheck" ).status_code == 200 :
60+ raise ValueError ("Endpoint is not running" )
61+ if not os .path .exists (self .dataset ):
62+ raise ValueError ("Dataset path does not exist" )
63+ if (
64+ self .reranker_base_url
65+ and not requests .get (
66+ f"{ self .reranker_base_url } /healthcheck"
67+ ).status_code
68+ == 200
69+ ):
70+ raise ValueError ("Reranker endpoint is not running" )
71+ time .sleep (RETRY_INTERVAL )
6572
6673 def evaluate (self , retriever : str ):
6774 retrieval_tcs = []
@@ -79,8 +86,7 @@ def evaluate(self, retriever: str):
7986 question , ground_truth = qa_pair ["question" ], qa_pair ["ground_truth" ]
8087 response , response_time = self .query (retriever , question )
8188 response_text = response ["response" ]
82- context = response ["context" ]
83- context_list = context [0 ].split ("--------------------------" )
89+ context_list = [r ["context" ] for r in response ["context_sources" ]]
8490
8591 # works for: precision, recall, hallucination
8692 retrieval_tc = LLMTestCase (
0 commit comments