1010import multiprocessing
1111import os
1212
13+ # Third Party
14+ import httpx
15+
1316# First Party
1417from instructlab .eval import (
1518 mt_bench_answers ,
@@ -110,6 +113,7 @@ def gen_answers(
110113 api_key : str | None = None ,
111114 max_workers : int | str | None = None ,
112115 serving_gpus : int | None = None ,
116+ http_client : httpx .Client | None = None ,
113117 ) -> None :
114118 """
115119 Asks questions to model
@@ -119,6 +123,7 @@ def gen_answers(
119123 api_key API token for authenticating with model server
120124 max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
121125 serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
126+ http_client Custom http client to use for requests
122127 """
123128 logger .debug (locals ())
124129 mt_bench_answers .generate_answers (
@@ -127,6 +132,7 @@ def gen_answers(
127132 api_key = api_key ,
128133 output_dir = self .output_dir ,
129134 max_workers = self ._get_effective_max_workers (max_workers , serving_gpus ),
135+ http_client = http_client ,
130136 )
131137
132138 def judge_answers (
@@ -135,6 +141,7 @@ def judge_answers(
135141 api_key : str | None = None ,
136142 max_workers : int | str | None = None ,
137143 serving_gpus : int | None = None ,
144+ http_client : httpx .Client | None = None ,
138145 ) -> tuple :
139146 """
140147 Runs MT-Bench judgment
@@ -144,6 +151,7 @@ def judge_answers(
144151 api_key API token for authenticating with model server
145152 max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
146153 serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
154+ http_client Custom http client to use for requests
147155
148156 Returns:
149157 overall_score MT-Bench score for the overall model evaluation
@@ -160,6 +168,7 @@ def judge_answers(
160168 max_workers = self ._get_effective_max_workers (max_workers , serving_gpus ),
161169 output_dir = self .output_dir ,
162170 merge_system_user_message = self .merge_system_user_message ,
171+ http_client = http_client ,
163172 )
164173
165174
@@ -202,6 +211,7 @@ def gen_answers(
202211 api_key : str | None = None ,
203212 max_workers : int | str | None = None ,
204213 serving_gpus : int | None = None ,
214+ http_client : httpx .Client | None = None ,
205215 ) -> None :
206216 """
207217 Asks questions to model
@@ -211,6 +221,7 @@ def gen_answers(
211221 api_key API token for authenticating with model server
212222 max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
213223 serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
224+ http_client Custom http client to use for requests
214225 """
215226 logger .debug (locals ())
216227 mt_bench_branch_generator .generate (
@@ -228,6 +239,7 @@ def gen_answers(
228239 data_dir = self .output_dir ,
229240 max_workers = self ._get_effective_max_workers (max_workers , serving_gpus ),
230241 bench_name = "mt_bench_branch" ,
242+ http_client = http_client ,
231243 )
232244
233245 def judge_answers (
@@ -236,6 +248,7 @@ def judge_answers(
236248 api_key : str | None = None ,
237249 max_workers : int | str | None = None ,
238250 serving_gpus : int | None = None ,
251+ http_client : httpx .Client | None = None ,
239252 ) -> tuple :
240253 """
241254 Runs MT-Bench-Branch judgment. Judgments can be compared across runs with consistent question_id -> qna file name.
@@ -245,6 +258,7 @@ def judge_answers(
245258 api_key API token for authenticating with model server
246259 max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
247260 serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
261+ http_client Custom http client to use for requests
248262
249263 Returns:
250264 overall_score Overall score from the evaluation
@@ -263,5 +277,6 @@ def judge_answers(
263277 data_dir = self .output_dir ,
264278 bench_name = "mt_bench_branch" ,
265279 merge_system_user_message = self .merge_system_user_message ,
280+ http_client = http_client ,
266281 )
267282 return overall_score , qa_pairs , error_rate
0 commit comments