Skip to content

Commit 60f28ef

Browse files
author
Olivier Chafik
committed
tool-bench: add --ctk, --ctv, --fa flags
1 parent fc19192 commit 60f28ef

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

examples/server/tests/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class ServerProcess:
6464
id_slot: int | None = None
6565
cache_prompt: bool | None = None
6666
n_slots: int | None = None
67+
ctk: str | None = None
68+
ctv: str | None = None
69+
fa: bool | None = None
6770
server_continuous_batching: bool | None = False
6871
server_embeddings: bool | None = False
6972
server_reranking: bool | None = False
@@ -151,6 +154,12 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None:
151154
server_args.extend(["--ctx-size", self.n_ctx])
152155
if self.n_slots:
153156
server_args.extend(["--parallel", self.n_slots])
157+
if self.ctk:
158+
server_args.extend(["-ctk", self.ctk])
159+
if self.ctv:
160+
server_args.extend(["-ctv", self.ctv])
161+
if self.fa is not None:
162+
server_args.append("-fa")
154163
if self.n_predict:
155164
server_args.extend(["--n-predict", self.n_predict])
156165
if self.slot_save_path:

scripts/tool_bench.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,9 @@ def run(
211211
temp: Annotated[Optional[List[float]], typer.Option(help="Set of temperatures to test")] = None,
212212
top_p: Annotated[Optional[float], typer.Option(help="top_p")] = None,
213213
top_k: Annotated[Optional[int], typer.Option(help="top_k")] = None,
214+
ctk: Annotated[Optional[str], typer.Option(help="ctk")] = None,
215+
ctv: Annotated[Optional[str], typer.Option(help="ctv")] = None,
216+
fa: Annotated[Optional[bool], typer.Option(help="fa")] = None,
214217
seed: Annotated[Optional[int], typer.Option(help="Random seed")] = None,
215218
port: Annotated[int, typer.Option(help="llama-server port")] = 8084,
216219
force: Annotated[bool, typer.Option(help="Force overwrite of output file")] = False,
@@ -284,6 +287,9 @@ def elapsed():
284287
temp=t,
285288
top_p=top_p,
286289
top_k=top_k,
290+
ctk=ctk,
291+
ctv=ctv,
292+
seed=seed,
287293
success_ratio=float(success_count) / n,
288294
avg_time=mean(success_times + failure_times),
289295
median_time=median(success_times + failure_times),
@@ -307,6 +313,9 @@ def elapsed():
307313
server.n_ctx = n_ctx
308314
server.n_slots = 1
309315
server.jinja = True
316+
server.ctk = ctk
317+
server.ctv = ctv
318+
server.fa = fa
310319
server.n_predict = n_predict
311320
server.model_hf_repo = hf
312321
server.model_hf_file = None

0 commit comments

Comments
 (0)