Skip to content

Commit d0a686b

Browse files
author
ochafik
committed
Update tool_bench.py
1 parent 12deff6 commit d0a686b

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

scripts/tool_bench.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
1313
export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
1414
15+
./scripts/tool_bench.py run --n 10 --temp -1 --temp 0 --temp 1 --temp 2 --temp 5 --llama-baseline $PWD/buildMaster/bin/llama-server --output qwen14b.jsonl --hf bartowski/Qwen2.5-14B-Instruct-GGUF:Q4_K_L
1516
./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 1.5B Q4_K_M" --output qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF --ollama qwen2.5:1.5b-instruct-q4_K_M
1617
./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 Coder 7B Q4_K_M" --output qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF --ollama qwen2.5-coder:7b
1718
@@ -205,6 +206,7 @@ def run(
205206
model: Annotated[Optional[str], typer.Option(help="Name of the model to test (server agnostic)")] = None,
206207
hf: Annotated[Optional[str], typer.Option(help="GGUF huggingface model repo id (+ optional quant) to test w/ llama-server")] = None,
207208
chat_template: Annotated[Optional[str], typer.Option(help="Chat template override for llama-server")] = None,
209+
chat_template_file: Annotated[Optional[str], typer.Option(help="Chat template file override for llama-server")] = None,
208210
ollama: Annotated[Optional[str], typer.Option(help="Ollama model tag to test")] = None,
209211
llama_baseline: Annotated[Optional[str], typer.Option(help="llama-server baseline binary path to use as baseline")] = None,
210212
n: Annotated[int, typer.Option(help="Number of times to run each test")] = 10,
@@ -229,6 +231,12 @@ def run(
229231
# n_ctx = 8192
230232
n_ctx = 2048
231233

234+
if model is None:
235+
if hf is not None:
236+
model = hf.split("/")[-1]
237+
elif ollama is not None:
238+
model = ollama
239+
232240
assert force or append or not output.exists(), f"Output file already exists: {output}; use --force to overwrite"
233241

234242
with output.open('a' if append else 'w') as output_file:
@@ -320,6 +328,7 @@ def elapsed():
320328
server.model_hf_repo = hf
321329
server.model_hf_file = None
322330
server.chat_template = chat_template
331+
server.chat_template_file = chat_template_file
323332
server.server_path = server_path
324333
if port is not None:
325334
server.server_port = port
@@ -335,6 +344,7 @@ def elapsed():
335344
temp=t,
336345
output_kwargs=dict(
337346
chat_template=chat_template,
347+
chat_template_file=chat_template_file,
338348
),
339349
request_kwargs=dict(
340350
ignore_chat_grammar=ignore_chat_grammar,
@@ -355,6 +365,7 @@ def elapsed():
355365
temp=t,
356366
output_kwargs=dict(
357367
chat_template=None,
368+
chat_template_file=None,
358369
),
359370
request_kwargs=dict(
360371
model=ollama,

0 commit comments

Comments
 (0)