Skip to content

Commit d8d60d7

Browse files
committed
eager mode
1 parent e84d9db commit d8d60d7

File tree

5 files changed

+66
-61
lines changed

5 files changed

+66
-61
lines changed

cli/homl_cli/daemon_pb2.py

Lines changed: 24 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/homl_cli/main.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,24 +405,26 @@ def print_post_install_message():
405405

406406

407407

408-
def start_model(model_name):
408+
def start_model(model_name, eager):
409409
"""Starts a model with the vLLM server. Used by both run and chat commands."""
410410
stub = get_client_stub()
411411
if stub:
412412
spinner = Spinner(f"Starting model '{model_name}' (vLLM is a bit slow to start)...")
413413
spinner.start()
414414
try:
415-
response = stub.StartModel(daemon_pb2.StartModelRequest(model_name=model_name))
415+
response = stub.StartModel(daemon_pb2.StartModelRequest(model_name=model_name, eager_mode=eager))
416416
finally:
417417
spinner.stop()
418418
click.echo(response.message)
419419
return response.pid
420+
return 0
420421

421422
@main.command()
422423
@click.argument('model_name')
423-
def run(model_name):
424+
@click.option('--eager', is_flag=True, help="Start the model in eager mode, faster startup but slower latency, similar throughput.")
425+
def run(model_name, eager):
424426
"""Starts a model with the vLLM server."""
425-
start_model(model_name)
427+
start_model(model_name, eager=eager)
426428

427429
@main.command()
428430
def ps():
@@ -456,7 +458,7 @@ def ps():
456458
def chat(model_name):
457459
"""Starts a chat session with a model using the OpenAI-compatible API."""
458460
# Start the model using the helper (no spinner needed here, already in helper)
459-
if start_model(model_name) == 0:
461+
if start_model(model_name, True) == 0:
460462
return
461463
port = config.get_config_value("port", 7456)
462464
api_url = f"http://localhost:{port}/v1/chat/completions"

protos/daemon.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ message PullModelProgress {
5454

5555
message StartModelRequest {
5656
string model_name = 1;
57+
bool eager_mode = 2;
5758
}
5859

5960
message StartModelResponse {

server/homl_server/daemon_pb2.py

Lines changed: 24 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)