From 27156a0842086e0c6431f7c55c0d03368a1033c9 Mon Sep 17 00:00:00 2001 From: Michael Trommer Date: Thu, 4 Sep 2025 01:52:11 +0200 Subject: [PATCH 1/3] Make api_base URL configurable via environment variable LLAMACPP_HOST --- llm_llama_server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llm_llama_server.py b/llm_llama_server.py index 83306ca..99aa3b4 100644 --- a/llm_llama_server.py +++ b/llm_llama_server.py @@ -1,4 +1,5 @@ import llm +import os from llm.default_plugins.openai_models import Chat, AsyncChat @@ -7,10 +8,13 @@ class LlamaServer(Chat): key = "sk-llama-server" def __init__(self, **kwargs): + host = os.getenv("LLAMACPP_HOST", "http://localhost:8080") + api_base_url = f"{host.rstrip('/')}/v1" + super().__init__( model_name="llama-server", model_id=self.model_id, - api_base="http://localhost:8080/v1", + api_base=api_base_url, **kwargs, ) From 64764a2e7ae93904e9ab82c4c0f9c0079f937c3a Mon Sep 17 00:00:00 2001 From: Michael Trommer Date: Thu, 4 Sep 2025 02:32:41 +0200 Subject: [PATCH 2/3] Make api_base URL configurable via environment variable LLAMACPP_HOST --- llm_llama_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llm_llama_server.py b/llm_llama_server.py index 99aa3b4..79d4b62 100644 --- a/llm_llama_server.py +++ b/llm_llama_server.py @@ -27,10 +27,13 @@ class AsyncLlamaServer(AsyncChat): key = "sk-llama-server" def __init__(self, **kwargs): + host = os.getenv("LLAMACPP_HOST", "http://localhost:8080") + api_base_url = f"{host.rstrip('/')}/v1" + super().__init__( model_name="llama-server", model_id=self.model_id, - api_base="http://localhost:8080/v1", + api_base=api_base_url, **kwargs, ) From d4dd26ef047bbe93e0faf8f522f78c293e6418f6 Mon Sep 17 00:00:00 2001 From: Michael Trommer Date: Thu, 4 Sep 2025 02:52:06 +0200 Subject: [PATCH 3/3] Update README for LLAMACPP_HOST environment variable --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 544ec26..b41d425 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ llm install llm-llama-server ``` ## Usage -You'll need to be running a [llama-server](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) on port 8080 to use this plugin. +You'll need a running instance of [llama-server](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md). By default, it connects to `http://127.0.0.1:8080`. You can specify a different host and port by setting the `LLAMACIP_HOST` environment variable, for example: `export LLAMACPP_HOST="http://your-server-ip:8080"`. You can `brew install llama.cpp` to obtain that binary. Then run it like this: ```bash