From 27156a0842086e0c6431f7c55c0d03368a1033c9 Mon Sep 17 00:00:00 2001
From: Michael Trommer <mictro@gmail.com>
Date: Thu, 4 Sep 2025 01:52:11 +0200
Subject: [PATCH 1/3] Make api_base URL configurable via environment variable
 LLAMACPP_HOST

---
 llm_llama_server.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llm_llama_server.py b/llm_llama_server.py
index 83306ca..99aa3b4 100644
--- a/llm_llama_server.py
+++ b/llm_llama_server.py
@@ -1,4 +1,5 @@
 import llm
+import os
 from llm.default_plugins.openai_models import Chat, AsyncChat
 
 
@@ -7,10 +8,13 @@ class LlamaServer(Chat):
     key = "sk-llama-server"
 
     def __init__(self, **kwargs):
+        host = os.getenv("LLAMACPP_HOST", "http://localhost:8080")
+        api_base_url = f"{host.rstrip('/')}/v1"
+
         super().__init__(
             model_name="llama-server",
             model_id=self.model_id,
-            api_base="http://localhost:8080/v1",
+            api_base=api_base_url,
             **kwargs,
         )
 

From 64764a2e7ae93904e9ab82c4c0f9c0079f937c3a Mon Sep 17 00:00:00 2001
From: Michael Trommer <mictro@gmail.com>
Date: Thu, 4 Sep 2025 02:32:41 +0200
Subject: [PATCH 2/3] Make api_base URL configurable via environment variable
 LLAMACPP_HOST

---
 llm_llama_server.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llm_llama_server.py b/llm_llama_server.py
index 99aa3b4..79d4b62 100644
--- a/llm_llama_server.py
+++ b/llm_llama_server.py
@@ -27,10 +27,13 @@ class AsyncLlamaServer(AsyncChat):
     key = "sk-llama-server"
 
     def __init__(self, **kwargs):
+        host = os.getenv("LLAMACPP_HOST", "http://localhost:8080")
+        api_base_url = f"{host.rstrip('/')}/v1"
+
         super().__init__(
             model_name="llama-server",
             model_id=self.model_id,
-            api_base="http://localhost:8080/v1",
+            api_base=api_base_url,
             **kwargs,
         )
 

From d4dd26ef047bbe93e0faf8f522f78c293e6418f6 Mon Sep 17 00:00:00 2001
From: Michael Trommer <mictro@gmail.com>
Date: Thu, 4 Sep 2025 02:52:06 +0200
Subject: [PATCH 3/3] Update README for LLAMACPP_HOST environment variable

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 544ec26..b41d425 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ llm install llm-llama-server
 ```
 ## Usage
 
-You'll need to be running a [llama-server](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) on port 8080 to use this plugin.
+You'll need a running instance of [llama-server](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md). By default, it connects to `http://127.0.0.1:8080`. You can specify a different host and port by setting the `LLAMACIP_HOST` environment variable, for example: `export LLAMACPP_HOST="http://your-server-ip:8080"`.
 
 You can `brew install llama.cpp` to obtain that binary. Then run it like this:
 ```bash