Skip to content

Commit fb4432d

Browse files
authored
Added model_options param (#8)
1 parent 251b034 commit fb4432d

File tree

6 files changed

+129
-16
lines changed

6 files changed

+129
-16
lines changed

ai_server/server.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,23 @@ def chat_with_llama_server_http(
5555
timeout: int = 300,
5656
image_files: Optional[list] = None,
5757
json_schema: Optional[dict] = None,
58+
model_options: Optional[dict] = None,
5859
) -> str:
5960
"""Handle chat using llama-server HTTP API."""
6061
if not LLAMA_SERVER_URL:
6162
raise Exception("LLAMA_SERVER_URL environment variable not set")
6263

6364
try:
6465
messages = _build_messages(content, system_prompt, image_files=[]) # TODO: Pass image files
65-
payload = {'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512}
66+
67+
if not model_options:
68+
model_options = {}
69+
70+
payload = {'model': model, 'messages': messages, **model_options}
6671
if json_schema:
6772
payload['json_schema'] = json_schema[SCHEMA_KEY]
73+
if 'stream' not in payload:
74+
payload['stream'] = False
6875

6976
response = requests.post(
7077
f'{LLAMA_SERVER_URL}/v1/chat/completions',
@@ -106,6 +113,7 @@ def chat_with_ollama(
106113
system_prompt: Optional[str] = None,
107114
image_files: Optional[list] = None,
108115
json_schema: Optional[dict] = None,
116+
model_options: Optional[dict] = None,
109117
) -> str:
110118
"""Handle chat using ollama."""
111119
messages = _build_messages(content, system_prompt, image_files)
@@ -115,6 +123,7 @@ def chat_with_ollama(
115123
messages=messages,
116124
stream=False,
117125
format=json_schema[SCHEMA_KEY] if json_schema else None,
126+
options=model_options,
118127
)
119128
return response.message.content
120129

@@ -125,6 +134,7 @@ def chat_with_llamacpp(
125134
system_prompt: Optional[str] = None,
126135
timeout: int = 300,
127136
image_files: Optional[list] = None,
137+
model_options: Optional[dict] = None,
128138
json_schema: Optional[dict] = None,
129139
) -> str:
130140
"""Handle chat using llama.cpp CLI."""
@@ -142,6 +152,10 @@ def chat_with_llamacpp(
142152
if system_prompt:
143153
cmd.extend(['--system-prompt', system_prompt])
144154

155+
if model_options:
156+
for key, value in model_options.items():
157+
cmd.extend(['--model-option', key, value])
158+
145159
if image_files:
146160
pass # TODO: pass image files
147161

@@ -171,6 +185,7 @@ def chat_with_model(
171185
llama_mode: str = "cli",
172186
system_prompt: Optional[str] = None,
173187
image_files: Optional[list] = None,
188+
model_options: Optional[dict] = None,
174189
json_schema: Optional[dict] = None,
175190
) -> str:
176191
"""Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
@@ -179,18 +194,33 @@ def chat_with_model(
179194
if not LLAMA_SERVER_URL:
180195
raise Exception("LLAMA_SERVER_URL environment variable not set for server mode")
181196
return chat_with_llama_server_http(
182-
model, content, system_prompt=system_prompt, image_files=image_files, json_schema=json_schema
197+
model,
198+
content,
199+
system_prompt=system_prompt,
200+
image_files=image_files,
201+
json_schema=json_schema,
202+
model_options=model_options,
183203
)
184204
elif llama_mode == "cli":
185205
return chat_with_llamacpp(
186-
model, content, system_prompt=system_prompt, image_files=image_files, json_schema=json_schema
206+
model,
207+
content,
208+
system_prompt=system_prompt,
209+
image_files=image_files,
210+
json_schema=json_schema,
211+
model_options=model_options,
187212
)
188213
else:
189214
raise ValueError(f"Invalid llama_mode: '{llama_mode}'. Valid options are 'server' or 'cli'.")
190215
else:
191216
# Model not available in llama.cpp, use ollama
192217
return chat_with_ollama(
193-
model, content, system_prompt=system_prompt, image_files=image_files, json_schema=json_schema
218+
model,
219+
content,
220+
system_prompt=system_prompt,
221+
image_files=image_files,
222+
json_schema=json_schema,
223+
model_options=model_options,
194224
)
195225

196226

@@ -216,14 +246,17 @@ def chat():
216246
llama_mode = request.form.get('llama_mode', 'cli')
217247
system_prompt = request.form.get('system_prompt')
218248
image_files = list(request.files.values())
249+
model_options = request.form.get('model_options')
219250
json_schema = request.form.get('json_schema')
220251
if json_schema:
221252
json_schema = json.loads(json_schema)
222253

223254
if not content.strip():
224255
abort(400, description='Missing prompt content')
225256

226-
response_content = chat_with_model(model, content, llama_mode, system_prompt, image_files, json_schema)
257+
response_content = chat_with_model(
258+
model, content, llama_mode, system_prompt, image_files, model_options=model_options, json_schema=json_schema
259+
)
227260
return jsonify(response_content)
228261

229262

test/test_cli_mode.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,12 @@ def test_cli_mode_uses_llamacpp_when_available(self):
109109
assert result == "CLI response from DeepSeek V3"
110110
self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
111111
self.mock_chat_llamacpp.assert_called_once_with(
112-
TEST_LLAMACPP_MODEL, 'Write a function', system_prompt=None, image_files=None, json_schema=None
112+
TEST_LLAMACPP_MODEL,
113+
'Write a function',
114+
system_prompt=None,
115+
image_files=None,
116+
json_schema=None,
117+
model_options=None,
113118
)
114119

115120
def test_cli_mode_fallback_to_ollama_when_unavailable(self):
@@ -122,7 +127,12 @@ def test_cli_mode_fallback_to_ollama_when_unavailable(self):
122127
assert result == "Ollama response from DeepSeek Coder"
123128
self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
124129
self.mock_chat_ollama.assert_called_once_with(
125-
TEST_OLLAMA_MODEL, 'Help with coding', system_prompt=None, image_files=None, json_schema=None
130+
TEST_OLLAMA_MODEL,
131+
'Help with coding',
132+
system_prompt=None,
133+
image_files=None,
134+
json_schema=None,
135+
model_options=None,
126136
)
127137

128138
def test_default_mode_is_cli(self):
@@ -135,7 +145,30 @@ def test_default_mode_is_cli(self):
135145
assert result == "Default CLI mode response"
136146
self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
137147
self.mock_chat_llamacpp.assert_called_once_with(
138-
TEST_LLAMACPP_MODEL, 'Help me', system_prompt=None, image_files=None, json_schema=None
148+
TEST_LLAMACPP_MODEL, 'Help me', system_prompt=None, image_files=None, json_schema=None, model_options=None
149+
)
150+
self.mock_chat_llamacpp.assert_called_once_with(
151+
TEST_LLAMACPP_MODEL, 'Help me', system_prompt=None, image_files=None, model_options=None, json_schema=None
152+
)
153+
154+
def test_model_options(self):
155+
"""Test that model_options are passed correctly in CLI mode."""
156+
self.mock_available.return_value = True
157+
self.mock_chat_llamacpp.return_value = "Model options test response"
158+
159+
test_options = {"temperature": 0.7, "top_p": 0.9}
160+
161+
result = chat_with_model(TEST_LLAMACPP_MODEL, 'Help me', model_options=test_options)
162+
163+
assert result == "Model options test response"
164+
self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
165+
self.mock_chat_llamacpp.assert_called_once_with(
166+
TEST_LLAMACPP_MODEL,
167+
'Help me',
168+
system_prompt=None,
169+
image_files=None,
170+
json_schema=None,
171+
model_options=test_options,
139172
)
140173

141174

@@ -192,5 +225,10 @@ def test_cli_mode_passes_json_schema_to_ollama(self, tmp_path):
192225
assert result == "schema-aware response"
193226

194227
mock_ollama.assert_called_once_with(
195-
TEST_OLLAMA_MODEL, "Give me an answer", system_prompt=None, image_files=None, json_schema=test_schema
228+
TEST_OLLAMA_MODEL,
229+
"Give me an answer",
230+
system_prompt=None,
231+
image_files=None,
232+
json_schema=test_schema,
233+
model_options=None,
196234
)

test/test_core.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def test_chat_with_ollama_success(self, mock_ollama):
8383
model=TEST_OLLAMA_MODEL,
8484
messages=[{'role': 'user', 'content': 'Help me write a Python function'}],
8585
stream=False,
86+
options=None,
8687
format=None,
8788
)
8889

@@ -110,12 +111,15 @@ def test_chat_with_ollama_with_json_schema(self, mock_ollama, tmp_path):
110111
mock_response.message.content = "42"
111112
mock_ollama.return_value = mock_response
112113

113-
result = chat_with_ollama(TEST_OLLAMA_MODEL, "What is the meaning of life?", json_schema=test_schema)
114+
result = chat_with_ollama(
115+
TEST_OLLAMA_MODEL, "What is the meaning of life?", json_schema=test_schema, model_options=None
116+
)
114117

115118
assert result == "42"
116119
mock_ollama.assert_called_once_with(
117120
model=TEST_OLLAMA_MODEL,
118121
messages=[{"role": "user", "content": "What is the meaning of life?"}],
119122
stream=False,
120-
format={"type": "object", "properties": {"answer": {"type": "string"}}},
123+
format={'type': 'object', 'properties': {'answer': {'type': 'string'}}},
124+
options=None,
121125
)

test/test_server_mode.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,12 @@ def test_server_mode_uses_llamacpp_when_available(self):
112112
assert result == "Server response from DeepSeek V3"
113113
self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
114114
self.mock_chat_server.assert_called_once_with(
115-
TEST_LLAMACPP_MODEL, 'Explain code', system_prompt=None, image_files=None, json_schema=None
115+
TEST_LLAMACPP_MODEL,
116+
'Explain code',
117+
system_prompt=None,
118+
image_files=None,
119+
json_schema=None,
120+
model_options=None,
116121
)
117122

118123
def test_server_mode_fallback_to_ollama_when_unavailable(self):
@@ -125,7 +130,30 @@ def test_server_mode_fallback_to_ollama_when_unavailable(self):
125130
assert result == "Ollama fallback response"
126131
self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
127132
self.mock_chat_ollama.assert_called_once_with(
128-
TEST_OLLAMA_MODEL, 'Debug code', system_prompt=None, image_files=None, json_schema=None
133+
TEST_OLLAMA_MODEL, 'Debug code', system_prompt=None, image_files=None, json_schema=None, model_options=None
134+
)
135+
self.mock_chat_ollama.assert_called_once_with(
136+
TEST_OLLAMA_MODEL, 'Debug code', system_prompt=None, image_files=None, json_schema=None, model_options=None
137+
)
138+
139+
def test_server_mode_fallback_to_ollama_with_model_options(self):
140+
"""Test server mode falls back to ollama and passes model_options correctly when model is unavailable in llama.cpp."""
141+
self.mock_available.return_value = False
142+
self.mock_chat_ollama.return_value = "Ollama fallback response"
143+
144+
test_options = {"temperature": 0.3, "top_k": 50}
145+
146+
result = chat_with_model(TEST_OLLAMA_MODEL, 'Debug code', llama_mode='server', model_options=test_options)
147+
148+
assert result == "Ollama fallback response"
149+
self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
150+
self.mock_chat_ollama.assert_called_once_with(
151+
TEST_OLLAMA_MODEL,
152+
'Debug code',
153+
system_prompt=None,
154+
image_files=None,
155+
model_options=test_options,
156+
json_schema=None,
129157
)
130158

131159
def test_server_mode_requires_server_url(self):

test/test_system_prompt.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,10 @@ def test_chat_with_model_routing(self, mock_available, mock_chat):
7878

7979
chat_with_model(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT)
8080
mock_chat.assert_called_once_with(
81-
TEST_MODEL, TEST_USER_CONTENT, system_prompt=TEST_SYSTEM_PROMPT, image_files=None, json_schema=None
81+
TEST_MODEL,
82+
TEST_USER_CONTENT,
83+
system_prompt=TEST_SYSTEM_PROMPT,
84+
image_files=None,
85+
json_schema=None,
86+
model_options=None,
8287
)

test/test_system_prompt_api.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from unittest.mock import patch
23

34
import pytest
@@ -39,7 +40,9 @@ def test_api_with_system_prompt(self, mock_chat, mock_redis, client):
3940

4041
assert response.status_code == 200
4142

42-
mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT, [], None)
43+
mock_chat.assert_called_once_with(
44+
TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT, [], json_schema=None, model_options=None
45+
)
4346

4447
@patch('ai_server.server.REDIS_CONNECTION')
4548
@patch('ai_server.server.chat_with_model')
@@ -54,7 +57,9 @@ def test_api_without_system_prompt(self, mock_chat, mock_redis, client):
5457

5558
assert response.status_code == 200
5659

57-
mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', None, [], None)
60+
mock_chat.assert_called_once_with(
61+
TEST_MODEL, TEST_USER_CONTENT, 'cli', None, [], model_options=None, json_schema=None
62+
)
5863

5964
@patch('ai_server.server.REDIS_CONNECTION')
6065
def test_api_authentication_still_required(self, mock_redis, client):

0 commit comments

Comments
 (0)