@@ -55,16 +55,23 @@ def chat_with_llama_server_http(
5555 timeout : int = 300 ,
5656 image_files : Optional [list ] = None ,
5757 json_schema : Optional [dict ] = None ,
58+ model_options : Optional [dict ] = None ,
5859) -> str :
5960 """Handle chat using llama-server HTTP API."""
6061 if not LLAMA_SERVER_URL :
6162 raise Exception ("LLAMA_SERVER_URL environment variable not set" )
6263
6364 try :
6465 messages = _build_messages (content , system_prompt , image_files = []) # TODO: Pass image files
65- payload = {'model' : model , 'messages' : messages , 'stream' : False , 'max_tokens' : 512 }
66+
67+ if not model_options :
68+ model_options = {}
69+
70+ payload = {'model' : model , 'messages' : messages , ** model_options }
6671 if json_schema :
6772 payload ['json_schema' ] = json_schema [SCHEMA_KEY ]
73+ if 'stream' not in payload :
74+ payload ['stream' ] = False
6875
6976 response = requests .post (
7077 f'{ LLAMA_SERVER_URL } /v1/chat/completions' ,
@@ -106,6 +113,7 @@ def chat_with_ollama(
106113 system_prompt : Optional [str ] = None ,
107114 image_files : Optional [list ] = None ,
108115 json_schema : Optional [dict ] = None ,
116+ model_options : Optional [dict ] = None ,
109117) -> str :
110118 """Handle chat using ollama."""
111119 messages = _build_messages (content , system_prompt , image_files )
@@ -115,6 +123,7 @@ def chat_with_ollama(
115123 messages = messages ,
116124 stream = False ,
117125 format = json_schema [SCHEMA_KEY ] if json_schema else None ,
126+ options = model_options ,
118127 )
119128 return response .message .content
120129
@@ -125,6 +134,7 @@ def chat_with_llamacpp(
125134 system_prompt : Optional [str ] = None ,
126135 timeout : int = 300 ,
127136 image_files : Optional [list ] = None ,
137+ model_options : Optional [dict ] = None ,
128138 json_schema : Optional [dict ] = None ,
129139) -> str :
130140 """Handle chat using llama.cpp CLI."""
@@ -142,6 +152,10 @@ def chat_with_llamacpp(
142152 if system_prompt :
143153 cmd .extend (['--system-prompt' , system_prompt ])
144154
155+ if model_options :
156+ for key , value in model_options .items ():
157+ cmd .extend (['--model-option' , key , value ])
158+
145159 if image_files :
146160 pass # TODO: pass image files
147161
@@ -171,6 +185,7 @@ def chat_with_model(
171185 llama_mode : str = "cli" ,
172186 system_prompt : Optional [str ] = None ,
173187 image_files : Optional [list ] = None ,
188+ model_options : Optional [dict ] = None ,
174189 json_schema : Optional [dict ] = None ,
175190) -> str :
176191 """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
@@ -179,18 +194,33 @@ def chat_with_model(
179194 if not LLAMA_SERVER_URL :
180195 raise Exception ("LLAMA_SERVER_URL environment variable not set for server mode" )
181196 return chat_with_llama_server_http (
182- model , content , system_prompt = system_prompt , image_files = image_files , json_schema = json_schema
197+ model ,
198+ content ,
199+ system_prompt = system_prompt ,
200+ image_files = image_files ,
201+ json_schema = json_schema ,
202+ model_options = model_options ,
183203 )
184204 elif llama_mode == "cli" :
185205 return chat_with_llamacpp (
186- model , content , system_prompt = system_prompt , image_files = image_files , json_schema = json_schema
206+ model ,
207+ content ,
208+ system_prompt = system_prompt ,
209+ image_files = image_files ,
210+ json_schema = json_schema ,
211+ model_options = model_options ,
187212 )
188213 else :
189214 raise ValueError (f"Invalid llama_mode: '{ llama_mode } '. Valid options are 'server' or 'cli'." )
190215 else :
191216 # Model not available in llama.cpp, use ollama
192217 return chat_with_ollama (
193- model , content , system_prompt = system_prompt , image_files = image_files , json_schema = json_schema
218+ model ,
219+ content ,
220+ system_prompt = system_prompt ,
221+ image_files = image_files ,
222+ json_schema = json_schema ,
223+ model_options = model_options ,
194224 )
195225
196226
@@ -216,14 +246,17 @@ def chat():
216246 llama_mode = request .form .get ('llama_mode' , 'cli' )
217247 system_prompt = request .form .get ('system_prompt' )
218248 image_files = list (request .files .values ())
249+ model_options = request .form .get ('model_options' )
219250 json_schema = request .form .get ('json_schema' )
220251 if json_schema :
221252 json_schema = json .loads (json_schema )
222253
223254 if not content .strip ():
224255 abort (400 , description = 'Missing prompt content' )
225256
226- response_content = chat_with_model (model , content , llama_mode , system_prompt , image_files , json_schema )
257+ response_content = chat_with_model (
258+ model , content , llama_mode , system_prompt , image_files , model_options = model_options , json_schema = json_schema
259+ )
227260 return jsonify (response_content )
228261
229262
0 commit comments