22
33import base64
44import glob
5+ import json
56import os
67import subprocess
78from typing import Optional
3233 if _llama_server_url and not _llama_server_url .startswith (('http://' , 'https://' ))
3334 else _llama_server_url
3435)
36+ SCHEMA_KEY = "schema"
3537
3638
3739def _build_messages (content : str , system_prompt : Optional [str ] = None , image_files : Optional [list ] = None ) -> list :
@@ -52,17 +54,21 @@ def chat_with_llama_server_http(
5254 system_prompt : Optional [str ] = None ,
5355 timeout : int = 300 ,
5456 image_files : Optional [list ] = None ,
57+ json_schema : Optional [dict ] = None ,
5558) -> str :
5659 """Handle chat using llama-server HTTP API."""
5760 if not LLAMA_SERVER_URL :
5861 raise Exception ("LLAMA_SERVER_URL environment variable not set" )
5962
6063 try :
6164 messages = _build_messages (content , system_prompt , image_files = []) # TODO: Pass image files
65+ payload = {'model' : model , 'messages' : messages , 'stream' : False , 'max_tokens' : 512 }
66+ if json_schema :
67+ payload ['json_schema' ] = json_schema [SCHEMA_KEY ]
6268
6369 response = requests .post (
6470 f'{ LLAMA_SERVER_URL } /v1/chat/completions' ,
65- json = { 'model' : model , 'messages' : messages , 'stream' : False , 'max_tokens' : 512 } ,
71+ json = payload ,
6672 headers = {'Content-Type' : 'application/json' },
6773 timeout = timeout ,
6874 )
@@ -95,12 +101,21 @@ def is_llamacpp_available(model: str) -> bool:
95101
96102
97103def chat_with_ollama (
98- model : str , content : str , system_prompt : Optional [str ] = None , image_files : Optional [list ] = None
104+ model : str ,
105+ content : str ,
106+ system_prompt : Optional [str ] = None ,
107+ image_files : Optional [list ] = None ,
108+ json_schema : Optional [dict ] = None ,
99109) -> str :
100110 """Handle chat using ollama."""
101111 messages = _build_messages (content , system_prompt , image_files )
102112
103- response = ollama .chat (model = model , messages = messages , stream = False )
113+ response = ollama .chat (
114+ model = model ,
115+ messages = messages ,
116+ stream = False ,
117+ format = json_schema [SCHEMA_KEY ] if json_schema else None ,
118+ )
104119 return response .message .content
105120
106121
@@ -110,6 +125,7 @@ def chat_with_llamacpp(
110125 system_prompt : Optional [str ] = None ,
111126 timeout : int = 300 ,
112127 image_files : Optional [list ] = None ,
128+ json_schema : Optional [dict ] = None ,
113129) -> str :
114130 """Handle chat using llama.cpp CLI."""
115131 model_path = resolve_model_path (model )
@@ -118,6 +134,9 @@ def chat_with_llamacpp(
118134 raise ValueError (f"Model not found: { model } " )
119135
120136 cmd = [LLAMA_CPP_CLI , '-m' , model_path , '--n-gpu-layers' , '40' , '-p' , content , '-n' , '512' , '--single-turn' ]
137+ if json_schema :
138+ raw_schema = json_schema [SCHEMA_KEY ] if SCHEMA_KEY in json_schema else json_schema
139+ cmd += ["--json-schema" , json .dumps (raw_schema )]
121140
122141 # Add system prompt if provided
123142 if system_prompt :
@@ -152,20 +171,27 @@ def chat_with_model(
152171 llama_mode : str = "cli" ,
153172 system_prompt : Optional [str ] = None ,
154173 image_files : Optional [list ] = None ,
174+ json_schema : Optional [dict ] = None ,
155175) -> str :
156176 """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
157177 if is_llamacpp_available (model ):
158178 if llama_mode == "server" :
159179 if not LLAMA_SERVER_URL :
160180 raise Exception ("LLAMA_SERVER_URL environment variable not set for server mode" )
161- return chat_with_llama_server_http (model , content , system_prompt = system_prompt , image_files = image_files )
181+ return chat_with_llama_server_http (
182+ model , content , system_prompt = system_prompt , image_files = image_files , json_schema = json_schema
183+ )
162184 elif llama_mode == "cli" :
163- return chat_with_llamacpp (model , content , system_prompt = system_prompt , image_files = image_files )
185+ return chat_with_llamacpp (
186+ model , content , system_prompt = system_prompt , image_files = image_files , json_schema = json_schema
187+ )
164188 else :
165189 raise ValueError (f"Invalid llama_mode: '{ llama_mode } '. Valid options are 'server' or 'cli'." )
166190 else :
167191 # Model not available in llama.cpp, use ollama
168- return chat_with_ollama (model , content , system_prompt = system_prompt , image_files = image_files )
192+ return chat_with_ollama (
193+ model , content , system_prompt = system_prompt , image_files = image_files , json_schema = json_schema
194+ )
169195
170196
171197def authenticate () -> str :
@@ -190,11 +216,14 @@ def chat():
190216 llama_mode = request .form .get ('llama_mode' , 'cli' )
191217 system_prompt = request .form .get ('system_prompt' )
192218 image_files = list (request .files .values ())
219+ json_schema = request .form .get ('json_schema' )
220+ if json_schema :
221+ json_schema = json .loads (json_schema )
193222
194223 if not content .strip ():
195224 abort (400 , description = 'Missing prompt content' )
196225
197- response_content = chat_with_model (model , content , llama_mode , system_prompt , image_files )
226+ response_content = chat_with_model (model , content , llama_mode , system_prompt , image_files , json_schema )
198227 return jsonify (response_content )
199228
200229
0 commit comments