-
Notifications
You must be signed in to change notification settings - Fork 3
Add json schema #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
5667951
6d8bc20
a560352
8c14911
367f173
89c8db5
d4e2f11
4d9aaac
1eb22ad
4ed6c0f
e3a8c4b
6ccbc69
6b94352
09d7027
c00b12d
62f08fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| import base64 | ||
| import glob | ||
| import json | ||
| import os | ||
| import subprocess | ||
| from typing import Optional | ||
|
|
@@ -32,6 +33,7 @@ | |
| if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) | ||
| else _llama_server_url | ||
| ) | ||
| SCHEMA_KEY = "schema" | ||
|
|
||
|
|
||
| def _build_messages(content: str, system_prompt: Optional[str] = None, image_files: Optional[list] = None) -> list: | ||
|
|
@@ -52,17 +54,21 @@ def chat_with_llama_server_http( | |
| system_prompt: Optional[str] = None, | ||
| timeout: int = 300, | ||
| image_files: Optional[list] = None, | ||
| json_schema: Optional[dict] = None, | ||
| ) -> str: | ||
| """Handle chat using llama-server HTTP API.""" | ||
| if not LLAMA_SERVER_URL: | ||
| raise Exception("LLAMA_SERVER_URL environment variable not set") | ||
|
|
||
| try: | ||
| messages = _build_messages(content, system_prompt, image_files=[]) # TODO: Pass image files | ||
| payload = {'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512} | ||
| if json_schema: | ||
| payload['json_schema'] = json_schema[SCHEMA_KEY] | ||
|
|
||
| response = requests.post( | ||
| f'{LLAMA_SERVER_URL}/v1/chat/completions', | ||
| json={'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512}, | ||
| json=payload, | ||
| headers={'Content-Type': 'application/json'}, | ||
| timeout=timeout, | ||
| ) | ||
|
|
@@ -95,12 +101,21 @@ def is_llamacpp_available(model: str) -> bool: | |
|
|
||
|
|
||
| def chat_with_ollama( | ||
| model: str, content: str, system_prompt: Optional[str] = None, image_files: Optional[list] = None | ||
| model: str, | ||
| content: str, | ||
| system_prompt: Optional[str] = None, | ||
| image_files: Optional[list] = None, | ||
| json_schema: Optional[dict] = None, | ||
| ) -> str: | ||
| """Handle chat using ollama.""" | ||
| messages = _build_messages(content, system_prompt, image_files) | ||
|
|
||
| response = ollama.chat(model=model, messages=messages, stream=False) | ||
| response = ollama.chat( | ||
| model=model, | ||
| messages=messages, | ||
| stream=False, | ||
| format=json_schema['schema'] if json_schema else None, | ||
| ) | ||
| return response.message.content | ||
|
|
||
|
|
||
|
|
@@ -110,6 +125,7 @@ def chat_with_llamacpp( | |
| system_prompt: Optional[str] = None, | ||
| timeout: int = 300, | ||
| image_files: Optional[list] = None, | ||
| json_schema: Optional[dict] = None, | ||
| ) -> str: | ||
| """Handle chat using llama.cpp CLI.""" | ||
| model_path = resolve_model_path(model) | ||
|
|
@@ -118,6 +134,9 @@ def chat_with_llamacpp( | |
| raise ValueError(f"Model not found: {model}") | ||
|
|
||
| cmd = [LLAMA_CPP_CLI, '-m', model_path, '--n-gpu-layers', '40', '-p', content, '-n', '512', '--single-turn'] | ||
| if json_schema: | ||
| raw_schema = json_schema[SCHEMA_KEY] if SCHEMA_KEY in json_schema else json_schema | ||
| cmd += ["--json-schema", json.dumps(raw_schema)] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. |
||
|
|
||
| # Add system prompt if provided | ||
| if system_prompt: | ||
|
|
@@ -152,20 +171,27 @@ def chat_with_model( | |
| llama_mode: str = "cli", | ||
| system_prompt: Optional[str] = None, | ||
| image_files: Optional[list] = None, | ||
| json_schema: Optional[dict] = None, | ||
| ) -> str: | ||
| """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt.""" | ||
| if is_llamacpp_available(model): | ||
| if llama_mode == "server": | ||
| if not LLAMA_SERVER_URL: | ||
| raise Exception("LLAMA_SERVER_URL environment variable not set for server mode") | ||
| return chat_with_llama_server_http(model, content, system_prompt=system_prompt, image_files=image_files) | ||
| return chat_with_llama_server_http( | ||
| model, content, system_prompt=system_prompt, image_files=image_files, json_schema=json_schema | ||
| ) | ||
| elif llama_mode == "cli": | ||
| return chat_with_llamacpp(model, content, system_prompt=system_prompt, image_files=image_files) | ||
| return chat_with_llamacpp( | ||
| model, content, system_prompt=system_prompt, image_files=image_files, json_schema=json_schema | ||
| ) | ||
| else: | ||
| raise ValueError(f"Invalid llama_mode: '{llama_mode}'. Valid options are 'server' or 'cli'.") | ||
| else: | ||
| # Model not available in llama.cpp, use ollama | ||
| return chat_with_ollama(model, content, system_prompt=system_prompt, image_files=image_files) | ||
| return chat_with_ollama( | ||
| model, content, system_prompt=system_prompt, image_files=image_files, json_schema=json_schema | ||
| ) | ||
|
|
||
|
|
||
| def authenticate() -> str: | ||
|
|
@@ -190,11 +216,14 @@ def chat(): | |
| llama_mode = request.form.get('llama_mode', 'cli') | ||
| system_prompt = request.form.get('system_prompt') | ||
| image_files = list(request.files.values()) | ||
| json_schema = request.form.get('json_schema') | ||
| if json_schema: | ||
| json_schema = json.loads(json_schema) | ||
|
|
||
| if not content.strip(): | ||
| abort(400, description='Missing prompt content') | ||
|
|
||
| response_content = chat_with_model(model, content, llama_mode, system_prompt, image_files) | ||
| response_content = chat_with_model(model, content, llama_mode, system_prompt, image_files, json_schema) | ||
| return jsonify(response_content) | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -111,7 +111,9 @@ def test_server_mode_uses_llamacpp_when_available(self): | |
|
|
||
| assert result == "Server response from DeepSeek V3" | ||
| self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL) | ||
| self.mock_chat_server.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Explain code', system_prompt=None, image_files=None) | ||
| self.mock_chat_server.assert_called_once_with( | ||
| TEST_LLAMACPP_MODEL, 'Explain code', system_prompt=None, image_files=None, json_schema=None | ||
| ) | ||
|
|
||
| def test_server_mode_fallback_to_ollama_when_unavailable(self): | ||
| """Test server mode falls back to ollama when model not available in llama.cpp.""" | ||
|
|
@@ -122,7 +124,9 @@ def test_server_mode_fallback_to_ollama_when_unavailable(self): | |
|
|
||
| assert result == "Ollama fallback response" | ||
| self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL) | ||
| self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Debug code', system_prompt=None, image_files=None) | ||
| self.mock_chat_ollama.assert_called_once_with( | ||
| TEST_OLLAMA_MODEL, 'Debug code', system_prompt=None, image_files=None, json_schema=None | ||
| ) | ||
|
|
||
| def test_server_mode_requires_server_url(self): | ||
| """Test server mode requires LLAMA_SERVER_URL to be set.""" | ||
|
|
@@ -178,3 +182,33 @@ def test_complete_server_fallback_flow_to_ollama(self, mock_glob, mock_ollama, m | |
| assert result == "Ollama server fallback integration test successful!" | ||
| mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_OLLAMA_MODEL}/*.gguf') | ||
| mock_ollama.assert_called_once() | ||
|
|
||
| def test_server_mode_passes_json_schema_to_llama_server(self, tmp_path, mock_requests_post, mock_llama_server_url): | ||
| """ | ||
| chat_with_model (server mode) should forward a json_schema file path | ||
| and llama-server should receive the parsed schema in its JSON body. | ||
| """ | ||
| test_schema = {"schema": {"type": "object", "properties": {"answer": {"type": "string"}}}} | ||
|
|
||
| with patch('ai_server.server.is_llamacpp_available', return_value=True): | ||
| mock_response = MagicMock() | ||
| mock_response.status_code = 200 | ||
| mock_response.json.return_value = {"choices": [{"message": {"content": "Schema-aware server reply"}}]} | ||
| mock_requests_post.return_value = mock_response | ||
|
|
||
| result = chat_with_model( | ||
| TEST_LLAMACPP_MODEL, "Give me an answer", llama_mode="server", json_schema=test_schema | ||
| ) | ||
|
|
||
| assert result == "Schema-aware server reply" | ||
|
|
||
| # Verify POST call | ||
| mock_requests_post.assert_called_once() | ||
| args, kwargs = mock_requests_post.call_args | ||
| assert args[0] == "http://localhost:8080/v1/chat/completions" | ||
|
|
||
| body = kwargs["json"] | ||
| print(body) | ||
|
||
| assert body["model"] == TEST_LLAMACPP_MODEL | ||
| assert body["messages"][0]["content"] == "Give me an answer" | ||
| assert body["json_schema"] == {"type": "object", "properties": {"answer": {"type": "string"}}} | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's still 'schema'.