Added support for uploading image files (#6)

david-yz-liu · web-flow · commit 70dfdf997181 · 2025-06-23T11:01:07.000-04:00
Note: only passes the image files to ollama, not llama.cpp models.
diff --git a/ai_server/server.py b/ai_server/server.py
@@ -1,3 +1,6 @@
+from __future__ import annotations
+
+import base64
 import glob
 import os
 import subprocess
@@ -31,24 +34,31 @@
 )
 
 
-def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
+def _build_messages(content: str, system_prompt: Optional[str] = None, image_files: Optional[list] = None) -> list:
     """Build messages list with optional system prompt."""
     messages = []
     if system_prompt:
         messages.append({'role': 'system', 'content': system_prompt})
     messages.append({'role': 'user', 'content': content})
+
+    if image_files:
+        messages[-1]["images"] = [base64.b64encode(image_file.read()).decode("utf-8") for image_file in image_files]
     return messages
 
 
 def chat_with_llama_server_http(
-    model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300
+    model: str,
+    content: str,
+    system_prompt: Optional[str] = None,
+    timeout: int = 300,
+    image_files: Optional[list] = None,
 ) -> str:
     """Handle chat using llama-server HTTP API."""
     if not LLAMA_SERVER_URL:
         raise Exception("LLAMA_SERVER_URL environment variable not set")
 
     try:
-        messages = _build_messages(content, system_prompt)
+        messages = _build_messages(content, system_prompt, image_files=[])  # TODO: Pass image files
 
         response = requests.post(
             f'{LLAMA_SERVER_URL}/v1/chat/completions',
@@ -84,15 +94,23 @@ def is_llamacpp_available(model: str) -> bool:
     return resolve_model_path(model) is not None
 
 
-def chat_with_ollama(model: str, content: str, system_prompt: Optional[str] = None) -> str:
+def chat_with_ollama(
+    model: str, content: str, system_prompt: Optional[str] = None, image_files: Optional[list] = None
+) -> str:
     """Handle chat using ollama."""
-    messages = _build_messages(content, system_prompt)
+    messages = _build_messages(content, system_prompt, image_files)
 
     response = ollama.chat(model=model, messages=messages, stream=False)
     return response.message.content
 
 
-def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:
+def chat_with_llamacpp(
+    model: str,
+    content: str,
+    system_prompt: Optional[str] = None,
+    timeout: int = 300,
+    image_files: Optional[list] = None,
+) -> str:
     """Handle chat using llama.cpp CLI."""
     model_path = resolve_model_path(model)
 
@@ -105,6 +123,9 @@ def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] =
     if system_prompt:
         cmd.extend(['--system-prompt', system_prompt])
 
+    if image_files:
+        pass  # TODO: pass image files
+
     try:
         result = subprocess.run(cmd, capture_output=True, text=False, timeout=timeout, check=True)
 
@@ -125,20 +146,26 @@ def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] =
         raise Exception("Llama.cpp CLI not found")
 
 
-def chat_with_model(model: str, content: str, llama_mode: str = "cli", system_prompt: Optional[str] = None) -> str:
+def chat_with_model(
+    model: str,
+    content: str,
+    llama_mode: str = "cli",
+    system_prompt: Optional[str] = None,
+    image_files: Optional[list] = None,
+) -> str:
     """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
     if is_llamacpp_available(model):
         if llama_mode == "server":
             if not LLAMA_SERVER_URL:
                 raise Exception("LLAMA_SERVER_URL environment variable not set for server mode")
-            return chat_with_llama_server_http(model, content, system_prompt)
+            return chat_with_llama_server_http(model, content, system_prompt, image_files)
         elif llama_mode == "cli":
-            return chat_with_llamacpp(model, content, system_prompt)
+            return chat_with_llamacpp(model, content, system_prompt, image_files)
         else:
             raise ValueError(f"Invalid llama_mode: '{llama_mode}'. Valid options are 'server' or 'cli'.")
     else:
         # Model not available in llama.cpp, use ollama
-        return chat_with_ollama(model, content, system_prompt)
+        return chat_with_ollama(model, content, system_prompt, image_files)
 
 
 def authenticate() -> str:
@@ -158,16 +185,16 @@ def authenticate() -> str:
 def chat():
     """Handle chat request with optional llama_mode and system prompt parameters."""
     authenticate()
-    params = request.get_json()
-    model = params.get('model', DEFAULT_MODEL)
-    content = params.get('content', '')
-    llama_mode = params.get('llama_mode', 'cli')
-    system_prompt = params.get('system_prompt')
+    model = request.form.get('model', DEFAULT_MODEL)
+    content = request.form.get('content', '')
+    llama_mode = request.form.get('llama_mode', 'cli')
+    system_prompt = request.form.get('system_prompt')
+    image_files = list(request.files.values())
 
     if not content.strip():
         abort(400, description='Missing prompt content')
 
-    response_content = chat_with_model(model, content, llama_mode, system_prompt)
+    response_content = chat_with_model(model, content, llama_mode, system_prompt, image_files)
     return jsonify(response_content)
 
 
diff --git a/test/test_cli_mode.py b/test/test_cli_mode.py
@@ -108,7 +108,7 @@ def test_cli_mode_uses_llamacpp_when_available(self):
 
         assert result == "CLI response from DeepSeek V3"
         self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
-        self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Write a function', None)
+        self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Write a function', None, None)
 
     def test_cli_mode_fallback_to_ollama_when_unavailable(self):
         """Test CLI mode falls back to ollama when model not available in llama.cpp."""
@@ -119,7 +119,7 @@ def test_cli_mode_fallback_to_ollama_when_unavailable(self):
 
         assert result == "Ollama response from DeepSeek Coder"
         self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
-        self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Help with coding', None)
+        self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Help with coding', None, None)
 
     def test_default_mode_is_cli(self):
         """Test that default mode is CLI when no llama_mode specified."""
@@ -130,7 +130,7 @@ def test_default_mode_is_cli(self):
 
         assert result == "Default CLI mode response"
         self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
-        self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Help me', None)
+        self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Help me', None, None)
 
 
 class TestCLIModeIntegration:
diff --git a/test/test_server_mode.py b/test/test_server_mode.py
@@ -111,7 +111,7 @@ def test_server_mode_uses_llamacpp_when_available(self):
 
         assert result == "Server response from DeepSeek V3"
         self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
-        self.mock_chat_server.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Explain code', None)
+        self.mock_chat_server.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Explain code', None, None)
 
     def test_server_mode_fallback_to_ollama_when_unavailable(self):
         """Test server mode falls back to ollama when model not available in llama.cpp."""
@@ -122,7 +122,7 @@ def test_server_mode_fallback_to_ollama_when_unavailable(self):
 
         assert result == "Ollama fallback response"
         self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
-        self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Debug code', None)
+        self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Debug code', None, None)
 
     def test_server_mode_requires_server_url(self):
         """Test server mode requires LLAMA_SERVER_URL to be set."""
diff --git a/test/test_system_prompt.py b/test/test_system_prompt.py
@@ -77,4 +77,4 @@ def test_chat_with_model_routing(self, mock_available, mock_chat):
         mock_chat.return_value = "result"
 
         chat_with_model(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT)
-        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, TEST_SYSTEM_PROMPT)
+        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, TEST_SYSTEM_PROMPT, None)
diff --git a/test/test_system_prompt_api.py b/test/test_system_prompt_api.py
@@ -34,12 +34,12 @@ def test_api_with_system_prompt(self, mock_chat, mock_redis, client):
         response = client.post(
             '/chat',
             headers={'X-API-KEY': 'test-key'},
-            json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT},
+            data={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT},
         )
 
         assert response.status_code == 200
 
-        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT)
+        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT, [])
 
     @patch('ai_server.server.REDIS_CONNECTION')
     @patch('ai_server.server.chat_with_model')
@@ -49,12 +49,12 @@ def test_api_without_system_prompt(self, mock_chat, mock_redis, client):
         mock_chat.return_value = "def function(): pass"
 
         response = client.post(
-            '/chat', headers={'X-API-KEY': 'test-key'}, json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT}
+            '/chat', headers={'X-API-KEY': 'test-key'}, data={'model': TEST_MODEL, 'content': TEST_USER_CONTENT}
         )
 
         assert response.status_code == 200
 
-        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', None)
+        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', None, [])
 
     @patch('ai_server.server.REDIS_CONNECTION')
     def test_api_authentication_still_required(self, mock_redis, client):
@@ -64,7 +64,7 @@ def test_api_authentication_still_required(self, mock_redis, client):
         response = client.post(
             '/chat',
             headers={'X-API-KEY': 'invalid-key'},
-            json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT},
+            data={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT},
         )
 
         assert response.status_code == 500