diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..283b33c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,37 @@ +name: Tests + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + push: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.13"] + + steps: + - uses: actions/checkout@v4.2.2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5.3.0 + with: + python-version: ${{ matrix.python-version }} + - name: Cache pip + uses: actions/cache@v4.2.3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel + pip install -e .[dev] + - name: Run tests + run: | + pytest -vv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ab86f37 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,32 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 # Use the ref you want to point at + hooks: + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-illegal-windows-names + - id: check-json + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + - id: check-yaml + - id: destroyed-symlinks + - id: detect-private-key + - id: end-of-file-fixer + - id: forbid-submodules + - id: trailing-whitespace + + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 25.1.0 + hooks: + - id: black + args: [--safe, --quiet] + + - repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + + +ci: + autoupdate_schedule: quarterly diff --git a/README.md b/README.md index 0aabafd..af79764 100644 --- a/README.md +++ b/README.md @@ -1 +1,21 @@ -# ai-server \ No newline at end of file +# ai-server + +## Developers + +To install project dependencies, including development dependencies: + +```console +$ pip install -e .[dev] +``` + +To install pre-commit hooks: + +```console +$ pre-commit install +``` + +To run the test suite: + +```console +$ pytest +``` diff --git a/ai_server/__main__.py b/ai_server/__main__.py index 5f79e09..9395c8f 100644 --- a/ai_server/__main__.py +++ b/ai_server/__main__.py @@ -1,4 +1,3 @@ from .server import app - app.run(debug=True, host="0.0.0.0") diff --git a/ai_server/redis_helper.py b/ai_server/redis_helper.py index 529bfbb..9f5476b 100644 --- a/ai_server/redis_helper.py +++ b/ai_server/redis_helper.py @@ -2,7 +2,6 @@ import redis - REDIS_URL = os.environ["REDIS_URL"] REDIS_CONNECTION = redis.Redis.from_url(REDIS_URL) diff --git a/ai_server/server.py b/ai_server/server.py index 8338ab6..39bd85e 100644 --- a/ai_server/server.py +++ b/ai_server/server.py @@ -1,11 +1,12 @@ -from flask import Flask, request, jsonify, abort -import ollama -import subprocess +import glob import os -import requests +import subprocess from typing import Optional + +import ollama +import requests from dotenv import load_dotenv -import glob +from flask import Flask, abort, jsonify, request from .redis_helper import REDIS_CONNECTION @@ -23,7 +24,12 @@ # Llama server configuration _llama_server_url = os.getenv('LLAMA_SERVER_URL') # e.g., http://localhost:8080 or localhost:8080 -LLAMA_SERVER_URL = f"http://{_llama_server_url}" if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) else _llama_server_url +LLAMA_SERVER_URL = ( + f"http://{_llama_server_url}" + if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) + else _llama_server_url +) + def _build_messages(content: str, system_prompt: Optional[str] = None) -> list: """Build messages list with optional system prompt.""" @@ -33,26 +39,24 @@ def _build_messages(content: str, system_prompt: Optional[str] = None) -> list: messages.append({'role': 'user', 'content': content}) return messages -def chat_with_llama_server_http(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str: + +def chat_with_llama_server_http( + model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300 +) -> str: """Handle chat using llama-server HTTP API.""" if not LLAMA_SERVER_URL: raise Exception("LLAMA_SERVER_URL environment variable not set") - + try: messages = _build_messages(content, system_prompt) - + response = requests.post( f'{LLAMA_SERVER_URL}/v1/chat/completions', - json={ - 'model': model, - 'messages': messages, - 'stream': False, - 'max_tokens': 512 - }, + json={'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512}, headers={'Content-Type': 'application/json'}, - timeout=timeout + timeout=timeout, ) - + if response.status_code == 200: data = response.json() if 'choices' in data and len(data['choices']) > 0: @@ -61,68 +65,55 @@ def chat_with_llama_server_http(model: str, content: str, system_prompt: Optiona raise Exception("Invalid response format from llama-server") else: raise Exception(f"Llama-server HTTP error") - + except requests.Timeout: raise Exception(f"Llama-server request timed out for model {model}") except requests.RequestException as e: raise Exception(f"Llama-server request failed: {str(e)}") + def resolve_model_path(model: str) -> Optional[str]: """Resolve model name to full GGUF file path using glob pattern.""" pattern = os.path.join(GGUF_DIR, model, "*.gguf") matches = glob.glob(pattern) return matches[0] if matches else None + def is_llamacpp_available(model: str) -> bool: """Check if model is available in llama.cpp.""" return resolve_model_path(model) is not None + def chat_with_ollama(model: str, content: str, system_prompt: Optional[str] = None) -> str: """Handle chat using ollama.""" messages = _build_messages(content, system_prompt) - - response = ollama.chat( - model=model, - messages=messages, - stream=False - ) + + response = ollama.chat(model=model, messages=messages, stream=False) return response.message.content + def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str: """Handle chat using llama.cpp CLI.""" model_path = resolve_model_path(model) - + if not model_path: raise ValueError(f"Model not found: {model}") - - cmd = [ - LLAMA_CPP_CLI, - '-m', model_path, - '--n-gpu-layers', '40', - '-p', content, - '-n', '512', - '--single-turn' - ] - + + cmd = [LLAMA_CPP_CLI, '-m', model_path, '--n-gpu-layers', '40', '-p', content, '-n', '512', '--single-turn'] + # Add system prompt if provided if system_prompt: cmd.extend(['--system-prompt', system_prompt]) - + try: - result = subprocess.run( - cmd, - capture_output=True, - text=False, - timeout=timeout, - check=True - ) - + result = subprocess.run(cmd, capture_output=True, text=False, timeout=timeout, check=True) + stdout_text = result.stdout.decode('utf-8', errors='replace') # Strip whitespace and return the response response = stdout_text.strip() return response if response else "No response generated." - + except subprocess.TimeoutExpired: raise Exception(f"Llama.cpp request timed out for model {model}") except subprocess.CalledProcessError as e: @@ -133,6 +124,7 @@ def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] = except FileNotFoundError: raise Exception("Llama.cpp CLI not found") + def chat_with_model(model: str, content: str, llama_mode: str = "cli", system_prompt: Optional[str] = None) -> str: """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt.""" if is_llamacpp_available(model): @@ -171,13 +163,14 @@ def chat(): content = params.get('content', '') llama_mode = params.get('llama_mode', 'cli') system_prompt = params.get('system_prompt') - + if not content.strip(): abort(400, description='Missing prompt content') response_content = chat_with_model(model, content, llama_mode, system_prompt) return jsonify(response_content) + @app.errorhandler(Exception) def internal_error(error): return jsonify({"error": str(error)}), 500 diff --git a/pyproject.toml b/pyproject.toml index eb959ed..09c94eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,19 @@ dependencies = [ "requests", ] +[project.optional-dependencies] +dev = [ + "pre-commit", + "pytest", +] [project.urls] Homepage = "https://github.com/MarkUsProject/ai-server" Issues = "https://github.com/MarkUsProject/ai-server/issues" + +[tool.black] +line-length = 120 +skip-string-normalization = true + +[tool.isort] +profile = "black" diff --git a/test/test_cli_mode.py b/test/test_cli_mode.py index dfb3404..9cd05f3 100644 --- a/test/test_cli_mode.py +++ b/test/test_cli_mode.py @@ -1,14 +1,12 @@ -import pytest import os import subprocess -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + +import pytest os.environ.setdefault('REDIS_URL', 'redis://localhost:6379') -from ai_server.server import ( - chat_with_llamacpp, - chat_with_model -) +from ai_server.server import chat_with_llamacpp, chat_with_model # Test models TEST_LLAMACPP_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS' @@ -21,18 +19,21 @@ def mock_subprocess(): with patch('ai_server.server.subprocess.run') as mock: yield mock + @pytest.fixture def mock_resolve_model_path(): """Mock resolve_model_path for CLI tests.""" with patch('ai_server.server.resolve_model_path') as mock: yield mock + @pytest.fixture def mock_glob(): """Mock glob.glob for model discovery tests.""" with patch('ai_server.server.glob.glob') as mock: yield mock + @pytest.fixture def mock_ollama(): """Mock ollama.chat for fallback tests.""" @@ -42,21 +43,21 @@ def mock_ollama(): class TestLlamaCppCLI: """Test llama.cpp CLI execution.""" - + def test_chat_with_llamacpp_success(self, mock_resolve_model_path, mock_subprocess): """Test successful CLI chat with llama.cpp.""" model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf' mock_resolve_model_path.return_value = model_path - + mock_result = MagicMock() mock_result.stdout = b'I can help you with DeepSeek V3.' mock_subprocess.return_value = mock_result - + result = chat_with_llamacpp(TEST_LLAMACPP_MODEL, 'Hello, can you help me code?') - + assert result == "I can help you with DeepSeek V3." mock_resolve_model_path.assert_called_once_with(TEST_LLAMACPP_MODEL) - + # Verify correct CLI command structure args, kwargs = mock_subprocess.call_args cmd = args[0] @@ -64,69 +65,69 @@ def test_chat_with_llamacpp_success(self, mock_resolve_model_path, mock_subproce assert '-m' in cmd and model_path in cmd assert '--n-gpu-layers' in cmd and '40' in cmd assert '--single-turn' in cmd - + def test_chat_with_llamacpp_model_not_found(self, mock_resolve_model_path): """Test CLI chat when model is not found.""" mock_resolve_model_path.return_value = None - + with pytest.raises(ValueError, match="Model not found: nonexistent-model"): chat_with_llamacpp('nonexistent-model', 'Hello') - + def test_chat_with_llamacpp_subprocess_error(self, mock_resolve_model_path, mock_subprocess): """Test CLI chat when subprocess fails.""" mock_resolve_model_path.return_value = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf' - + error = subprocess.CalledProcessError(1, 'cmd') error.stderr = b'CUDA out of memory' mock_subprocess.side_effect = error - + with pytest.raises(Exception, match=f"Llama.cpp failed for {TEST_LLAMACPP_MODEL}: CUDA out of memory"): chat_with_llamacpp(TEST_LLAMACPP_MODEL, 'Hello') class TestCLIModeRouting: """Test CLI mode routing and fallback logic.""" - + @pytest.fixture(autouse=True) def setup_routing_mocks(self): """Set up common mocks for routing tests.""" - with patch('ai_server.server.chat_with_llamacpp') as mock_chat_llamacpp, \ - patch('ai_server.server.is_llamacpp_available') as mock_available, \ - patch('ai_server.server.chat_with_ollama') as mock_chat_ollama: + with patch('ai_server.server.chat_with_llamacpp') as mock_chat_llamacpp, patch( + 'ai_server.server.is_llamacpp_available' + ) as mock_available, patch('ai_server.server.chat_with_ollama') as mock_chat_ollama: self.mock_chat_llamacpp = mock_chat_llamacpp - self.mock_available = mock_available + self.mock_available = mock_available self.mock_chat_ollama = mock_chat_ollama yield - + def test_cli_mode_uses_llamacpp_when_available(self): """Test CLI mode routes to llama.cpp when model is available.""" self.mock_available.return_value = True self.mock_chat_llamacpp.return_value = "CLI response from DeepSeek V3" - + result = chat_with_model(TEST_LLAMACPP_MODEL, 'Write a function', llama_mode='cli') - + assert result == "CLI response from DeepSeek V3" self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL) self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Write a function') - + def test_cli_mode_fallback_to_ollama_when_unavailable(self): """Test CLI mode falls back to ollama when model not available in llama.cpp.""" self.mock_available.return_value = False self.mock_chat_ollama.return_value = "Ollama response from DeepSeek Coder" - + result = chat_with_model(TEST_OLLAMA_MODEL, 'Help with coding', llama_mode='cli') - + assert result == "Ollama response from DeepSeek Coder" self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL) self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Help with coding') - + def test_default_mode_is_cli(self): """Test that default mode is CLI when no llama_mode specified.""" self.mock_available.return_value = True self.mock_chat_llamacpp.return_value = "Default CLI mode response" - + result = chat_with_model(TEST_LLAMACPP_MODEL, 'Help me') # No llama_mode specified - + assert result == "Default CLI mode response" self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL) self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Help me') @@ -134,35 +135,35 @@ def test_default_mode_is_cli(self): class TestCLIModeIntegration: """Test complete CLI mode integration flows.""" - + def test_complete_cli_flow_with_real_model(self, mock_glob, mock_subprocess): """Test complete CLI flow: model resolution → CLI execution.""" model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf' - + mock_glob.return_value = [model_path] mock_result = MagicMock() mock_result.stdout = b'Complete integration test successful with DeepSeek V3!' mock_subprocess.return_value = mock_result - + result = chat_with_model(TEST_LLAMACPP_MODEL, 'Integration test', llama_mode='cli') - + assert result == "Complete integration test successful with DeepSeek V3!" # Verify glob called twice: once for availability check, once for CLI execution assert mock_glob.call_count == 2 mock_subprocess.assert_called_once() - + def test_complete_cli_fallback_flow_to_ollama(self, mock_glob, mock_ollama): """Test complete CLI fallback flow: model not found → fallback to ollama.""" # Mock model not found in llama.cpp mock_glob.return_value = [] - + # Mock successful ollama response mock_response = MagicMock() mock_response.message.content = "Ollama CLI fallback integration test successful!" mock_ollama.return_value = mock_response - + result = chat_with_model(TEST_OLLAMA_MODEL, 'Fallback test', llama_mode='cli') - + assert result == "Ollama CLI fallback integration test successful!" mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_OLLAMA_MODEL}/*.gguf') - mock_ollama.assert_called_once() + mock_ollama.assert_called_once() diff --git a/test/test_core.py b/test/test_core.py index b25dd6c..7e88cb1 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -1,14 +1,11 @@ -import pytest import os -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + +import pytest os.environ.setdefault('REDIS_URL', 'redis://localhost:6379') -from ai_server.server import ( - resolve_model_path, - is_llamacpp_available, - chat_with_ollama -) +from ai_server.server import chat_with_ollama, is_llamacpp_available, resolve_model_path # Test models TEST_LLAMACPP_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS' @@ -21,6 +18,7 @@ def mock_glob(): with patch('ai_server.server.glob.glob') as mock: yield mock + @pytest.fixture def mock_ollama(): """Mock ollama.chat for ollama tests.""" @@ -30,73 +28,73 @@ def mock_ollama(): class TestModelResolution: """Test core model resolution functionality.""" - + def test_resolve_model_path_found(self, mock_glob): """Test model path resolution when model exists.""" model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf' mock_glob.return_value = [model_path] - + result = resolve_model_path(TEST_LLAMACPP_MODEL) - + assert result == model_path mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/*.gguf') - + def test_resolve_model_path_not_found(self, mock_glob): """Test model path resolution when model doesn't exist.""" mock_glob.return_value = [] - + result = resolve_model_path('nonexistent-model') - + assert result is None - + def test_is_llamacpp_available_true(self): """Test model availability check when model exists.""" with patch('ai_server.server.resolve_model_path') as mock_resolve: mock_resolve.return_value = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf' - + result = is_llamacpp_available(TEST_LLAMACPP_MODEL) - + assert result is True mock_resolve.assert_called_once_with(TEST_LLAMACPP_MODEL) - + def test_is_llamacpp_available_false(self): """Test model availability check when model doesn't exist.""" with patch('ai_server.server.resolve_model_path') as mock_resolve: mock_resolve.return_value = None - + result = is_llamacpp_available('nonexistent-model') - + assert result is False class TestOllamaCore: """Test core ollama functionality used as fallback.""" - + def test_chat_with_ollama_success(self, mock_ollama): """Test successful chat with ollama.""" mock_response = MagicMock() mock_response.message.content = "Hello! I'm DeepSeek Coder V2. I can help you with coding tasks." mock_ollama.return_value = mock_response - + result = chat_with_ollama(TEST_OLLAMA_MODEL, 'Help me write a Python function') - + assert result == "Hello! I'm DeepSeek Coder V2. I can help you with coding tasks." mock_ollama.assert_called_once_with( model=TEST_OLLAMA_MODEL, messages=[{'role': 'user', 'content': 'Help me write a Python function'}], - stream=False + stream=False, ) - + def test_chat_with_ollama_service_unavailable(self, mock_ollama): """Test ollama chat when service is unavailable.""" mock_ollama.side_effect = Exception("Ollama service is not running") - + with pytest.raises(Exception, match="Ollama service is not running"): chat_with_ollama(TEST_OLLAMA_MODEL, 'Hello') - + def test_chat_with_ollama_model_not_found(self, mock_ollama): """Test ollama chat when model is not found.""" mock_ollama.side_effect = Exception("model 'nonexistent:latest' not found") - + with pytest.raises(Exception, match="model 'nonexistent:latest' not found"): - chat_with_ollama('nonexistent:latest', 'Hello') + chat_with_ollama('nonexistent:latest', 'Hello') diff --git a/test/test_server_mode.py b/test/test_server_mode.py index 53b4338..194031f 100644 --- a/test/test_server_mode.py +++ b/test/test_server_mode.py @@ -1,13 +1,11 @@ -import pytest import os -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + +import pytest os.environ.setdefault('REDIS_URL', 'redis://localhost:6379') -from ai_server.server import ( - chat_with_llama_server_http, - chat_with_model -) +from ai_server.server import chat_with_llama_server_http, chat_with_model # Test models TEST_LLAMACPP_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS' @@ -20,18 +18,21 @@ def mock_requests_post(): with patch('ai_server.server.requests.post') as mock: yield mock + @pytest.fixture def mock_llama_server_url(): """Mock LLAMA_SERVER_URL for server tests.""" with patch('ai_server.server.LLAMA_SERVER_URL', 'http://localhost:8080'): yield + @pytest.fixture def mock_glob(): """Mock glob.glob for model discovery tests.""" with patch('ai_server.server.glob.glob') as mock: yield mock + @pytest.fixture def mock_ollama(): """Mock ollama.chat for fallback tests.""" @@ -41,142 +42,139 @@ def mock_ollama(): class TestLlamaServerHTTP: """Test llama.cpp server HTTP functionality.""" - + def test_chat_with_llama_server_http_success(self, mock_requests_post, mock_llama_server_url): """Test successful HTTP chat with llama-server.""" mock_response = MagicMock() mock_response.status_code = 200 - mock_response.json.return_value = { - 'choices': [{'message': {'content': 'Server response from DeepSeek V3'}}] - } + mock_response.json.return_value = {'choices': [{'message': {'content': 'Server response from DeepSeek V3'}}]} mock_requests_post.return_value = mock_response - + result = chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello from server') - + assert result == "Server response from DeepSeek V3" - + # Verify correct API call args, kwargs = mock_requests_post.call_args assert args[0] == 'http://localhost:8080/v1/chat/completions' assert kwargs['json']['model'] == TEST_LLAMACPP_MODEL assert kwargs['json']['messages'][0]['content'] == 'Hello from server' - + def test_chat_with_llama_server_http_no_url(self): """Test HTTP chat when LLAMA_SERVER_URL is not set.""" with patch('ai_server.server.LLAMA_SERVER_URL', None): with pytest.raises(Exception, match="LLAMA_SERVER_URL environment variable not set"): chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello') - + def test_chat_with_llama_server_http_error_response(self, mock_requests_post, mock_llama_server_url): """Test HTTP chat when server returns error.""" mock_response = MagicMock() mock_response.status_code = 500 mock_requests_post.return_value = mock_response - + with pytest.raises(Exception, match="Llama-server HTTP error"): chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello') - + def test_chat_with_llama_server_http_invalid_response_format(self, mock_requests_post, mock_llama_server_url): """Test HTTP chat when server returns invalid response format.""" mock_response = MagicMock() mock_response.status_code = 200 mock_response.json.return_value = {'error': 'Invalid request'} # Missing choices mock_requests_post.return_value = mock_response - + with pytest.raises(Exception, match="Invalid response format from llama-server"): chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello') class TestServerModeRouting: """Test server mode routing and fallback logic.""" - + @pytest.fixture(autouse=True) def setup_routing_mocks(self): """Set up common mocks for routing tests.""" - with patch('ai_server.server.chat_with_llama_server_http') as mock_chat_server, \ - patch('ai_server.server.is_llamacpp_available') as mock_available, \ - patch('ai_server.server.chat_with_ollama') as mock_chat_ollama, \ - patch('ai_server.server.LLAMA_SERVER_URL', 'http://localhost:8080'): + with patch('ai_server.server.chat_with_llama_server_http') as mock_chat_server, patch( + 'ai_server.server.is_llamacpp_available' + ) as mock_available, patch('ai_server.server.chat_with_ollama') as mock_chat_ollama, patch( + 'ai_server.server.LLAMA_SERVER_URL', 'http://localhost:8080' + ): self.mock_chat_server = mock_chat_server self.mock_available = mock_available self.mock_chat_ollama = mock_chat_ollama yield - + def test_server_mode_uses_llamacpp_when_available(self): """Test server mode routes to llama-server when model is available.""" self.mock_available.return_value = True self.mock_chat_server.return_value = "Server response from DeepSeek V3" - + result = chat_with_model(TEST_LLAMACPP_MODEL, 'Explain code', llama_mode='server') - + assert result == "Server response from DeepSeek V3" self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL) self.mock_chat_server.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Explain code') - + def test_server_mode_fallback_to_ollama_when_unavailable(self): """Test server mode falls back to ollama when model not available in llama.cpp.""" self.mock_available.return_value = False self.mock_chat_ollama.return_value = "Ollama fallback response" - + result = chat_with_model(TEST_OLLAMA_MODEL, 'Debug code', llama_mode='server') - + assert result == "Ollama fallback response" self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL) self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Debug code') - + def test_server_mode_requires_server_url(self): """Test server mode requires LLAMA_SERVER_URL to be set.""" with patch('ai_server.server.LLAMA_SERVER_URL', None): self.mock_available.return_value = True - + with pytest.raises(Exception, match="LLAMA_SERVER_URL environment variable not set"): chat_with_model(TEST_LLAMACPP_MODEL, 'Hello', llama_mode='server') - + def test_invalid_llama_mode_raises_error(self): """Test that invalid llama_mode raises ValueError.""" self.mock_available.return_value = True - + with pytest.raises(ValueError, match="Invalid llama_mode: 'invalid'"): chat_with_model(TEST_LLAMACPP_MODEL, 'Hello', llama_mode='invalid') class TestServerModeIntegration: """Test complete server mode integration flows.""" - + def test_complete_server_flow_with_real_model(self, mock_glob, mock_requests_post, mock_llama_server_url): """Test complete server flow: model resolution → HTTP API call.""" model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf' - + # Mock model found (only checked once for availability in server mode) mock_glob.return_value = [model_path] - + # Mock successful HTTP response mock_response = MagicMock() mock_response.status_code = 200 - mock_response.json.return_value = { - 'choices': [{'message': {'content': 'Server integration test successful!'}}] - } + mock_response.json.return_value = {'choices': [{'message': {'content': 'Server integration test successful!'}}]} mock_requests_post.return_value = mock_response - + result = chat_with_model(TEST_LLAMACPP_MODEL, 'Integration test', llama_mode='server') - + assert result == "Server integration test successful!" # In server mode, glob.glob only called once for is_llamacpp_available mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/*.gguf') mock_requests_post.assert_called_once() - + def test_complete_server_fallback_flow_to_ollama(self, mock_glob, mock_ollama, mock_llama_server_url): """Test complete server fallback flow: model not found → fallback to ollama.""" # Mock model not found in llama.cpp mock_glob.return_value = [] - + # Mock successful ollama response mock_response = MagicMock() mock_response.message.content = "Ollama server fallback integration test successful!" mock_ollama.return_value = mock_response - + result = chat_with_model(TEST_OLLAMA_MODEL, 'Fallback test', llama_mode='server') - + assert result == "Ollama server fallback integration test successful!" mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_OLLAMA_MODEL}/*.gguf') - mock_ollama.assert_called_once() + mock_ollama.assert_called_once() diff --git a/test/test_system_prompt.py b/test/test_system_prompt.py index 3b8f684..20372d2 100644 --- a/test/test_system_prompt.py +++ b/test/test_system_prompt.py @@ -1,5 +1,6 @@ +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock TEST_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS' TEST_SYSTEM_PROMPT = "You are a helpful coding assistant." diff --git a/test/test_system_prompt_api.py b/test/test_system_prompt_api.py index a4c62e3..d84c7d3 100644 --- a/test/test_system_prompt_api.py +++ b/test/test_system_prompt_api.py @@ -1,6 +1,7 @@ -import pytest from unittest.mock import patch +import pytest + TEST_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS' TEST_SYSTEM_PROMPT = "You are a helpful coding assistant." TEST_USER_CONTENT = "Write a function" @@ -18,6 +19,7 @@ def setup_env(self, monkeypatch): def client(self): """Create test client for Flask app.""" from ai_server.server import app + app.config['TESTING'] = True with app.test_client() as client: yield client @@ -29,23 +31,15 @@ def test_api_with_system_prompt(self, mock_chat, mock_redis, client): mock_redis.get.return_value = b'test_user' mock_chat.return_value = "def function(): pass" - response = client.post('/chat', + response = client.post( + '/chat', headers={'X-API-KEY': 'test-key'}, - json={ - 'model': TEST_MODEL, - 'content': TEST_USER_CONTENT, - 'system_prompt': TEST_SYSTEM_PROMPT - } + json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT}, ) assert response.status_code == 200 - mock_chat.assert_called_once_with( - TEST_MODEL, - TEST_USER_CONTENT, - 'cli', - TEST_SYSTEM_PROMPT - ) + mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT) @patch('ai_server.server.REDIS_CONNECTION') @patch('ai_server.server.chat_with_model') @@ -54,35 +48,23 @@ def test_api_without_system_prompt(self, mock_chat, mock_redis, client): mock_redis.get.return_value = b'test_user' mock_chat.return_value = "def function(): pass" - response = client.post('/chat', - headers={'X-API-KEY': 'test-key'}, - json={ - 'model': TEST_MODEL, - 'content': TEST_USER_CONTENT - } + response = client.post( + '/chat', headers={'X-API-KEY': 'test-key'}, json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT} ) assert response.status_code == 200 - mock_chat.assert_called_once_with( - TEST_MODEL, - TEST_USER_CONTENT, - 'cli', - None - ) + mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', None) @patch('ai_server.server.REDIS_CONNECTION') def test_api_authentication_still_required(self, mock_redis, client): """Test that authentication is still required with system_prompt.""" mock_redis.get.return_value = None - response = client.post('/chat', + response = client.post( + '/chat', headers={'X-API-KEY': 'invalid-key'}, - json={ - 'model': TEST_MODEL, - 'content': TEST_USER_CONTENT, - 'system_prompt': TEST_SYSTEM_PROMPT - } + json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT}, ) assert response.status_code == 500