diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..283b33c
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,37 @@
+name: Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+  push:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.13"]
+
+    steps:
+      - uses: actions/checkout@v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5.3.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Cache pip
+        uses: actions/cache@v4.2.3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+            ${{ runner.os }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          pip install -e .[dev]
+      - name: Run tests
+        run: |
+            pytest -vv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..ab86f37
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0  # Use the ref you want to point at
+    hooks:
+      - id: check-case-conflict
+      - id: check-executables-have-shebangs
+      - id: check-illegal-windows-names
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-yaml
+      - id: destroyed-symlinks
+      - id: detect-private-key
+      - id: end-of-file-fixer
+      - id: forbid-submodules
+      - id: trailing-whitespace
+
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 25.1.0
+    hooks:
+      - id: black
+        args: [--safe, --quiet]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 6.0.1
+    hooks:
+      - id: isort
+
+
+ci:
+  autoupdate_schedule: quarterly
diff --git a/README.md b/README.md
index 0aabafd..af79764 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,21 @@
-# ai-server
\ No newline at end of file
+# ai-server
+
+## Developers
+
+To install project dependencies, including development dependencies:
+
+```console
+$ pip install -e .[dev]
+```
+
+To install pre-commit hooks:
+
+```console
+$ pre-commit install
+```
+
+To run the test suite:
+
+```console
+$ pytest
+```
diff --git a/ai_server/__main__.py b/ai_server/__main__.py
index 5f79e09..9395c8f 100644
--- a/ai_server/__main__.py
+++ b/ai_server/__main__.py
@@ -1,4 +1,3 @@
 from .server import app
 
-
 app.run(debug=True, host="0.0.0.0")
diff --git a/ai_server/redis_helper.py b/ai_server/redis_helper.py
index 529bfbb..9f5476b 100644
--- a/ai_server/redis_helper.py
+++ b/ai_server/redis_helper.py
@@ -2,7 +2,6 @@
 
 import redis
 
-
 REDIS_URL = os.environ["REDIS_URL"]
 
 REDIS_CONNECTION = redis.Redis.from_url(REDIS_URL)
diff --git a/ai_server/server.py b/ai_server/server.py
index 8338ab6..39bd85e 100644
--- a/ai_server/server.py
+++ b/ai_server/server.py
@@ -1,11 +1,12 @@
-from flask import Flask, request, jsonify, abort
-import ollama
-import subprocess
+import glob
 import os
-import requests
+import subprocess
 from typing import Optional
+
+import ollama
+import requests
 from dotenv import load_dotenv
-import glob
+from flask import Flask, abort, jsonify, request
 
 from .redis_helper import REDIS_CONNECTION
 
@@ -23,7 +24,12 @@
 
 # Llama server configuration
 _llama_server_url = os.getenv('LLAMA_SERVER_URL')  # e.g., http://localhost:8080 or localhost:8080
-LLAMA_SERVER_URL = f"http://{_llama_server_url}" if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) else _llama_server_url
+LLAMA_SERVER_URL = (
+    f"http://{_llama_server_url}"
+    if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://'))
+    else _llama_server_url
+)
+
 
 def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
     """Build messages list with optional system prompt."""
@@ -33,26 +39,24 @@ def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
     messages.append({'role': 'user', 'content': content})
     return messages
 
-def chat_with_llama_server_http(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:
+
+def chat_with_llama_server_http(
+    model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300
+) -> str:
     """Handle chat using llama-server HTTP API."""
     if not LLAMA_SERVER_URL:
         raise Exception("LLAMA_SERVER_URL environment variable not set")
-    
+
     try:
         messages = _build_messages(content, system_prompt)
-        
+
         response = requests.post(
             f'{LLAMA_SERVER_URL}/v1/chat/completions',
-            json={
-                'model': model,
-                'messages': messages,
-                'stream': False,
-                'max_tokens': 512
-            },
+            json={'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512},
             headers={'Content-Type': 'application/json'},
-            timeout=timeout
+            timeout=timeout,
         )
-        
+
         if response.status_code == 200:
             data = response.json()
             if 'choices' in data and len(data['choices']) > 0:
@@ -61,68 +65,55 @@ def chat_with_llama_server_http(model: str, content: str, system_prompt: Optiona
                 raise Exception("Invalid response format from llama-server")
         else:
             raise Exception(f"Llama-server HTTP error")
-                
+
     except requests.Timeout:
         raise Exception(f"Llama-server request timed out for model {model}")
     except requests.RequestException as e:
         raise Exception(f"Llama-server request failed: {str(e)}")
 
+
 def resolve_model_path(model: str) -> Optional[str]:
     """Resolve model name to full GGUF file path using glob pattern."""
     pattern = os.path.join(GGUF_DIR, model, "*.gguf")
     matches = glob.glob(pattern)
     return matches[0] if matches else None
 
+
 def is_llamacpp_available(model: str) -> bool:
     """Check if model is available in llama.cpp."""
     return resolve_model_path(model) is not None
 
+
 def chat_with_ollama(model: str, content: str, system_prompt: Optional[str] = None) -> str:
     """Handle chat using ollama."""
     messages = _build_messages(content, system_prompt)
-    
-    response = ollama.chat(
-        model=model,
-        messages=messages,
-        stream=False
-    )
+
+    response = ollama.chat(model=model, messages=messages, stream=False)
     return response.message.content
 
+
 def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:
     """Handle chat using llama.cpp CLI."""
     model_path = resolve_model_path(model)
-    
+
     if not model_path:
         raise ValueError(f"Model not found: {model}")
-    
-    cmd = [
-        LLAMA_CPP_CLI,
-        '-m', model_path,
-        '--n-gpu-layers', '40',
-        '-p', content,
-        '-n', '512',
-        '--single-turn'
-    ]
-    
+
+    cmd = [LLAMA_CPP_CLI, '-m', model_path, '--n-gpu-layers', '40', '-p', content, '-n', '512', '--single-turn']
+
     # Add system prompt if provided
     if system_prompt:
         cmd.extend(['--system-prompt', system_prompt])
-    
+
     try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=False,
-            timeout=timeout,
-            check=True
-        )
-        
+        result = subprocess.run(cmd, capture_output=True, text=False, timeout=timeout, check=True)
+
         stdout_text = result.stdout.decode('utf-8', errors='replace')
 
         # Strip whitespace and return the response
         response = stdout_text.strip()
         return response if response else "No response generated."
-        
+
     except subprocess.TimeoutExpired:
         raise Exception(f"Llama.cpp request timed out for model {model}")
     except subprocess.CalledProcessError as e:
@@ -133,6 +124,7 @@ def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] =
     except FileNotFoundError:
         raise Exception("Llama.cpp CLI not found")
 
+
 def chat_with_model(model: str, content: str, llama_mode: str = "cli", system_prompt: Optional[str] = None) -> str:
     """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
     if is_llamacpp_available(model):
@@ -171,13 +163,14 @@ def chat():
     content = params.get('content', '')
     llama_mode = params.get('llama_mode', 'cli')
     system_prompt = params.get('system_prompt')
-    
+
     if not content.strip():
         abort(400, description='Missing prompt content')
 
     response_content = chat_with_model(model, content, llama_mode, system_prompt)
     return jsonify(response_content)
 
+
 @app.errorhandler(Exception)
 def internal_error(error):
     return jsonify({"error": str(error)}), 500
diff --git a/pyproject.toml b/pyproject.toml
index eb959ed..09c94eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,19 @@ dependencies = [
     "requests",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "pre-commit",
+    "pytest",
+]
 
 [project.urls]
 Homepage = "https://github.com/MarkUsProject/ai-server"
 Issues = "https://github.com/MarkUsProject/ai-server/issues"
+
+[tool.black]
+line-length = 120
+skip-string-normalization = true
+
+[tool.isort]
+profile = "black"
diff --git a/test/test_cli_mode.py b/test/test_cli_mode.py
index dfb3404..9cd05f3 100644
--- a/test/test_cli_mode.py
+++ b/test/test_cli_mode.py
@@ -1,14 +1,12 @@
-import pytest
 import os
 import subprocess
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
+
+import pytest
 
 os.environ.setdefault('REDIS_URL', 'redis://localhost:6379')
 
-from ai_server.server import (
-    chat_with_llamacpp,
-    chat_with_model
-)
+from ai_server.server import chat_with_llamacpp, chat_with_model
 
 # Test models
 TEST_LLAMACPP_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS'
@@ -21,18 +19,21 @@ def mock_subprocess():
     with patch('ai_server.server.subprocess.run') as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_resolve_model_path():
     """Mock resolve_model_path for CLI tests."""
     with patch('ai_server.server.resolve_model_path') as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_glob():
     """Mock glob.glob for model discovery tests."""
     with patch('ai_server.server.glob.glob') as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_ollama():
     """Mock ollama.chat for fallback tests."""
@@ -42,21 +43,21 @@ def mock_ollama():
 
 class TestLlamaCppCLI:
     """Test llama.cpp CLI execution."""
-    
+
     def test_chat_with_llamacpp_success(self, mock_resolve_model_path, mock_subprocess):
         """Test successful CLI chat with llama.cpp."""
         model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf'
         mock_resolve_model_path.return_value = model_path
-        
+
         mock_result = MagicMock()
         mock_result.stdout = b'I can help you with DeepSeek V3.'
         mock_subprocess.return_value = mock_result
-        
+
         result = chat_with_llamacpp(TEST_LLAMACPP_MODEL, 'Hello, can you help me code?')
-        
+
         assert result == "I can help you with DeepSeek V3."
         mock_resolve_model_path.assert_called_once_with(TEST_LLAMACPP_MODEL)
-        
+
         # Verify correct CLI command structure
         args, kwargs = mock_subprocess.call_args
         cmd = args[0]
@@ -64,69 +65,69 @@ def test_chat_with_llamacpp_success(self, mock_resolve_model_path, mock_subproce
         assert '-m' in cmd and model_path in cmd
         assert '--n-gpu-layers' in cmd and '40' in cmd
         assert '--single-turn' in cmd
-    
+
     def test_chat_with_llamacpp_model_not_found(self, mock_resolve_model_path):
         """Test CLI chat when model is not found."""
         mock_resolve_model_path.return_value = None
-        
+
         with pytest.raises(ValueError, match="Model not found: nonexistent-model"):
             chat_with_llamacpp('nonexistent-model', 'Hello')
-    
+
     def test_chat_with_llamacpp_subprocess_error(self, mock_resolve_model_path, mock_subprocess):
         """Test CLI chat when subprocess fails."""
         mock_resolve_model_path.return_value = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf'
-        
+
         error = subprocess.CalledProcessError(1, 'cmd')
         error.stderr = b'CUDA out of memory'
         mock_subprocess.side_effect = error
-        
+
         with pytest.raises(Exception, match=f"Llama.cpp failed for {TEST_LLAMACPP_MODEL}: CUDA out of memory"):
             chat_with_llamacpp(TEST_LLAMACPP_MODEL, 'Hello')
 
 
 class TestCLIModeRouting:
     """Test CLI mode routing and fallback logic."""
-    
+
     @pytest.fixture(autouse=True)
     def setup_routing_mocks(self):
         """Set up common mocks for routing tests."""
-        with patch('ai_server.server.chat_with_llamacpp') as mock_chat_llamacpp, \
-             patch('ai_server.server.is_llamacpp_available') as mock_available, \
-             patch('ai_server.server.chat_with_ollama') as mock_chat_ollama:
+        with patch('ai_server.server.chat_with_llamacpp') as mock_chat_llamacpp, patch(
+            'ai_server.server.is_llamacpp_available'
+        ) as mock_available, patch('ai_server.server.chat_with_ollama') as mock_chat_ollama:
             self.mock_chat_llamacpp = mock_chat_llamacpp
-            self.mock_available = mock_available  
+            self.mock_available = mock_available
             self.mock_chat_ollama = mock_chat_ollama
             yield
-    
+
     def test_cli_mode_uses_llamacpp_when_available(self):
         """Test CLI mode routes to llama.cpp when model is available."""
         self.mock_available.return_value = True
         self.mock_chat_llamacpp.return_value = "CLI response from DeepSeek V3"
-        
+
         result = chat_with_model(TEST_LLAMACPP_MODEL, 'Write a function', llama_mode='cli')
-        
+
         assert result == "CLI response from DeepSeek V3"
         self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
         self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Write a function')
-    
+
     def test_cli_mode_fallback_to_ollama_when_unavailable(self):
         """Test CLI mode falls back to ollama when model not available in llama.cpp."""
         self.mock_available.return_value = False
         self.mock_chat_ollama.return_value = "Ollama response from DeepSeek Coder"
-        
+
         result = chat_with_model(TEST_OLLAMA_MODEL, 'Help with coding', llama_mode='cli')
-        
+
         assert result == "Ollama response from DeepSeek Coder"
         self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
         self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Help with coding')
-    
+
     def test_default_mode_is_cli(self):
         """Test that default mode is CLI when no llama_mode specified."""
         self.mock_available.return_value = True
         self.mock_chat_llamacpp.return_value = "Default CLI mode response"
-        
+
         result = chat_with_model(TEST_LLAMACPP_MODEL, 'Help me')  # No llama_mode specified
-        
+
         assert result == "Default CLI mode response"
         self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
         self.mock_chat_llamacpp.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Help me')
@@ -134,35 +135,35 @@ def test_default_mode_is_cli(self):
 
 class TestCLIModeIntegration:
     """Test complete CLI mode integration flows."""
-    
+
     def test_complete_cli_flow_with_real_model(self, mock_glob, mock_subprocess):
         """Test complete CLI flow: model resolution → CLI execution."""
         model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf'
-        
+
         mock_glob.return_value = [model_path]
         mock_result = MagicMock()
         mock_result.stdout = b'Complete integration test successful with DeepSeek V3!'
         mock_subprocess.return_value = mock_result
-        
+
         result = chat_with_model(TEST_LLAMACPP_MODEL, 'Integration test', llama_mode='cli')
-        
+
         assert result == "Complete integration test successful with DeepSeek V3!"
         # Verify glob called twice: once for availability check, once for CLI execution
         assert mock_glob.call_count == 2
         mock_subprocess.assert_called_once()
-    
+
     def test_complete_cli_fallback_flow_to_ollama(self, mock_glob, mock_ollama):
         """Test complete CLI fallback flow: model not found → fallback to ollama."""
         # Mock model not found in llama.cpp
         mock_glob.return_value = []
-        
+
         # Mock successful ollama response
         mock_response = MagicMock()
         mock_response.message.content = "Ollama CLI fallback integration test successful!"
         mock_ollama.return_value = mock_response
-        
+
         result = chat_with_model(TEST_OLLAMA_MODEL, 'Fallback test', llama_mode='cli')
-        
+
         assert result == "Ollama CLI fallback integration test successful!"
         mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_OLLAMA_MODEL}/*.gguf')
-        mock_ollama.assert_called_once() 
+        mock_ollama.assert_called_once()
diff --git a/test/test_core.py b/test/test_core.py
index b25dd6c..7e88cb1 100644
--- a/test/test_core.py
+++ b/test/test_core.py
@@ -1,14 +1,11 @@
-import pytest
 import os
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
+
+import pytest
 
 os.environ.setdefault('REDIS_URL', 'redis://localhost:6379')
 
-from ai_server.server import (
-    resolve_model_path,
-    is_llamacpp_available,
-    chat_with_ollama
-)
+from ai_server.server import chat_with_ollama, is_llamacpp_available, resolve_model_path
 
 # Test models
 TEST_LLAMACPP_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS'
@@ -21,6 +18,7 @@ def mock_glob():
     with patch('ai_server.server.glob.glob') as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_ollama():
     """Mock ollama.chat for ollama tests."""
@@ -30,73 +28,73 @@ def mock_ollama():
 
 class TestModelResolution:
     """Test core model resolution functionality."""
-    
+
     def test_resolve_model_path_found(self, mock_glob):
         """Test model path resolution when model exists."""
         model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf'
         mock_glob.return_value = [model_path]
-        
+
         result = resolve_model_path(TEST_LLAMACPP_MODEL)
-        
+
         assert result == model_path
         mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/*.gguf')
-    
+
     def test_resolve_model_path_not_found(self, mock_glob):
         """Test model path resolution when model doesn't exist."""
         mock_glob.return_value = []
-        
+
         result = resolve_model_path('nonexistent-model')
-        
+
         assert result is None
-    
+
     def test_is_llamacpp_available_true(self):
         """Test model availability check when model exists."""
         with patch('ai_server.server.resolve_model_path') as mock_resolve:
             mock_resolve.return_value = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf'
-            
+
             result = is_llamacpp_available(TEST_LLAMACPP_MODEL)
-            
+
             assert result is True
             mock_resolve.assert_called_once_with(TEST_LLAMACPP_MODEL)
-    
+
     def test_is_llamacpp_available_false(self):
         """Test model availability check when model doesn't exist."""
         with patch('ai_server.server.resolve_model_path') as mock_resolve:
             mock_resolve.return_value = None
-            
+
             result = is_llamacpp_available('nonexistent-model')
-            
+
             assert result is False
 
 
 class TestOllamaCore:
     """Test core ollama functionality used as fallback."""
-    
+
     def test_chat_with_ollama_success(self, mock_ollama):
         """Test successful chat with ollama."""
         mock_response = MagicMock()
         mock_response.message.content = "Hello! I'm DeepSeek Coder V2. I can help you with coding tasks."
         mock_ollama.return_value = mock_response
-        
+
         result = chat_with_ollama(TEST_OLLAMA_MODEL, 'Help me write a Python function')
-        
+
         assert result == "Hello! I'm DeepSeek Coder V2. I can help you with coding tasks."
         mock_ollama.assert_called_once_with(
             model=TEST_OLLAMA_MODEL,
             messages=[{'role': 'user', 'content': 'Help me write a Python function'}],
-            stream=False
+            stream=False,
         )
-    
+
     def test_chat_with_ollama_service_unavailable(self, mock_ollama):
         """Test ollama chat when service is unavailable."""
         mock_ollama.side_effect = Exception("Ollama service is not running")
-        
+
         with pytest.raises(Exception, match="Ollama service is not running"):
             chat_with_ollama(TEST_OLLAMA_MODEL, 'Hello')
-    
+
     def test_chat_with_ollama_model_not_found(self, mock_ollama):
         """Test ollama chat when model is not found."""
         mock_ollama.side_effect = Exception("model 'nonexistent:latest' not found")
-        
+
         with pytest.raises(Exception, match="model 'nonexistent:latest' not found"):
-            chat_with_ollama('nonexistent:latest', 'Hello') 
+            chat_with_ollama('nonexistent:latest', 'Hello')
diff --git a/test/test_server_mode.py b/test/test_server_mode.py
index 53b4338..194031f 100644
--- a/test/test_server_mode.py
+++ b/test/test_server_mode.py
@@ -1,13 +1,11 @@
-import pytest
 import os
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
+
+import pytest
 
 os.environ.setdefault('REDIS_URL', 'redis://localhost:6379')
 
-from ai_server.server import (
-    chat_with_llama_server_http,
-    chat_with_model
-)
+from ai_server.server import chat_with_llama_server_http, chat_with_model
 
 # Test models
 TEST_LLAMACPP_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS'
@@ -20,18 +18,21 @@ def mock_requests_post():
     with patch('ai_server.server.requests.post') as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_llama_server_url():
     """Mock LLAMA_SERVER_URL for server tests."""
     with patch('ai_server.server.LLAMA_SERVER_URL', 'http://localhost:8080'):
         yield
 
+
 @pytest.fixture
 def mock_glob():
     """Mock glob.glob for model discovery tests."""
     with patch('ai_server.server.glob.glob') as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_ollama():
     """Mock ollama.chat for fallback tests."""
@@ -41,142 +42,139 @@ def mock_ollama():
 
 class TestLlamaServerHTTP:
     """Test llama.cpp server HTTP functionality."""
-    
+
     def test_chat_with_llama_server_http_success(self, mock_requests_post, mock_llama_server_url):
         """Test successful HTTP chat with llama-server."""
         mock_response = MagicMock()
         mock_response.status_code = 200
-        mock_response.json.return_value = {
-            'choices': [{'message': {'content': 'Server response from DeepSeek V3'}}]
-        }
+        mock_response.json.return_value = {'choices': [{'message': {'content': 'Server response from DeepSeek V3'}}]}
         mock_requests_post.return_value = mock_response
-        
+
         result = chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello from server')
-        
+
         assert result == "Server response from DeepSeek V3"
-        
+
         # Verify correct API call
         args, kwargs = mock_requests_post.call_args
         assert args[0] == 'http://localhost:8080/v1/chat/completions'
         assert kwargs['json']['model'] == TEST_LLAMACPP_MODEL
         assert kwargs['json']['messages'][0]['content'] == 'Hello from server'
-    
+
     def test_chat_with_llama_server_http_no_url(self):
         """Test HTTP chat when LLAMA_SERVER_URL is not set."""
         with patch('ai_server.server.LLAMA_SERVER_URL', None):
             with pytest.raises(Exception, match="LLAMA_SERVER_URL environment variable not set"):
                 chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello')
-    
+
     def test_chat_with_llama_server_http_error_response(self, mock_requests_post, mock_llama_server_url):
         """Test HTTP chat when server returns error."""
         mock_response = MagicMock()
         mock_response.status_code = 500
         mock_requests_post.return_value = mock_response
-        
+
         with pytest.raises(Exception, match="Llama-server HTTP error"):
             chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello')
-    
+
     def test_chat_with_llama_server_http_invalid_response_format(self, mock_requests_post, mock_llama_server_url):
         """Test HTTP chat when server returns invalid response format."""
         mock_response = MagicMock()
         mock_response.status_code = 200
         mock_response.json.return_value = {'error': 'Invalid request'}  # Missing choices
         mock_requests_post.return_value = mock_response
-        
+
         with pytest.raises(Exception, match="Invalid response format from llama-server"):
             chat_with_llama_server_http(TEST_LLAMACPP_MODEL, 'Hello')
 
 
 class TestServerModeRouting:
     """Test server mode routing and fallback logic."""
-    
+
     @pytest.fixture(autouse=True)
     def setup_routing_mocks(self):
         """Set up common mocks for routing tests."""
-        with patch('ai_server.server.chat_with_llama_server_http') as mock_chat_server, \
-             patch('ai_server.server.is_llamacpp_available') as mock_available, \
-             patch('ai_server.server.chat_with_ollama') as mock_chat_ollama, \
-             patch('ai_server.server.LLAMA_SERVER_URL', 'http://localhost:8080'):
+        with patch('ai_server.server.chat_with_llama_server_http') as mock_chat_server, patch(
+            'ai_server.server.is_llamacpp_available'
+        ) as mock_available, patch('ai_server.server.chat_with_ollama') as mock_chat_ollama, patch(
+            'ai_server.server.LLAMA_SERVER_URL', 'http://localhost:8080'
+        ):
             self.mock_chat_server = mock_chat_server
             self.mock_available = mock_available
             self.mock_chat_ollama = mock_chat_ollama
             yield
-    
+
     def test_server_mode_uses_llamacpp_when_available(self):
         """Test server mode routes to llama-server when model is available."""
         self.mock_available.return_value = True
         self.mock_chat_server.return_value = "Server response from DeepSeek V3"
-        
+
         result = chat_with_model(TEST_LLAMACPP_MODEL, 'Explain code', llama_mode='server')
-        
+
         assert result == "Server response from DeepSeek V3"
         self.mock_available.assert_called_once_with(TEST_LLAMACPP_MODEL)
         self.mock_chat_server.assert_called_once_with(TEST_LLAMACPP_MODEL, 'Explain code')
-    
+
     def test_server_mode_fallback_to_ollama_when_unavailable(self):
         """Test server mode falls back to ollama when model not available in llama.cpp."""
         self.mock_available.return_value = False
         self.mock_chat_ollama.return_value = "Ollama fallback response"
-        
+
         result = chat_with_model(TEST_OLLAMA_MODEL, 'Debug code', llama_mode='server')
-        
+
         assert result == "Ollama fallback response"
         self.mock_available.assert_called_once_with(TEST_OLLAMA_MODEL)
         self.mock_chat_ollama.assert_called_once_with(TEST_OLLAMA_MODEL, 'Debug code')
-    
+
     def test_server_mode_requires_server_url(self):
         """Test server mode requires LLAMA_SERVER_URL to be set."""
         with patch('ai_server.server.LLAMA_SERVER_URL', None):
             self.mock_available.return_value = True
-            
+
             with pytest.raises(Exception, match="LLAMA_SERVER_URL environment variable not set"):
                 chat_with_model(TEST_LLAMACPP_MODEL, 'Hello', llama_mode='server')
-    
+
     def test_invalid_llama_mode_raises_error(self):
         """Test that invalid llama_mode raises ValueError."""
         self.mock_available.return_value = True
-        
+
         with pytest.raises(ValueError, match="Invalid llama_mode: 'invalid'"):
             chat_with_model(TEST_LLAMACPP_MODEL, 'Hello', llama_mode='invalid')
 
 
 class TestServerModeIntegration:
     """Test complete server mode integration flows."""
-    
+
     def test_complete_server_flow_with_real_model(self, mock_glob, mock_requests_post, mock_llama_server_url):
         """Test complete server flow: model resolution → HTTP API call."""
         model_path = f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/{TEST_LLAMACPP_MODEL}.gguf'
-        
+
         # Mock model found (only checked once for availability in server mode)
         mock_glob.return_value = [model_path]
-        
+
         # Mock successful HTTP response
         mock_response = MagicMock()
         mock_response.status_code = 200
-        mock_response.json.return_value = {
-            'choices': [{'message': {'content': 'Server integration test successful!'}}]
-        }
+        mock_response.json.return_value = {'choices': [{'message': {'content': 'Server integration test successful!'}}]}
         mock_requests_post.return_value = mock_response
-        
+
         result = chat_with_model(TEST_LLAMACPP_MODEL, 'Integration test', llama_mode='server')
-        
+
         assert result == "Server integration test successful!"
         # In server mode, glob.glob only called once for is_llamacpp_available
         mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_LLAMACPP_MODEL}/*.gguf')
         mock_requests_post.assert_called_once()
-    
+
     def test_complete_server_fallback_flow_to_ollama(self, mock_glob, mock_ollama, mock_llama_server_url):
         """Test complete server fallback flow: model not found → fallback to ollama."""
         # Mock model not found in llama.cpp
         mock_glob.return_value = []
-        
+
         # Mock successful ollama response
         mock_response = MagicMock()
         mock_response.message.content = "Ollama server fallback integration test successful!"
         mock_ollama.return_value = mock_response
-        
+
         result = chat_with_model(TEST_OLLAMA_MODEL, 'Fallback test', llama_mode='server')
-        
+
         assert result == "Ollama server fallback integration test successful!"
         mock_glob.assert_called_once_with(f'/data1/GGUF/{TEST_OLLAMA_MODEL}/*.gguf')
-        mock_ollama.assert_called_once() 
+        mock_ollama.assert_called_once()
diff --git a/test/test_system_prompt.py b/test/test_system_prompt.py
index 3b8f684..20372d2 100644
--- a/test/test_system_prompt.py
+++ b/test/test_system_prompt.py
@@ -1,5 +1,6 @@
+from unittest.mock import MagicMock, patch
+
 import pytest
-from unittest.mock import patch, MagicMock
 
 TEST_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS'
 TEST_SYSTEM_PROMPT = "You are a helpful coding assistant."
diff --git a/test/test_system_prompt_api.py b/test/test_system_prompt_api.py
index a4c62e3..d84c7d3 100644
--- a/test/test_system_prompt_api.py
+++ b/test/test_system_prompt_api.py
@@ -1,6 +1,7 @@
-import pytest
 from unittest.mock import patch
 
+import pytest
+
 TEST_MODEL = 'DeepSeek-V3-0324-UD-IQ2_XXS'
 TEST_SYSTEM_PROMPT = "You are a helpful coding assistant."
 TEST_USER_CONTENT = "Write a function"
@@ -18,6 +19,7 @@ def setup_env(self, monkeypatch):
     def client(self):
         """Create test client for Flask app."""
         from ai_server.server import app
+
         app.config['TESTING'] = True
         with app.test_client() as client:
             yield client
@@ -29,23 +31,15 @@ def test_api_with_system_prompt(self, mock_chat, mock_redis, client):
         mock_redis.get.return_value = b'test_user'
         mock_chat.return_value = "def function(): pass"
 
-        response = client.post('/chat',
+        response = client.post(
+            '/chat',
             headers={'X-API-KEY': 'test-key'},
-            json={
-                'model': TEST_MODEL,
-                'content': TEST_USER_CONTENT,
-                'system_prompt': TEST_SYSTEM_PROMPT
-            }
+            json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT},
         )
 
         assert response.status_code == 200
 
-        mock_chat.assert_called_once_with(
-            TEST_MODEL,
-            TEST_USER_CONTENT,
-            'cli',
-            TEST_SYSTEM_PROMPT
-        )
+        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', TEST_SYSTEM_PROMPT)
 
     @patch('ai_server.server.REDIS_CONNECTION')
     @patch('ai_server.server.chat_with_model')
@@ -54,35 +48,23 @@ def test_api_without_system_prompt(self, mock_chat, mock_redis, client):
         mock_redis.get.return_value = b'test_user'
         mock_chat.return_value = "def function(): pass"
 
-        response = client.post('/chat',
-            headers={'X-API-KEY': 'test-key'},
-            json={
-                'model': TEST_MODEL,
-                'content': TEST_USER_CONTENT
-            }
+        response = client.post(
+            '/chat', headers={'X-API-KEY': 'test-key'}, json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT}
         )
 
         assert response.status_code == 200
 
-        mock_chat.assert_called_once_with(
-            TEST_MODEL,
-            TEST_USER_CONTENT,
-            'cli',
-            None
-        )
+        mock_chat.assert_called_once_with(TEST_MODEL, TEST_USER_CONTENT, 'cli', None)
 
     @patch('ai_server.server.REDIS_CONNECTION')
     def test_api_authentication_still_required(self, mock_redis, client):
         """Test that authentication is still required with system_prompt."""
         mock_redis.get.return_value = None
 
-        response = client.post('/chat',
+        response = client.post(
+            '/chat',
             headers={'X-API-KEY': 'invalid-key'},
-            json={
-                'model': TEST_MODEL,
-                'content': TEST_USER_CONTENT,
-                'system_prompt': TEST_SYSTEM_PROMPT
-            }
+            json={'model': TEST_MODEL, 'content': TEST_USER_CONTENT, 'system_prompt': TEST_SYSTEM_PROMPT},
         )
 
         assert response.status_code == 500