MarkUsProject · david-yz-liu · Jun 15, 2025 · Jun 15, 2025 · Jun 15, 2025 · Jun 15, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,37 @@
+name: Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+  push:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.13"]
+
+    steps:
+      - uses: actions/[email protected]
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/[email protected]
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Cache pip
+        uses: actions/[email protected]
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+            ${{ runner.os }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          pip install -e .[dev]
+      - name: Run tests
+        run: |
+            pytest -vv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0  # Use the ref you want to point at
+    hooks:
+      - id: check-case-conflict
+      - id: check-executables-have-shebangs
+      - id: check-illegal-windows-names
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-yaml
+      - id: destroyed-symlinks
+      - id: detect-private-key
+      - id: end-of-file-fixer
+      - id: forbid-submodules
+      - id: trailing-whitespace
+
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 25.1.0
+    hooks:
+      - id: black
+        args: [--safe, --quiet]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 6.0.1
+    hooks:
+      - id: isort
+
+
+ci:
+  autoupdate_schedule: quarterly
diff --git a/README.md b/README.md
@@ -1 +1,21 @@
-# ai-server
+# ai-server
+
+## Developers
+
+To install project dependencies, including development dependencies:
+
+```console
+$ pip install -e .[dev]
+```
+
+To install pre-commit hooks:
+
+```console
+$ pre-commit install
+```
+
+To run the test suite:
+
+```console
+$ pytest
+```
diff --git a/ai_server/__main__.py b/ai_server/__main__.py
@@ -1,4 +1,3 @@
 from .server import app
 
-
 app.run(debug=True, host="0.0.0.0")
diff --git a/ai_server/redis_helper.py b/ai_server/redis_helper.py
@@ -2,7 +2,6 @@
 
 import redis
 
-
 REDIS_URL = os.environ["REDIS_URL"]
 
 REDIS_CONNECTION = redis.Redis.from_url(REDIS_URL)
diff --git a/ai_server/server.py b/ai_server/server.py
@@ -1,11 +1,12 @@
-from flask import Flask, request, jsonify, abort
-import ollama
-import subprocess
+import glob
 import os
-import requests
+import subprocess
 from typing import Optional
+
+import ollama
+import requests
 from dotenv import load_dotenv
-import glob
+from flask import Flask, abort, jsonify, request
 
 from .redis_helper import REDIS_CONNECTION
 
@@ -23,7 +24,12 @@
 
 # Llama server configuration
 _llama_server_url = os.getenv('LLAMA_SERVER_URL')  # e.g., http://localhost:8080 or localhost:8080
-LLAMA_SERVER_URL = f"http://{_llama_server_url}" if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) else _llama_server_url
+LLAMA_SERVER_URL = (
+    f"http://{_llama_server_url}"
+    if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://'))
+    else _llama_server_url
+)
+
 
 def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
     """Build messages list with optional system prompt."""
@@ -33,26 +39,24 @@ def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
     messages.append({'role': 'user', 'content': content})
     return messages
 
-def chat_with_llama_server_http(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:
+
+def chat_with_llama_server_http(
+    model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300
+) -> str:
     """Handle chat using llama-server HTTP API."""
     if not LLAMA_SERVER_URL:
         raise Exception("LLAMA_SERVER_URL environment variable not set")
-    
+
     try:
         messages = _build_messages(content, system_prompt)
-        
+
         response = requests.post(
             f'{LLAMA_SERVER_URL}/v1/chat/completions',
-            json={
-                'model': model,
-                'messages': messages,
-                'stream': False,
-                'max_tokens': 512
-            },
+            json={'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512},
             headers={'Content-Type': 'application/json'},
-            timeout=timeout
+            timeout=timeout,
         )
-        
+
         if response.status_code == 200:
             data = response.json()
             if 'choices' in data and len(data['choices']) > 0:
@@ -61,68 +65,55 @@ def chat_with_llama_server_http(model: str, content: str, system_prompt: Optiona
                 raise Exception("Invalid response format from llama-server")
         else:
             raise Exception(f"Llama-server HTTP error")
-                
+
     except requests.Timeout:
         raise Exception(f"Llama-server request timed out for model {model}")
     except requests.RequestException as e:
         raise Exception(f"Llama-server request failed: {str(e)}")
 
+
 def resolve_model_path(model: str) -> Optional[str]:
     """Resolve model name to full GGUF file path using glob pattern."""
     pattern = os.path.join(GGUF_DIR, model, "*.gguf")
     matches = glob.glob(pattern)
     return matches[0] if matches else None
 
+
 def is_llamacpp_available(model: str) -> bool:
     """Check if model is available in llama.cpp."""
     return resolve_model_path(model) is not None
 
+
 def chat_with_ollama(model: str, content: str, system_prompt: Optional[str] = None) -> str:
     """Handle chat using ollama."""
     messages = _build_messages(content, system_prompt)
-
-    response = ollama.chat(
-        model=model,
-        messages=messages,
-        stream=False
-    )
+
+    response = ollama.chat(model=model, messages=messages, stream=False)
     return response.message.content
 
+
 def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:
     """Handle chat using llama.cpp CLI."""
     model_path = resolve_model_path(model)
-    
+
     if not model_path:
         raise ValueError(f"Model not found: {model}")
-
-    cmd = [
-        LLAMA_CPP_CLI,
-        '-m', model_path,
-        '--n-gpu-layers', '40',
-        '-p', content,
-        '-n', '512',
-        '--single-turn'
-    ]
-
+
+    cmd = [LLAMA_CPP_CLI, '-m', model_path, '--n-gpu-layers', '40', '-p', content, '-n', '512', '--single-turn']
+
     # Add system prompt if provided
     if system_prompt:
         cmd.extend(['--system-prompt', system_prompt])
-    
+
     try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=False,
-            timeout=timeout,
-            check=True
-        )
-
+        result = subprocess.run(cmd, capture_output=True, text=False, timeout=timeout, check=True)
+
         stdout_text = result.stdout.decode('utf-8', errors='replace')
 
         # Strip whitespace and return the response
         response = stdout_text.strip()
         return response if response else "No response generated."
-        
+
     except subprocess.TimeoutExpired:
         raise Exception(f"Llama.cpp request timed out for model {model}")
     except subprocess.CalledProcessError as e:
@@ -133,6 +124,7 @@ def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] =
     except FileNotFoundError:
         raise Exception("Llama.cpp CLI not found")
 
+
 def chat_with_model(model: str, content: str, llama_mode: str = "cli", system_prompt: Optional[str] = None) -> str:
     """Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
     if is_llamacpp_available(model):
@@ -171,13 +163,14 @@ def chat():
     content = params.get('content', '')
     llama_mode = params.get('llama_mode', 'cli')
     system_prompt = params.get('system_prompt')
-    
+
     if not content.strip():
         abort(400, description='Missing prompt content')
 
     response_content = chat_with_model(model, content, llama_mode, system_prompt)
     return jsonify(response_content)
 
+
 @app.errorhandler(Exception)
 def internal_error(error):
     return jsonify({"error": str(error)}), 500
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,7 +26,19 @@ dependencies = [
     "requests",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "pre-commit",
+    "pytest",
+]
 
 [project.urls]
 Homepage = "https://github.com/MarkUsProject/ai-server"
 Issues = "https://github.com/MarkUsProject/ai-server/issues"
+
+[tool.black]
+line-length = 120
+skip-string-normalization = true
+
+[tool.isort]
+profile = "black"
Original file line number	Diff line number	Diff line change
		@@ -1,4 +1,3 @@
		from .server import app


		app.run(debug=True, host="0.0.0.0")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,7 +2,6 @@

		import redis


		REDIS_URL = os.environ["REDIS_URL"]

		REDIS_CONNECTION = redis.Redis.from_url(REDIS_URL)