Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Tests

on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
push:
branches:
- main

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.13"]

steps:
- uses: actions/[email protected]
- name: Set up Python ${{ matrix.python-version }}
uses: actions/[email protected]
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/[email protected]
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -e .[dev]
- name: Run tests
run: |
pytest -vv
32 changes: 32 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0 # Use the ref you want to point at
hooks:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-illegal-windows-names
- id: check-json
- id: check-merge-conflict
- id: check-symlinks
- id: check-toml
- id: check-yaml
- id: destroyed-symlinks
- id: detect-private-key
- id: end-of-file-fixer
- id: forbid-submodules
- id: trailing-whitespace

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 25.1.0
hooks:
- id: black
args: [--safe, --quiet]

- repo: https://github.com/pycqa/isort
rev: 6.0.1
hooks:
- id: isort


ci:
autoupdate_schedule: quarterly
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,21 @@
# ai-server
# ai-server

## Developers

To install project dependencies, including development dependencies:

```console
$ pip install -e .[dev]
```

To install pre-commit hooks:

```console
$ pre-commit install
```

To run the test suite:

```console
$ pytest
```
1 change: 0 additions & 1 deletion ai_server/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .server import app


app.run(debug=True, host="0.0.0.0")
1 change: 0 additions & 1 deletion ai_server/redis_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import redis


REDIS_URL = os.environ["REDIS_URL"]

REDIS_CONNECTION = redis.Redis.from_url(REDIS_URL)
85 changes: 39 additions & 46 deletions ai_server/server.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from flask import Flask, request, jsonify, abort
import ollama
import subprocess
import glob
import os
import requests
import subprocess
from typing import Optional

import ollama
import requests
from dotenv import load_dotenv
import glob
from flask import Flask, abort, jsonify, request

from .redis_helper import REDIS_CONNECTION

Expand All @@ -23,7 +24,12 @@

# Llama server configuration
_llama_server_url = os.getenv('LLAMA_SERVER_URL') # e.g., http://localhost:8080 or localhost:8080
LLAMA_SERVER_URL = f"http://{_llama_server_url}" if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) else _llama_server_url
LLAMA_SERVER_URL = (
f"http://{_llama_server_url}"
if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://'))
else _llama_server_url
)


def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
"""Build messages list with optional system prompt."""
Expand All @@ -33,26 +39,24 @@ def _build_messages(content: str, system_prompt: Optional[str] = None) -> list:
messages.append({'role': 'user', 'content': content})
return messages

def chat_with_llama_server_http(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:

def chat_with_llama_server_http(
model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300
) -> str:
"""Handle chat using llama-server HTTP API."""
if not LLAMA_SERVER_URL:
raise Exception("LLAMA_SERVER_URL environment variable not set")

try:
messages = _build_messages(content, system_prompt)

response = requests.post(
f'{LLAMA_SERVER_URL}/v1/chat/completions',
json={
'model': model,
'messages': messages,
'stream': False,
'max_tokens': 512
},
json={'model': model, 'messages': messages, 'stream': False, 'max_tokens': 512},
headers={'Content-Type': 'application/json'},
timeout=timeout
timeout=timeout,
)

if response.status_code == 200:
data = response.json()
if 'choices' in data and len(data['choices']) > 0:
Expand All @@ -61,68 +65,55 @@ def chat_with_llama_server_http(model: str, content: str, system_prompt: Optiona
raise Exception("Invalid response format from llama-server")
else:
raise Exception(f"Llama-server HTTP error")

except requests.Timeout:
raise Exception(f"Llama-server request timed out for model {model}")
except requests.RequestException as e:
raise Exception(f"Llama-server request failed: {str(e)}")


def resolve_model_path(model: str) -> Optional[str]:
"""Resolve model name to full GGUF file path using glob pattern."""
pattern = os.path.join(GGUF_DIR, model, "*.gguf")
matches = glob.glob(pattern)
return matches[0] if matches else None


def is_llamacpp_available(model: str) -> bool:
"""Check if model is available in llama.cpp."""
return resolve_model_path(model) is not None


def chat_with_ollama(model: str, content: str, system_prompt: Optional[str] = None) -> str:
"""Handle chat using ollama."""
messages = _build_messages(content, system_prompt)

response = ollama.chat(
model=model,
messages=messages,
stream=False
)

response = ollama.chat(model=model, messages=messages, stream=False)
return response.message.content


def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] = None, timeout: int = 300) -> str:
"""Handle chat using llama.cpp CLI."""
model_path = resolve_model_path(model)

if not model_path:
raise ValueError(f"Model not found: {model}")

cmd = [
LLAMA_CPP_CLI,
'-m', model_path,
'--n-gpu-layers', '40',
'-p', content,
'-n', '512',
'--single-turn'
]


cmd = [LLAMA_CPP_CLI, '-m', model_path, '--n-gpu-layers', '40', '-p', content, '-n', '512', '--single-turn']

# Add system prompt if provided
if system_prompt:
cmd.extend(['--system-prompt', system_prompt])

try:
result = subprocess.run(
cmd,
capture_output=True,
text=False,
timeout=timeout,
check=True
)

result = subprocess.run(cmd, capture_output=True, text=False, timeout=timeout, check=True)

stdout_text = result.stdout.decode('utf-8', errors='replace')

# Strip whitespace and return the response
response = stdout_text.strip()
return response if response else "No response generated."

except subprocess.TimeoutExpired:
raise Exception(f"Llama.cpp request timed out for model {model}")
except subprocess.CalledProcessError as e:
Expand All @@ -133,6 +124,7 @@ def chat_with_llamacpp(model: str, content: str, system_prompt: Optional[str] =
except FileNotFoundError:
raise Exception("Llama.cpp CLI not found")


def chat_with_model(model: str, content: str, llama_mode: str = "cli", system_prompt: Optional[str] = None) -> str:
"""Route chat request based on llama_mode: server (external), cli, or ollama fallback; and with optional system prompt."""
if is_llamacpp_available(model):
Expand Down Expand Up @@ -171,13 +163,14 @@ def chat():
content = params.get('content', '')
llama_mode = params.get('llama_mode', 'cli')
system_prompt = params.get('system_prompt')

if not content.strip():
abort(400, description='Missing prompt content')

response_content = chat_with_model(model, content, llama_mode, system_prompt)
return jsonify(response_content)


@app.errorhandler(Exception)
def internal_error(error):
return jsonify({"error": str(error)}), 500
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,19 @@ dependencies = [
"requests",
]

[project.optional-dependencies]
dev = [
"pre-commit",
"pytest",
]

[project.urls]
Homepage = "https://github.com/MarkUsProject/ai-server"
Issues = "https://github.com/MarkUsProject/ai-server/issues"

[tool.black]
line-length = 120
skip-string-normalization = true

[tool.isort]
profile = "black"
Loading
Loading