Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
334 changes: 286 additions & 48 deletions conf/openrouter_models.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
# These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "9.8.2"
__version__ = "9.9.0"
# Last update date in ISO format
__updated__ = "2025-12-15"
__updated__ = "2026-03-06"
# Primary maintainer
__author__ = "Fahad Gilani"

Expand Down
24 changes: 16 additions & 8 deletions docs/custom_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,26 @@ The curated defaults in `conf/openrouter_models.json` include popular entries su

| Alias | Canonical Model | Highlights |
|-------|-----------------|------------|
| `opus`, `claude-opus` | `anthropic/claude-opus-4.1` | Flagship Claude reasoning model with vision |
| `sonnet`, `sonnet4.5` | `anthropic/claude-sonnet-4.5` | Balanced Claude with high context window |
| `opus`, `claude-opus` | `anthropic/claude-opus-4.6` | Latest Anthropic flagship (1M context, vision). `opus4.5` → 4.5, `opus4.1` → 4.1 |
| `sonnet` | `anthropic/claude-sonnet-4.6` | Frontier Sonnet (1M context, vision). `sonnet4.5` → 4.5 |
| `haiku` | `anthropic/claude-3.5-haiku` | Fast Claude option with vision |
| `pro`, `gemini` | `google/gemini-2.5-pro` | Frontier Gemini with extended thinking |
| `pro`, `gemini` | `google/gemini-3.1-pro-preview` | Latest Gemini Pro with 1M context, thinking. `gemini3.0` → 3.0 |
| `flash` | `google/gemini-2.5-flash` | Ultra-fast Gemini with vision |
| `mistral` | `mistralai/mistral-large-2411` | Frontier Mistral (text only) |
| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model |
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model |
| `gpt5`, `gpt5.4` | `openai/gpt-5.4` | Unified frontier model (1M context, 128K output). `gpt5.0` → 5.0 |
| `gpt5pro` | `openai/gpt-5.4-pro` | Enhanced reasoning variant (1M context). `gpt5.2-pro` → 5.2 Pro |
| `codex`, `codex-5.3` | `openai/gpt-5.3-codex` | Latest agentic coding model (Responses API). `codex-5.0` → 5.0 |
| `gpt5.2`, `gpt-5.2`, `5.2` | `openai/gpt-5.2` | Flagship GPT-5.2 with reasoning and vision |
| `gpt5.1-codex`, `codex-5.1` | `openai/gpt-5.1-codex` | Agentic coding specialization (Responses API) |
| `codex-mini`, `gpt5.1-codex-mini` | `openai/gpt-5.1-codex-mini` | Cost-efficient Codex variant with streaming |
| `codex-mini` | `openai/gpt-5.1-codex-mini` | Cost-efficient Codex variant with streaming |
| `mistral` | `mistralai/mistral-large-2411` | Frontier Mistral (text only) |
| `devstral` | `mistralai/devstral-2512` | 123B agentic coding model (262K context) |
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
| `deepseek-v3`, `dsv3` | `deepseek/deepseek-v3.2-exp` | DeepSeek V3.2 with strong reasoning (164K context) |
| `qwen` | `qwen/qwen3.6-plus` | Alibaba's latest frontier model (1M context). `qwen3.5` → 3.5 |
| `minimax` | `minimax/minimax-m2.7` | 1T param model matching GPT-5.3-Codex. `m2.5` → 2.5 |
| `hermes` | `nousresearch/hermes-4-405b` | Hybrid reasoning with deliberative alignment (131K context) |
| `qwen-coder` | `qwen/qwen3-coder` | Frontier open-weight coding model, 480B MoE (262K context) |
| `flash-lite` | `google/gemini-3.1-flash-lite-preview` | Cost-efficient Gemini flash variant (1M context) |

Consult the JSON file for the full list, aliases, and capability flags. Add new entries as OpenRouter releases additional models.

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[project]
name = "pal-mcp-server"
version = "9.8.2"
version = "9.9.0"
description = "AI-powered MCP server with multiple model providers"
requires-python = ">=3.9"
requires-python = ">=3.10"
dependencies = [
"mcp>=1.0.0",
"google-genai>=1.19.0",
Expand Down
64 changes: 62 additions & 2 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,66 @@ async def handle_list_tools() -> list[Tool]:
return tools


async def _execute_with_progress(coro):
"""
Run a tool coroutine while sending MCP progress notifications every ~15 seconds.

This prevents Claude Desktop's ~4 minute timeout from killing long-running
tool calls (e.g. slow model API responses that take 4-8 minutes).
"""
# Get progress token from request context
progress_token = None
session = None
request_id = None
try:
ctx = server.request_context
if ctx.meta:
progress_token = ctx.meta.progressToken
session = ctx.session
request_id = ctx.request_id
except (LookupError, AttributeError):
pass

if session is None:
return await coro

# Claude Desktop omits progressToken but still times out after ~4 min.
# Use request_id as fallback — sending any notification keeps stdio alive.
if progress_token is None:
progress_token = request_id if request_id is not None else "pal-progress"

task = asyncio.create_task(coro)
tick = 0
notification_failed = False
try:
while not task.done():
try:
await asyncio.wait_for(asyncio.shield(task), timeout=15.0)
except asyncio.TimeoutError:
tick += 1
elapsed = tick * 15
try:
await session.send_progress_notification(
progress_token=progress_token,
progress=float(tick),
message=f"Model is thinking... ({elapsed}s elapsed)",
related_request_id=request_id,
)
except Exception as exc:
if not notification_failed:
logger.warning(f"Progress notification failed: {exc}")
notification_failed = True
except asyncio.CancelledError:
task.cancel()
try:
await task
except Exception as exc:
logger.debug(f"Task raised during cancellation teardown: {exc}")
raise

return task.result()


@server.call_tool()
async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
"""
Expand Down Expand Up @@ -807,7 +867,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
if not tool.requires_model():
logger.debug(f"Tool {name} doesn't require model resolution - skipping model validation")
# Execute tool directly without model context
return await tool.execute(arguments)
return await _execute_with_progress(tool.execute(arguments))

# Handle auto mode at MCP boundary - resolve to specific model
if model_name.lower() == "auto":
Expand Down Expand Up @@ -862,7 +922,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
raise ToolExecutionError(ToolOutput(**file_size_check).model_dump_json())

# Execute tool with pre-resolved model context
result = await tool.execute(arguments)
result = await _execute_with_progress(tool.execute(arguments))
logger.info(f"Tool '{name}' execution completed")

# Log completion to activity file
Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_chat_simple_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
- Conversation context preservation across turns
"""


from .conversation_base_test import ConversationBaseTest


Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_conversation_chain_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
- Properly traverse parent relationships for history reconstruction
"""


from .conversation_base_test import ConversationBaseTest


Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_cross_tool_comprehensive.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
5. Proper tool chaining with context
"""


from .conversation_base_test import ConversationBaseTest


Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_ollama_custom_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
- Model alias resolution for local models
"""


from .base_test import BaseSimulatorTest


Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_openrouter_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
- Auto mode correctly selects OpenRouter models
"""


from .base_test import BaseSimulatorTest


Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_openrouter_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
- Error handling when models are not available
"""


from .base_test import BaseSimulatorTest


Expand Down
1 change: 0 additions & 1 deletion simulator_tests/test_xai_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
- API integration and response validation
"""


from .base_test import BaseSimulatorTest


Expand Down
10 changes: 5 additions & 5 deletions tests/test_custom_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def test_model_alias_resolution(self):
provider = CustomProvider(api_key="test-key", base_url="http://localhost:11434/v1")

# Test that aliases resolve properly
# "llama" now resolves to "meta-llama/llama-3-70b" (the OpenRouter model)
resolved = provider._resolve_model_name("llama")
assert resolved == "meta-llama/llama-3-70b"
# "deepseek" resolves to "deepseek/deepseek-r1-0528" (the OpenRouter model)
resolved = provider._resolve_model_name("deepseek")
assert resolved == "deepseek/deepseek-r1-0528"

# Test local model alias
resolved_local = provider._resolve_model_name("local-llama")
Expand Down Expand Up @@ -216,8 +216,8 @@ def custom_provider_factory(api_key=None):
custom_provider = custom_provider_factory()
openrouter_provider = OpenRouterProvider(api_key="test-openrouter-key")

assert not custom_provider.validate_model_name("llama")
assert openrouter_provider.validate_model_name("llama")
assert not custom_provider.validate_model_name("deepseek")
assert openrouter_provider.validate_model_name("deepseek")


class TestConfigureProvidersFunction:
Expand Down
12 changes: 4 additions & 8 deletions tests/test_directory_expansion_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,23 @@ def temp_directory_with_files(self, project_path):
files = []
for i in range(5):
swift_file = temp_path / f"File{i}.swift"
swift_file.write_text(
f"""
swift_file.write_text(f"""
import Foundation

class TestClass{i} {{
func testMethod{i}() -> String {{
return "test{i}"
}}
}}
"""
)
""")
files.append(str(swift_file))

# Create a Python file as well
python_file = temp_path / "helper.py"
python_file.write_text(
"""
python_file.write_text("""
def helper_function():
return "helper"
"""
)
""")
files.append(str(python_file))

try:
Expand Down
6 changes: 2 additions & 4 deletions tests/test_docker_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,11 @@ def temp_project_dir():

# Create base files
(temp_path / "server.py").write_text("# Mock server.py")
(temp_path / "Dockerfile").write_text(
"""
(temp_path / "Dockerfile").write_text("""
FROM python:3.11-slim
COPY server.py /app/
CMD ["python", "/app/server.py"]
"""
)
""")

yield temp_path

Expand Down
18 changes: 12 additions & 6 deletions tests/test_model_resolution_bug.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,27 @@ def setup_method(self):
self.consensus_tool = ConsensusTool()

def test_openrouter_registry_resolves_gemini_alias(self):
"""Test that OpenRouter registry properly resolves 'gemini' to 'google/gemini-3-pro-preview'."""
"""Test that OpenRouter registry properly resolves 'gemini' to the latest Gemini Pro."""
# Test the registry directly
provider = OpenRouterProvider("test_key")

# Test alias resolution
# Test alias resolution - 'gemini' and 'pro' now point to 3.1
resolved_model_name = provider._resolve_model_name("gemini")
assert (
resolved_model_name == "google/gemini-3-pro-preview"
), f"Expected 'google/gemini-3-pro-preview', got '{resolved_model_name}'"
resolved_model_name == "google/gemini-3.1-pro-preview"
), f"Expected 'google/gemini-3.1-pro-preview', got '{resolved_model_name}'"

# Test that it also works with 'pro' alias
resolved_pro = provider._resolve_model_name("pro")
assert (
resolved_pro == "google/gemini-3-pro-preview"
), f"Expected 'google/gemini-3-pro-preview', got '{resolved_pro}'"
resolved_pro == "google/gemini-3.1-pro-preview"
), f"Expected 'google/gemini-3.1-pro-preview', got '{resolved_pro}'"

# Test version-specific alias still works for backward compat
resolved_3_0 = provider._resolve_model_name("gemini3.0")
assert (
resolved_3_0 == "google/gemini-3-pro-preview"
), f"Expected 'google/gemini-3-pro-preview', got '{resolved_3_0}'"

# DELETED: test_provider_registry_returns_openrouter_for_gemini
# This test had a flawed mock setup - it mocked get_provider() but called get_provider_for_model().
Expand Down
38 changes: 22 additions & 16 deletions tests/test_openrouter_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,28 +78,32 @@ def test_model_alias_resolution(self):
"""Test model alias resolution."""
provider = OpenRouterProvider(api_key="test-key")

# Test alias resolution
assert provider._resolve_model_name("opus") == "anthropic/claude-opus-4.5"
# Test alias resolution - generic aliases point to latest versions
assert provider._resolve_model_name("opus") == "anthropic/claude-opus-4.6"
assert provider._resolve_model_name("opus4.6") == "anthropic/claude-opus-4.6"
assert provider._resolve_model_name("opus4.5") == "anthropic/claude-opus-4.5"
assert provider._resolve_model_name("opus4.1") == "anthropic/claude-opus-4.1"
assert provider._resolve_model_name("sonnet") == "anthropic/claude-sonnet-4.5"
assert provider._resolve_model_name("sonnet") == "anthropic/claude-sonnet-4.6"
assert provider._resolve_model_name("sonnet4.6") == "anthropic/claude-sonnet-4.6"
assert provider._resolve_model_name("sonnet4.5") == "anthropic/claude-sonnet-4.5"
assert provider._resolve_model_name("sonnet4.1") == "anthropic/claude-sonnet-4.1"
assert provider._resolve_model_name("o3") == "openai/o3"
assert provider._resolve_model_name("o3-mini") == "openai/o3-mini"
assert provider._resolve_model_name("o3mini") == "openai/o3-mini"
assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
assert provider._resolve_model_name("haiku") == "anthropic/claude-3.5-haiku"
assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411"
assert provider._resolve_model_name("grok-4") == "x-ai/grok-4"
assert provider._resolve_model_name("grok4") == "x-ai/grok-4"
assert provider._resolve_model_name("grok") == "x-ai/grok-4"
assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528"
assert provider._resolve_model_name("r1") == "deepseek/deepseek-r1-0528"
assert provider._resolve_model_name("pro") == "google/gemini-3.1-pro-preview"
assert provider._resolve_model_name("gemini3.0") == "google/gemini-3-pro-preview"

# Test case-insensitive
assert provider._resolve_model_name("OPUS") == "anthropic/claude-opus-4.5"
assert provider._resolve_model_name("SONNET") == "anthropic/claude-sonnet-4.5"
assert provider._resolve_model_name("OPUS") == "anthropic/claude-opus-4.6"
assert provider._resolve_model_name("SONNET") == "anthropic/claude-sonnet-4.6"
assert provider._resolve_model_name("O3") == "openai/o3"
assert provider._resolve_model_name("Mistral") == "mistralai/mistral-large-2411"

Expand Down Expand Up @@ -307,18 +311,18 @@ def test_registry_capabilities(self):

registry = OpenRouterModelRegistry()

# Test known model (opus alias now points to 4.5)
# Test known model (opus alias now points to 4.6)
caps = registry.get_capabilities("opus")
assert caps is not None
assert caps.model_name == "anthropic/claude-opus-4.5"
assert caps.context_window == 200000 # Claude's context window
assert caps.model_name == "anthropic/claude-opus-4.6"
assert caps.context_window == 1000000 # Claude 4.6 context window

# Test using full model name for 4.5
caps = registry.get_capabilities("anthropic/claude-opus-4.5")
assert caps is not None
assert caps.model_name == "anthropic/claude-opus-4.5"

# Test opus4.5 alias
# Test opus4.5 alias (backward compat)
caps = registry.get_capabilities("opus4.5")
assert caps is not None
assert caps.model_name == "anthropic/claude-opus-4.5"
Expand All @@ -343,12 +347,14 @@ def test_multiple_aliases_same_model(self):

registry = OpenRouterModelRegistry()

# All these should resolve to Claude Sonnet 4.5
sonnet_45_aliases = ["sonnet", "sonnet4.5"]
for alias in sonnet_45_aliases:
config = registry.resolve(alias)
assert config is not None
assert config.model_name == "anthropic/claude-sonnet-4.5"
# 'sonnet' now resolves to 4.6, 'sonnet4.5' still resolves to 4.5
config = registry.resolve("sonnet")
assert config is not None
assert config.model_name == "anthropic/claude-sonnet-4.6"

config = registry.resolve("sonnet4.5")
assert config is not None
assert config.model_name == "anthropic/claude-sonnet-4.5"

# Test Sonnet 4.1 alias
config = registry.resolve("sonnet4.1")
Expand Down
Loading
Loading