Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
# Get your X.AI API key from: https://console.x.ai/
XAI_API_KEY=your_xai_api_key_here

# Get your Nebius Token Factory API key from: https://tokenfactory.nebius.com/
# Provides access to open-source models: Qwen3, DeepSeek, Llama, GLM, GPT-OSS
NEBIUS_API_KEY=your_nebius_api_key_here
# NEBIUS_MODELS_CONFIG_PATH=/path/to/custom_nebius_models.json # Optional: Custom model catalog
# NEBIUS_ALLOWED_MODELS=nebius-qwen3,nebius-deepseek,nebius-llama # Optional: Restrict models

# Get your DIAL API key and configure host URL
# DIAL provides unified access to multiple AI models through a single API
DIAL_API_KEY=your_dial_api_key_here
Expand Down Expand Up @@ -105,6 +111,21 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
# - grok3 (shorthand for grok-3)
# - grokfast (shorthand for grok-3-fast)
#
# Supported Nebius Token Factory models:
# - Qwen/Qwen3-235B-A22B-Instruct-2507 (262K context, flagship reasoning)
# - Qwen/Qwen3-235B-A22B-Thinking-2507 (262K context, with extended thinking)
# - openai/gpt-oss-120b (128K context, OpenAI's open-source model)
# - deepseek-ai/DeepSeek-R1-0528 (128K context, reasoning with visible thought)
# - deepseek-ai/DeepSeek-V3-0324 (128K context, general purpose)
# - meta-llama/Llama-3.3-70B-Instruct (128K context, Meta flagship)
# - THUDM/GLM-4.5 (128K context, vision + function calling)
# - nebius-qwen3 (shorthand for Qwen3-235B)
# - nebius-deepseek (shorthand for DeepSeek-V3)
# - nebius-deepseek-r1 (shorthand for DeepSeek-R1)
# - nebius-llama (shorthand for Llama-3.3-70B)
# - nebius-gpt-oss (shorthand for GPT-OSS-120B)
# - nebius-glm (shorthand for GLM-4.5)
#
# Supported DIAL models (when available in your DIAL deployment):
# - o3-2025-04-16 (200K context, latest O3 release)
# - o4-mini-2025-04-16 (200K context, latest O4 mini)
Expand Down
17 changes: 17 additions & 0 deletions conf/custom_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,23 @@
"max_image_size_mb": 0.0,
"description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
"intelligence_score": 6
},
{
"model_name": "deepseek-v3.2:cloud",
"aliases": [
"deepseek-v3",
"deepseek",
"ds-v3"
],
"context_window": 163840,
"max_output_tokens": 65536,
"supports_extended_thinking": true,
"supports_json_mode": true,
"supports_function_calling": true,
"supports_images": false,
"max_image_size_mb": 0.0,
"description": "DeepSeek V3.2 via Ollama cloud - advanced reasoning with 160K context",
"intelligence_score": 19
}
]
}
3 changes: 3 additions & 0 deletions conf/dial_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
"model_name": "gemini-2.5-pro-preview-03-25-google-search",
"friendly_name": "DIAL (Gemini 2.5 Pro Search)",
"aliases": ["gemini-2.5-pro-search"],
"excluded_tools": ["consensus"],
"intelligence_score": 17,
"description": "Gemini 2.5 Pro with Google Search via DIAL",
"context_window": 1000000,
Expand All @@ -137,6 +138,7 @@
"model_name": "gemini-2.5-pro-preview-05-06",
"friendly_name": "DIAL (Gemini 2.5 Pro)",
"aliases": ["gemini-2.5-pro"],
"excluded_tools": ["consensus"],
"intelligence_score": 18,
"description": "Gemini 2.5 Pro via DIAL - Deep reasoning",
"context_window": 1000000,
Expand All @@ -153,6 +155,7 @@
"model_name": "gemini-2.5-flash-preview-05-20",
"friendly_name": "DIAL (Gemini Flash 2.5)",
"aliases": ["gemini-2.5-flash"],
"excluded_tools": ["consensus"],
"intelligence_score": 10,
"description": "Gemini 2.5 Flash via DIAL - Ultra-fast",
"context_window": 1000000,
Expand Down
28 changes: 26 additions & 2 deletions conf/gemini_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,28 @@
}
},
"models": [
{
"model_name": "gemini-3.1-pro",
"friendly_name": "Gemini Pro 3.1",
"aliases": [
"gemini3.1",
"gemini-3.1"
],
"intelligence_score": 20,
"description": "Gemini 3.1 Pro (1M context) - Google's latest flagship with deep reasoning and multimodal support",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 32768,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"allow_code_generation": true,
"max_image_size_mb": 32.0
},
{
"model_name": "gemini-3-pro-preview",
"friendly_name": "Gemini Pro 3.0 Preview",
Expand All @@ -33,7 +55,7 @@
"gemini3",
"gemini-pro"
],
"intelligence_score": 18,
"intelligence_score": 17,
"description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
"context_window": 1048576,
"max_output_tokens": 65536,
Expand All @@ -54,7 +76,8 @@
"aliases": [
"gemini-pro-2.5"
],
"intelligence_score": 18,
"excluded_tools": ["consensus"],
"intelligence_score": 17,
"description": "Older Model. 1M context - Complex problems, architecture, deep analysis",
"context_window": 1048576,
"max_output_tokens": 65536,
Expand Down Expand Up @@ -116,6 +139,7 @@
"flash",
"flash2.5"
],
"excluded_tools": ["consensus"],
"intelligence_score": 10,
"description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
"context_window": 1048576,
Expand Down
Loading
Loading