BeehiveInnovations · Tony363 · Feb 7, 2026 · Apr 6, 2026
diff --git a/.env.example b/.env.example
@@ -29,6 +29,12 @@ AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
 # Get your X.AI API key from: https://console.x.ai/
 XAI_API_KEY=your_xai_api_key_here
 
+# Get your Nebius Token Factory API key from: https://tokenfactory.nebius.com/
+# Provides access to open-source models: Qwen3, DeepSeek, Llama, GLM, GPT-OSS
+NEBIUS_API_KEY=your_nebius_api_key_here
+# NEBIUS_MODELS_CONFIG_PATH=/path/to/custom_nebius_models.json  # Optional: Custom model catalog
+# NEBIUS_ALLOWED_MODELS=nebius-qwen3,nebius-deepseek,nebius-llama  # Optional: Restrict models
+
 # Get your DIAL API key and configure host URL
 # DIAL provides unified access to multiple AI models through a single API
 DIAL_API_KEY=your_dial_api_key_here
@@ -105,6 +111,21 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
 #   - grok3           (shorthand for grok-3)
 #   - grokfast        (shorthand for grok-3-fast)
 #
+# Supported Nebius Token Factory models:
+#   - Qwen/Qwen3-235B-A22B-Instruct-2507 (262K context, flagship reasoning)
+#   - Qwen/Qwen3-235B-A22B-Thinking-2507 (262K context, with extended thinking)
+#   - openai/gpt-oss-120b     (128K context, OpenAI's open-source model)
+#   - deepseek-ai/DeepSeek-R1-0528 (128K context, reasoning with visible thought)
+#   - deepseek-ai/DeepSeek-V3-0324 (128K context, general purpose)
+#   - meta-llama/Llama-3.3-70B-Instruct (128K context, Meta flagship)
+#   - THUDM/GLM-4.5           (128K context, vision + function calling)
+#   - nebius-qwen3            (shorthand for Qwen3-235B)
+#   - nebius-deepseek         (shorthand for DeepSeek-V3)
+#   - nebius-deepseek-r1      (shorthand for DeepSeek-R1)
+#   - nebius-llama            (shorthand for Llama-3.3-70B)
+#   - nebius-gpt-oss          (shorthand for GPT-OSS-120B)
+#   - nebius-glm              (shorthand for GLM-4.5)
+#
 # Supported DIAL models (when available in your DIAL deployment):
 #   - o3-2025-04-16   (200K context, latest O3 release)
 #   - o4-mini-2025-04-16 (200K context, latest O4 mini)

diff --git a/conf/custom_models.json b/conf/custom_models.json
@@ -36,6 +36,23 @@
       "max_image_size_mb": 0.0,
       "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
       "intelligence_score": 6
+    },
+    {
+      "model_name": "deepseek-v3.2:cloud",
+      "aliases": [
+        "deepseek-v3",
+        "deepseek",
+        "ds-v3"
+      ],
+      "context_window": 163840,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": true,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "description": "DeepSeek V3.2 via Ollama cloud - advanced reasoning with 160K context",
+      "intelligence_score": 19
     }
   ]
 }
diff --git a/conf/dial_models.json b/conf/dial_models.json
@@ -121,6 +121,7 @@
       "model_name": "gemini-2.5-pro-preview-03-25-google-search",
       "friendly_name": "DIAL (Gemini 2.5 Pro Search)",
       "aliases": ["gemini-2.5-pro-search"],
+      "excluded_tools": ["consensus"],
       "intelligence_score": 17,
       "description": "Gemini 2.5 Pro with Google Search via DIAL",
       "context_window": 1000000,
@@ -137,6 +138,7 @@
       "model_name": "gemini-2.5-pro-preview-05-06",
       "friendly_name": "DIAL (Gemini 2.5 Pro)",
       "aliases": ["gemini-2.5-pro"],
+      "excluded_tools": ["consensus"],
       "intelligence_score": 18,
       "description": "Gemini 2.5 Pro via DIAL - Deep reasoning",
       "context_window": 1000000,
@@ -153,6 +155,7 @@
       "model_name": "gemini-2.5-flash-preview-05-20",
       "friendly_name": "DIAL (Gemini Flash 2.5)",
       "aliases": ["gemini-2.5-flash"],
+      "excluded_tools": ["consensus"],
       "intelligence_score": 10,
       "description": "Gemini 2.5 Flash via DIAL - Ultra-fast",
       "context_window": 1000000,

diff --git a/conf/gemini_models.json b/conf/gemini_models.json
@@ -25,6 +25,28 @@
     }
   },
   "models": [
+    {
+      "model_name": "gemini-3.1-pro",
+      "friendly_name": "Gemini Pro 3.1",
+      "aliases": [
+        "gemini3.1",
+        "gemini-3.1"
+      ],
+      "intelligence_score": 20,
+      "description": "Gemini 3.1 Pro (1M context) - Google's latest flagship with deep reasoning and multimodal support",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "max_thinking_tokens": 32768,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "allow_code_generation": true,
+      "max_image_size_mb": 32.0
+    },
     {
       "model_name": "gemini-3-pro-preview",
       "friendly_name": "Gemini Pro 3.0 Preview",
@@ -33,7 +55,7 @@
         "gemini3",
         "gemini-pro"
       ],
-      "intelligence_score": 18,
+      "intelligence_score": 17,
       "description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
       "context_window": 1048576,
       "max_output_tokens": 65536,
@@ -54,7 +76,8 @@
       "aliases": [
         "gemini-pro-2.5"
       ],
-      "intelligence_score": 18,
+      "excluded_tools": ["consensus"],
+      "intelligence_score": 17,
       "description": "Older Model. 1M context - Complex problems, architecture, deep analysis",
       "context_window": 1048576,
       "max_output_tokens": 65536,
@@ -116,6 +139,7 @@
         "flash",
         "flash2.5"
       ],
+      "excluded_tools": ["consensus"],
       "intelligence_score": 10,
       "description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
       "context_window": 1048576,