mito-ds
diff --git a/‎.cursor/commands/verify.md‎
Lines changed: 17 additions & 10 deletions b/‎.cursor/commands/verify.md‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎.github/workflows/test-litellm-llm-providers.yml‎
Lines changed: 88 additions & 0 deletions b/‎.github/workflows/test-litellm-llm-providers.yml‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎mito-ai/.eslintignore‎
Lines changed: 11 additions & 0 deletions b/‎mito-ai/.eslintignore‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎mito-ai/docs/litellm-deployment.md‎
Lines changed: 87 additions & 0 deletions b/‎mito-ai/docs/litellm-deployment.md‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎mito-ai/mito_ai/__init__.py‎
Lines changed: 16 additions & 22 deletions b/‎mito-ai/mito_ai/__init__.py‎
Lines changed: 16 additions & 22 deletions
diff --git a/‎mito-ai/mito_ai/anthropic_client.py‎
Lines changed: 24 additions & 14 deletions b/‎mito-ai/mito_ai/anthropic_client.py‎
Lines changed: 24 additions & 14 deletions
@@ -42,17 +42,24 @@ After each interaction:
 
 ### 7. Rebuild and Re-test
 
-After making fixes:
-
-1. **Setup Environment**: 
-   - For frontend changes: Wait a few seconds for rebuild to complete
-   - For backend changes: Restart and relaunch the server 
-
-2. **Refresh browser**: 
-   - Take new snapshot
+After making code changes and before testing again:
+
+1. **For Frontend Changes** (TypeScript/React/CSS/JavaScript):
+   - Wait a few seconds for the build to complete (check the TypeScript terminal)
+   - **Refresh the browser** to load the updated code:
+     - Use browser navigation to reload the page, or
+     - Use browser refresh functionality
+   - Retest the feature
+
+
+2. **For Backend Changes** (Python/server code):
+   - **Shut down the Jupyter server** (stop the running JupyterLab process)
+   - **Relaunch the Jupyter server** to load the updated backend code
+   - Navigate to the JupyterLab URL again
+   - Wait 3-5 seconds for full page load
+   - Take a new snapshot
    - Re-test the feature
-   - Verify fix worked
 
 ### 8. Iterate Until Complete
 
-Repeat steps 4-7 until the feature works correctly.
+Repeat steps 4-7 until the feature works correctly. 
@@ -0,0 +1,88 @@
+name: Test - Mito AI Frontend Playwright with LiteLLM
+
+on:
+  push:
+    branches: [ dev ]
+    paths:
+      - 'mito-ai/**'
+      - 'tests/llm_providers_tests/litellm_llm_providers.spec.ts'
+      - '.github/workflows/test-litellm-llm-providers.yml'
+  pull_request:
+    paths:
+      - 'mito-ai/**'
+      - 'tests/llm_providers_tests/litellm_llm_providers.spec.ts'
+      - '.github/workflows/test-litellm-llm-providers.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-mitoai-frontend-jupyterlab-litellm:
+    runs-on: ubuntu-24.04
+    timeout-minutes: 60
+    strategy:
+      matrix:
+        python-version: ['3.10', '3.12']
+      fail-fast: false
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: pip
+        cache-dependency-path: |
+          mito-ai/setup.py
+          tests/requirements.txt
+    - uses: actions/setup-node@v4
+      with:
+        node-version: 22
+        cache: 'npm'
+        cache-dependency-path: mito-ai/package-lock.json
+    - name: Upgrade pip
+      run: |
+        python -m pip install --upgrade pip
+    - name: Install dependencies
+      run: |
+        cd tests
+        bash mac-setup.sh
+    - name: Install mitosheet-helper-enterprise
+      run: |
+        cd tests
+        source venv/bin/activate
+        pip install mitosheet-helper-enterprise
+    - name: Install JupyterLab
+      run: |
+        python -m pip install jupyterlab
+    - name: Install Node.js dependencies
+      run: |
+        cd mito-ai
+        jlpm install
+    - name: Setup JupyterLab
+      run: |
+        cd tests
+        source venv/bin/activate
+        pip install setuptools==68.0.0
+        cd ../mito-ai
+        jupyter labextension develop . --overwrite
+        jupyter server extension enable --py mito_ai
+    - name: Start a server and run LiteLLM provider tests
+      run: |
+        cd tests
+        source venv/bin/activate
+        jupyter lab --config jupyter_server_test_config.py &
+        jlpm run test:litellm-llm-providers
+      env:
+        LITELLM_BASE_URL: ${{ secrets.LITELLM_BASE_URL }}
+        LITELLM_MODELS: ${{ secrets.LITELLM_MODELS }}
+        LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
+    - name: Upload test-results
+      uses: actions/upload-artifact@v4
+      if: failure()
+      with:
+        name: mitoai-jupyterlab-playwright-litellm-report-${{ matrix.python-version }}-${{ github.run_id }}
+        path: tests/playwright-report/
+        retention-days: 14
@@ -0,0 +1,11 @@
+node_modules
+venv
+dist
+coverage
+**/*.d.ts
+tests
+**/__tests__
+ui-tests
+lib
+buildcache
+*.tsbuildinfo
@@ -0,0 +1,87 @@
+# LiteLLM Enterprise Deployment Guide
+
+This guide explains how to configure Mito AI for enterprise deployments with strict data privacy and security requirements.
+
+## Overview
+
+Enterprise mode in Mito AI provides:
+
+1. **LLM Model Lockdown**: AI calls ONLY go to IT-approved LLM models
+2. **Telemetry Elimination**: No telemetry is sent to Mito servers
+3. **User Protection**: End users cannot change to unapproved LLM models
+
+## Enabling Enterprise Mode
+
+Enterprise mode is automatically enabled when the `mitosheet-helper-enterprise` package is installed. This package must be installed by your IT team with appropriate permissions.
+
+```bash
+pip install mitosheet-helper-enterprise
+```
+
+## LiteLLM Configuration
+
+When enterprise mode is enabled, you can optionally configure LiteLLM to route all AI calls to your approved LLM endpoint. LiteLLM configuration is **optional** - if not configured, users can continue using the normal Mito server flow.
+
+### Environment Variables
+
+Configure the following environment variables on the Jupyter server:
+
+#### IT-Controlled Variables (Set by IT Team)
+
+- **`LITELLM_BASE_URL`**: The base URL of your LiteLLM server endpoint
+  - Example: `https://your-litellm-server.com`
+  - Must be OpenAI-compatible
+
+- **`LITELLM_MODELS`**: Comma-separated list of approved model names
+  - Model names must include provider prefix (e.g., `"openai/gpt-4o"`)
+  - Example: `"openai/gpt-4o,openai/gpt-4o-mini,anthropic/claude-3-5-sonnet"`
+  - Format: Comma-separated string (whitespace is automatically trimmed)
+  - The first model in the list is the default model.
+
+#### User-Controlled Variables (Set by Each End User)
+
+- **`LITELLM_API_KEY`**: User's API key for authentication with the LiteLLM server
+  - Each user sets their own API key
+  - Keys are never sent to Mito servers
+
+## Security Guarantees
+
+1. **Defense in Depth**:
+   - Backend validates all model selections (even if frontend is bypassed)
+   - Frontend UI only shows approved models
+   - All API calls go to LiteLLM base URL
+   - If user does not set correct API key, the app will still not send requests to the Mito server, instead it will just show an error message.
+
+
+2. **Telemetry Elimination**:
+   - Early return in telemetry functions when enterprise mode is active
+   - No analytics library calls made
+   - No network requests to external telemetry servers
+
+3. **Model Lockdown**:
+   - Backend validates all model selections against approved list
+   - Backend rejects model change requests for unapproved models
+   - Frontend shows only approved models in model selector
+
+4. **API Key Management**:
+   - Users set their own `LITELLM_API_KEY` environment variable for authentication
+   - IT controls the LiteLLM endpoint and approved models, users control authentication
+   - Keys never sent to Mito servers
+
+## Verification
+
+### Check Enterprise Mode Status
+
+When you start Jupyter Lab, check the server logs for:
+
+```
+Enterprise mode enabled
+LiteLLM configured: endpoint=https://your-litellm-server.com, models=['openai/gpt-4o', 'openai/gpt-4o-mini']
+```
+
+### Verify Model Selection
+
+1. Open Mito AI chat in Jupyter Lab
+2. Click on the model selector
+3. Verify only approved models from `LITELLM_MODELS` are displayed
+4. Verify you cannot select unapproved models
@@ -4,11 +4,11 @@
 from typing import List, Dict
 from jupyter_server.utils import url_path_join
 from mito_ai.completions.handlers import CompletionHandler
-from mito_ai.completions.providers import OpenAIProvider
+from mito_ai.provider_manager import ProviderManager
 from mito_ai.completions.message_history import GlobalMessageHistory
 from mito_ai.app_deploy.handlers import AppDeployHandler
-from mito_ai.streamlit_preview.handlers import StreamlitPreviewHandler
 from mito_ai.log.urls import get_log_urls
+from mito_ai.utils.litellm_utils import is_litellm_configured
 from mito_ai.version_check import VersionCheckHandler
 from mito_ai.db.urls import get_db_urls
 from mito_ai.settings.urls import get_settings_urls
@@ -20,6 +20,8 @@
 from mito_ai.user.urls import get_user_urls
 from mito_ai.chat_history.urls import get_chat_history_urls
 from mito_ai.chart_wizard.urls import get_chart_wizard_urls
+from mito_ai.utils.version_utils import is_enterprise
+from mito_ai import constants
 
 # Force Matplotlib to use the Jupyter inline backend.
 # Background: importing Streamlit sets os.environ["MPLBACKEND"] = "Agg" very early.
@@ -33,16 +35,6 @@
 import os
 os.environ["MPLBACKEND"] = "module://matplotlib_inline.backend_inline"
 
-try:
-    from _version import __version__
-except ImportError:
-    # Fallback when using the package in dev mode without installing in editable mode with pip. It is highly recommended to install
-    # the package from a stable release or in editable mode: https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs
-    import warnings
-    
-    warnings.warn("Importing 'mito_ai' outside a proper installation.")
-    __version__ = "dev"
-
 def _jupyter_labextension_paths() -> List[Dict[str, str]]:
     return [{"src": "labextension", "dest": "mito_ai"}]
 
@@ -65,7 +57,7 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
     web_app = server_app.web_app
     base_url = web_app.settings["base_url"]
 
-    open_ai_provider = OpenAIProvider(config=server_app.config)
+    provider_manager = ProviderManager(config=server_app.config)
 
     # Create a single GlobalMessageHistory instance for the entire server
     # This ensures thread-safe access to the .mito/ai-chats directory
@@ -76,18 +68,13 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
         (
             url_path_join(base_url, "mito-ai", "completions"),
             CompletionHandler,
-            {"llm": open_ai_provider, "message_history": global_message_history},
+            {"llm": provider_manager, "message_history": global_message_history},
         ),
         (
             url_path_join(base_url, "mito-ai", "app-deploy"),
             AppDeployHandler,
             {}
         ),
-        (
-            url_path_join(base_url, "mito-ai", "streamlit-preview"),
-            StreamlitPreviewHandler,
-            {}
-        ),
         (
             url_path_join(base_url, "mito-ai", "version-check"),
             VersionCheckHandler,
@@ -104,13 +91,20 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
     handlers.extend(get_db_urls(base_url))  # type: ignore
     handlers.extend(get_settings_urls(base_url))  # type: ignore
     handlers.extend(get_rules_urls(base_url))  # type: ignore
-    handlers.extend(get_log_urls(base_url, open_ai_provider.key_type))  # type: ignore
+    handlers.extend(get_log_urls(base_url, provider_manager.key_type))  # type: ignore
     handlers.extend(get_auth_urls(base_url))  # type: ignore
-    handlers.extend(get_streamlit_preview_urls(base_url))  # type: ignore
+    handlers.extend(get_streamlit_preview_urls(base_url, provider_manager))  # type: ignore
     handlers.extend(get_file_uploads_urls(base_url)) # type: ignore
     handlers.extend(get_user_urls(base_url)) # type: ignore
     handlers.extend(get_chat_history_urls(base_url, global_message_history)) # type: ignore
-    handlers.extend(get_chart_wizard_urls(base_url, open_ai_provider)) # type: ignore
+    handlers.extend(get_chart_wizard_urls(base_url, provider_manager)) # type: ignore
 
     web_app.add_handlers(host_pattern, handlers)
+    
+    # Log enterprise mode status and LiteLLM configuration
+    if is_enterprise():
+        server_app.log.info("Enterprise mode enabled")
+        if is_litellm_configured():
+            server_app.log.info(f"LiteLLM configured: endpoint={constants.LITELLM_BASE_URL}, models={constants.LITELLM_MODELS}")
+    
     server_app.log.info("Loaded the mito_ai server extension")
@@ -9,7 +9,7 @@
 from mito_ai.completions.models import ResponseFormatInfo, CompletionReply, CompletionStreamChunk, CompletionItem, MessageType
 from mito_ai.completions.prompt_builders.prompt_section_registry import get_max_trim_after_messages
 from openai.types.chat import ChatCompletionMessageParam
-from mito_ai.utils.anthropic_utils import get_anthropic_completion_from_mito_server, select_correct_model, stream_anthropic_completion_from_mito_server, get_anthropic_completion_function_params
+from mito_ai.utils.anthropic_utils import get_anthropic_completion_from_mito_server, select_correct_model, stream_anthropic_completion_from_mito_server, get_anthropic_completion_function_params, LARGE_CONTEXT_MODEL, EXTENDED_CONTEXT_BETA
 
 # Max tokens is a required parameter for the Anthropic API. 
 # We set it to a high number so that we can edit large code cells
@@ -220,7 +220,10 @@ def __init__(self, api_key: Optional[str], timeout: int = 30, max_retries: int =
         self.max_retries = max_retries
         self.client: Optional[anthropic.Anthropic]
         if api_key:
-            self.client = anthropic.Anthropic(api_key=api_key)
+            # Use a higher timeout to avoid the 10-minute streaming requirement for long requests
+            # The default SDK timeout is 600s (10 minutes), but we set it higher for agent mode
+            # TODO: We should update agent mode to use streaming like anthropic suggests
+            self.client = anthropic.Anthropic(api_key=api_key, timeout=1200.0)  # 20 minutes
         else:
             self.client = None
 
@@ -249,7 +252,8 @@ async def request_completions(
         if self.api_key:
             # Unpack provider_data for direct API call
             assert self.client is not None
-            response = self.client.messages.create(**provider_data)
+            # Beta API accepts MessageParam (compatible at runtime with BetaMessageParam)
+            response = self.client.beta.messages.create(**provider_data)  # type: ignore[arg-type]
 
             if provider_data.get("tool_choice") is not None:
                 result = extract_and_parse_anthropic_json_response(response)
@@ -284,21 +288,27 @@ async def stream_completions(self, messages: List[ChatCompletionMessageParam], m
 
             if self.api_key:
                 assert self.client is not None
-                stream = self.client.messages.create(
-                    model=model,
-                    max_tokens=MAX_TOKENS,
-                    temperature=0,
-                    system=anthropic_system_prompt,
-                    messages=anthropic_messages,
-                    stream=True
-                )
+                # Beta API accepts MessageParam (compatible at runtime with BetaMessageParam)
+                # Enable extended context beta when using LARGE_CONTEXT_MODEL
+                create_params = {
+                    "model": model,
+                    "max_tokens": MAX_TOKENS,
+                    "temperature": 0,
+                    "system": anthropic_system_prompt,
+                    "messages": anthropic_messages,  # type: ignore[arg-type]
+                    "stream": True
+                }
+                if model == LARGE_CONTEXT_MODEL:
+                    create_params["betas"] = [EXTENDED_CONTEXT_BETA]
+                stream = self.client.beta.messages.create(**create_params)  # type: ignore[call-overload]
 
                 for chunk in stream:
-                    if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-                        content = chunk.delta.text
+                    # Type checking for beta API streaming chunks (runtime type checking, types are compatible)
+                    if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":  # type: ignore[union-attr]
+                        content = chunk.delta.text  # type: ignore[union-attr]
                         accumulated_response += content
 
-                        is_finished = chunk.type == "message_stop"
+                        is_finished = chunk.type == "message_stop"  # type: ignore[union-attr]
 
                         reply_fn(CompletionStreamChunk(
                             parent_id=message_id,