Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions IDEAL_FINAL_STATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Dispatcher Refactoring: Ideal State

**Goal:** Replace 47-provider `if/elif` chain with an O(1) dispatcher lookup.

---

## Impact

**Performance**

- Lookup: O(n) → O(1)
- Average speedup: 24x
- Worst case: 47x
- The if-else chain essentially becomes a linear search loop through all providers, and adding a new provider increases lookup time proportionally

---

## Current State

```python
def completion(...):
# 1,416 lines: setup, validation (KEEP)

# 2,300 lines: provider routing (REPLACE)
if custom_llm_provider == "azure":
# 120 lines
elif custom_llm_provider == "anthropic":
# 58 lines
# ... 45 more elif blocks ...
```

---

## Target State

```python
def completion(...):
# Setup, validation (unchanged)

# Single dispatcher call (replaces all if/elif)
response = ProviderDispatcher.dispatch(
custom_llm_provider=custom_llm_provider,
model=model,
messages=messages,
# ... pass all params ...
)
return response
```

---

## Progress

**Current (POC)**

- OpenAI migrated
- 99 lines removed
- All tests passing
---

## Detailed Final Structure

### main.py Structure (After Full Migration)

```python
# ========================================
# ENDPOINT FUNCTIONS (~2,800 lines total)
# ========================================

def completion(...): # ~500 lines
# Setup (400 lines)
# Dispatch (30 lines)
# Error handling (70 lines)

def embedding(...): # ~150 lines
# Setup (100 lines)
# Dispatch (20 lines)
# Error handling (30 lines)

def image_generation(...): # ~100 lines
# Setup (70 lines)
# Dispatch (20 lines)
# Error handling (10 lines)

def transcription(...): # ~150 lines
# Simpler - fewer providers

def speech(...): # ~150 lines
# Simpler - fewer providers

# Other helper functions (1,750 lines)
# ========================================
# TOTAL: ~2,800 lines (from 6,272)
# ========================================
```

### provider_dispatcher.py Structure

```python
# ========================================
# PROVIDER DISPATCHER (~3,500 lines total)
# ========================================

class ProviderDispatcher:
"""Unified dispatcher for all endpoints"""

# COMPLETION HANDLERS (~2,000 lines)
_completion_dispatch = {
"openai": _handle_openai_completion, # DONE
"azure": _handle_azure_completion,
"anthropic": _handle_anthropic_completion,
# ... 44 more
}

# EMBEDDING HANDLERS (~800 lines)
_embedding_dispatch = {
"openai": _handle_openai_embedding,
"azure": _handle_azure_embedding,
"vertex_ai": _handle_vertex_embedding,
# ... 21 more
}

# IMAGE GENERATION HANDLERS (~400 lines)
_image_dispatch = {
"openai": _handle_openai_image,
"azure": _handle_azure_image,
# ... 13 more
}

# SHARED UTILITIES (~300 lines)
@staticmethod
def _get_openai_credentials(**ctx):
"""Shared across completion, embedding, image_gen"""
pass

@staticmethod
def _get_azure_credentials(**ctx):
"""Shared across completion, embedding, image_gen"""
pass
```
257 changes: 257 additions & 0 deletions litellm/llms/provider_dispatcher.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a new 'dispatch' here vs. just using the providerconfig.get_chat_provider_config?

def get_provider_chat_config( # noqa: PLR0915

Reference implementation on rerank:

rerank_provider_config: BaseRerankConfig = (

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 can we just re-use the existing OpenaI config file to implement the dispatcher ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea, I hadn't seen that on the codebase.

Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
"""
Provider Dispatcher - O(1) provider routing for completion()

Replaces the O(n) if/elif chain in main.py with a fast dispatch table.
This allows adding providers without modifying the main completion() function.

Usage:
response = ProviderDispatcher.dispatch(
custom_llm_provider="azure",
model=model,
messages=messages,
...
)
"""

from typing import Union
from litellm.types.utils import ModelResponse
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper


class ProviderDispatcher:
"""
Fast O(1) provider routing using a dispatch table.

Starting with OpenAI as proof of concept, then incrementally add remaining 46 providers.
"""

_dispatch_table = None # Lazy initialization

@classmethod
def _initialize_dispatch_table(cls):
"""Initialize dispatch table on first use"""
if cls._dispatch_table is not None:
return

# All OpenAI-compatible providers use the same handler
cls._dispatch_table = {
"openai": cls._handle_openai,
"custom_openai": cls._handle_openai,
"deepinfra": cls._handle_openai,
"perplexity": cls._handle_openai,
"nvidia_nim": cls._handle_openai,
"cerebras": cls._handle_openai,
"baseten": cls._handle_openai,
"sambanova": cls._handle_openai,
"volcengine": cls._handle_openai,
"anyscale": cls._handle_openai,
"together_ai": cls._handle_openai,
"nebius": cls._handle_openai,
"wandb": cls._handle_openai,
# TODO: Add remaining providers incrementally
# "azure": cls._handle_azure,
# "anthropic": cls._handle_anthropic,
# "bedrock": cls._handle_bedrock,
# ... etc
}

@classmethod
def dispatch(cls, custom_llm_provider: str, **context) -> Union[ModelResponse, CustomStreamWrapper]:
"""
Dispatch to the appropriate provider handler.

Args:
custom_llm_provider: Provider name (e.g., 'azure', 'openai')
**context: All parameters from completion() - model, messages, api_key, etc.

Returns:
ModelResponse or CustomStreamWrapper for streaming

Raises:
ValueError: If provider not in dispatch table (use old if/elif as fallback)
"""
cls._initialize_dispatch_table()

# _dispatch_table is guaranteed to be initialized after _initialize_dispatch_table()
assert cls._dispatch_table is not None, "Dispatch table should be initialized"

handler = cls._dispatch_table.get(custom_llm_provider)
if handler is None:
raise ValueError(
f"Provider '{custom_llm_provider}' not yet migrated to dispatch table. "
f"Available providers: {list(cls._dispatch_table.keys())}"
)

return handler(**context)

@staticmethod
def _handle_openai(**ctx) -> Union[ModelResponse, CustomStreamWrapper]:
"""
Handle OpenAI completions.

Complete logic extracted from main.py lines 2029-2135
"""
# CIRCULAR IMPORT WORKAROUND:
# We cannot directly import OpenAIChatCompletion class here because:
# 1. main.py imports from provider_dispatcher.py (this file)
# 2. provider_dispatcher.py would import from openai.py
# 3. openai.py might import from main.py -> circular dependency
#
# SOLUTION: Use the module-level instances that are already created in main.py
# These instances are created at module load time (lines 235, 265) and are
# available via litellm.main module reference.
#
# This is "hacky" but necessary because:
# - We're refactoring a 6,000+ line file incrementally
# - Breaking circular imports requires careful ordering
# - Using existing instances avoids recreating handler objects
# - Future refactoring can move these to a proper registry pattern

import litellm
from litellm.secret_managers.main import get_secret, get_secret_bool
from litellm.utils import add_openai_metadata
import openai

# Access pre-instantiated handlers from main.py (created at lines 235, 265)
from litellm import main as litellm_main
openai_chat_completions = litellm_main.openai_chat_completions
base_llm_http_handler = litellm_main.base_llm_http_handler

# Extract context
model = ctx['model']
messages = ctx['messages']
api_key = ctx.get('api_key')
api_base = ctx.get('api_base')
headers = ctx.get('headers')
model_response = ctx['model_response']
optional_params = ctx['optional_params']
litellm_params = ctx['litellm_params']
logging = ctx['logging_obj']
acompletion = ctx.get('acompletion', False)
timeout = ctx.get('timeout')
client = ctx.get('client')
extra_headers = ctx.get('extra_headers')
print_verbose = ctx.get('print_verbose')
logger_fn = ctx.get('logger_fn')
custom_llm_provider = ctx.get('custom_llm_provider', 'openai')
shared_session = ctx.get('shared_session')
custom_prompt_dict = ctx.get('custom_prompt_dict')
encoding = ctx.get('encoding')
stream = ctx.get('stream')
provider_config = ctx.get('provider_config')
metadata = ctx.get('metadata')
organization = ctx.get('organization')

# Get API base with fallbacks
api_base = (
api_base
or litellm.api_base
or get_secret("OPENAI_BASE_URL")
or get_secret("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)

# Get organization
organization = (
organization
or litellm.organization
or get_secret("OPENAI_ORGANIZATION")
or None
)
openai.organization = organization

# Get API key
api_key = (
api_key
or litellm.api_key
or litellm.openai_key
or get_secret("OPENAI_API_KEY")
)

headers = headers or litellm.headers

if extra_headers is not None:
optional_params["extra_headers"] = extra_headers

# PREVIEW: Allow metadata to be passed to OpenAI
if litellm.enable_preview_features and metadata is not None:
optional_params["metadata"] = add_openai_metadata(metadata)

# Load config
config = litellm.OpenAIConfig.get_config()
for k, v in config.items():
if k not in optional_params:
optional_params[k] = v

# Check if using experimental base handler
use_base_llm_http_handler = get_secret_bool(
"EXPERIMENTAL_OPENAI_BASE_LLM_HTTP_HANDLER"
)

try:
if use_base_llm_http_handler:
# Type checking disabled - complex handler signatures
response = base_llm_http_handler.completion( # type: ignore
model=model,
messages=messages,
api_base=api_base, # type: ignore
custom_llm_provider=custom_llm_provider,
model_response=model_response,
encoding=encoding,
logging_obj=logging,
optional_params=optional_params,
timeout=timeout, # type: ignore
litellm_params=litellm_params,
shared_session=shared_session,
acompletion=acompletion,
stream=stream,
api_key=api_key, # type: ignore
headers=headers,
client=client,
provider_config=provider_config,
)
else:
# Type checking disabled - complex handler signatures
response = openai_chat_completions.completion( # type: ignore
model=model,
messages=messages,
headers=headers,
model_response=model_response,
print_verbose=print_verbose,
api_key=api_key, # type: ignore
api_base=api_base, # type: ignore
acompletion=acompletion,
logging_obj=logging,
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
timeout=timeout, # type: ignore
custom_prompt_dict=custom_prompt_dict, # type: ignore
client=client,
organization=organization, # type: ignore
custom_llm_provider=custom_llm_provider,
shared_session=shared_session,
)
except Exception as e:
# Log the original exception
logging.post_call(
input=messages,
api_key=api_key,
original_response=str(e),
additional_args={"headers": headers},
)
raise e

# Post-call logging for streaming
if optional_params.get("stream", False):
logging.post_call(
input=messages,
api_key=api_key,
original_response=response,
additional_args={"headers": headers},
)

# Type ignore: Handler methods have broad return types (ModelResponse | CustomStreamWrapper | Coroutine | etc)
# but in practice for chat completions, we only get ModelResponse or CustomStreamWrapper
return response # type: ignore

Loading
Loading