Merge pull request #14416 from xprilion/wandb-inference

krrishdholakia · web-flow · commit 390e47cbf3f1 · 2025-09-22T16:36:46.000-07:00
Add W&amp;B Inference to LiteLLM
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -60,6 +60,7 @@
     empower_models,
     together_ai_models,
     baseten_models,
+    WANDB_MODELS,
     REPEATED_STREAMING_CHUNK_LIMIT,
     request_timeout,
     open_ai_embedding_models,
@@ -242,6 +243,7 @@
 snowflake_key: Optional[str] = None
 gradient_ai_api_key: Optional[str] = None
 nebius_key: Optional[str] = None
+wandb_key: Optional[str] = None
 heroku_key: Optional[str] = None
 cometapi_key: Optional[str] = None
 ovhcloud_key: Optional[str] = None
@@ -524,6 +526,7 @@ def identify(event_details):
 oci_models: Set = set()
 vercel_ai_gateway_models: Set = set()
 volcengine_models: Set = set()
+wandb_models: Set = set(WANDB_MODELS)
 ovhcloud_models: Set = set()
 ovhcloud_embedding_models: Set = set()
 
@@ -740,6 +743,8 @@ def add_known_models():
             oci_models.add(key)
         elif value.get("litellm_provider") == "volcengine":
             volcengine_models.add(key)
+        elif value.get("litellm_provider") == "wandb":
+            wandb_models.add(key)
         elif value.get("litellm_provider") == "ovhcloud":
             ovhcloud_models.add(key)
         elif value.get("litellm_provider") == "ovhcloud-embedding-models":
@@ -838,6 +843,7 @@ def add_known_models():
     | heroku_models
     | vercel_ai_gateway_models
     | volcengine_models
+    | wandb_models
     | ovhcloud_models
 )
 
@@ -920,6 +926,7 @@ def add_known_models():
     "cometapi": cometapi_models,
     "oci": oci_models,
     "volcengine": volcengine_models,
+    "wandb": wandb_models,
     "ovhcloud": ovhcloud_models | ovhcloud_embedding_models,
 }
 
@@ -1259,6 +1266,7 @@ def add_known_models():
 from .llms.watsonx.embed.transformation import IBMWatsonXEmbeddingConfig
 from .llms.github_copilot.chat.transformation import GithubCopilotConfig
 from .llms.nebius.chat.transformation import NebiusConfig
+from .llms.wandb.chat.transformation import WandbConfig
 from .llms.dashscope.chat.transformation import DashScopeChatConfig
 from .llms.moonshot.chat.transformation import MoonshotChatConfig
 from .llms.v0.chat.transformation import V0ChatConfig
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -313,6 +313,7 @@
     "morph",
     "lambda_ai",
     "vercel_ai_gateway",
+    "wandb",
     "ovhcloud",
 ]
 
@@ -448,6 +449,7 @@
     "https://api.lambda.ai/v1",
     "https://api.hyperbolic.xyz/v1",
     "https://ai-gateway.vercel.sh/v1",
+    "https://api.inference.wandb.ai/v1",
 ]
 
 
@@ -492,6 +494,7 @@
     "hyperbolic",
     "vercel_ai_gateway",
     "aiml",
+    "wandb",
 ]
 openai_text_completion_compatible_providers: List = (
     [  # providers that support `/v1/completions`
@@ -507,6 +510,7 @@
         "v0",
         "lambda_ai",
         "hyperbolic",
+        "wandb",
     ]
 )
 _openai_like_providers: List = [
@@ -757,6 +761,38 @@
     ]
 )
 
+WANDB_MODELS: set = set(
+    [
+        # openai models
+        "openai/gpt-oss-120b",
+        "openai/gpt-oss-20b",
+
+        # zai-org models
+        "zai-org/GLM-4.5",
+
+        # Qwen models
+        "Qwen/Qwen3-235B-A22B-Instruct-2507",
+        "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+        "Qwen/Qwen3-235B-A22B-Thinking-2507",
+
+        # moonshotai
+        "moonshotai/Kimi-K2-Instruct",
+
+        # meta models
+        "meta-llama/Llama-3.1-8B-Instruct",
+        "meta-llama/Llama-3.3-70B-Instruct",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+
+        # deepseek-ai
+        "deepseek-ai/DeepSeek-V3.1",
+        "deepseek-ai/DeepSeek-R1-0528",
+        "deepseek-ai/DeepSeek-V3-0324",
+
+        # microsoft
+        "microsoft/Phi-4-mini-instruct",
+    ]
+)
+
 BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
     "cohere",
     "anthropic",
diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -252,6 +252,9 @@ def get_llm_provider(  # noqa: PLR0915
                     elif endpoint == "https://ai-gateway.vercel.sh/v1":
                         custom_llm_provider = "vercel_ai_gateway"
                         dynamic_api_key = get_secret_str("VERCEL_AI_GATEWAY_API_KEY")
+                    elif endpoint == "https://api.inference.wandb.ai/v1":
+                        custom_llm_provider = "wandb"
+                        dynamic_api_key = get_secret_str("WANDB_API_KEY")
 
                     if api_base is not None and not isinstance(api_base, str):
                         raise Exception(
@@ -773,6 +776,13 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
         ) = litellm.AIMLChatConfig()._get_openai_compatible_provider_info(
             api_base, api_key
         )
+    elif custom_llm_provider == "wandb":
+        api_base = (
+            api_base
+            or get_secret("WANDB_API_BASE")
+            or "https://api.inference.wandb.ai/v1"
+        )  # type: ignore
+        dynamic_api_key = api_key or get_secret_str("WANDB_API_KEY")
 
     if api_base is not None and not isinstance(api_base, str):
         raise Exception("api base needs to be a string. api_base={}".format(api_base))
diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py
@@ -149,6 +149,9 @@ def get_supported_openai_params(  # noqa: PLR0915
     elif custom_llm_provider == "nebius":
         if request_type == "chat_completion":
             return litellm.NebiusConfig().get_supported_openai_params(model=model)
+    elif custom_llm_provider == "wandb":
+        if request_type == "chat_completion":
+            return litellm.WandbConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "replicate":
         return litellm.ReplicateConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "huggingface":
diff --git a/litellm/llms/wandb/__init__.py b/litellm/llms/wandb/__init__.py
diff --git a/litellm/llms/wandb/chat/__init__.py b/litellm/llms/wandb/chat/__init__.py
diff --git a/litellm/llms/wandb/chat/transformation.py b/litellm/llms/wandb/chat/transformation.py
@@ -0,0 +1,27 @@
+"""
+Wandb Chat Completions API - Transformation
+
+This is OpenAI compatible - no translation needed / occurs
+"""
+
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+
+
+class WandbConfig(OpenAIGPTConfig):
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        map max_completion_tokens param to max_tokens
+        """
+        supported_openai_params = self.get_supported_openai_params(model=model)
+        for param, value in non_default_params.items():
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
+                optional_params[param] = value
+        return optional_params
diff --git a/litellm/main.py b/litellm/main.py
@@ -1981,6 +1981,7 @@ def completion(  # type: ignore # noqa: PLR0915
             or custom_llm_provider == "openai"
             or custom_llm_provider == "together_ai"
             or custom_llm_provider == "nebius"
+            or custom_llm_provider == "wandb"
             or custom_llm_provider in litellm.openai_compatible_providers
             or "ft:gpt-3.5-turbo" in model  # finetune gpt-3.5-turbo
         ):  # allow user to make an openai call with a custom base
@@ -4400,6 +4401,27 @@ def embedding(  # noqa: PLR0915
                 or "api.studio.nebius.ai/v1"
             )
 
+            response = openai_chat_completions.embedding(
+                model=model,
+                input=input,
+                api_base=api_base,
+                api_key=api_key,
+                logging_obj=logging,
+                timeout=timeout,
+                model_response=EmbeddingResponse(),
+                optional_params=optional_params,
+                client=client,
+                aembedding=aembedding,
+            )
+        elif custom_llm_provider == "wandb":
+            api_key = api_key or litellm.api_key or get_secret_str("WANDB_API_KEY")
+            api_base = (
+                api_base
+                or litellm.api_base
+                or get_secret_str("WANDB_API_BASE")
+                or "https://api.inference.wandb.ai/v1"
+            )
+
             response = openai_chat_completions.embedding(
                 model=model,
                 input=input,
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
@@ -2397,6 +2397,7 @@ class LlmProviders(str, Enum):
     AUTO_ROUTER = "auto_router"
     VERCEL_AI_GATEWAY = "vercel_ai_gateway"
     DOTPROMPT = "dotprompt"
+    WANDB = "wandb"
     OVHCLOUD = "ovhcloud"
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
@@ -3275,6 +3275,7 @@ def pre_process_optional_params(
             and custom_llm_provider != "openrouter"
             and custom_llm_provider != "vercel_ai_gateway"
             and custom_llm_provider != "nebius"
+            and custom_llm_provider != "wandb"
             and custom_llm_provider not in litellm.openai_compatible_providers
         ):
             if custom_llm_provider == "ollama":
@@ -4446,6 +4447,9 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]):
     # nebius
     elif llm_provider == "nebius":
         api_key = api_key or litellm.nebius_key or get_secret("NEBIUS_API_KEY")
+    # wandb
+    elif llm_provider == "wandb":
+        api_key = api_key or litellm.wandb_key or get_secret("WANDB_API_KEY")
     return api_key
 
 
@@ -5530,6 +5534,11 @@ def validate_environment(  # noqa: PLR0915
                 keys_in_environment = True
             else:
                 missing_keys.append("NEBIUS_API_KEY")
+        elif custom_llm_provider == "wandb":
+            if "WANDB_API_KEY" in os.environ:
+                keys_in_environment = True
+            else:
+                missing_keys.append("WANDB_API_KEY")
         elif custom_llm_provider == "dashscope":
             if "DASHSCOPE_API_KEY" in os.environ:
                 keys_in_environment = True
@@ -5644,6 +5653,11 @@ def validate_environment(  # noqa: PLR0915
                 keys_in_environment = True
             else:
                 missing_keys.append("NEBIUS_API_KEY")
+        elif model in litellm.wandb_models:
+            if "WANDB_API_KEY" in os.environ:
+                keys_in_environment = True
+            else:
+                missing_keys.append("WANDB_API_KEY")
 
     def filter_missing_keys(keys: List[str], exclude_pattern: str) -> List[str]:
         """Filter out keys that contain the exclude_pattern (case insensitive)."""
@@ -7046,6 +7060,8 @@ def get_provider_chat_config(  # noqa: PLR0915
             return litellm.NovitaConfig()
         elif litellm.LlmProviders.NEBIUS == provider:
             return litellm.NebiusConfig()
+        elif litellm.LlmProviders.WANDB == provider:
+            return litellm.WandbConfig()
         elif litellm.LlmProviders.DASHSCOPE == provider:
             return litellm.DashScopeChatConfig()
         elif litellm.LlmProviders.MOONSHOT == provider:
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
diff --git a/tests/test_litellm/llms/wandb/test_wandb_chat_transformation.py b/tests/test_litellm/llms/wandb/test_wandb_chat_transformation.py