[Feature]: Support GPT-OSS models on vertex ai (#14184)

ishaan-jaff · web-flow · commit c821f1ddf1d5 · 2025-09-02T14:15:26.000-07:00
* add VertexAIGPTOSSTransformation

* fix: optional_params

* fix: is_vertex_partner_model

* test_partner_models_httpx

* docs GPT oss docs

* test_vertex_ai_gpt_oss_reasoning_effort

* add vertex ai models
diff --git a/docs/my-website/docs/providers/vertex_partner.md b/docs/my-website/docs/providers/vertex_partner.md
@@ -15,6 +15,7 @@ import TabItem from '@theme/TabItem';
 | Mistral | `vertex_ai/mistral-*` | [Vertex AI - Mistral Models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral) |
 | AI21 (Jamba) | `vertex_ai/jamba-*` | [Vertex AI - AI21 Models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/ai21) |
 | Qwen | `vertex_ai/qwen/*` | [Vertex AI - Qwen Models](https://cloud.google.com/vertex-ai/generative-ai/docs/maas/qwen) |
+| OpenAI (GPT-OSS) | `vertex_ai/openai/gpt-oss-*` | [Vertex AI - GPT-OSS Models](https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/) |
 | Model Garden | `vertex_ai/openai/{MODEL_ID}` or `vertex_ai/{MODEL_ID}` | [Vertex Model Garden](https://cloud.google.com/model-garden?hl=en) |
 
 ## Vertex AI - Anthropic (Claude)
@@ -658,6 +659,141 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 </Tabs>
 
 
+## VertexAI GPT-OSS Models
+
+| Property | Details |
+|----------|---------|
+| Provider Route | `vertex_ai/openai/{MODEL}` |
+| Vertex Documentation | [Vertex AI - GPT-OSS Models](https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/) |
+
+**LiteLLM Supports all Vertex AI GPT-OSS Models.** Ensure you use the `vertex_ai/openai/` prefix for all Vertex AI GPT-OSS models.
+
+| Model Name       | Usage                        |
+|------------------|------------------------------|
+| vertex_ai/openai/gpt-oss-20b-maas | `completion('vertex_ai/openai/gpt-oss-20b-maas', messages)` |
+
+#### Usage
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+import os
+
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
+
+model = "openai/gpt-oss-20b-maas"
+
+vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
+vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
+
+response = completion(
+    model="vertex_ai/" + model,
+    messages=[{"role": "user", "content": "hi"}],
+    vertex_ai_project=vertex_ai_project,
+    vertex_ai_location=vertex_ai_location,
+)
+print("\nModel Response", response)
+```
+</TabItem>
+<TabItem value="proxy" label="Proxy">
+
+**1. Add to config**
+
+```yaml
+model_list:
+    - model_name: gpt-oss
+      litellm_params:
+        model: vertex_ai/openai/gpt-oss-20b-maas
+        vertex_ai_project: "my-test-project"
+        vertex_ai_location: "us-central1"
+```
+
+**2. Start proxy**
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING at http://0.0.0.0:4000
+```
+
+**3. Test it!**
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+      --header 'Authorization: Bearer sk-1234' \
+      --header 'Content-Type: application/json' \
+      --data '{
+            "model": "gpt-oss", # 👈 the 'model_name' in config
+            "messages": [
+                {
+                "role": "user",
+                "content": "what llm are you"
+                }
+            ],
+        }'
+```
+
+</TabItem>
+</Tabs>
+
+#### Usage - `reasoning_effort`
+
+GPT-OSS models support the `reasoning_effort` parameter for enhanced reasoning capabilities.
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+
+response = completion(
+    model="vertex_ai/openai/gpt-oss-20b-maas",
+    messages=[{"role": "user", "content": "Solve this complex problem step by step"}],
+    reasoning_effort="low",  # Options: "minimal", "low", "medium", "high"
+    vertex_ai_project="your-vertex-project",
+    vertex_ai_location="us-central1",
+)
+```
+
+</TabItem>
+
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+- model_name: gpt-oss
+  litellm_params:
+    model: vertex_ai/openai/gpt-oss-20b-maas
+    vertex_ai_project: "my-test-project"
+    vertex_ai_location: "us-central1"
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+```bash
+curl http://0.0.0.0:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
+  -d '{
+    "model": "gpt-oss",
+    "messages": [{"role": "user", "content": "Solve this complex problem step by step"}],
+    "reasoning_effort": "low"
+  }'
+```
+
+</TabItem>
+</Tabs>
+
 ## Model Garden
 
 :::tip
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -458,6 +458,7 @@ def identify(event_details):
 vertex_deepseek_models: Set = set()
 vertex_ai_ai21_models: Set = set()
 vertex_mistral_models: Set = set()
+vertex_openai_models: Set = set()
 ai21_models: Set = set()
 ai21_chat_models: Set = set()
 nlp_cloud_models: Set = set()
@@ -604,6 +605,9 @@ def add_known_models():
         elif value.get("litellm_provider") == "vertex_ai-image-models":
             key = key.replace("vertex_ai/", "")
             vertex_ai_image_models.add(key)
+        elif value.get("litellm_provider") == "vertex_ai-openai_models":
+            key = key.replace("vertex_ai/", "")
+            vertex_openai_models.add(key)
         elif value.get("litellm_provider") == "ai21":
             if value.get("mode") == "chat":
                 ai21_chat_models.add(key)
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/gpt_oss/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/gpt_oss/transformation.py
@@ -0,0 +1,27 @@
+import litellm
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+
+
+class VertexAIGPTOSSTransformation(OpenAIGPTConfig):
+    """
+    Transformation for GPT-OSS model from VertexAI
+
+    https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas?hl=id
+    """
+    def __init__(self):
+        super().__init__()
+    
+    def get_supported_openai_params(self, model: str) -> list:
+        base_gpt_series_params = super().get_supported_openai_params(model=model)
+        gpt_oss_only_params = ["reasoning_effort"]
+        base_gpt_series_params.extend(gpt_oss_only_params)
+
+        #########################################################
+        # VertexAI - GPT-OSS does not support tool calls
+        #########################################################
+        if litellm.supports_function_calling(model=model) is False:
+            TOOL_CALLING_PARAMS_TO_REMOVE = ["tool", "tool_choice", "function_call", "functions"]
+            base_gpt_series_params = [param for param in base_gpt_series_params if param not in TOOL_CALLING_PARAMS_TO_REMOVE]
+
+        return base_gpt_series_params
+
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py
@@ -49,6 +49,7 @@ def is_vertex_partner_model(model: str):
             or model.startswith("jamba")
             or model.startswith("claude")
             or model.startswith("qwen")
+            or model.startswith("openai")
         ):
             return True
         return False
@@ -59,6 +60,7 @@ def should_use_openai_handler(model: str):
             "llama",
             "deepseek-ai",
             "qwen",
+            "openai",
         ]
         if any(provider in model for provider in OPENAI_LIKE_VERTEX_PROVIDERS):
             return True
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -9884,6 +9884,28 @@
         "supports_tool_choice": true,
         "supports_prompt_caching": true
     },
+    "vertex_ai/openai/gpt-oss-20b-maas": {
+        "max_tokens": 32768,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.075e-06,
+        "output_cost_per_token": 0.30e-06,
+        "litellm_provider": "vertex_ai-openai_models",
+        "mode": "chat",
+        "supports_reasoning": true,
+        "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas"
+    },
+    "vertex_ai/openai/gpt-oss-120b-maas": {
+        "max_tokens": 32768,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.15e-06,
+        "output_cost_per_token": 0.60e-06,
+        "litellm_provider": "vertex_ai-openai_models",
+        "mode": "chat",
+        "supports_reasoning": true,
+        "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas"
+    },
     "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": {
         "max_tokens": 32768,
         "max_input_tokens": 262144,
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
@@ -3,4 +3,3 @@ model_list:
     litellm_params:
       model: openai/*
       api_base: https://exampleopenaiendpoint-production-0ee2.up.railway.app/
-      mock_response: "hi"
diff --git a/litellm/utils.py b/litellm/utils.py
@@ -3601,6 +3601,17 @@ def _check_valid_arg(supported_params: List[str]):
                     else False
                 ),
             )
+        elif provider_config is not None:
+            optional_params = provider_config.map_openai_params(
+                non_default_params=non_default_params,
+                optional_params=optional_params,
+                model=model,
+                drop_params=(
+                    drop_params
+                    if drop_params is not None and isinstance(drop_params, bool)
+                    else False
+                ),
+            )
         else:  # use generic openai-like param mapping
             optional_params = litellm.VertexAILlama3Config().map_openai_params(
                 non_default_params=non_default_params,
@@ -6864,6 +6875,11 @@ def get_provider_chat_config(  # noqa: PLR0915
                 return litellm.VertexGeminiConfig()
             elif "claude" in model:
                 return litellm.VertexAIAnthropicConfig()
+            elif "gpt-oss" in model:
+                from litellm.llms.vertex_ai.vertex_ai_partner_models.gpt_oss.transformation import (
+                    VertexAIGPTOSSTransformation,
+                )
+                return VertexAIGPTOSSTransformation()
             elif model in litellm.vertex_mistral_models:
                 if "codestral" in model:
                     return litellm.CodestralTextCompletionConfig()
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -9884,6 +9884,28 @@
         "supports_tool_choice": true,
         "supports_prompt_caching": true
     },
+    "vertex_ai/openai/gpt-oss-20b-maas": {
+        "max_tokens": 32768,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.075e-06,
+        "output_cost_per_token": 0.30e-06,
+        "litellm_provider": "vertex_ai-openai_models",
+        "mode": "chat",
+        "supports_reasoning": true,
+        "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas"
+    },
+    "vertex_ai/openai/gpt-oss-120b-maas": {
+        "max_tokens": 32768,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.15e-06,
+        "output_cost_per_token": 0.60e-06,
+        "litellm_provider": "vertex_ai-openai_models",
+        "mode": "chat",
+        "supports_reasoning": true,
+        "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas"
+    },
     "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": {
         "max_tokens": 32768,
         "max_input_tokens": 262144,
diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py
@@ -840,7 +840,8 @@ async def test_gemini_pro_function_calling_httpx(model, sync_mode):
     [
         ("vertex_ai/mistral-large-2411", "us-central1"),
         ("vertex_ai/mistral-nemo@2407", "us-central1"),
-        ("vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas", "us-south1")
+        ("vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas", "us-south1"),
+        ("vertex_ai/openai/gpt-oss-20b-maas", "us-central1"),
     ],
 )
 @pytest.mark.parametrize(
@@ -911,6 +912,7 @@ async def test_partner_models_httpx(model, region, sync_mode):
         ("vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas", "us-east5"),
         ("vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas", "us-south1"),
         ("vertex_ai/mistral-large-2411", "us-central1"), # critical - we had this issue: https://github.com/BerriAI/litellm/issues/13888
+        ("vertex_ai/openai/gpt-oss-20b-maas", "us-central1"),
     ],
 )
 @pytest.mark.parametrize(
diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/gpt_oss/test_vertex_ai_gpt_oss_transformation.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/gpt_oss/test_vertex_ai_gpt_oss_transformation.py

Original file line number	Diff line number	Diff line change
`@@ -840,7 +840,8 @@ async def test_gemini_pro_function_calling_httpx(model, sync_mode):`
`840`	`840`	`[`
`841`	`841`	`("vertex_ai/mistral-large-2411", "us-central1"),`
`842`	`842`	`("vertex_ai/mistral-nemo@2407", "us-central1"),`
`843`		`- ("vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas", "us-south1")`
	`843`	`+ ("vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas", "us-south1"),`
	`844`	`+ ("vertex_ai/openai/gpt-oss-20b-maas", "us-central1"),`
`844`	`845`	`],`
`845`	`846`	`)`
`846`	`847`	`@pytest.mark.parametrize(`
`@@ -911,6 +912,7 @@ async def test_partner_models_httpx(model, region, sync_mode):`
`911`	`912`	`("vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas", "us-east5"),`
`912`	`913`	`("vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas", "us-south1"),`
`913`	`914`	`("vertex_ai/mistral-large-2411", "us-central1"), # critical - we had this issue: https://github.com/BerriAI/litellm/issues/13888`
	`915`	`+ ("vertex_ai/openai/gpt-oss-20b-maas", "us-central1"),`
`914`	`916`	`],`
`915`	`917`	`)`
`916`	`918`	`@pytest.mark.parametrize(`