BerriAI
diff --git a/‎.circleci/config.yml‎
Lines changed: 1 addition & 0 deletions b/‎.circleci/config.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/test-litellm.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/test-litellm.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/my-website/docs/completion/input.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/my-website/docs/completion/input.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/my-website/docs/providers/vertex_partner.md‎
Lines changed: 136 additions & 0 deletions b/‎docs/my-website/docs/providers/vertex_partner.md‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎docs/my-website/docs/proxy/config_settings.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/my-website/docs/proxy/config_settings.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/my-website/docs/proxy/enterprise.md‎
Lines changed: 87 additions & 0 deletions b/‎docs/my-website/docs/proxy/enterprise.md‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎docs/my-website/docs/proxy/request_headers.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/my-website/docs/proxy/request_headers.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎litellm/__init__.py‎
Lines changed: 5 additions & 34 deletions b/‎litellm/__init__.py‎
Lines changed: 5 additions & 34 deletions
@@ -1913,6 +1913,7 @@ jobs:
               -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
               -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
               -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
+              -e DEFAULT_NUM_WORKERS_LITELLM_PROXY=1
               -e USE_DDTRACE=True \
               -e DD_API_KEY=$DD_API_KEY \
               -e DD_SITE=$DD_SITE \
 
@@ -31,6 +31,7 @@ jobs:
         poetry run pip install "pytest-retry==1.6.3"
         poetry run pip install pytest-xdist
         poetry run pip install "google-genai==1.22.0"
+        poetry run pip install "google-cloud-aiplatform>=1.38"
         poetry run pip install "fastapi-offline==1.7.3"
     - name: Setup litellm-enterprise as local package
       run: |
 
@@ -106,6 +106,7 @@ def completion(
     parallel_tool_calls: Optional[bool] = None,
     logprobs: Optional[bool] = None,
     top_logprobs: Optional[int] = None,
+    safety_identifier: Optional[str] = None,
     deployment_id=None,
     # soon to be deprecated params by OpenAI
     functions: Optional[List] = None,
@@ -196,6 +197,8 @@ def completion(
 
 - `top_logprobs`: *int (optional)* - An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to true if this parameter is used.
 
+- `safety_identifier`: *string (optional)* - A unique identifier for tracking and managing safety-related requests. This parameter helps with safety monitoring and compliance tracking.
+
 - `headers`: *dict (optional)* - A dictionary of headers to be sent with the request.
 
 - `extra_headers`: *dict (optional)* - Alternative to `headers`, used to send extra headers in LLM API request. 
 
@@ -15,6 +15,7 @@ import TabItem from '@theme/TabItem';
 | Mistral | `vertex_ai/mistral-*` | [Vertex AI - Mistral Models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral) |
 | AI21 (Jamba) | `vertex_ai/jamba-*` | [Vertex AI - AI21 Models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/ai21) |
 | Qwen | `vertex_ai/qwen/*` | [Vertex AI - Qwen Models](https://cloud.google.com/vertex-ai/generative-ai/docs/maas/qwen) |
+| OpenAI (GPT-OSS) | `vertex_ai/openai/gpt-oss-*` | [Vertex AI - GPT-OSS Models](https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/) |
 | Model Garden | `vertex_ai/openai/{MODEL_ID}` or `vertex_ai/{MODEL_ID}` | [Vertex Model Garden](https://cloud.google.com/model-garden?hl=en) |
 
 ## Vertex AI - Anthropic (Claude)
@@ -658,6 +659,141 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 </Tabs>
 
 
+## VertexAI GPT-OSS Models
+
+| Property | Details |
+|----------|---------|
+| Provider Route | `vertex_ai/openai/{MODEL}` |
+| Vertex Documentation | [Vertex AI - GPT-OSS Models](https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/) |
+
+**LiteLLM Supports all Vertex AI GPT-OSS Models.** Ensure you use the `vertex_ai/openai/` prefix for all Vertex AI GPT-OSS models.
+
+| Model Name       | Usage                        |
+|------------------|------------------------------|
+| vertex_ai/openai/gpt-oss-20b-maas | `completion('vertex_ai/openai/gpt-oss-20b-maas', messages)` |
+
+#### Usage
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+import os
+
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
+
+model = "openai/gpt-oss-20b-maas"
+
+vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
+vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
+
+response = completion(
+    model="vertex_ai/" + model,
+    messages=[{"role": "user", "content": "hi"}],
+    vertex_ai_project=vertex_ai_project,
+    vertex_ai_location=vertex_ai_location,
+)
+print("\nModel Response", response)
+```
+</TabItem>
+<TabItem value="proxy" label="Proxy">
+
+**1. Add to config**
+
+```yaml
+model_list:
+    - model_name: gpt-oss
+      litellm_params:
+        model: vertex_ai/openai/gpt-oss-20b-maas
+        vertex_ai_project: "my-test-project"
+        vertex_ai_location: "us-central1"
+```
+
+**2. Start proxy**
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING at http://0.0.0.0:4000
+```
+
+**3. Test it!**
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+      --header 'Authorization: Bearer sk-1234' \
+      --header 'Content-Type: application/json' \
+      --data '{
+            "model": "gpt-oss", # 👈 the 'model_name' in config
+            "messages": [
+                {
+                "role": "user",
+                "content": "what llm are you"
+                }
+            ],
+        }'
+```
+
+</TabItem>
+</Tabs>
+
+#### Usage - `reasoning_effort`
+
+GPT-OSS models support the `reasoning_effort` parameter for enhanced reasoning capabilities.
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+
+response = completion(
+    model="vertex_ai/openai/gpt-oss-20b-maas",
+    messages=[{"role": "user", "content": "Solve this complex problem step by step"}],
+    reasoning_effort="low",  # Options: "minimal", "low", "medium", "high"
+    vertex_ai_project="your-vertex-project",
+    vertex_ai_location="us-central1",
+)
+```
+
+</TabItem>
+
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+- model_name: gpt-oss
+  litellm_params:
+    model: vertex_ai/openai/gpt-oss-20b-maas
+    vertex_ai_project: "my-test-project"
+    vertex_ai_location: "us-central1"
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+```bash
+curl http://0.0.0.0:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
+  -d '{
+    "model": "gpt-oss",
+    "messages": [{"role": "user", "content": "Solve this complex problem step by step"}],
+    "reasoning_effort": "low"
+  }'
+```
+
+</TabItem>
+</Tabs>
+
 ## Model Garden
 
 :::tip
 
@@ -573,6 +573,10 @@ router_settings:
 | LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM
 | LITELLM_LOG | Enable detailed logging for LiteLLM
 | LITELLM_LOG_FILE | File path to write LiteLLM logs to. When set, logs will be written to both console and the specified file
+| LITELLM_LOGGER_NAME | Name for OTEL logger 
+| LITELLM_METER_NAME | Name for OTEL Meter 
+| LITELLM_OTEL_INTEGRATION_ENABLE_EVENTS | Optionally enable semantic logs for OTEL
+| LITELLM_OTEL_INTEGRATION_ENABLE_METRICS | Optionally enable emantic metrics for OTEL
 | LITELLM_MASTER_KEY | Master key for proxy authentication
 | LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development)
 | LITELLM_RATE_LIMIT_WINDOW_SIZE | Rate limit window size for LiteLLM. Default is 60
 
@@ -439,6 +439,33 @@ response = client.chat.completions.create(
 
 print(response)
 ```
+
+**Using Headers:**
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="sk-1234",
+    base_url="http://0.0.0.0:4000"
+)
+
+# Pass spend logs metadata via headers
+response = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test request, write a short poem"
+        }
+    ],
+    extra_headers={
+        "x-litellm-spend-logs-metadata": '{"user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion"}'
+    }
+)
+
+print(response)
+```
+
 </TabItem>
 
 
@@ -478,6 +505,43 @@ async function runOpenAI() {
 // Call the asynchronous function
 runOpenAI();
 ```
+
+**Using Headers:**
+
+```js
+const openai = require('openai');
+
+async function runOpenAI() {
+  const client = new openai.OpenAI({
+    apiKey: 'sk-1234',
+    baseURL: 'http://0.0.0.0:4000'
+  });
+
+  try {
+    const response = await client.chat.completions.create({
+      model: 'gpt-3.5-turbo',
+      messages: [
+        {
+          role: 'user',
+          content: "this is a test request, write a short poem"
+        },
+      ]
+    }, {
+      headers: {
+        'x-litellm-spend-logs-metadata': '{"user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion"}'
+      }
+    });
+    console.log(response);
+  } catch (error) {
+    console.log("got this exception from server");
+    console.error(error);
+  }
+}
+
+// Call the asynchronous function
+runOpenAI();
+```
+
 </TabItem>
 
 <TabItem value="Curl" label="Curl Request">
@@ -502,6 +566,29 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
     }
 }'
 ```
+
+</TabItem>
+
+<TabItem value="headers" label="Using Headers">
+
+Pass `x-litellm-spend-logs-metadata` as a request header with JSON string
+
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --header 'Content-Type: application/json' \
+    --header 'Authorization: Bearer sk-1234' \
+    --header 'x-litellm-spend-logs-metadata: {"user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion"}' \
+    --data '{
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what llm are you"
+        }
+    ]
+}'
+```
+
 </TabItem>
 <TabItem value="langchain" label="Langchain">
 
 
@@ -14,6 +14,8 @@ Special headers that are supported by LiteLLM.
 
 `x-litellm-num-retries`: Optional[int]: The number of retries for the request.
 
+`x-litellm-spend-logs-metadata`: Optional[str]: JSON string containing custom metadata to include in spend logs. Example: `{"user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion"}`. [Learn More](../proxy/enterprise#tracking-spend-with-custom-metadata)
+
 ## Anthropic Headers
 
 `anthropic-version` Optional[str]: The version of the Anthropic API to use.  
 
@@ -67,6 +67,7 @@
     bedrock_embedding_models,
     known_tokenizer_config,
     BEDROCK_INVOKE_PROVIDERS_LITERAL,
+    BEDROCK_CONVERSE_MODELS,
     DEFAULT_MAX_TOKENS,
     DEFAULT_SOFT_BUDGET,
     DEFAULT_ALLOWED_FAILS,
@@ -432,40 +433,6 @@ def identify(event_details):
 project = None
 config_path = None
 vertex_ai_safety_settings: Optional[dict] = None
-BEDROCK_CONVERSE_MODELS = [
-    "openai.gpt-oss-20b-1:0",
-    "openai.gpt-oss-120b-1:0",
-    "anthropic.claude-opus-4-1-20250805-v1:0",
-    "anthropic.claude-opus-4-20250514-v1:0",
-    "anthropic.claude-sonnet-4-20250514-v1:0",
-    "anthropic.claude-3-7-sonnet-20250219-v1:0",
-    "anthropic.claude-3-5-haiku-20241022-v1:0",
-    "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "anthropic.claude-3-5-sonnet-20240620-v1:0",
-    "anthropic.claude-3-opus-20240229-v1:0",
-    "anthropic.claude-3-sonnet-20240229-v1:0",
-    "anthropic.claude-3-haiku-20240307-v1:0",
-    "anthropic.claude-v2",
-    "anthropic.claude-v2:1",
-    "anthropic.claude-v1",
-    "anthropic.claude-instant-v1",
-    "ai21.jamba-instruct-v1:0",
-    "ai21.jamba-1-5-mini-v1:0",
-    "ai21.jamba-1-5-large-v1:0",
-    "meta.llama3-70b-instruct-v1:0",
-    "meta.llama3-8b-instruct-v1:0",
-    "meta.llama3-1-8b-instruct-v1:0",
-    "meta.llama3-1-70b-instruct-v1:0",
-    "meta.llama3-1-405b-instruct-v1:0",
-    "meta.llama3-70b-instruct-v1:0",
-    "mistral.mistral-large-2407-v1:0",
-    "mistral.mistral-large-2402-v1:0",
-    "mistral.mistral-small-2402-v1:0",
-    "meta.llama3-2-1b-instruct-v1:0",
-    "meta.llama3-2-3b-instruct-v1:0",
-    "meta.llama3-2-11b-instruct-v1:0",
-    "meta.llama3-2-90b-instruct-v1:0",
-]
 
 ####### COMPLETION MODELS ###################
 from typing import Set  
@@ -491,6 +458,7 @@ def identify(event_details):
 vertex_deepseek_models: Set = set()
 vertex_ai_ai21_models: Set = set()
 vertex_mistral_models: Set = set()
+vertex_openai_models: Set = set()
 ai21_models: Set = set()
 ai21_chat_models: Set = set()
 nlp_cloud_models: Set = set()
@@ -637,6 +605,9 @@ def add_known_models():
         elif value.get("litellm_provider") == "vertex_ai-image-models":
             key = key.replace("vertex_ai/", "")
             vertex_ai_image_models.add(key)
+        elif value.get("litellm_provider") == "vertex_ai-openai_models":
+            key = key.replace("vertex_ai/", "")
+            vertex_openai_models.add(key)
         elif value.get("litellm_provider") == "ai21":
             if value.get("mode") == "chat":
                 ai21_chat_models.add(key)