BerriAI
diff --git a/‎.circleci/config.yml‎
Lines changed: 2 additions & 0 deletions b/‎.circleci/config.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/Chart.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/charts/litellm-helm/Chart.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/charts/litellm-helm/README.md‎
Lines changed: 5 additions & 0 deletions b/‎deploy/charts/litellm-helm/README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/templates/NOTES.txt‎
Lines changed: 1 addition & 0 deletions b/‎deploy/charts/litellm-helm/templates/NOTES.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/templates/poddisruptionbudget.yaml‎
Lines changed: 33 additions & 0 deletions b/‎deploy/charts/litellm-helm/templates/poddisruptionbudget.yaml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/tests/pdb_tests.yaml‎
Lines changed: 45 additions & 0 deletions b/‎deploy/charts/litellm-helm/tests/pdb_tests.yaml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/values.yaml‎
Lines changed: 8 additions & 1 deletion b/‎deploy/charts/litellm-helm/values.yaml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎docs/my-website/docs/exception_mapping.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/my-website/docs/exception_mapping.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/my-website/docs/proxy/config_settings.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/my-website/docs/proxy/config_settings.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/my-website/docs/proxy/deploy.md‎
Lines changed: 1 addition & 4 deletions b/‎docs/my-website/docs/proxy/deploy.md‎
Lines changed: 1 addition & 4 deletions
@@ -1477,6 +1477,7 @@ jobs:
             docker run -d \
               -p 4000:4000 \
               -e DATABASE_URL=$PROXY_DATABASE_URL \
+              -e DEFAULT_NUM_WORKERS_LITELLM_PROXY=1 \
               -e DISABLE_SCHEMA_UPDATE="True" \
               -v $(pwd)/litellm/proxy/example_config_yaml/bad_schema.prisma:/app/schema.prisma \
               -v $(pwd)/litellm/proxy/example_config_yaml/bad_schema.prisma:/app/litellm/proxy/schema.prisma \
@@ -2962,6 +2963,7 @@ jobs:
           command: |
             docker run --name my-app \
               -p 4000:4000 \
+              -e DEFAULT_NUM_WORKERS_LITELLM_PROXY=1 \
               -e DATABASE_URL="postgresql://wrong:wrong@wrong:5432/wrong" \
               myapp:latest \
               --port 4000 > docker_output.log 2>&1 || true
 
@@ -18,7 +18,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.4.5
+version: 0.4.6
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 
@@ -41,6 +41,11 @@ If `db.useStackgresOperator` is used (not yet implemented):
 | `proxyConfigMap.key`        | Key in the ConfigMap that contains the proxy config file.                                                                                                                                                      | `"config.yaml"` |
 | `proxy_config.*`            | See [values.yaml](./values.yaml) for default settings. Rendered into the ConfigMap’s `config.yaml` only when `proxyConfigMap.create=true`. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | `N/A` |
 | `extraContainers[]`         | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy.
+| `pdb.enabled`                   | Enable a PodDisruptionBudget for the LiteLLM proxy Deployment                                                                 | `false` |
+| `pdb.minAvailable`             | Minimum number/percentage of pods that must be available during **voluntary** disruptions (choose **one** of minAvailable/maxUnavailable) | `null`  |
+| `pdb.maxUnavailable`           | Maximum number/percentage of pods that can be unavailable during **voluntary** disruptions (choose **one** of minAvailable/maxUnavailable) | `null`  |
+| `pdb.annotations`              | Extra metadata annotations to add to the PDB                                                                                   | `{}`    |
+| `pdb.labels`                   | Extra metadata labels to add to the PDB                                                                                        | `{}`    |
 
 #### Example `proxy_config` ConfigMap from values (default):
 
 
@@ -20,3 +20,4 @@
   echo "Visit http://127.0.0.1:8080 to use your application"
   kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
+PDB: {{ if .Values.pdb.enabled }}enabled{{ else }}disabled{{ end }}. Configure via .Values.pdb.*
@@ -0,0 +1,33 @@
+{{- /*
+PodDisruptionBudget for LiteLLM proxy
+Controlled via .Values.pdb.enabled and .Values.pdb.{minAvailable|maxUnavailable}
+Only one of minAvailable / maxUnavailable should be set. If both are set, minAvailable wins.
+*/ -}}
+{{- if .Values.pdb.enabled }}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ include "litellm.fullname" . }}
+  labels:
+    {{- include "litellm.labels" . | nindent 4 }}
+  {{- with .Values.pdb.labels }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  {{- with .Values.pdb.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    matchLabels:
+      {{- /* Match the Deployment selector to target the same pod set */ -}}
+      {{- include "litellm.selectorLabels" . | nindent 6 }}
+  {{- if .Values.pdb.minAvailable }}
+  minAvailable: {{ .Values.pdb.minAvailable }}
+  {{- else if .Values.pdb.maxUnavailable }}
+  maxUnavailable: {{ .Values.pdb.maxUnavailable }}
+  {{- else }}
+  # Safe default if enabled but not configured
+  maxUnavailable: 1
+  {{- end }}
+{{- end }}
@@ -0,0 +1,45 @@
+suite: "pdb enabled"
+templates:
+  - poddisruptionbudget.yaml
+tests:
+  - it: "renders a PDB with maxUnavailable=1"
+    set:
+      pdb.enabled: true
+      pdb.maxUnavailable: 1
+    asserts:
+      - hasDocuments: { count: 1 }
+      - isKind: { of: PodDisruptionBudget }
+      - equal: { path: apiVersion, value: policy/v1 }
+      - equal: { path: spec.maxUnavailable, value: 1 }
+      - equal:
+          path: spec.selector.matchLabels
+          value:
+            app.kubernetes.io/name: litellm
+            app.kubernetes.io/instance: RELEASE-NAME
+
+---
+suite: "pdb disabled"
+templates:
+  - poddisruptionbudget.yaml
+tests:
+  - it: "does not render when disabled"
+    set:
+      pdb.enabled: false
+    asserts:
+      - hasDocuments: { count: 0 }
+
+---
+suite: "pdb minAvailable precedence"
+templates:
+  - poddisruptionbudget.yaml
+tests:
+  - it: "uses minAvailable when both are set"
+    set:
+      pdb.enabled: true
+      pdb.minAvailable: "50%"
+      pdb.maxUnavailable: 1
+    asserts:
+      - isKind: { of: PodDisruptionBudget }
+      - equal: { path: apiVersion, value: policy/v1 }
+      - equal: { path: spec.minAvailable, value: "50%" }
+      - isNull: { path: spec.maxUnavailable }
@@ -240,4 +240,11 @@ extraEnvVars: {
     #   value: EXTRA_ENV_VAR_VALUE
 }
 
-
+# Pod Disruption Budget
+pdb:
+  enabled: false
+  # Set exactly one of the following. If both are set, minAvailable takes precedence.
+  minAvailable: null     # e.g. "50%" or 1
+  maxUnavailable: null   # e.g. 1 or "20%"
+  annotations: {}
+  labels: {}
@@ -12,6 +12,7 @@ All exceptions can be imported from `litellm` - e.g. `from litellm import BadReq
 | 400 | UnsupportedParamsError | litellm.BadRequestError | Raised when unsupported params are passed |
 | 400         | ContextWindowExceededError| litellm.BadRequestError | Special error type for context window exceeded error messages - enables context window fallbacks |
 | 400         | ContentPolicyViolationError| litellm.BadRequestError | Special error type for content policy violation error messages - enables content policy fallbacks |
+| 400         | ImageFetchError | litellm.BadRequestError | Raised when there are errors fetching or processing images |
 | 400 | InvalidRequestError | openai.BadRequestError | Deprecated error, use BadRequestError instead |
 | 401         | AuthenticationError      | openai.AuthenticationError |
 | 403         | PermissionDeniedError    | openai.PermissionDeniedError |
 
@@ -431,6 +431,7 @@ router_settings:
 | DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT | Default token count for mock response completions. Default is 20
 | DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT | Default token count for mock response prompts. Default is 10
 | DEFAULT_MODEL_CREATED_AT_TIME | Default creation timestamp for models. Default is 1677610602
+| DEFAULT_NUM_WORKERS_LITELLM_PROXY | Default number of workers for LiteLLM proxy. Default is 4. **We strongly recommend setting NUM Workers to Number of vCPUs available**
 | DEFAULT_PROMPT_INJECTION_SIMILARITY_THRESHOLD | Default threshold for prompt injection similarity. Default is 0.7
 | DEFAULT_POLLING_INTERVAL | Default polling interval for schedulers in seconds. Default is 0.03
 | DEFAULT_REASONING_EFFORT_DISABLE_THINKING_BUDGET | Default reasoning effort disable thinking budget. Default is 0
 
@@ -12,10 +12,7 @@ To start using Litellm, run the following commands in a shell:
 
 ```bash
 # Get the code
-git clone https://github.com/BerriAI/litellm
-
-# Go to folder
-cd litellm
+curl -O https://raw.githubusercontent.com/BerriAI/litellm/main/docker-compose.yml
 
 # Add the master key - you can change this after setup
 echo 'LITELLM_MASTER_KEY="sk-1234"' > .env