diff --git a/README.md b/README.md index 67a1b9a..1f46a80 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## Charts -- vLLM: inference and serving engine for LLMs (NVIDIA device plugin required). +- vLLM (NVIDIA device plugin required): inference and serving engine for LLMs. `helm upgrade --install vllm charts/vllm --namespace vllm --create-namespace -f values/production/vllm.yaml` @@ -10,10 +10,14 @@ `helm upgrade --install cloudnative-pg charts/cloudnative-pg --namespace cnpg-system --create-namespace -f values/production/cloudnative-pg.yaml` -- PostgreSQL: CloudNativePG required +- PostgreSQL (CloudNativePG required) `helm upgrade --install pg-cluster charts/pg-cluster --namespace pg-cluster --create-namespace -f values/production/pg-cluster.yaml` +- LiteLLM (PostgreSQL required): LLM Gateway to call 100+ LLMs, load balance, cost tracking across projects. + +`helm upgrade --install litellm charts/litellm --namespace litellm --create-namespace -f values/production/litellm.yaml` + ## Dependencies - NVIDIA device plugin: used to manage NVIDIA GPUs in a K8s cluster. diff --git a/charts/litellm/.helmignore b/charts/litellm/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/litellm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/litellm/Chart.lock b/charts/litellm/Chart.lock new file mode 100644 index 0000000..f13578d --- /dev/null +++ b/charts/litellm/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: postgresql + repository: oci://registry-1.docker.io/bitnamicharts + version: 14.3.1 +- name: redis + repository: oci://registry-1.docker.io/bitnamicharts + version: 18.19.1 +digest: sha256:8660fe6287f9941d08c0902f3f13731079b8cecd2a5da2fbc54e5b7aae4a6f62 +generated: "2024-03-10T02:28:52.275022+05:30" diff --git a/charts/litellm/Chart.yaml b/charts/litellm/Chart.yaml new file mode 100644 index 0000000..aa81e4e --- /dev/null +++ b/charts/litellm/Chart.yaml @@ -0,0 +1,37 @@ +apiVersion: v2 + +# We can't call ourselves just "litellm" because then we couldn't publish to the +# same OCI repository as the "litellm" OCI image +name: litellm-helm +description: Call all LLM APIs using the OpenAI format + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.4.7 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: v1.50.2 + +dependencies: + - name: "postgresql" + version: ">=13.3.0" + repository: oci://registry-1.docker.io/bitnamicharts + condition: db.deployStandalone + - name: redis + version: ">=18.0.0" + repository: oci://registry-1.docker.io/bitnamicharts + condition: redis.enabled diff --git a/charts/litellm/README.md b/charts/litellm/README.md new file mode 100644 index 0000000..352c3e9 --- /dev/null +++ b/charts/litellm/README.md @@ -0,0 +1,187 @@ +# Helm Chart for LiteLLM + +> [!IMPORTANT] +> This is community maintained, Please make an issue if you run into a bug +> We recommend using [Docker or Kubernetes for production deployments](https://docs.litellm.ai/docs/proxy/prod) + +## Prerequisites + +- Kubernetes 1.21+ +- Helm 3.8.0+ + +If `db.deployStandalone` is used: +- PV provisioner support in the underlying infrastructure + +If `db.useStackgresOperator` is used (not yet implemented): +- The Stackgres Operator must already be installed in the Kubernetes Cluster. This chart will **not** install the operator if it is missing. + +## Parameters + +### LiteLLM Proxy Deployment Settings + +| Name | Description | Value | +| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | +| `replicaCount` | The number of LiteLLM Proxy pods to be deployed | `1` | +| `masterkeySecretName` | The name of the Kubernetes Secret that contains the Master API Key for LiteLLM. If not specified, use the generated secret name. | N/A | +| `masterkeySecretKey` | The key within the Kubernetes Secret that contains the Master API Key for LiteLLM. If not specified, use `masterkey` as the key. | N/A | +| `masterkey` | The Master API Key for LiteLLM. If not specified, a random key in the `sk-...` format is generated. | N/A | +| `environmentSecrets` | An optional array of Secret object names. The keys and values in these secrets will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` | +| `environmentConfigMaps` | An optional array of ConfigMap object names. The keys and values in these configmaps will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` | +| `image.repository` | LiteLLM Proxy image repository | `ghcr.io/berriai/litellm` | +| `image.pullPolicy` | LiteLLM Proxy image pull policy | `IfNotPresent` | +| `image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` | +| `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` | +| `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` | +| `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` | +| `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `4000` | +| `service.loadBalancerClass` | Optional LoadBalancer implementation class (only used when `service.type` is `LoadBalancer`) | `""` | +| `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A | +| `proxyConfigMap.create` | When `true`, render a ConfigMap from `.Values.proxy_config` and mount it. | `true` | +| `proxyConfigMap.name` | When `create=false`, name of the existing ConfigMap to mount. | `""` | +| `proxyConfigMap.key` | Key in the ConfigMap that contains the proxy config file. | `"config.yaml"` | +| `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. Rendered into the ConfigMap’s `config.yaml` only when `proxyConfigMap.create=true`. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | `N/A` | +| `extraContainers[]` | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy. +| `pdb.enabled` | Enable a PodDisruptionBudget for the LiteLLM proxy Deployment | `false` | +| `pdb.minAvailable` | Minimum number/percentage of pods that must be available during **voluntary** disruptions (choose **one** of minAvailable/maxUnavailable) | `null` | +| `pdb.maxUnavailable` | Maximum number/percentage of pods that can be unavailable during **voluntary** disruptions (choose **one** of minAvailable/maxUnavailable) | `null` | +| `pdb.annotations` | Extra metadata annotations to add to the PDB | `{}` | +| `pdb.labels` | Extra metadata labels to add to the PDB | `{}` | + +#### Example `proxy_config` ConfigMap from values (default): + + +``` +proxyConfigMap: + create: true + key: "config.yaml" + +proxy_config: + general_settings: + master_key: os.environ/PROXY_MASTER_KEY + model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo + api_key: eXaMpLeOnLy +``` + +#### Example using existing `proxyConfigMap` instead of creating it: + + +``` +proxyConfigMap: + create: false + name: my-litellm-config + key: config.yaml + +# proxy_config is ignored in this mode +``` + +#### Example `environmentSecrets` Secret + + +``` +apiVersion: v1 +kind: Secret +metadata: + name: litellm-envsecrets +data: + AZURE_OPENAI_API_KEY: TXlTZWN1cmVLM3k= +type: Opaque +``` + +### Database Settings +| Name | Description | Value | +| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | +| `db.useExisting` | Use an existing Postgres database. A Kubernetes Secret object must exist that contains credentials for connecting to the database. An example secret object definition is provided below. | `false` | +| `db.endpoint` | If `db.useExisting` is `true`, this is the IP, Hostname or Service Name of the Postgres server to connect to. | `localhost` | +| `db.database` | If `db.useExisting` is `true`, the name of the existing database to connect to. | `litellm` | +| `db.url` | If `db.useExisting` is `true`, the connection url of the existing database to connect to can be overwritten with this value. | `postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)` | +| `db.secret.name` | If `db.useExisting` is `true`, the name of the Kubernetes Secret that contains credentials. | `postgres` | +| `db.secret.usernameKey` | If `db.useExisting` is `true`, the name of the key within the Kubernetes Secret that holds the username for authenticating with the Postgres instance. | `username` | +| `db.secret.passwordKey` | If `db.useExisting` is `true`, the name of the key within the Kubernetes Secret that holds the password associates with the above user. | `password` | +| `db.useStackgresOperator` | Not yet implemented. | `false` | +| `db.deployStandalone` | Deploy a standalone, single instance deployment of Postgres, using the Bitnami postgresql chart. This is useful for getting started but doesn't provide HA or (by default) data backups. | `true` | +| `postgresql.*` | If `db.deployStandalone` is `true`, configuration passed to the Bitnami postgresql chart. See the [Bitnami Documentation](https://github.com/bitnami/charts/tree/main/bitnami/postgresql) for full configuration details. See [values.yaml](./values.yaml) for the default configuration. | See [values.yaml](./values.yaml) | +| `postgresql.auth.*` | If `db.deployStandalone` is `true`, care should be taken to ensure the default `password` and `postgres-password` values are **NOT** used. | `NoTaGrEaTpAsSwOrD` | + +#### Example Postgres `db.useExisting` Secret +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: postgres +data: + # Password for the "postgres" user + postgres-password: + username: litellm + password: +type: Opaque +``` + +#### Examples for `environmentSecrets` and `environemntConfigMaps` + +```yaml +# Use config map for not-secret configuration data +apiVersion: v1 +kind: ConfigMap +metadata: + name: litellm-env-configmap +data: + SOME_KEY: someValue + ANOTHER_KEY: anotherValue +``` + +```yaml +# Use secrets for things which are actually secret like API keys, credentials, etc +# Base64 encode the values stored in a Kubernetes Secret: $ pbpaste | base64 | pbcopy +# The --decode flag is convenient: $ pbpaste | base64 --decode + +apiVersion: v1 +kind: Secret +metadata: + name: litellm-env-secret +type: Opaque +data: + SOME_PASSWORD: cDZbUGVXeU5e0ZW # base64 encoded + ANOTHER_PASSWORD: AAZbUGVXeU5e0ZB # base64 encoded +``` + +Source: [GitHub Gist from troyharvey](https://gist.github.com/troyharvey/4506472732157221e04c6b15e3b3f094) + +### Migration Job Settings + +The migration job supports both ArgoCD and Helm hooks to ensure database migrations run at the appropriate time during deployments. + +| Name | Description | Value | +| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | +| `migrationJob.enabled` | Enable or disable the schema migration Job | `true` | +| `migrationJob.backoffLimit` | Backoff limit for Job restarts | `4` | +| `migrationJob.ttlSecondsAfterFinished` | TTL for completed migration jobs | `120` | +| `migrationJob.annotations` | Additional annotations for the migration job pod | `{}` | +| `migrationJob.extraContainers` | Additional containers to run alongside the migration job | `[]` | +| `migrationJob.hooks.argocd.enabled` | Enable ArgoCD hooks for the migration job (uses PreSync hook with BeforeHookCreation delete policy) | `true` | +| `migrationJob.hooks.helm.enabled` | Enable Helm hooks for the migration job (uses pre-install,pre-upgrade hooks with before-hook-creation delete policy) | `false` | +| `migrationJob.hooks.helm.weight` | Helm hook execution order (lower weights executed first). Optional - defaults to "1" if not specified. | N/A | + + +## Accessing the Admin UI +When browsing to the URL published per the settings in `ingress.*`, you will +be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal +(from the `litellm` pod's perspective) URL published by the `-litellm` +Kubernetes Service. If the deployment uses the default settings for this +service, the **Proxy Endpoint** should be set to `http://-litellm:4000`. + +The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey` +was not provided to the helm command line, the `masterkey` is a randomly +generated string in the `sk-...` format stored in the `-litellm-masterkey` Kubernetes Secret. + +```bash +kubectl -n litellm get secret -litellm-masterkey -o jsonpath="{.data.masterkey}" +``` + +## Admin UI Limitations +At the time of writing, the Admin UI is unable to add models. This is because +it would need to update the `config.yaml` file which is a exposed ConfigMap, and +therefore, read-only. This is a limitation of this helm chart, not the Admin UI +itself. diff --git a/charts/litellm/ci/test-values.yaml b/charts/litellm/ci/test-values.yaml new file mode 100644 index 0000000..33a4df9 --- /dev/null +++ b/charts/litellm/ci/test-values.yaml @@ -0,0 +1,15 @@ +fullnameOverride: "" +# Disable database deployment and configuration +db: + deployStandalone: false + useExisting: false + +# Test environment variables +envVars: + DD_ENV: "dev_helm" + DD_SERVICE: "litellm" + USE_DDTRACE: "true" + +# Disable migration job since we're not using a database +migrationJob: + enabled: false \ No newline at end of file diff --git a/charts/litellm/templates/NOTES.txt b/charts/litellm/templates/NOTES.txt new file mode 100644 index 0000000..017bbfa --- /dev/null +++ b/charts/litellm/templates/NOTES.txt @@ -0,0 +1,23 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "litellm.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "litellm.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "litellm.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "litellm.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} +PDB: {{ if .Values.pdb.enabled }}enabled{{ else }}disabled{{ end }}. Configure via .Values.pdb.* \ No newline at end of file diff --git a/charts/litellm/templates/_helpers.tpl b/charts/litellm/templates/_helpers.tpl new file mode 100644 index 0000000..a1eda28 --- /dev/null +++ b/charts/litellm/templates/_helpers.tpl @@ -0,0 +1,84 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "litellm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "litellm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "litellm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "litellm.labels" -}} +helm.sh/chart: {{ include "litellm.chart" . }} +{{ include "litellm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "litellm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "litellm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "litellm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "litellm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Get redis service name +*/}} +{{- define "litellm.redis.serviceName" -}} +{{- if and (eq .Values.redis.architecture "standalone") .Values.redis.sentinel.enabled -}} +{{- printf "%s-%s" .Release.Name (default "redis" .Values.redis.nameOverride | trunc 63 | trimSuffix "-") -}} +{{- else -}} +{{- printf "%s-%s-master" .Release.Name (default "redis" .Values.redis.nameOverride | trunc 63 | trimSuffix "-") -}} +{{- end -}} +{{- end -}} + +{{/* +Get redis service port +*/}} +{{- define "litellm.redis.port" -}} +{{- if .Values.redis.sentinel.enabled -}} +{{ .Values.redis.sentinel.service.ports.sentinel }} +{{- else -}} +{{ .Values.redis.master.service.ports.redis }} +{{- end -}} +{{- end -}} diff --git a/charts/litellm/templates/configmap-litellm.yaml b/charts/litellm/templates/configmap-litellm.yaml new file mode 100644 index 0000000..cf35917 --- /dev/null +++ b/charts/litellm/templates/configmap-litellm.yaml @@ -0,0 +1,9 @@ +{{- if .Values.proxyConfigMap.create }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "litellm.fullname" . }}-config +data: + config.yaml: | +{{ .Values.proxy_config | toYaml | indent 6 }} +{{- end }} \ No newline at end of file diff --git a/charts/litellm/templates/deployment.yaml b/charts/litellm/templates/deployment.yaml new file mode 100644 index 0000000..6a5a6e8 --- /dev/null +++ b/charts/litellm/templates/deployment.yaml @@ -0,0 +1,210 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + {{- toYaml .Values.deploymentAnnotations | nindent 4 }} + name: {{ include "litellm.fullname" . }} + labels: + {{- include "litellm.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "litellm.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + {{- if .Values.proxyConfigMap.create }} + checksum/config: {{ include (print $.Template.BasePath "/configmap-litellm.yaml") . | sha256sum }} + {{- end }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "litellm.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "litellm.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ include "litellm.name" . }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "main-%s" .Chart.AppVersion) }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: HOST + value: "{{ .Values.listen | default "0.0.0.0" }}" + - name: PORT + value: {{ .Values.service.port | quote}} + {{- if .Values.db.deployStandalone }} + - name: DATABASE_USERNAME + valueFrom: + secretKeyRef: + name: {{ include "litellm.fullname" . }}-dbcredentials + key: username + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "litellm.fullname" . }}-dbcredentials + key: password + - name: DATABASE_HOST + value: {{ .Release.Name }}-postgresql + - name: DATABASE_NAME + value: litellm + {{- else if .Values.db.useExisting }} + - name: DATABASE_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.db.secret.name }} + key: {{ .Values.db.secret.usernameKey }} + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.db.secret.name }} + key: {{ .Values.db.secret.passwordKey }} + - name: DATABASE_HOST + {{- if .Values.db.secret.endpointKey }} + valueFrom: + secretKeyRef: + name: {{ .Values.db.secret.name }} + key: {{ .Values.db.secret.endpointKey }} + {{- else }} + value: {{ .Values.db.endpoint }} + {{- end }} + - name: DATABASE_NAME + value: {{ .Values.db.database }} + - name: DATABASE_URL + value: {{ .Values.db.url | quote }} + {{- end }} + - name: PROXY_MASTER_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.masterkeySecretName | default (printf "%s-masterkey" (include "litellm.fullname" .)) }} + key: {{ .Values.masterkeySecretKey | default "masterkey" }} + {{- if .Values.redis.enabled }} + - name: REDIS_HOST + value: {{ include "litellm.redis.serviceName" . }} + - name: REDIS_PORT + value: {{ include "litellm.redis.port" . | quote }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "redis.secretName" .Subcharts.redis }} + key: {{include "redis.secretPasswordKey" .Subcharts.redis }} + {{- end }} + {{- if .Values.envVars }} + {{- range $key, $val := .Values.envVars }} + - name: {{ $key }} + value: {{ $val | quote }} + {{- end }} + {{- end }} + {{- if .Values.separateHealthApp }} + - name: SEPARATE_HEALTH_APP + value: "1" + - name: SEPARATE_HEALTH_PORT + value: {{ .Values.separateHealthPort | default "8081" | quote }} + {{- end }} + {{- with .Values.extraEnvVars }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + {{- range .Values.environmentSecrets }} + - secretRef: + name: {{ . }} + {{- end }} + {{- range .Values.environmentConfigMaps }} + - configMapRef: + name: {{ . }} + {{- end }} + args: + - --config + - /etc/litellm/config.yaml + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- if .Values.separateHealthApp }} + - name: health + containerPort: {{ .Values.separateHealthPort | default 8081 }} + protocol: TCP + {{- end }} + livenessProbe: + httpGet: + path: /health/liveliness + port: {{ if .Values.separateHealthApp }}"health"{{ else }}"http"{{ end }} + readinessProbe: + httpGet: + path: /health/readiness + port: {{ if .Values.separateHealthApp }}"health"{{ else }}"http"{{ end }} + startupProbe: + httpGet: + path: /health/readiness + port: {{ if .Values.separateHealthApp }}"health"{{ else }}"http"{{ end }} + failureThreshold: 30 + periodSeconds: 10 + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: litellm-config + mountPath: /etc/litellm/ + {{ if .Values.securityContext.readOnlyRootFilesystem }} + - name: tmp + mountPath: /tmp + - name: cache + mountPath: /.cache + - name: npm + mountPath: /.npm + {{- end }} + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + {{ if .Values.securityContext.readOnlyRootFilesystem }} + - name: tmp + emptyDir: + sizeLimit: 500Mi + - name: cache + emptyDir: + sizeLimit: 500Mi + - name: npm + emptyDir: + sizeLimit: 500Mi + {{- end }} + - name: litellm-config + configMap: + {{- if .Values.proxyConfigMap.create }} + name: {{ include "litellm.fullname" . }}-config + {{- else }} + name: {{ .Values.proxyConfigMap.name }} + {{- end }} + items: + - key: {{ .Values.proxyConfigMap.key | default "config.yaml" }} + path: "config.yaml" + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/charts/litellm/templates/hpa.yaml b/charts/litellm/templates/hpa.yaml new file mode 100644 index 0000000..71e199c --- /dev/null +++ b/charts/litellm/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "litellm.fullname" . }} + labels: + {{- include "litellm.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "litellm.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/charts/litellm/templates/ingress.yaml b/charts/litellm/templates/ingress.yaml new file mode 100644 index 0000000..09e8d71 --- /dev/null +++ b/charts/litellm/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "litellm.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "litellm.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/litellm/templates/migrations-job.yaml b/charts/litellm/templates/migrations-job.yaml new file mode 100644 index 0000000..243a4ba --- /dev/null +++ b/charts/litellm/templates/migrations-job.yaml @@ -0,0 +1,109 @@ +{{- if .Values.migrationJob.enabled }} +# This job runs the Prisma migrations for the LiteLLM DB. +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "litellm.fullname" . }}-migrations + labels: + {{- include "litellm.labels" . | nindent 4 }} + annotations: + {{- if .Values.migrationJob.hooks.argocd.enabled }} + argocd.argoproj.io/hook: PreSync + argocd.argoproj.io/hook-delete-policy: BeforeHookCreation + {{- end }} + {{- if .Values.migrationJob.hooks.helm.enabled }} + helm.sh/hook: "pre-install,pre-upgrade" + helm.sh/hook-delete-policy: "before-hook-creation" + helm.sh/hook-weight: {{ .Values.migrationJob.hooks.helm.weight | default "1" | quote }} + {{- end }} + checksum/config: {{ toYaml .Values | sha256sum }} +spec: + template: + metadata: + labels: + {{- include "litellm.labels" . | nindent 8 }} + annotations: + {{- with .Values.migrationJob.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "litellm.serviceAccountName" . }} + containers: + - name: prisma-migrations + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "main-%s" .Chart.AppVersion) }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + command: ["python", "litellm/proxy/prisma_migration.py"] + workingDir: "/app" + env: + {{- if .Values.db.useExisting }} + - name: DATABASE_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.db.secret.name }} + key: {{ .Values.db.secret.usernameKey }} + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.db.secret.name }} + key: {{ .Values.db.secret.passwordKey }} + - name: DATABASE_HOST + {{- if .Values.db.secret.endpointKey }} + valueFrom: + secretKeyRef: + name: {{ .Values.db.secret.name }} + key: {{ .Values.db.secret.endpointKey }} + {{- else }} + value: {{ .Values.db.endpoint }} + {{- end }} + - name: DATABASE_NAME + value: {{ .Values.db.database }} + - name: DATABASE_URL + value: {{ .Values.db.url | quote }} + {{- else if .Values.db.deployStandalone }} + - name: DATABASE_URL + value: postgresql://{{ .Values.postgresql.auth.username }}:{{ .Values.postgresql.auth.password }}@{{ .Release.Name }}-postgresql/{{ .Values.postgresql.auth.database }} + {{- end }} + {{- if .Values.envVars }} + {{- range $key, $val := .Values.envVars }} + - name: {{ $key }} + value: {{ $val | quote }} + {{- end }} + {{- end }} + {{- with .Values.extraEnvVars }} + {{- toYaml . | nindent 12 }} + {{- end }} + - name: DISABLE_SCHEMA_UPDATE + value: "false" # always run the migration from the Helm PreSync hook, override the value set + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.migrationJob.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.migrationJob.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + restartPolicy: OnFailure + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + ttlSecondsAfterFinished: {{ .Values.migrationJob.ttlSecondsAfterFinished }} + backoffLimit: {{ .Values.migrationJob.backoffLimit }} +{{- end }} diff --git a/charts/litellm/templates/poddisruptionbudget.yaml b/charts/litellm/templates/poddisruptionbudget.yaml new file mode 100644 index 0000000..1715b94 --- /dev/null +++ b/charts/litellm/templates/poddisruptionbudget.yaml @@ -0,0 +1,33 @@ +{{- /* +PodDisruptionBudget for LiteLLM proxy +Controlled via .Values.pdb.enabled and .Values.pdb.{minAvailable|maxUnavailable} +Only one of minAvailable / maxUnavailable should be set. If both are set, minAvailable wins. +*/ -}} +{{- if .Values.pdb.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "litellm.fullname" . }} + labels: + {{- include "litellm.labels" . | nindent 4 }} + {{- with .Values.pdb.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.pdb.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- /* Match the Deployment selector to target the same pod set */ -}} + {{- include "litellm.selectorLabels" . | nindent 6 }} + {{- if .Values.pdb.minAvailable }} + minAvailable: {{ .Values.pdb.minAvailable }} + {{- else if .Values.pdb.maxUnavailable }} + maxUnavailable: {{ .Values.pdb.maxUnavailable }} + {{- else }} + # Safe default if enabled but not configured + maxUnavailable: 1 + {{- end }} +{{- end }} diff --git a/charts/litellm/templates/secret-dbcredentials.yaml b/charts/litellm/templates/secret-dbcredentials.yaml new file mode 100644 index 0000000..8851f58 --- /dev/null +++ b/charts/litellm/templates/secret-dbcredentials.yaml @@ -0,0 +1,12 @@ +{{- if .Values.db.deployStandalone -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "litellm.fullname" . }}-dbcredentials +data: + # Password for the "postgres" user + postgres-password: {{ ( index .Values.postgresql.auth "postgres-password") | default "litellm" | b64enc }} + username: {{ .Values.postgresql.auth.username | default "litellm" | b64enc }} + password: {{ .Values.postgresql.auth.password | default "litellm" | b64enc }} +type: Opaque +{{- end -}} \ No newline at end of file diff --git a/charts/litellm/templates/secret-masterkey.yaml b/charts/litellm/templates/secret-masterkey.yaml new file mode 100644 index 0000000..7c8560c --- /dev/null +++ b/charts/litellm/templates/secret-masterkey.yaml @@ -0,0 +1,10 @@ +{{- if not .Values.masterkeySecretName }} +{{ $masterkey := (.Values.masterkey | default (printf "sk-%s" (randAlphaNum 18))) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "litellm.fullname" . }}-masterkey +data: + masterkey: {{ $masterkey | b64enc }} +type: Opaque +{{- end }} diff --git a/charts/litellm/templates/service.yaml b/charts/litellm/templates/service.yaml new file mode 100644 index 0000000..1181220 --- /dev/null +++ b/charts/litellm/templates/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "litellm.fullname" . }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "litellm.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + {{- if and (eq .Values.service.type "LoadBalancer") .Values.service.loadBalancerClass }} + loadBalancerClass: {{ .Values.service.loadBalancerClass }} + {{- end }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "litellm.selectorLabels" . | nindent 4 }} diff --git a/charts/litellm/templates/serviceaccount.yaml b/charts/litellm/templates/serviceaccount.yaml new file mode 100644 index 0000000..7655470 --- /dev/null +++ b/charts/litellm/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "litellm.serviceAccountName" . }} + labels: + {{- include "litellm.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/charts/litellm/templates/tests/test-connection.yaml b/charts/litellm/templates/tests/test-connection.yaml new file mode 100644 index 0000000..86a8f66 --- /dev/null +++ b/charts/litellm/templates/tests/test-connection.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "litellm.fullname" . }}-test-connection" + labels: + {{- include "litellm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['sh', '-c'] + args: + - | + # Wait for a bit to allow the service to be ready + sleep 10 + # Try multiple times with a delay between attempts + for i in $(seq 1 30); do + wget -T 5 "{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness" && exit 0 + echo "Attempt $i failed, waiting..." + sleep 2 + done + exit 1 + restartPolicy: Never \ No newline at end of file diff --git a/charts/litellm/templates/tests/test-env-vars.yaml b/charts/litellm/templates/tests/test-env-vars.yaml new file mode 100644 index 0000000..9f02775 --- /dev/null +++ b/charts/litellm/templates/tests/test-env-vars.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "litellm.fullname" . }}-env-test" + labels: + {{- include "litellm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: test + image: busybox + command: ['sh', '-c'] + args: + - | + # Test DD_ENV + if [ "$DD_ENV" != "dev_helm" ]; then + echo "❌ Environment variable DD_ENV mismatch. Expected: dev_helm, Got: $DD_ENV" + exit 1 + fi + echo "✅ Environment variable DD_ENV matches expected value: $DD_ENV" + + # Test DD_SERVICE + if [ "$DD_SERVICE" != "litellm" ]; then + echo "❌ Environment variable DD_SERVICE mismatch. Expected: litellm, Got: $DD_SERVICE" + exit 1 + fi + echo "✅ Environment variable DD_SERVICE matches expected value: $DD_SERVICE" + + # Test USE_DDTRACE + if [ "$USE_DDTRACE" != "true" ]; then + echo "❌ Environment variable USE_DDTRACE mismatch. Expected: true, Got: $USE_DDTRACE" + exit 1 + fi + echo "✅ Environment variable USE_DDTRACE matches expected value: $USE_DDTRACE" + env: + - name: DD_ENV + value: {{ .Values.envVars.DD_ENV | quote }} + - name: DD_SERVICE + value: {{ .Values.envVars.DD_SERVICE | quote }} + - name: USE_DDTRACE + value: {{ .Values.envVars.USE_DDTRACE | quote }} + restartPolicy: Never \ No newline at end of file diff --git a/charts/litellm/tests/deployment_tests.yaml b/charts/litellm/tests/deployment_tests.yaml new file mode 100644 index 0000000..f9c8396 --- /dev/null +++ b/charts/litellm/tests/deployment_tests.yaml @@ -0,0 +1,139 @@ +suite: test deployment +templates: + - deployment.yaml + - configmap-litellm.yaml +tests: + - it: should work + template: deployment.yaml + set: + image.tag: test + asserts: + - isKind: + of: Deployment + - matchRegex: + path: metadata.name + pattern: -litellm$ + - equal: + path: spec.template.spec.containers[0].image + value: ghcr.io/berriai/litellm-database:test + - it: should work with tolerations + template: deployment.yaml + set: + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + asserts: + - equal: + path: spec.template.spec.tolerations[0].key + value: node-role.kubernetes.io/master + - equal: + path: spec.template.spec.tolerations[0].operator + value: Exists + - it: should work with affinity + template: deployment.yaml + set: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - antarctica-east1 + asserts: + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key + value: topology.kubernetes.io/zone + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator + value: In + - equal: + path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0] + value: antarctica-east1 + - it: should work without masterkeySecretName or masterkeySecretKey + template: deployment.yaml + set: + masterkeySecretName: "" + masterkeySecretKey: "" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: PROXY_MASTER_KEY + valueFrom: + secretKeyRef: + name: RELEASE-NAME-litellm-masterkey + key: masterkey + - it: should work with masterkeySecretName and masterkeySecretKey + template: deployment.yaml + set: + masterkeySecretName: my-secret + masterkeySecretKey: my-key + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: PROXY_MASTER_KEY + valueFrom: + secretKeyRef: + name: my-secret + key: my-key + - it: should work with extraEnvVars + template: deployment.yaml + set: + extraEnvVars: + - name: EXTRA_ENV_VAR + valueFrom: + fieldRef: + fieldPath: metadata.labels['env'] + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: EXTRA_ENV_VAR + valueFrom: + fieldRef: + fieldPath: metadata.labels['env'] + - it: should work with both extraEnvVars and envVars + template: deployment.yaml + set: + envVars: + ENV_VAR: ENV_VAR_VALUE + extraEnvVars: + - name: EXTRA_ENV_VAR + value: EXTRA_ENV_VAR_VALUE + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: ENV_VAR + value: ENV_VAR_VALUE + - contains: + path: spec.template.spec.containers[0].env + content: + name: EXTRA_ENV_VAR + value: EXTRA_ENV_VAR_VALUE + - it: should mount existing configmap when create=false + template: deployment.yaml + set: + proxyConfigMap: + create: false + name: my-litellm-config + key: custom.yaml + asserts: + - contains: + path: spec.template.spec.volumes + content: + name: litellm-config + configMap: + name: my-litellm-config + items: + - key: custom.yaml + path: config.yaml + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: litellm-config + mountPath: /etc/litellm/ \ No newline at end of file diff --git a/charts/litellm/tests/masterkey-secret_tests.yaml b/charts/litellm/tests/masterkey-secret_tests.yaml new file mode 100644 index 0000000..bbbade9 --- /dev/null +++ b/charts/litellm/tests/masterkey-secret_tests.yaml @@ -0,0 +1,24 @@ +suite: test masterkey secret +templates: + - secret-masterkey.yaml +tests: + - it: should create a secret if masterkeySecretName is not set. should start with sk-xxxx (base64 encoded as c2st*) + template: secret-masterkey.yaml + set: + masterkeySecretName: "" + asserts: + - isKind: + of: Secret + - matchRegex: + path: data.masterkey + pattern: ^c2st + # Note: The masterkey is generated as "sk-<18-random-chars>" in plain text, + # but stored as base64 encoded in Kubernetes secret (requirement). + # "sk-" base64 encodes to "c2st", so we check for "^c2st" pattern. + - it: should not create a secret if masterkeySecretName is set + template: secret-masterkey.yaml + set: + masterkeySecretName: my-secret + asserts: + - hasDocuments: + count: 0 diff --git a/charts/litellm/tests/migrations-job_tests.yaml b/charts/litellm/tests/migrations-job_tests.yaml new file mode 100644 index 0000000..3a7bfa5 --- /dev/null +++ b/charts/litellm/tests/migrations-job_tests.yaml @@ -0,0 +1,127 @@ +suite: test migrations job +templates: + - migrations-job.yaml +tests: + - it: should work with envVars + template: migrations-job.yaml + set: + envVars: + TEST_ENV_VAR: "test_value" + ANOTHER_VAR: "another_value" + migrationJob: + enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: TEST_ENV_VAR + value: "test_value" + - contains: + path: spec.template.spec.containers[0].env + content: + name: ANOTHER_VAR + value: "another_value" + + - it: should work with extraEnvVars + template: migrations-job.yaml + set: + extraEnvVars: + - name: EXTRA_ENV_VAR + valueFrom: + fieldRef: + fieldPath: metadata.labels['env'] + - name: SIMPLE_EXTRA_VAR + value: "simple_value" + migrationJob: + enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: EXTRA_ENV_VAR + valueFrom: + fieldRef: + fieldPath: metadata.labels['env'] + - contains: + path: spec.template.spec.containers[0].env + content: + name: SIMPLE_EXTRA_VAR + value: "simple_value" + + - it: should work with both envVars and extraEnvVars + template: migrations-job.yaml + set: + envVars: + ENV_VAR: "env_var_value" + extraEnvVars: + - name: EXTRA_ENV_VAR + value: "extra_env_var_value" + migrationJob: + enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: ENV_VAR + value: "env_var_value" + - contains: + path: spec.template.spec.containers[0].env + content: + name: EXTRA_ENV_VAR + value: "extra_env_var_value" + + - it: should not render when migrations job is disabled + template: migrations-job.yaml + set: + migrationJob: + enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should still include default env vars + template: migrations-job.yaml + set: + envVars: + CUSTOM_VAR: "custom_value" + migrationJob: + enabled: true + db: + useExisting: true + endpoint: "test-db" + database: "testdb" + url: "postgresql://user:pass@test-db:5432/testdb" + secret: + name: "test-secret" + usernameKey: "username" + passwordKey: "password" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: DISABLE_SCHEMA_UPDATE + value: "false" + - contains: + path: spec.template.spec.containers[0].env + content: + name: DATABASE_HOST + value: "test-db" + - contains: + path: spec.template.spec.containers[0].env + content: + name: CUSTOM_VAR + value: "custom_value" + + - it: should not include DATABASE_URL when deployStandalone is false + template: migrations-job.yaml + set: + migrationJob: + enabled: true + db: + deployStandalone: false + useExisting: false + asserts: + - notContains: + path: spec.template.spec.containers[0].env + content: + name: DATABASE_URL \ No newline at end of file diff --git a/charts/litellm/tests/pdb_tests.yaml b/charts/litellm/tests/pdb_tests.yaml new file mode 100644 index 0000000..5e042e8 --- /dev/null +++ b/charts/litellm/tests/pdb_tests.yaml @@ -0,0 +1,45 @@ +suite: "pdb enabled" +templates: + - poddisruptionbudget.yaml +tests: + - it: "renders a PDB with maxUnavailable=1" + set: + pdb.enabled: true + pdb.maxUnavailable: 1 + asserts: + - hasDocuments: { count: 1 } + - isKind: { of: PodDisruptionBudget } + - equal: { path: apiVersion, value: policy/v1 } + - equal: { path: spec.maxUnavailable, value: 1 } + - equal: + path: spec.selector.matchLabels + value: + app.kubernetes.io/name: litellm + app.kubernetes.io/instance: RELEASE-NAME + +--- +suite: "pdb disabled" +templates: + - poddisruptionbudget.yaml +tests: + - it: "does not render when disabled" + set: + pdb.enabled: false + asserts: + - hasDocuments: { count: 0 } + +--- +suite: "pdb minAvailable precedence" +templates: + - poddisruptionbudget.yaml +tests: + - it: "uses minAvailable when both are set" + set: + pdb.enabled: true + pdb.minAvailable: "50%" + pdb.maxUnavailable: 1 + asserts: + - isKind: { of: PodDisruptionBudget } + - equal: { path: apiVersion, value: policy/v1 } + - equal: { path: spec.minAvailable, value: "50%" } + - isNull: { path: spec.maxUnavailable } diff --git a/charts/litellm/tests/service_tests.yaml b/charts/litellm/tests/service_tests.yaml new file mode 100644 index 0000000..43ed018 --- /dev/null +++ b/charts/litellm/tests/service_tests.yaml @@ -0,0 +1,116 @@ +suite: Service Configuration Tests +templates: + - service.yaml +tests: + - it: should create a default ClusterIP service + template: service.yaml + asserts: + - isKind: + of: Service + - equal: + path: spec.type + value: ClusterIP + - equal: + path: spec.ports[0].port + value: 4000 + - equal: + path: spec.ports[0].targetPort + value: http + - equal: + path: spec.ports[0].protocol + value: TCP + - equal: + path: spec.ports[0].name + value: http + - isNull: + path: spec.loadBalancerClass + + - it: should create a NodePort service when specified + template: service.yaml + set: + service.type: NodePort + asserts: + - isKind: + of: Service + - equal: + path: spec.type + value: NodePort + - isNull: + path: spec.loadBalancerClass + + - it: should create a LoadBalancer service when specified + template: service.yaml + set: + service.type: LoadBalancer + asserts: + - isKind: + of: Service + - equal: + path: spec.type + value: LoadBalancer + - isNull: + path: spec.loadBalancerClass + + - it: should add loadBalancerClass when specified with LoadBalancer type + template: service.yaml + set: + service.type: LoadBalancer + service.loadBalancerClass: tailscale + asserts: + - isKind: + of: Service + - equal: + path: spec.type + value: LoadBalancer + - equal: + path: spec.loadBalancerClass + value: tailscale + + - it: should not add loadBalancerClass when specified with ClusterIP type + template: service.yaml + set: + service.type: ClusterIP + service.loadBalancerClass: tailscale + asserts: + - isKind: + of: Service + - equal: + path: spec.type + value: ClusterIP + - isNull: + path: spec.loadBalancerClass + + - it: should use custom port when specified + template: service.yaml + set: + service.port: 8080 + asserts: + - equal: + path: spec.ports[0].port + value: 8080 + + - it: should add service annotations when specified + template: service.yaml + set: + service.annotations: + cloud.google.com/load-balancer-type: "Internal" + service.beta.kubernetes.io/aws-load-balancer-internal: "true" + asserts: + - isKind: + of: Service + - equal: + path: metadata.annotations + value: + cloud.google.com/load-balancer-type: "Internal" + service.beta.kubernetes.io/aws-load-balancer-internal: "true" + + - it: should use the correct selector labels + template: service.yaml + asserts: + - isNotNull: + path: spec.selector + - equal: + path: spec.selector + value: + app.kubernetes.io/name: litellm + app.kubernetes.io/instance: RELEASE-NAME diff --git a/values/production/litellm.yaml b/values/production/litellm.yaml new file mode 100644 index 0000000..9690fd6 --- /dev/null +++ b/values/production/litellm.yaml @@ -0,0 +1,250 @@ +# Default values for litellm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + # Use "ghcr.io/berriai/litellm-database" for optimized image with database + repository: ghcr.io/berriai/litellm-database + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + # tag: "main-latest" + tag: "" + +imagePullSecrets: [] +nameOverride: "litellm" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: false + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# annotations for litellm deployment +deploymentAnnotations: {} +# annotations for litellm pods +podAnnotations: {} +podLabels: {} + +# At the time of writing, the litellm docker image requires write access to the +# filesystem on startup so that prisma can install some dependencies. +podSecurityContext: {} +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: false + # runAsNonRoot: true + # runAsUser: 1000 + +# A list of Kubernetes Secret objects that will be exported to the LiteLLM proxy +# pod as environment variables. These secrets can then be referenced in the +# configuration file (or "litellm" ConfigMap) with `os.environ/` +environmentSecrets: [] + # - litellm-env-secret + +# A list of Kubernetes ConfigMap objects that will be exported to the LiteLLM proxy +# pod as environment variables. The ConfigMap kv-pairs can then be referenced in the +# configuration file (or "litellm" ConfigMap) with `os.environ/` +environmentConfigMaps: [] + # - litellm-env-configmap + +service: + type: ClusterIP + port: 4000 + # If service type is `LoadBalancer` you can + # optionally specify loadBalancerClass + # loadBalancerClass: tailscale + +# Separate health app configuration +# When enabled, health checks will use a separate port and the application +# will receive SEPARATE_HEALTH_APP=1 and SEPARATE_HEALTH_PORT from environment variables +separateHealthApp: false +separateHealthPort: 8081 + +ingress: + enabled: false + className: "nginx" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: api.example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +# masterkey: changeit + +# if set, use this secret for the master key; otherwise, autogenerate a new one +masterkeySecretName: "" + +# if set, use this secret key for the master key; otherwise, use the default key +masterkeySecretKey: "" + +proxyConfigMap: + # when true, creates a new configmap + create: true + # if create is false and name is set, use existing ConfigMap + # create: false + # name: "" + # key: "config.yaml" + +# The elements within proxy_config are rendered as config.yaml for the proxy +# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml +# Reference: https://docs.litellm.ai/docs/proxy/configs +proxy_config: + model_list: + # At least one model must exist for the proxy to start. + - model_name: opt-125m + litellm_params: + model: opt-125m + api_key: eXaMpLeOnLy + # - model_name: fake-openai-endpoint + # litellm_params: + # model: openai/fake + # api_key: fake-key + # api_base: https://exampleopenaiendpoint-production.up.railway.app/ + general_settings: + master_key: os.environ/PROXY_MASTER_KEY + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: [] +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +db: + # Use an existing postgres server/cluster + useExisting: false + + # How to connect to the existing postgres server/cluster + endpoint: localhost + database: litellm + url: postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME) + secret: + name: postgres + usernameKey: username + passwordKey: password + # Optional: when set, DATABASE_HOST will be sourced from this secret key instead of db.endpoint + endpointKey: "" + + # Use the Stackgres Helm chart to deploy an instance of a Stackgres cluster. + # The Stackgres Operator must already be installed within the target + # Kubernetes cluster. + # TODO: Stackgres deployment currently unsupported + useStackgresOperator: false + + # Use the Postgres Helm chart to create a single node, stand alone postgres + # instance. See the "postgresql" top level key for additional configuration. + deployStandalone: true + +# Settings for Bitnami postgresql chart (if db.deployStandalone is true, ignored +# otherwise) +postgresql: + architecture: standalone + auth: + username: litellm + database: litellm + + # You should override these on the helm command line with + # `--set postgresql.auth.postgres-password=,postgresql.auth.password=` + password: NoTaGrEaTpAsSwOrD + postgres-password: NoTaGrEaTpAsSwOrD + + # A secret is created by this chart (litellm-helm) with the credentials that + # the new Postgres instance should use. + # existingSecret: "" + # secretKeys: + # userPasswordKey: password + +# requires cache: true in config file +# either enable this or pass a secret for REDIS_HOST, REDIS_PORT, REDIS_PASSWORD or REDIS_URL +# with cache: true to use existing redis instance +redis: + enabled: false + architecture: standalone + +# Prisma migration job settings +migrationJob: + enabled: true # Enable or disable the schema migration Job + retries: 3 # Number of retries for the Job in case of failure + backoffLimit: 4 # Backoff limit for Job restarts + disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0. + annotations: {} + ttlSecondsAfterFinished: 120 + resources: {} + # requests: + # cpu: 100m + # memory: 100Mi + extraContainers: [] + + # Hook configuration + hooks: + argocd: + enabled: true + helm: + enabled: false + +# Additional environment variables to be added to the deployment as a map of key-value pairs +envVars: { + # USE_DDTRACE: "true" +} + +# Additional environment variables to be added to the deployment as a list of k8s env vars +extraEnvVars: { + # - name: EXTRA_ENV_VAR + # value: EXTRA_ENV_VAR_VALUE +} + +# Pod Disruption Budget +pdb: + enabled: false + # Set exactly one of the following. If both are set, minAvailable takes precedence. + minAvailable: null # e.g. "50%" or 1 + maxUnavailable: null # e.g. 1 or "20%" + annotations: {} + labels: {} diff --git a/values/staging/cloudnative-pg.yaml b/values/staging/cloudnative-pg.yaml new file mode 100644 index 0000000..cbba750 --- /dev/null +++ b/values/staging/cloudnative-pg.yaml @@ -0,0 +1,689 @@ +# +# Copyright © contributors to CloudNativePG, established as +# CloudNativePG a Series of LF Projects, LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Default values for CloudNativePG. +# This is a YAML-formatted file. +# Please declare variables to be passed to your templates. + +replicaCount: 1 + +image: + repository: ghcr.io/cloudnative-pg/cloudnative-pg + pullPolicy: IfNotPresent + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +namespaceOverride: "" + +hostNetwork: false +dnsPolicy: "" + +# -- Update strategy for the operator. +# ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy +# For example: +# type: RollingUpdate +# rollingUpdate: +# maxSurge: 25% +# maxUnavailable: 25% +updateStrategy: {} + +crds: + # -- Specifies whether the CRDs should be created when installing the chart. + create: true + +# -- The webhook configuration. +webhook: + port: 9443 + mutating: + create: true + failurePolicy: Fail + validating: + create: true + failurePolicy: Fail + livenessProbe: + initialDelaySeconds: 3 + readinessProbe: + initialDelaySeconds: 3 + startupProbe: + failureThreshold: 6 + periodSeconds: 5 + +# Operator configuration. +config: + # -- Specifies whether the secret should be created. + create: true + # -- The name of the configmap/secret to use. + name: cnpg-controller-manager-config + # -- Specifies whether it should be stored in a secret, instead of a configmap. + secret: false + # -- This option determines if the operator is responsible for observing + # events across the entire Kubernetes cluster or if its focus should be + # narrowed down to the specific namespace within which it has been deployed. + clusterWide: true + # -- The content of the configmap/secret, see + # https://cloudnative-pg.io/documentation/current/operator_conf/#available-options + # for all the available options. + data: {} + # INHERITED_ANNOTATIONS: categories + # INHERITED_LABELS: environment, workload, app + # WATCH_NAMESPACE: namespace-a,namespace-b + # -- The maximum number of concurrent reconciles. Defaults to 10. + maxConcurrentReconciles: 10 + +# -- Additional arguments to be added to the operator's args list. +additionalArgs: [] + +# -- Array containing extra environment variables which can be templated. +# For example: +# - name: RELEASE_NAME +# value: "{{ .Release.Name }}" +# - name: MY_VAR +# value: "mySpecialKey" +additionalEnv: [] + +serviceAccount: + # -- Specifies whether the service account should be created. + create: true + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template. + name: "" + +rbac: + # -- Specifies whether ClusterRole and ClusterRoleBinding should be created. + create: true + # -- Aggregate ClusterRoles to Kubernetes default user-facing roles. + # Ref: https://kubernetes.io/docs/reference/access-authn-authz/rbac/#user-facing-roles + aggregateClusterRoles: false + +# -- Annotations to be added to all other resources. +commonAnnotations: {} +# -- Annotations to be added to the pod. +podAnnotations: {} +# -- Labels to be added to the pod. +podLabels: {} + +# -- Container Security Context. +containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 10001 + runAsGroup: 10001 + seccompProfile: + type: RuntimeDefault + capabilities: + drop: + - "ALL" + +# -- Security Context for the whole pod. +podSecurityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + # fsGroup: 2000 + +# -- Priority indicates the importance of a Pod relative to other Pods. +priorityClassName: "" + +service: + type: ClusterIP + # -- DO NOT CHANGE THE SERVICE NAME as it is currently used to generate the certificate + # and can not be configured + name: cnpg-webhook-service + port: 443 + # -- Set the ip family policy to configure dual-stack see [Configure dual-stack](https://kubernetes.io/docs/concepts/services-networking/dual-stack/#services) + ipFamilyPolicy: "" + # -- Sets the families that should be supported and the order in which they should be applied to ClusterIP as well. Can be IPv4 and/or IPv6. + ipFamilies: [] + +resources: {} + # If you want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # + # limits: + # cpu: 100m + # memory: 200Mi + # requests: + # cpu: 100m + # memory: 100Mi + +# -- Nodeselector for the operator to be installed. +nodeSelector: {} + +# -- Topology Spread Constraints for the operator to be installed. +topologySpreadConstraints: [] + +# -- Tolerations for the operator to be installed. +tolerations: [] + +# -- Affinity for the operator to be installed. +affinity: {} + +monitoring: + + # -- Specifies whether the monitoring should be enabled. Requires Prometheus Operator CRDs. + podMonitorEnabled: false + # -- Metrics relabel configurations to apply to samples before ingestion. + podMonitorMetricRelabelings: [] + # -- Relabel configurations to apply to samples before scraping. + podMonitorRelabelings: [] + # -- Additional labels for the podMonitor + podMonitorAdditionalLabels: {} + + grafanaDashboard: + create: false + # -- Allows overriding the namespace where the ConfigMap will be created, defaulting to the same one as the Release. + namespace: "" + # -- The name of the ConfigMap containing the dashboard. + configMapName: "cnpg-grafana-dashboard" + # -- Label that ConfigMaps should have to be loaded as dashboards. DEPRECATED: Use labels instead. + sidecarLabel: "grafana_dashboard" + # -- Label value that ConfigMaps should have to be loaded as dashboards. DEPRECATED: Use labels instead. + sidecarLabelValue: "1" + # -- Labels that ConfigMaps should have to get configured in Grafana. + labels: {} + # -- Annotations that ConfigMaps can have to get configured in Grafana. + annotations: {} + +# Default monitoring queries +monitoringQueriesConfigMap: + # -- The name of the default monitoring configmap. + name: cnpg-default-monitoring + # -- A string representation of a YAML defining monitoring queries. + queries: | + backends: + query: | + SELECT sa.datname + , sa.usename + , sa.application_name + , states.state + , COALESCE(sa.count, 0) AS total + , COALESCE(sa.max_tx_secs, 0) AS max_tx_duration_seconds + FROM ( VALUES ('active') + , ('idle') + , ('idle in transaction') + , ('idle in transaction (aborted)') + , ('fastpath function call') + , ('disabled') + ) AS states(state) + LEFT JOIN ( + SELECT datname + , state + , usename + , COALESCE(application_name, '') AS application_name + , COUNT(*) + , COALESCE(EXTRACT (EPOCH FROM (max(now() - xact_start))), 0) AS max_tx_secs + FROM pg_catalog.pg_stat_activity + GROUP BY datname, state, usename, application_name + ) sa ON states.state = sa.state + WHERE sa.usename IS NOT NULL + metrics: + - datname: + usage: "LABEL" + description: "Name of the database" + - usename: + usage: "LABEL" + description: "Name of the user" + - application_name: + usage: "LABEL" + description: "Name of the application" + - state: + usage: "LABEL" + description: "State of the backend" + - total: + usage: "GAUGE" + description: "Number of backends" + - max_tx_duration_seconds: + usage: "GAUGE" + description: "Maximum duration of a transaction in seconds" + + backends_waiting: + query: | + SELECT count(*) AS total + FROM pg_catalog.pg_locks blocked_locks + JOIN pg_catalog.pg_locks blocking_locks + ON blocking_locks.locktype = blocked_locks.locktype + AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database + AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation + AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page + AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple + AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid + AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid + AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid + AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid + AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid + AND blocking_locks.pid != blocked_locks.pid + JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid + WHERE NOT blocked_locks.granted + metrics: + - total: + usage: "GAUGE" + description: "Total number of backends that are currently waiting on other queries" + + pg_database: + query: | + SELECT datname + , pg_catalog.pg_database_size(datname) AS size_bytes + , pg_catalog.age(datfrozenxid) AS xid_age + , pg_catalog.mxid_age(datminmxid) AS mxid_age + FROM pg_catalog.pg_database + WHERE datallowconn + metrics: + - datname: + usage: "LABEL" + description: "Name of the database" + - size_bytes: + usage: "GAUGE" + description: "Disk space used by the database" + - xid_age: + usage: "GAUGE" + description: "Number of transactions from the frozen XID to the current one" + - mxid_age: + usage: "GAUGE" + description: "Number of multiple transactions (Multixact) from the frozen XID to the current one" + + pg_postmaster: + query: | + SELECT EXTRACT(EPOCH FROM pg_postmaster_start_time) AS start_time + FROM pg_catalog.pg_postmaster_start_time() + metrics: + - start_time: + usage: "GAUGE" + description: "Time at which postgres started (based on epoch)" + + pg_replication: + query: "SELECT CASE WHEN ( + NOT pg_catalog.pg_is_in_recovery() + OR pg_catalog.pg_last_wal_receive_lsn() = pg_catalog.pg_last_wal_replay_lsn()) + THEN 0 + ELSE GREATEST (0, + EXTRACT(EPOCH FROM (now() - pg_catalog.pg_last_xact_replay_timestamp()))) + END AS lag, + pg_catalog.pg_is_in_recovery() AS in_recovery, + EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up, + (SELECT count(*) FROM pg_catalog.pg_stat_replication) AS streaming_replicas" + metrics: + - lag: + usage: "GAUGE" + description: "Replication lag behind primary in seconds" + - in_recovery: + usage: "GAUGE" + description: "Whether the instance is in recovery" + - is_wal_receiver_up: + usage: "GAUGE" + description: "Whether the instance wal_receiver is up" + - streaming_replicas: + usage: "GAUGE" + description: "Number of streaming replicas connected to the instance" + + pg_replication_slots: + query: | + SELECT slot_name, + slot_type, + database, + active, + (CASE pg_catalog.pg_is_in_recovery() + WHEN TRUE THEN pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_receive_lsn(), restart_lsn) + ELSE pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), restart_lsn) + END) as pg_wal_lsn_diff + FROM pg_catalog.pg_replication_slots + WHERE NOT temporary + metrics: + - slot_name: + usage: "LABEL" + description: "Name of the replication slot" + - slot_type: + usage: "LABEL" + description: "Type of the replication slot" + - database: + usage: "LABEL" + description: "Name of the database" + - active: + usage: "GAUGE" + description: "Flag indicating whether the slot is active" + - pg_wal_lsn_diff: + usage: "GAUGE" + description: "Replication lag in bytes" + + pg_stat_archiver: + query: | + SELECT archived_count + , failed_count + , COALESCE(EXTRACT(EPOCH FROM (now() - last_archived_time)), -1) AS seconds_since_last_archival + , COALESCE(EXTRACT(EPOCH FROM (now() - last_failed_time)), -1) AS seconds_since_last_failure + , COALESCE(EXTRACT(EPOCH FROM last_archived_time), -1) AS last_archived_time + , COALESCE(EXTRACT(EPOCH FROM last_failed_time), -1) AS last_failed_time + , COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_archived_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_archived_wal_start_lsn + , COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_failed_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_failed_wal_start_lsn + , EXTRACT(EPOCH FROM stats_reset) AS stats_reset_time + FROM pg_catalog.pg_stat_archiver + metrics: + - archived_count: + usage: "COUNTER" + description: "Number of WAL files that have been successfully archived" + - failed_count: + usage: "COUNTER" + description: "Number of failed attempts for archiving WAL files" + - seconds_since_last_archival: + usage: "GAUGE" + description: "Seconds since the last successful archival operation" + - seconds_since_last_failure: + usage: "GAUGE" + description: "Seconds since the last failed archival operation" + - last_archived_time: + usage: "GAUGE" + description: "Epoch of the last time WAL archiving succeeded" + - last_failed_time: + usage: "GAUGE" + description: "Epoch of the last time WAL archiving failed" + - last_archived_wal_start_lsn: + usage: "GAUGE" + description: "Archived WAL start LSN" + - last_failed_wal_start_lsn: + usage: "GAUGE" + description: "Last failed WAL LSN" + - stats_reset_time: + usage: "GAUGE" + description: "Time at which these statistics were last reset" + + pg_stat_bgwriter: + runonserver: "<17.0.0" + query: | + SELECT checkpoints_timed + , checkpoints_req + , checkpoint_write_time + , checkpoint_sync_time + , buffers_checkpoint + , buffers_clean + , maxwritten_clean + , buffers_backend + , buffers_backend_fsync + , buffers_alloc + FROM pg_catalog.pg_stat_bgwriter + metrics: + - checkpoints_timed: + usage: "COUNTER" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "COUNTER" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "COUNTER" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "COUNTER" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "COUNTER" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "COUNTER" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "COUNTER" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "COUNTER" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "COUNTER" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "COUNTER" + description: "Number of buffers allocated" + + pg_stat_bgwriter_17: + runonserver: ">=17.0.0" + name: pg_stat_bgwriter + query: | + SELECT buffers_clean + , maxwritten_clean + , buffers_alloc + , EXTRACT(EPOCH FROM stats_reset) AS stats_reset_time + FROM pg_catalog.pg_stat_bgwriter + metrics: + - buffers_clean: + usage: "COUNTER" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "COUNTER" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_alloc: + usage: "COUNTER" + description: "Number of buffers allocated" + - stats_reset_time: + usage: "GAUGE" + description: "Time at which these statistics were last reset" + + pg_stat_checkpointer: + runonserver: ">=17.0.0" + query: | + SELECT num_timed AS checkpoints_timed + , num_requested AS checkpoints_req + , restartpoints_timed + , restartpoints_req + , restartpoints_done + , write_time + , sync_time + , buffers_written + , EXTRACT(EPOCH FROM stats_reset) AS stats_reset_time + FROM pg_catalog.pg_stat_checkpointer + metrics: + - checkpoints_timed: + usage: "COUNTER" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "COUNTER" + description: "Number of requested checkpoints that have been performed" + - restartpoints_timed: + usage: "COUNTER" + description: "Number of scheduled restartpoints due to timeout or after a failed attempt to perform it" + - restartpoints_req: + usage: "COUNTER" + description: "Number of requested restartpoints that have been performed" + - restartpoints_done: + usage: "COUNTER" + description: "Number of restartpoints that have been performed" + - write_time: + usage: "COUNTER" + description: "Total amount of time that has been spent in the portion of processing checkpoints and restartpoints where files are written to disk, in milliseconds" + - sync_time: + usage: "COUNTER" + description: "Total amount of time that has been spent in the portion of processing checkpoints and restartpoints where files are synchronized to disk, in milliseconds" + - buffers_written: + usage: "COUNTER" + description: "Number of buffers written during checkpoints and restartpoints" + - stats_reset_time: + usage: "GAUGE" + description: "Time at which these statistics were last reset" + + pg_stat_database: + query: | + SELECT datname + , xact_commit + , xact_rollback + , blks_read + , blks_hit + , tup_returned + , tup_fetched + , tup_inserted + , tup_updated + , tup_deleted + , conflicts + , temp_files + , temp_bytes + , deadlocks + , blk_read_time + , blk_write_time + FROM pg_catalog.pg_stat_database + metrics: + - datname: + usage: "LABEL" + description: "Name of this database" + - xact_commit: + usage: "COUNTER" + description: "Number of transactions in this database that have been committed" + - xact_rollback: + usage: "COUNTER" + description: "Number of transactions in this database that have been rolled back" + - blks_read: + usage: "COUNTER" + description: "Number of disk blocks read in this database" + - blks_hit: + usage: "COUNTER" + description: "Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)" + - tup_returned: + usage: "COUNTER" + description: "Number of rows returned by queries in this database" + - tup_fetched: + usage: "COUNTER" + description: "Number of rows fetched by queries in this database" + - tup_inserted: + usage: "COUNTER" + description: "Number of rows inserted by queries in this database" + - tup_updated: + usage: "COUNTER" + description: "Number of rows updated by queries in this database" + - tup_deleted: + usage: "COUNTER" + description: "Number of rows deleted by queries in this database" + - conflicts: + usage: "COUNTER" + description: "Number of queries canceled due to conflicts with recovery in this database" + - temp_files: + usage: "COUNTER" + description: "Number of temporary files created by queries in this database" + - temp_bytes: + usage: "COUNTER" + description: "Total amount of data written to temporary files by queries in this database" + - deadlocks: + usage: "COUNTER" + description: "Number of deadlocks detected in this database" + - blk_read_time: + usage: "COUNTER" + description: "Time spent reading data file blocks by backends in this database, in milliseconds" + - blk_write_time: + usage: "COUNTER" + description: "Time spent writing data file blocks by backends in this database, in milliseconds" + + pg_stat_replication: + primary: true + query: | + SELECT usename + , COALESCE(application_name, '') AS application_name + , COALESCE(client_addr::text, '') AS client_addr + , COALESCE(client_port::text, '') AS client_port + , EXTRACT(EPOCH FROM backend_start) AS backend_start + , COALESCE(pg_catalog.age(backend_xmin), 0) AS backend_xmin_age + , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), sent_lsn) AS sent_diff_bytes + , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), write_lsn) AS write_diff_bytes + , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), flush_lsn) AS flush_diff_bytes + , COALESCE(pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), replay_lsn),0) AS replay_diff_bytes + , COALESCE((EXTRACT(EPOCH FROM write_lag)),0)::float AS write_lag_seconds + , COALESCE((EXTRACT(EPOCH FROM flush_lag)),0)::float AS flush_lag_seconds + , COALESCE((EXTRACT(EPOCH FROM replay_lag)),0)::float AS replay_lag_seconds + FROM pg_catalog.pg_stat_replication + metrics: + - usename: + usage: "LABEL" + description: "Name of the replication user" + - application_name: + usage: "LABEL" + description: "Name of the application" + - client_addr: + usage: "LABEL" + description: "Client IP address" + - client_port: + usage: "LABEL" + description: "Client TCP port" + - backend_start: + usage: "COUNTER" + description: "Time when this process was started" + - backend_xmin_age: + usage: "COUNTER" + description: "The age of this standby's xmin horizon" + - sent_diff_bytes: + usage: "GAUGE" + description: "Difference in bytes from the last write-ahead log location sent on this connection" + - write_diff_bytes: + usage: "GAUGE" + description: "Difference in bytes from the last write-ahead log location written to disk by this standby server" + - flush_diff_bytes: + usage: "GAUGE" + description: "Difference in bytes from the last write-ahead log location flushed to disk by this standby server" + - replay_diff_bytes: + usage: "GAUGE" + description: "Difference in bytes from the last write-ahead log location replayed into the database on this standby server" + - write_lag_seconds: + usage: "GAUGE" + description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it" + - flush_lag_seconds: + usage: "GAUGE" + description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it" + - replay_lag_seconds: + usage: "GAUGE" + description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it" + + pg_settings: + query: | + SELECT name, + CASE setting WHEN 'on' THEN '1' WHEN 'off' THEN '0' ELSE setting END AS setting + FROM pg_catalog.pg_settings + WHERE vartype IN ('integer', 'real', 'bool') + ORDER BY 1 + metrics: + - name: + usage: "LABEL" + description: "Name of the setting" + - setting: + usage: "GAUGE" + description: "Setting value" + + pg_extensions: + query: | + SELECT + current_database() as datname, + name as extname, + default_version, + installed_version, + CASE + WHEN default_version = installed_version THEN 0 + ELSE 1 + END AS update_available + FROM pg_catalog.pg_available_extensions + WHERE installed_version IS NOT NULL + metrics: + - datname: + usage: "LABEL" + description: "Name of the database" + - extname: + usage: "LABEL" + description: "Extension name" + - default_version: + usage: "LABEL" + description: "Default version" + - installed_version: + usage: "LABEL" + description: "Installed version" + - update_available: + usage: "GAUGE" + description: "An update is available" + target_databases: + - '*' diff --git a/values/staging/pg-cluster.yaml b/values/staging/pg-cluster.yaml new file mode 100644 index 0000000..6e81ffe --- /dev/null +++ b/values/staging/pg-cluster.yaml @@ -0,0 +1,530 @@ +# -- Override the name of the chart +nameOverride: "" +# -- Override the full name of the chart +fullnameOverride: "" +# -- Override the namespace of the chart +namespaceOverride: "" + +### +# -- Type of the CNPG database. Available types: +# * `postgresql` +# * `postgis` +# * `timescaledb` +type: postgresql + +version: + # -- PostgreSQL major version to use + postgresql: "16" + # -- If using TimescaleDB, specify the version + timescaledb: "2.15" + # -- If using PostGIS, specify the version + postgis: "3.4" + +### +# -- Cluster mode of operation. Available modes: +# * `standalone` - default mode. Creates new or updates an existing CNPG cluster. +# * `replica` - Creates a replica cluster from an existing CNPG cluster. # TODO +# * `recovery` - Same as standalone but creates a cluster from a backup, object store or via pg_basebackup. +mode: standalone + +recovery: + ## + # -- Available recovery methods: + # * `backup` - Recovers a CNPG cluster from a CNPG backup (PITR supported) Needs to be on the same cluster in the same namespace. + # * `object_store` - Recovers a CNPG cluster from a barman object store (PITR supported). + # * `pg_basebackup` - Recovers a CNPG cluster viaa streaming replication protocol. Useful if you want to + # migrate databases to CloudNativePG, even from outside Kubernetes. + # * `import` - Import one or more databases from an existing Postgres cluster. + method: backup + + ## -- Point in time recovery target. Specify one of the following: + pitrTarget: + # -- Time in RFC3339 format + time: "" + + ## + # -- Backup Recovery Method + backupName: "" # Name of the backup to recover from. Required if method is `backup`. + + ## + # -- The original cluster name when used in backups. Also known as serverName. + clusterName: "" + # -- Name of the database used by the application. Default: `app`. + database: app + # -- Name of the owner of the database in the instance to be used by applications. Defaults to the value of the `database` key. + owner: "" + # -- Overrides the provider specific default endpoint. Defaults to: + # S3: https://s3..amazonaws.com" + # Leave empty if using the default S3 endpoint + endpointURL: "" + # -- Specifies a CA bundle to validate a privately signed certificate. + endpointCA: + # -- Creates a secret with the given value if true, otherwise uses an existing secret. + create: false + name: "" + key: "" + value: "" + # -- Overrides the provider specific default path. Defaults to: + # S3: s3:// + # Azure: https://..core.windows.net/ + # Google: gs:// + destinationPath: "" + # -- One of `s3`, `azure` or `google` + provider: s3 + s3: + region: "" + bucket: "" + path: "/" + accessKey: "" + secretKey: "" + # -- Use the role based authentication without providing explicitly the keys + inheritFromIAMRole: false + azure: + path: "/" + connectionString: "" + storageAccount: "" + storageKey: "" + storageSasToken: "" + containerName: "" + serviceName: blob + inheritFromAzureAD: false + google: + path: "/" + bucket: "" + gkeEnvironment: false + applicationCredentials: "" + secret: + # -- Whether to create a secret for the backup credentials + create: true + # -- Name of the backup credentials secret + name: "" + + # See https://cloudnative-pg.io/documentation/current/bootstrap/#bootstrap-from-a-live-cluster-pg_basebackup + pgBaseBackup: + # -- Name of the database used by the application. Default: `app`. + database: app + # -- Name of the secret containing the initial credentials for the owner of the user database. If empty a new secret will be created from scratch + secret: "" + # -- Name of the owner of the database in the instance to be used by applications. Defaults to the value of the `database` key. + owner: "" + source: + host: "" + port: 5432 + username: "" + database: "app" + sslMode: "verify-full" + passwordSecret: + # -- Whether to create a secret for the password + create: false + # -- Name of the secret containing the password + name: "" + # -- The key in the secret containing the password + key: "password" + # -- The password value to use when creating the secret + value: "" + sslKeySecret: + name: "" + key: "" + sslCertSecret: + name: "" + key: "" + sslRootCertSecret: + name: "" + key: "" + + # See: https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-Import + import: + # -- One of `microservice` or `monolith.` + # See: https://cloudnative-pg.io/documentation/current/database_import/#how-it-works + type: "microservice" + # -- Databases to import + databases: [] + # -- Roles to import + roles: [] + # -- List of SQL queries to be executed as a superuser in the application database right after is imported. + # To be used with extreme care. Only available in microservice type. + postImportApplicationSQL: [] + # -- When set to true, only the pre-data and post-data sections of pg_restore are invoked, avoiding data import. + schemaOnly: false + # -- List of custom options to pass to the `pg_dump` command. IMPORTANT: Use these options with caution and at your + # own risk, as the operator does not validate their content. Be aware that certain options may conflict with the + # operator's intended functionality or design. + pgDumpExtraOptions: [] + # -- List of custom options to pass to the `pg_restore` command. IMPORTANT: Use these options with caution and at + # your own risk, as the operator does not validate their content. Be aware that certain options may conflict with the + # operator's intended functionality or design. + pgRestoreExtraOptions: [] + source: + host: "" + port: 5432 + username: "" + database: "" + sslMode: "verify-full" + passwordSecret: + # -- Whether to create a secret for the password + create: false + # -- Name of the secret containing the password + name: "" + # -- The key in the secret containing the password + key: "password" + # -- The password value to use when creating the secret + value: "" + sslKeySecret: + name: "" + key: "" + sslCertSecret: + name: "" + key: "" + sslRootCertSecret: + name: "" + key: "" + + +cluster: + # -- Number of instances + instances: 3 + + # -- Name of the container image, supporting both tags (:) and digests for deterministic and repeatable deployments: + # :@sha256: + imageName: "" # Default value depends on type (postgresql/postgis/timescaledb) + + # -- Reference to `ImageCatalog` of `ClusterImageCatalog`, if specified takes precedence over `cluster.imageName` + imageCatalogRef: {} + # kind: ImageCatalog + # name: postgresql + + # -- Image pull policy. One of Always, Never or IfNotPresent. If not defined, it defaults to IfNotPresent. Cannot be updated. + # More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + imagePullPolicy: IfNotPresent + + # -- The list of pull secrets to be used to pull the images. + # See: https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-LocalObjectReference + imagePullSecrets: [] + + storage: + size: 8Gi + storageClass: "nfs-csi" + + walStorage: + enabled: false + size: 1Gi + storageClass: "" + + # -- The UID of the postgres user inside the image, defaults to 26 + postgresUID: -1 + + # -- The GID of the postgres user inside the image, defaults to 26 + postgresGID: -1 + + # -- Customization of service definitions. Please refer to https://cloudnative-pg.io/documentation/current/service_management/ + services: {} + + # -- Resources requirements of every generated Pod. + # Please refer to https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more information. + # We strongly advise you use the same setting for limits and requests so that your cluster pods are given a Guaranteed QoS. + # See: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/ + resources: {} + # limits: + # cpu: 2000m + # memory: 8Gi + # requests: + # cpu: 2000m + # memory: 8Gi + + priorityClassName: "" + + # -- Method to follow to upgrade the primary server during a rolling update procedure, after all replicas have been + # successfully updated. It can be switchover (default) or restart. + primaryUpdateMethod: switchover + + # -- Strategy to follow to upgrade the primary server during a rolling update procedure, after all replicas have been + # successfully updated: it can be automated (unsupervised - default) or manual (supervised) + primaryUpdateStrategy: unsupervised + + # -- The instances' log level, one of the following values: error, warning, info (default), debug, trace + logLevel: "info" + + # -- Affinity/Anti-affinity rules for Pods. + # See: https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-AffinityConfiguration + affinity: + topologyKey: topology.kubernetes.io/zone + + # -- Env follows the Env format to pass environment variables to the pods created in the cluster + env: [] + # - name: MY_CUSTOM_FLAG + # value: "enabled" + # - name: MY_CUSTROM_ENV + # valueFrom: + # configMapKeyRef: + # name: my-custom-env + # key: env + # optional: true + # - name: MY_CUSTOM_SECRET_ENV + # valueFrom: + # secretKeyRef: + # name: my-custom-secret + # key: secret + # optional: true + + # -- EnvFrom follows the EnvFrom format to pass environment variables sources to the pods to be used by Env + envFrom: [] + # - configMapRef: + # name: global-envs + # optional: true + # - secretRef: + # name: db-credentials + # optional: true + + # -- The configuration for the CA and related certificates. + # See: https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-CertificatesConfiguration + certificates: {} + + # -- When this option is enabled, the operator will use the SuperuserSecret to update the postgres user password. + # If the secret is not present, the operator will automatically create one. + # When this option is disabled, the operator will ignore the SuperuserSecret content, delete it when automatically created, + # and then blank the password of the postgres user by setting it to NULL. + enableSuperuserAccess: true + superuserSecret: "" + + # -- Allow to disable PDB, mainly useful for upgrade of single-instance clusters or development purposes + # See: https://cloudnative-pg.io/documentation/current/kubernetes_upgrade/#pod-disruption-budgets + enablePDB: true + + # -- This feature enables declarative management of existing roles, as well as the creation of new roles if they are not + # already present in the database. + # See: https://cloudnative-pg.io/documentation/current/declarative_role_management/ + roles: [] + # - name: dante + # ensure: present + # comment: Dante Alighieri + # login: true + # superuser: false + # inRoles: + # - pg_monitor + # - pg_signal_backend + + monitoring: + # -- Whether to enable monitoring + enabled: false + podMonitor: + # -- Whether to enable the PodMonitor + enabled: true + # --The list of relabelings for the PodMonitor. + # Applied to samples before scraping. + relabelings: [] + # -- The list of metric relabelings for the PodMonitor. + # Applied to samples before ingestion. + metricRelabelings: [] + prometheusRule: + # -- Whether to enable the PrometheusRule automated alerts + enabled: true + # -- Exclude specified rules + excludeRules: [] + # - CNPGClusterZoneSpreadWarning + # -- Whether the default queries should be injected. + # Set it to true if you don't want to inject default queries into the cluster. + disableDefaultQueries: false + # -- Custom Prometheus metrics + # Will be stored in the ConfigMap + customQueries: [] + # - name: "pg_cache_hit_ratio" + # query: "SELECT current_database() as datname, sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio FROM pg_statio_user_tables;" + # target_databases: ["*"] + # predicate_query: "SELECT '{{ .Values.version.postgresql }}';" + # metrics: + # - datname: + # usage: "LABEL" + # description: "Name of the database" + # - ratio: + # usage: GAUGE + # description: "Cache hit ratio" + # -- The list of secrets containing the custom queries + customQueriesSecret: [] + # - name: custom-queries-secret + # key: custom-queries + + postgresql: + # -- PostgreSQL configuration options (postgresql.conf) + parameters: {} + # max_connections: 300 + # -- Quorum-based Synchronous Replication + synchronous: {} + # method: any + # number: 1 + # -- PostgreSQL Host Based Authentication rules (lines to be appended to the pg_hba.conf file) + pg_hba: [] + # - host all all 10.244.0.0/16 md5 + # -- PostgreSQL User Name Maps rules (lines to be appended to the pg_ident.conf file) + pg_ident: [] + # - mymap /^(.*)@mydomain\.com$ \1 + # -- Lists of shared preload libraries to add to the default ones + shared_preload_libraries: [] + # - pgaudit + # -- PostgreSQL LDAP configuration (see https://cloudnative-pg.io/documentation/current/postgresql_conf/#ldap-configuration) + ldap: {} + # https://cloudnative-pg.io/documentation/current/postgresql_conf/#ldap-configuration + # server: 'openldap.default.svc.cluster.local' + # bindSearchAuth: + # baseDN: 'ou=org,dc=example,dc=com' + # bindDN: 'cn=admin,dc=example,dc=com' + # bindPassword: + # name: 'ldapBindPassword' + # key: 'data' + # searchAttribute: 'uid' + + + # -- BootstrapInitDB is the configuration of the bootstrap process when initdb is used. + # See: https://cloudnative-pg.io/documentation/current/bootstrap/ + # See: https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-bootstrapinitdb + initdb: {} + # database: app + # owner: "" # Defaults to the database name + # secret: + # name: "" # Name of the secret containing the initial credentials for the owner of the user database. If empty a new secret will be created from scratch + # options: [] + # encoding: UTF8 + # postInitSQL: + # - CREATE EXTENSION IF NOT EXISTS vector; + # postInitApplicationSQL: [] + # postInitTemplateSQL: [] + + # -- Configure the metadata of the generated service account + serviceAccountTemplate: {} + + additionalLabels: {} + annotations: {} + + +backups: + # -- You need to configure backups manually, so backups are disabled by default. + enabled: false + + # -- Overrides the provider specific default endpoint. Defaults to: + # S3: https://s3..amazonaws.com" + endpointURL: "" # Leave empty if using the default S3 endpoint + # -- Specifies a CA bundle to validate a privately signed certificate. + endpointCA: + # -- Creates a secret with the given value if true, otherwise uses an existing secret. + create: false + name: "" + key: "" + value: "" + + # -- Overrides the provider specific default path. Defaults to: + # S3: s3:// + # Azure: https://..core.windows.net/ + # Google: gs:// + destinationPath: "" + # -- One of `s3`, `azure` or `google` + provider: s3 + s3: + region: "" + bucket: "" + path: "/" + accessKey: "" + secretKey: "" + # -- Use the role based authentication without providing explicitly the keys + inheritFromIAMRole: false + azure: + path: "/" + connectionString: "" + storageAccount: "" + storageKey: "" + storageSasToken: "" + containerName: "" + serviceName: blob + inheritFromAzureAD: false + google: + path: "/" + bucket: "" + gkeEnvironment: false + applicationCredentials: "" + secret: + # -- Whether to create a secret for the backup credentials + create: true + # -- Name of the backup credentials secret + name: "" + + wal: + # -- WAL compression method. One of `` (for no compression), `gzip`, `bzip2` or `snappy`. + compression: gzip + # -- Whether to instruct the storage provider to encrypt WAL files. One of `` (use the storage container default), `AES256` or `aws:kms`. + encryption: AES256 + # -- Number of WAL files to be archived or restored in parallel. + maxParallel: 1 + data: + # -- Data compression method. One of `` (for no compression), `gzip`, `bzip2` or `snappy`. + compression: gzip + # -- Whether to instruct the storage provider to encrypt data files. One of `` (use the storage container default), `AES256` or `aws:kms`. + encryption: AES256 + # -- Number of data files to be archived or restored in parallel. + jobs: 2 + + scheduledBackups: + - + # -- Scheduled backup name + name: daily-backup + # -- Schedule in cron format + schedule: "0 0 0 * * *" + # -- Backup owner reference + backupOwnerReference: self + # -- Backup method, can be `barmanObjectStore` (default) or `volumeSnapshot` + method: barmanObjectStore + + # -- Retention policy for backups + retentionPolicy: "30d" + +imageCatalog: + # -- Whether to provision an image catalog. If imageCatalog.images is empty this option will be ignored. + create: true + # -- List of images to be provisioned in an image catalog. + images: [] + # - image: ghcr.io/your_repo/your_image:your_tag + # major: 16 + +# -- List of PgBouncer poolers +poolers: [] + # - + # # -- Pooler name + # name: rw + # # -- PgBouncer type of service to forward traffic to. + # type: rw + # # -- PgBouncer pooling mode + # poolMode: transaction + # # -- Number of PgBouncer instances + # instances: 3 + # # -- PgBouncer configuration parameters + # parameters: + # max_client_conn: "1000" + # default_pool_size: "25" + # monitoring: + # # -- Whether to enable monitoring + # enabled: false + # podMonitor: + # # -- Whether to enable the PodMonitor + # enabled: true + # # -- Custom PgBouncer deployment template. + # # Use to override image, specify resources, etc. + # template: {} + # - + # # -- Pooler name + # name: ro + # # -- PgBouncer type of service to forward traffic to. + # type: ro + # # -- PgBouncer pooling mode + # poolMode: transaction + # # -- Number of PgBouncer instances + # instances: 3 + # # -- PgBouncer configuration parameters + # parameters: + # max_client_conn: "1000" + # default_pool_size: "25" + # monitoring: + # # -- Whether to enable monitoring + # enabled: false + # podMonitor: + # # -- Whether to enable the PodMonitor + # enabled: true + # # -- Custom PgBouncer deployment template. + # # Use to override image, specify resources, etc. + # template: {} diff --git a/values/staging/vllm.yaml b/values/staging/vllm.yaml index dd4dc1e..390a523 100644 --- a/values/staging/vllm.yaml +++ b/values/staging/vllm.yaml @@ -52,6 +52,8 @@ resources: # -- Node names where pods can be scheduled nodeNames: - "ubuntu-sv14" + - "ubuntu-sv15" + - "ubuntu-sv16" # -- Autoscaling configuration autoscaling: @@ -87,8 +89,9 @@ maxUnavailablePodDisruptionBudget: "" # # -- Storage size of the s3 # awsEc2MetadataDisabled: true # pvcStorage: "1Gi" -# storageClassName: "nfs-csi" -# nfsServer: "10.24.10.14" + +# storageClassName: "nfs-csi" +# nfsServer: "10.24.10.14" # -- Host path for local storage hostPath: "/root/workspace/thuanpt/vllm/models/gpt2"