|
| 1 | +# triton-inference-server |
| 2 | + |
| 3 | +NVIDIA Triton Inference Server |
| 4 | + |
| 5 | +   |
| 6 | + |
| 7 | +## Documentation |
| 8 | + |
| 9 | +For Triton Inference Server documentation please see [https://github.com/triton-inference-server/server). |
| 10 | + |
| 11 | +## Installing the Chart |
| 12 | + |
| 13 | +First add the ClowdHaus repository to Helm: |
| 14 | + |
| 15 | +```bash |
| 16 | +helm repo add clowdhaus https://clowdhaus.github.io/helm-charts |
| 17 | +``` |
| 18 | + |
| 19 | +To install the chart with the release name `triton-inference-server` in the `triton` namespace and default configuration: |
| 20 | + |
| 21 | +```bash |
| 22 | +helm install triton-inference-server \ |
| 23 | + --namespace triton \ |
| 24 | + --create-namespace \ |
| 25 | + clowdhaus/triton-inference-server |
| 26 | +``` |
| 27 | + |
| 28 | +## Values |
| 29 | + |
| 30 | +| Key | Type | Default | Description | |
| 31 | +|-----|------|---------|-------------| |
| 32 | +| affinity | object | `{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"nvidia.com/gpu.present","operator":"In","values":["true"]}]},{"matchExpressions":[{"key":"aws.amazon.com/neuron.present","operator":"In","values":["true"]}]}]}}}` | Affinity rules for scheduling the pod. | |
| 33 | +| args | list | `["--model-store=/models","--model-control-mode=poll","--repository-poll-secs=30"]` | Arguments for the inference server pod. | |
| 34 | +| autoscaling.behavior.scaleDown.policies[0].periodSeconds | int | `60` | | |
| 35 | +| autoscaling.behavior.scaleDown.policies[0].type | string | `"Percent"` | | |
| 36 | +| autoscaling.behavior.scaleDown.policies[0].value | int | `50` | | |
| 37 | +| autoscaling.behavior.scaleDown.stabilizationWindowSeconds | int | `180` | | |
| 38 | +| autoscaling.behavior.scaleUp.policies[0].periodSeconds | int | `15` | | |
| 39 | +| autoscaling.behavior.scaleUp.policies[0].type | string | `"Percent"` | | |
| 40 | +| autoscaling.behavior.scaleUp.policies[0].value | int | `100` | | |
| 41 | +| autoscaling.behavior.scaleUp.stabilizationWindowSeconds | int | `60` | | |
| 42 | +| autoscaling.enabled | bool | `false` | | |
| 43 | +| autoscaling.maxReplicas | int | `3` | | |
| 44 | +| autoscaling.metrics | list | `[]` | | |
| 45 | +| autoscaling.minReplicas | int | `1` | | |
| 46 | +| env | list | `[]` | Additional environment variables for the inference server pod. | |
| 47 | +| envFrom | list | `[]` | | |
| 48 | +| fullnameOverride | string | `""` | Overrides the chart's computed fullname. | |
| 49 | +| image.pullPolicy | string | `"IfNotPresent"` | | |
| 50 | +| image.repository | string | `"nvcr.io/nvidia/tritonserver"` | | |
| 51 | +| image.tag | string | `"25.02-py3"` | | |
| 52 | +| imagePullSecrets | list | `[]` | Image pull secrets for Docker images. | |
| 53 | +| ingress.annotations | object | `{}` | | |
| 54 | +| ingress.className | string | `""` | | |
| 55 | +| ingress.enabled | bool | `false` | | |
| 56 | +| ingress.hosts[0].host | string | `"chart-example.local"` | | |
| 57 | +| ingress.hosts[0].paths[0].path | string | `"/"` | | |
| 58 | +| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | |
| 59 | +| ingress.tls | list | `[]` | | |
| 60 | +| livenessProbe.httpGet.path | string | `"/v2/health/live"` | | |
| 61 | +| livenessProbe.httpGet.port | string | `"http"` | | |
| 62 | +| nameOverride | string | `""` | Overrides the chart's name. | |
| 63 | +| nodeSelector | object | `{}` | Node selectors to schedule the pod to nodes with labels. | |
| 64 | +| podAnnotations | object | `{}` | Additional annotations for the pod. | |
| 65 | +| podDisruptionBudget.create | bool | `false` | Specifies whether a pod disruption budget should be created | |
| 66 | +| podDisruptionBudget.maxUnavailable | int | `1` | | |
| 67 | +| podLabels | object | `{}` | Additional labels for the pod. | |
| 68 | +| podSecurityContext | object | `{"fsGroup":65532,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | SecurityContext for the pod. | |
| 69 | +| readinessProbe.httpGet.path | string | `"/v2/health/ready"` | | |
| 70 | +| readinessProbe.httpGet.port | string | `"http"` | | |
| 71 | +| readinessProbe.initialDelaySeconds | int | `5` | | |
| 72 | +| readinessProbe.periodSeconds | int | `5` | | |
| 73 | +| replicaCount | int | `1` | Number of replicas. | |
| 74 | +| resources.limits."nvidia.com/gpu" | int | `1` | | |
| 75 | +| securityContext.appArmorProfile | object | `{}` | AppArmor profile for the container. | |
| 76 | +| securityContext.seLinuxOptions | object | `{}` | SELinux options for the container. | |
| 77 | +| securityContext.seccompProfile | object | `{}` | Seccomp profile for the container. | |
| 78 | +| service.annotations | object | `{}` | Additional annotations to add to the service | |
| 79 | +| service.ports.grpc | int | `8001` | | |
| 80 | +| service.ports.http | int | `8000` | | |
| 81 | +| service.ports.metrics | int | `8002` | | |
| 82 | +| service.type | string | `"ClusterIP"` | | |
| 83 | +| serviceAccount.annotations | object | `{}` | Additional annotations to add to the service account | |
| 84 | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | |
| 85 | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | |
| 86 | +| tolerations[0].effect | string | `"NoSchedule"` | | |
| 87 | +| tolerations[0].key | string | `"nvidia.com/gpu"` | | |
| 88 | +| tolerations[0].operator | string | `"Exists"` | | |
| 89 | +| tolerations[1].effect | string | `"NoSchedule"` | | |
| 90 | +| tolerations[1].key | string | `"aws.amazon.com/neuron"` | | |
| 91 | +| tolerations[1].operator | string | `"Exists"` | | |
| 92 | +| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | |
| 93 | +| volumes | list | `[]` | Additional volumes on the output Deployment definition. | |
| 94 | + |
| 95 | +---------------------------------------------- |
| 96 | + |
| 97 | +Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/). |
0 commit comments