Skip to content

Commit 68398d5

Browse files
committed
feat: Add initial triton-inference-server chart
1 parent 6c3a6b0 commit 68398d5

File tree

17 files changed

+823
-1
lines changed

17 files changed

+823
-1
lines changed

.github/workflows/pre-commit.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: pre-commit
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- main
8+
9+
jobs:
10+
pre-commit:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v4
15+
with:
16+
fetch-depth: 0
17+
18+
- name: Pre-commit
19+
uses: pre-commit/[email protected]

.github/workflows/release.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Release Charts
1+
name: release
22

33
on:
44
push:

.pre-commit-config.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v5.0.0
4+
hooks:
5+
- id: check-merge-conflict
6+
- id: end-of-file-fixer
7+
- id: trailing-whitespace
8+
- id: mixed-line-ending
9+
args: [--fix=lf]
10+
11+
- repo: https://github.com/norwoodj/helm-docs
12+
rev: v1.14.2
13+
hooks:
14+
- id: helm-docs
15+
args:
16+
- --chart-search-root=charts

chart.yaml

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
---
2+
# Source: triton-inference-server/templates/serviceaccount.yaml
3+
apiVersion: v1
4+
kind: ServiceAccount
5+
metadata:
6+
name: release-name-triton-inference-server
7+
namespace: triton
8+
labels:
9+
helm.sh/chart: triton-inference-server-0.1.0
10+
app.kubernetes.io/name: triton-inference-server
11+
app.kubernetes.io/instance: release-name
12+
app.kubernetes.io/version: "2.55.0"
13+
app.kubernetes.io/managed-by: Helm
14+
automountServiceAccountToken: false
15+
---
16+
# Source: triton-inference-server/templates/service.yaml
17+
apiVersion: v1
18+
kind: Service
19+
metadata:
20+
name: release-name-triton-inference-server
21+
namespace: triton
22+
labels:
23+
helm.sh/chart: triton-inference-server-0.1.0
24+
app.kubernetes.io/name: triton-inference-server
25+
app.kubernetes.io/instance: release-name
26+
app.kubernetes.io/version: "2.55.0"
27+
app.kubernetes.io/managed-by: Helm
28+
spec:
29+
type: ClusterIP
30+
ports:
31+
- name: triton-http
32+
port: 8000
33+
targetPort: http
34+
- name: triton-grpc
35+
port: 8001
36+
targetPort: grpc
37+
- name: triton-metrics
38+
port: 8002
39+
targetPort: metrics
40+
selector:
41+
app.kubernetes.io/name: triton-inference-server
42+
app.kubernetes.io/instance: release-name
43+
---
44+
# Source: triton-inference-server/templates/deployment.yaml
45+
apiVersion: apps/v1
46+
kind: Deployment
47+
metadata:
48+
name: release-name-triton-inference-server
49+
namespace: triton
50+
labels:
51+
helm.sh/chart: triton-inference-server-0.1.0
52+
app.kubernetes.io/name: triton-inference-server
53+
app.kubernetes.io/instance: release-name
54+
app.kubernetes.io/version: "2.55.0"
55+
app.kubernetes.io/managed-by: Helm
56+
spec:
57+
replicas: 1
58+
selector:
59+
matchLabels:
60+
app.kubernetes.io/name: triton-inference-server
61+
app.kubernetes.io/instance: release-name
62+
template:
63+
metadata:
64+
labels:
65+
helm.sh/chart: triton-inference-server-0.1.0
66+
app.kubernetes.io/name: triton-inference-server
67+
app.kubernetes.io/instance: release-name
68+
app.kubernetes.io/version: "2.55.0"
69+
app.kubernetes.io/managed-by: Helm
70+
spec:
71+
serviceAccountName: release-name-triton-inference-server
72+
securityContext:
73+
fsGroup: 65532
74+
runAsNonRoot: true
75+
seccompProfile:
76+
type: RuntimeDefault
77+
containers:
78+
- name: triton-inference-server
79+
args:
80+
- "tritonserver"
81+
- "--model-store=/models"
82+
- "--model-control-mode=poll"
83+
- "--repository-poll-secs=30"
84+
image: "nvcr.io/nvidia/tritonserver:25.02-py3"
85+
imagePullPolicy: IfNotPresent
86+
ports:
87+
- name: http
88+
containerPort: 8000
89+
- name: grpc
90+
containerPort: 8001
91+
- name: metrics
92+
containerPort: 8002
93+
livenessProbe:
94+
httpGet:
95+
path: /v2/health/live
96+
port: http
97+
readinessProbe:
98+
httpGet:
99+
path: /v2/health/ready
100+
port: http
101+
initialDelaySeconds: 5
102+
periodSeconds: 5
103+
resources:
104+
limits:
105+
nvidia.com/gpu: 1
106+
securityContext:
107+
privileged: false
108+
allowPrivilegeEscalation: false
109+
runAsNonRoot: true
110+
runAsUser: 65532
111+
runAsGroup: 65532
112+
capabilities:
113+
drop:
114+
- ALL
115+
volumeMounts:
116+
- mountPath: /dev/shm
117+
name: dshm
118+
volumes:
119+
- name: dshm
120+
emptyDir:
121+
medium: Memory
122+
affinity:
123+
nodeAffinity:
124+
requiredDuringSchedulingIgnoredDuringExecution:
125+
nodeSelectorTerms:
126+
- matchExpressions:
127+
- key: nvidia.com/gpu.present
128+
operator: In
129+
values:
130+
- "true"
131+
- matchExpressions:
132+
- key: aws.amazon.com/neuron.present
133+
operator: In
134+
values:
135+
- "true"
136+
tolerations:
137+
- effect: NoSchedule
138+
key: nvidia.com/gpu
139+
operator: Exists
140+
- effect: NoSchedule
141+
key: aws.amazon.com/neuron
142+
operator: Exists

charts/.gitkeep

Whitespace-only changes.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: v2
2+
name: triton-inference-server
3+
description: NVIDIA Triton Inference Server
4+
5+
type: application
6+
version: 0.1.0
7+
appVersion: "2.55.0"
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# triton-inference-server
2+
3+
NVIDIA Triton Inference Server
4+
5+
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.55.0](https://img.shields.io/badge/AppVersion-2.55.0-informational?style=flat-square)
6+
7+
## Documentation
8+
9+
For Triton Inference Server documentation please see [https://github.com/triton-inference-server/server).
10+
11+
## Installing the Chart
12+
13+
First add the ClowdHaus repository to Helm:
14+
15+
```bash
16+
helm repo add clowdhaus https://clowdhaus.github.io/helm-charts
17+
```
18+
19+
To install the chart with the release name `triton-inference-server` in the `triton` namespace and default configuration:
20+
21+
```bash
22+
helm install triton-inference-server \
23+
--namespace triton \
24+
--create-namespace \
25+
clowdhaus/triton-inference-server
26+
```
27+
28+
## Values
29+
30+
| Key | Type | Default | Description |
31+
|-----|------|---------|-------------|
32+
| affinity | object | `{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"nvidia.com/gpu.present","operator":"In","values":["true"]}]},{"matchExpressions":[{"key":"aws.amazon.com/neuron.present","operator":"In","values":["true"]}]}]}}}` | Affinity rules for scheduling the pod. |
33+
| args | list | `["--model-store=/models","--model-control-mode=poll","--repository-poll-secs=30"]` | Arguments for the inference server pod. |
34+
| autoscaling.behavior.scaleDown.policies[0].periodSeconds | int | `60` | |
35+
| autoscaling.behavior.scaleDown.policies[0].type | string | `"Percent"` | |
36+
| autoscaling.behavior.scaleDown.policies[0].value | int | `50` | |
37+
| autoscaling.behavior.scaleDown.stabilizationWindowSeconds | int | `180` | |
38+
| autoscaling.behavior.scaleUp.policies[0].periodSeconds | int | `15` | |
39+
| autoscaling.behavior.scaleUp.policies[0].type | string | `"Percent"` | |
40+
| autoscaling.behavior.scaleUp.policies[0].value | int | `100` | |
41+
| autoscaling.behavior.scaleUp.stabilizationWindowSeconds | int | `60` | |
42+
| autoscaling.enabled | bool | `false` | |
43+
| autoscaling.maxReplicas | int | `3` | |
44+
| autoscaling.metrics | list | `[]` | |
45+
| autoscaling.minReplicas | int | `1` | |
46+
| env | list | `[]` | Additional environment variables for the inference server pod. |
47+
| envFrom | list | `[]` | |
48+
| fullnameOverride | string | `""` | Overrides the chart's computed fullname. |
49+
| image.pullPolicy | string | `"IfNotPresent"` | |
50+
| image.repository | string | `"nvcr.io/nvidia/tritonserver"` | |
51+
| image.tag | string | `"25.02-py3"` | |
52+
| imagePullSecrets | list | `[]` | Image pull secrets for Docker images. |
53+
| ingress.annotations | object | `{}` | |
54+
| ingress.className | string | `""` | |
55+
| ingress.enabled | bool | `false` | |
56+
| ingress.hosts[0].host | string | `"chart-example.local"` | |
57+
| ingress.hosts[0].paths[0].path | string | `"/"` | |
58+
| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | |
59+
| ingress.tls | list | `[]` | |
60+
| livenessProbe.httpGet.path | string | `"/v2/health/live"` | |
61+
| livenessProbe.httpGet.port | string | `"http"` | |
62+
| nameOverride | string | `""` | Overrides the chart's name. |
63+
| nodeSelector | object | `{}` | Node selectors to schedule the pod to nodes with labels. |
64+
| podAnnotations | object | `{}` | Additional annotations for the pod. |
65+
| podDisruptionBudget.create | bool | `false` | Specifies whether a pod disruption budget should be created |
66+
| podDisruptionBudget.maxUnavailable | int | `1` | |
67+
| podLabels | object | `{}` | Additional labels for the pod. |
68+
| podSecurityContext | object | `{"fsGroup":65532,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | SecurityContext for the pod. |
69+
| readinessProbe.httpGet.path | string | `"/v2/health/ready"` | |
70+
| readinessProbe.httpGet.port | string | `"http"` | |
71+
| readinessProbe.initialDelaySeconds | int | `5` | |
72+
| readinessProbe.periodSeconds | int | `5` | |
73+
| replicaCount | int | `1` | Number of replicas. |
74+
| resources.limits."nvidia.com/gpu" | int | `1` | |
75+
| securityContext.appArmorProfile | object | `{}` | AppArmor profile for the container. |
76+
| securityContext.seLinuxOptions | object | `{}` | SELinux options for the container. |
77+
| securityContext.seccompProfile | object | `{}` | Seccomp profile for the container. |
78+
| service.annotations | object | `{}` | Additional annotations to add to the service |
79+
| service.ports.grpc | int | `8001` | |
80+
| service.ports.http | int | `8000` | |
81+
| service.ports.metrics | int | `8002` | |
82+
| service.type | string | `"ClusterIP"` | |
83+
| serviceAccount.annotations | object | `{}` | Additional annotations to add to the service account |
84+
| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
85+
| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
86+
| tolerations[0].effect | string | `"NoSchedule"` | |
87+
| tolerations[0].key | string | `"nvidia.com/gpu"` | |
88+
| tolerations[0].operator | string | `"Exists"` | |
89+
| tolerations[1].effect | string | `"NoSchedule"` | |
90+
| tolerations[1].key | string | `"aws.amazon.com/neuron"` | |
91+
| tolerations[1].operator | string | `"Exists"` | |
92+
| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
93+
| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
94+
95+
----------------------------------------------
96+
97+
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{{ template "chart.header" . }}
2+
{{ template "chart.description" . }}
3+
4+
{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
5+
6+
## Documentation
7+
8+
For Triton Inference Server documentation please see [https://github.com/triton-inference-server/server).
9+
10+
## Installing the Chart
11+
12+
First add the ClowdHaus repository to Helm:
13+
14+
```bash
15+
helm repo add clowdhaus https://clowdhaus.github.io/helm-charts
16+
```
17+
18+
To install the chart with the release name `triton-inference-server` in the `triton` namespace and default configuration:
19+
20+
```bash
21+
helm install triton-inference-server \
22+
--namespace triton \
23+
--create-namespace \
24+
clowdhaus/triton-inference-server
25+
```
26+
27+
{{ template "chart.requirementsSection" . }}
28+
29+
{{ template "chart.valuesSection" . }}
30+
31+
----------------------------------------------
32+
33+
Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
1. Get the application URL by running these commands:
2+
{{- if .Values.ingress.enabled }}
3+
{{- range $host := .Values.ingress.hosts }}
4+
{{- range .paths }}
5+
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
6+
{{- end }}
7+
{{- end }}
8+
{{- else if contains "NodePort" .Values.service.type }}
9+
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "triton-inference-server.fullname" . }})
10+
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
11+
echo http://$NODE_IP:$NODE_PORT
12+
{{- else if contains "LoadBalancer" .Values.service.type }}
13+
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14+
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "triton-inference-server.fullname" . }}'
15+
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "triton-inference-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16+
echo http://$SERVICE_IP:{{ .Values.service.port }}
17+
{{- else if contains "ClusterIP" .Values.service.type }}
18+
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "triton-inference-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
19+
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20+
echo "Visit http://127.0.0.1:8080 to use your application"
21+
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
22+
{{- end }}

0 commit comments

Comments
 (0)