accelerated-containers
diff --git a/‎.github/workflows/pre-commit.yaml‎
Lines changed: 19 additions & 0 deletions b/‎.github/workflows/pre-commit.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎.github/workflows/release.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎chart.yaml‎
Lines changed: 142 additions & 0 deletions b/‎chart.yaml‎
Lines changed: 142 additions & 0 deletions
diff --git a/‎charts/.gitkeep‎ b/‎charts/.gitkeep‎
diff --git a/‎charts/triton-inference-server/.helmignore‎
Lines changed: 23 additions & 0 deletions b/‎charts/triton-inference-server/.helmignore‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎charts/triton-inference-server/Chart.yaml‎
Lines changed: 7 additions & 0 deletions b/‎charts/triton-inference-server/Chart.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎charts/triton-inference-server/README.md‎
Lines changed: 97 additions & 0 deletions b/‎charts/triton-inference-server/README.md‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎charts/triton-inference-server/README.md.gotmpl‎
Lines changed: 33 additions & 0 deletions b/‎charts/triton-inference-server/README.md.gotmpl‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎charts/triton-inference-server/templates/NOTES.txt‎
Lines changed: 22 additions & 0 deletions b/‎charts/triton-inference-server/templates/NOTES.txt‎
Lines changed: 22 additions & 0 deletions
@@ -0,0 +1,19 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Pre-commit
+        uses: pre-commit/[email protected]
@@ -1,4 +1,4 @@
-name: Release Charts
+name: release
 
 on:
   push:
 
@@ -0,0 +1,16 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-merge-conflict
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: mixed-line-ending
+        args: [--fix=lf]
+
+  - repo: https://github.com/norwoodj/helm-docs
+    rev:  v1.14.2
+    hooks:
+      - id: helm-docs
+        args:
+          - --chart-search-root=charts
@@ -0,0 +1,142 @@
+---
+# Source: triton-inference-server/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: release-name-triton-inference-server
+  namespace: triton
+  labels:
+    helm.sh/chart: triton-inference-server-0.1.0
+    app.kubernetes.io/name: triton-inference-server
+    app.kubernetes.io/instance: release-name
+    app.kubernetes.io/version: "2.55.0"
+    app.kubernetes.io/managed-by: Helm
+automountServiceAccountToken: false
+---
+# Source: triton-inference-server/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: release-name-triton-inference-server
+  namespace: triton
+  labels:
+    helm.sh/chart: triton-inference-server-0.1.0
+    app.kubernetes.io/name: triton-inference-server
+    app.kubernetes.io/instance: release-name
+    app.kubernetes.io/version: "2.55.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - name: triton-http
+      port: 8000
+      targetPort: http
+    - name: triton-grpc
+      port: 8001
+      targetPort: grpc
+    - name: triton-metrics
+      port: 8002
+      targetPort: metrics
+  selector:
+    app.kubernetes.io/name: triton-inference-server
+    app.kubernetes.io/instance: release-name
+---
+# Source: triton-inference-server/templates/deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: release-name-triton-inference-server
+  namespace: triton
+  labels:
+    helm.sh/chart: triton-inference-server-0.1.0
+    app.kubernetes.io/name: triton-inference-server
+    app.kubernetes.io/instance: release-name
+    app.kubernetes.io/version: "2.55.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: triton-inference-server
+      app.kubernetes.io/instance: release-name
+  template:
+    metadata:
+      labels:
+        helm.sh/chart: triton-inference-server-0.1.0
+        app.kubernetes.io/name: triton-inference-server
+        app.kubernetes.io/instance: release-name
+        app.kubernetes.io/version: "2.55.0"
+        app.kubernetes.io/managed-by: Helm
+    spec:
+      serviceAccountName: release-name-triton-inference-server
+      securityContext:
+        fsGroup: 65532
+        runAsNonRoot: true
+        seccompProfile:
+          type: RuntimeDefault
+      containers:
+        - name: triton-inference-server
+          args:
+            - "tritonserver"
+            - "--model-store=/models"
+            - "--model-control-mode=poll"
+            - "--repository-poll-secs=30"
+          image: "nvcr.io/nvidia/tritonserver:25.02-py3"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: 8000
+            - name: grpc
+              containerPort: 8001
+            - name: metrics
+              containerPort: 8002
+          livenessProbe:
+            httpGet:
+              path: /v2/health/live
+              port: http
+          readinessProbe:
+            httpGet:
+              path: /v2/health/ready
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+          securityContext:
+            privileged: false
+            allowPrivilegeEscalation: false
+            runAsNonRoot: true
+            runAsUser: 65532
+            runAsGroup: 65532
+            capabilities:
+              drop:
+                - ALL
+          volumeMounts:
+            - mountPath: /dev/shm
+              name: dshm
+      volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: nvidia.com/gpu.present
+                operator: In
+                values:
+                - "true"
+            - matchExpressions:
+              - key: aws.amazon.com/neuron.present
+                operator: In
+                values:
+                - "true"
+      tolerations:
+        - effect: NoSchedule
+          key: nvidia.com/gpu
+          operator: Exists
+        - effect: NoSchedule
+          key: aws.amazon.com/neuron
+          operator: Exists
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: triton-inference-server
+description: NVIDIA Triton Inference Server
+
+type: application
+version: 0.1.0
+appVersion: "2.55.0"
@@ -0,0 +1,97 @@
+# triton-inference-server
+
+NVIDIA Triton Inference Server
+
+![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.55.0](https://img.shields.io/badge/AppVersion-2.55.0-informational?style=flat-square)
+
+## Documentation
+
+For Triton Inference Server documentation please see [https://github.com/triton-inference-server/server).
+
+## Installing the Chart
+
+First add the ClowdHaus repository to Helm:
+
+```bash
+helm repo add clowdhaus https://clowdhaus.github.io/helm-charts
+```
+
+To install the chart with the release name `triton-inference-server` in the `triton` namespace and default configuration:
+
+```bash
+helm install triton-inference-server \
+    --namespace triton \
+    --create-namespace \
+    clowdhaus/triton-inference-server
+```
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| affinity | object | `{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"nvidia.com/gpu.present","operator":"In","values":["true"]}]},{"matchExpressions":[{"key":"aws.amazon.com/neuron.present","operator":"In","values":["true"]}]}]}}}` | Affinity rules for scheduling the pod. |
+| args | list | `["--model-store=/models","--model-control-mode=poll","--repository-poll-secs=30"]` | Arguments for the inference server pod. |
+| autoscaling.behavior.scaleDown.policies[0].periodSeconds | int | `60` |  |
+| autoscaling.behavior.scaleDown.policies[0].type | string | `"Percent"` |  |
+| autoscaling.behavior.scaleDown.policies[0].value | int | `50` |  |
+| autoscaling.behavior.scaleDown.stabilizationWindowSeconds | int | `180` |  |
+| autoscaling.behavior.scaleUp.policies[0].periodSeconds | int | `15` |  |
+| autoscaling.behavior.scaleUp.policies[0].type | string | `"Percent"` |  |
+| autoscaling.behavior.scaleUp.policies[0].value | int | `100` |  |
+| autoscaling.behavior.scaleUp.stabilizationWindowSeconds | int | `60` |  |
+| autoscaling.enabled | bool | `false` |  |
+| autoscaling.maxReplicas | int | `3` |  |
+| autoscaling.metrics | list | `[]` |  |
+| autoscaling.minReplicas | int | `1` |  |
+| env | list | `[]` | Additional environment variables for the inference server pod. |
+| envFrom | list | `[]` |  |
+| fullnameOverride | string | `""` | Overrides the chart's computed fullname. |
+| image.pullPolicy | string | `"IfNotPresent"` |  |
+| image.repository | string | `"nvcr.io/nvidia/tritonserver"` |  |
+| image.tag | string | `"25.02-py3"` |  |
+| imagePullSecrets | list | `[]` | Image pull secrets for Docker images. |
+| ingress.annotations | object | `{}` |  |
+| ingress.className | string | `""` |  |
+| ingress.enabled | bool | `false` |  |
+| ingress.hosts[0].host | string | `"chart-example.local"` |  |
+| ingress.hosts[0].paths[0].path | string | `"/"` |  |
+| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` |  |
+| ingress.tls | list | `[]` |  |
+| livenessProbe.httpGet.path | string | `"/v2/health/live"` |  |
+| livenessProbe.httpGet.port | string | `"http"` |  |
+| nameOverride | string | `""` | Overrides the chart's name. |
+| nodeSelector | object | `{}` | Node selectors to schedule the pod to nodes with labels. |
+| podAnnotations | object | `{}` | Additional annotations for the pod. |
+| podDisruptionBudget.create | bool | `false` | Specifies whether a pod disruption budget should be created |
+| podDisruptionBudget.maxUnavailable | int | `1` |  |
+| podLabels | object | `{}` | Additional labels for the pod. |
+| podSecurityContext | object | `{"fsGroup":65532,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | SecurityContext for the pod. |
+| readinessProbe.httpGet.path | string | `"/v2/health/ready"` |  |
+| readinessProbe.httpGet.port | string | `"http"` |  |
+| readinessProbe.initialDelaySeconds | int | `5` |  |
+| readinessProbe.periodSeconds | int | `5` |  |
+| replicaCount | int | `1` | Number of replicas. |
+| resources.limits."nvidia.com/gpu" | int | `1` |  |
+| securityContext.appArmorProfile | object | `{}` | AppArmor profile for the container. |
+| securityContext.seLinuxOptions | object | `{}` | SELinux options for the container. |
+| securityContext.seccompProfile | object | `{}` | Seccomp profile for the container. |
+| service.annotations | object | `{}` | Additional annotations to add to the service |
+| service.ports.grpc | int | `8001` |  |
+| service.ports.http | int | `8000` |  |
+| service.ports.metrics | int | `8002` |  |
+| service.type | string | `"ClusterIP"` |  |
+| serviceAccount.annotations | object | `{}` | Additional annotations to add to the service account |
+| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |
+| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |
+| tolerations[0].effect | string | `"NoSchedule"` |  |
+| tolerations[0].key | string | `"nvidia.com/gpu"` |  |
+| tolerations[0].operator | string | `"Exists"` |  |
+| tolerations[1].effect | string | `"NoSchedule"` |  |
+| tolerations[1].key | string | `"aws.amazon.com/neuron"` |  |
+| tolerations[1].operator | string | `"Exists"` |  |
+| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. |
+| volumes | list | `[]` | Additional volumes on the output Deployment definition. |
+
+----------------------------------------------
+
+Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
@@ -0,0 +1,33 @@
+{{ template "chart.header" . }}
+{{ template "chart.description" . }}
+
+{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
+
+## Documentation
+
+For Triton Inference Server documentation please see [https://github.com/triton-inference-server/server).
+
+## Installing the Chart
+
+First add the ClowdHaus repository to Helm:
+
+```bash
+helm repo add clowdhaus https://clowdhaus.github.io/helm-charts
+```
+
+To install the chart with the release name `triton-inference-server` in the `triton` namespace and default configuration:
+
+```bash
+helm install triton-inference-server \
+    --namespace triton \
+    --create-namespace \
+    clowdhaus/triton-inference-server
+```
+
+{{ template "chart.requirementsSection" . }}
+
+{{ template "chart.valuesSection" . }}
+
+----------------------------------------------
+
+Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
@@ -0,0 +1,22 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "triton-inference-server.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "triton-inference-server.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "triton-inference-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "triton-inference-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: Release Charts`
	`1`	`+name: release`
`2`	`2`
`3`	`3`	`on:`
`4`	`4`	`push:`