Skip to content

Commit 76c6725

Browse files
committed
feat: add Helm chart for easy deployment
- Add complete Helm chart with configurable values - Include CRD, RBAC, Deployment, Service templates - Add ServiceMonitor support for Prometheus Operator - Add comprehensive README with installation instructions Signed-off-by: pmady <pavan4devops@gmail.com>
1 parent 24beaaa commit 76c6725

File tree

10 files changed

+560
-0
lines changed

10 files changed

+560
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: v2
2+
name: kubeai-autoscaler
3+
description: A Helm chart for KubeAI Autoscaler - Kubernetes-native AI inference workload scaling
4+
type: application
5+
version: 0.1.0
6+
appVersion: "0.1.0"
7+
keywords:
8+
- kubernetes
9+
- autoscaling
10+
- ai
11+
- inference
12+
- gpu
13+
- machine-learning
14+
home: https://github.com/pmady/kubeai-autoscaler
15+
sources:
16+
- https://github.com/pmady/kubeai-autoscaler
17+
maintainers:
18+
- name: pmady
19+
email: pavan4devops@gmail.com

charts/kubeai-autoscaler/README.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# KubeAI Autoscaler Helm Chart
2+
3+
A Helm chart for deploying KubeAI Autoscaler - Kubernetes-native AI inference workload scaling.
4+
5+
## Prerequisites
6+
7+
- Kubernetes 1.24+
8+
- Helm 3.0+
9+
- Prometheus (for metrics collection)
10+
11+
## Installation
12+
13+
```bash
14+
# Add the repository (if published)
15+
helm repo add kubeai https://pmady.github.io/kubeai-autoscaler
16+
17+
# Install the chart
18+
helm install kubeai-autoscaler kubeai/kubeai-autoscaler -n kubeai-system --create-namespace
19+
```
20+
21+
### Install from source
22+
23+
```bash
24+
helm install kubeai-autoscaler ./charts/kubeai-autoscaler -n kubeai-system --create-namespace
25+
```
26+
27+
## Configuration
28+
29+
| Parameter | Description | Default |
30+
|-----------|-------------|---------|
31+
| `replicaCount` | Number of controller replicas | `1` |
32+
| `image.repository` | Controller image repository | `ghcr.io/pmady/kubeai-autoscaler` |
33+
| `image.tag` | Controller image tag | `""` (uses appVersion) |
34+
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
35+
| `serviceAccount.create` | Create service account | `true` |
36+
| `prometheus.address` | Prometheus server address | `http://prometheus.monitoring.svc.cluster.local:9090` |
37+
| `controller.leaderElection` | Enable leader election | `true` |
38+
| `serviceMonitor.enabled` | Enable ServiceMonitor for Prometheus Operator | `false` |
39+
| `resources.limits.cpu` | CPU limit | `500m` |
40+
| `resources.limits.memory` | Memory limit | `128Mi` |
41+
| `resources.requests.cpu` | CPU request | `100m` |
42+
| `resources.requests.memory` | Memory request | `64Mi` |
43+
44+
## Example
45+
46+
```bash
47+
helm install kubeai-autoscaler ./charts/kubeai-autoscaler \
48+
-n kubeai-system \
49+
--create-namespace \
50+
--set prometheus.address=http://prometheus.monitoring:9090 \
51+
--set serviceMonitor.enabled=true
52+
```
53+
54+
## Uninstallation
55+
56+
```bash
57+
helm uninstall kubeai-autoscaler -n kubeai-system
58+
```
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "kubeai-autoscaler.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
*/}}
11+
{{- define "kubeai-autoscaler.fullname" -}}
12+
{{- if .Values.fullnameOverride }}
13+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
14+
{{- else }}
15+
{{- $name := default .Chart.Name .Values.nameOverride }}
16+
{{- if contains $name .Release.Name }}
17+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
18+
{{- else }}
19+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
20+
{{- end }}
21+
{{- end }}
22+
{{- end }}
23+
24+
{{/*
25+
Create chart name and version as used by the chart label.
26+
*/}}
27+
{{- define "kubeai-autoscaler.chart" -}}
28+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
29+
{{- end }}
30+
31+
{{/*
32+
Common labels
33+
*/}}
34+
{{- define "kubeai-autoscaler.labels" -}}
35+
helm.sh/chart: {{ include "kubeai-autoscaler.chart" . }}
36+
{{ include "kubeai-autoscaler.selectorLabels" . }}
37+
{{- if .Chart.AppVersion }}
38+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
39+
{{- end }}
40+
app.kubernetes.io/managed-by: {{ .Release.Service }}
41+
{{- end }}
42+
43+
{{/*
44+
Selector labels
45+
*/}}
46+
{{- define "kubeai-autoscaler.selectorLabels" -}}
47+
app.kubernetes.io/name: {{ include "kubeai-autoscaler.name" . }}
48+
app.kubernetes.io/instance: {{ .Release.Name }}
49+
{{- end }}
50+
51+
{{/*
52+
Create the name of the service account to use
53+
*/}}
54+
{{- define "kubeai-autoscaler.serviceAccountName" -}}
55+
{{- if .Values.serviceAccount.create }}
56+
{{- default (include "kubeai-autoscaler.fullname" .) .Values.serviceAccount.name }}
57+
{{- else }}
58+
{{- default "default" .Values.serviceAccount.name }}
59+
{{- end }}
60+
{{- end }}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
apiVersion: apiextensions.k8s.io/v1
2+
kind: CustomResourceDefinition
3+
metadata:
4+
name: aiinferenceautoscalerpolicies.kubeai.io
5+
labels:
6+
{{- include "kubeai-autoscaler.labels" . | nindent 4 }}
7+
annotations:
8+
controller-gen.kubebuilder.io/version: v0.14.0
9+
spec:
10+
group: kubeai.io
11+
names:
12+
kind: AIInferenceAutoscalerPolicy
13+
listKind: AIInferenceAutoscalerPolicyList
14+
plural: aiinferenceautoscalerpolicies
15+
singular: aiinferenceautoscalerpolicy
16+
shortNames:
17+
- aiap
18+
- aipolicy
19+
scope: Namespaced
20+
versions:
21+
- name: v1alpha1
22+
served: true
23+
storage: true
24+
schema:
25+
openAPIV3Schema:
26+
type: object
27+
description: AIInferenceAutoscalerPolicy defines autoscaling rules for AI inference workloads
28+
properties:
29+
apiVersion:
30+
type: string
31+
kind:
32+
type: string
33+
metadata:
34+
type: object
35+
spec:
36+
type: object
37+
required:
38+
- targetRef
39+
- metrics
40+
properties:
41+
targetRef:
42+
type: object
43+
description: Reference to the target Deployment or StatefulSet
44+
required:
45+
- apiVersion
46+
- kind
47+
- name
48+
properties:
49+
apiVersion:
50+
type: string
51+
kind:
52+
type: string
53+
enum:
54+
- Deployment
55+
- StatefulSet
56+
name:
57+
type: string
58+
minReplicas:
59+
type: integer
60+
minimum: 1
61+
default: 1
62+
maxReplicas:
63+
type: integer
64+
minimum: 1
65+
cooldownPeriod:
66+
type: integer
67+
minimum: 0
68+
default: 300
69+
metrics:
70+
type: object
71+
properties:
72+
latency:
73+
type: object
74+
properties:
75+
enabled:
76+
type: boolean
77+
default: true
78+
targetP99Ms:
79+
type: integer
80+
targetP95Ms:
81+
type: integer
82+
prometheusQuery:
83+
type: string
84+
gpuUtilization:
85+
type: object
86+
properties:
87+
enabled:
88+
type: boolean
89+
default: true
90+
targetPercentage:
91+
type: integer
92+
minimum: 1
93+
maximum: 100
94+
prometheusQuery:
95+
type: string
96+
requestQueueDepth:
97+
type: object
98+
properties:
99+
enabled:
100+
type: boolean
101+
default: false
102+
targetDepth:
103+
type: integer
104+
minimum: 0
105+
prometheusQuery:
106+
type: string
107+
status:
108+
type: object
109+
properties:
110+
currentReplicas:
111+
type: integer
112+
desiredReplicas:
113+
type: integer
114+
lastScaleTime:
115+
type: string
116+
format: date-time
117+
currentMetrics:
118+
type: object
119+
properties:
120+
latencyP99Ms:
121+
type: integer
122+
latencyP95Ms:
123+
type: integer
124+
gpuUtilizationPercent:
125+
type: integer
126+
requestQueueDepth:
127+
type: integer
128+
conditions:
129+
type: array
130+
items:
131+
type: object
132+
properties:
133+
type:
134+
type: string
135+
status:
136+
type: string
137+
lastTransitionTime:
138+
type: string
139+
format: date-time
140+
reason:
141+
type: string
142+
message:
143+
type: string
144+
subresources:
145+
status: {}
146+
additionalPrinterColumns:
147+
- name: Target
148+
type: string
149+
jsonPath: .spec.targetRef.name
150+
- name: Min
151+
type: integer
152+
jsonPath: .spec.minReplicas
153+
- name: Max
154+
type: integer
155+
jsonPath: .spec.maxReplicas
156+
- name: Current
157+
type: integer
158+
jsonPath: .status.currentReplicas
159+
- name: Age
160+
type: date
161+
jsonPath: .metadata.creationTimestamp
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ include "kubeai-autoscaler.fullname" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
{{- include "kubeai-autoscaler.labels" . | nindent 4 }}
8+
spec:
9+
replicas: {{ .Values.replicaCount }}
10+
selector:
11+
matchLabels:
12+
{{- include "kubeai-autoscaler.selectorLabels" . | nindent 6 }}
13+
template:
14+
metadata:
15+
{{- with .Values.podAnnotations }}
16+
annotations:
17+
{{- toYaml . | nindent 8 }}
18+
{{- end }}
19+
labels:
20+
{{- include "kubeai-autoscaler.selectorLabels" . | nindent 8 }}
21+
spec:
22+
{{- with .Values.imagePullSecrets }}
23+
imagePullSecrets:
24+
{{- toYaml . | nindent 8 }}
25+
{{- end }}
26+
serviceAccountName: {{ include "kubeai-autoscaler.serviceAccountName" . }}
27+
securityContext:
28+
{{- toYaml .Values.podSecurityContext | nindent 8 }}
29+
containers:
30+
- name: {{ .Chart.Name }}
31+
securityContext:
32+
{{- toYaml .Values.securityContext | nindent 12 }}
33+
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
34+
imagePullPolicy: {{ .Values.image.pullPolicy }}
35+
args:
36+
{{- if .Values.controller.leaderElection }}
37+
- --leader-elect
38+
{{- end }}
39+
- --prometheus-address={{ .Values.prometheus.address }}
40+
ports:
41+
- name: metrics
42+
containerPort: 8080
43+
protocol: TCP
44+
- name: health
45+
containerPort: 8081
46+
protocol: TCP
47+
livenessProbe:
48+
httpGet:
49+
path: /healthz
50+
port: health
51+
initialDelaySeconds: 15
52+
periodSeconds: 20
53+
readinessProbe:
54+
httpGet:
55+
path: /readyz
56+
port: health
57+
initialDelaySeconds: 5
58+
periodSeconds: 10
59+
resources:
60+
{{- toYaml .Values.resources | nindent 12 }}
61+
{{- with .Values.nodeSelector }}
62+
nodeSelector:
63+
{{- toYaml . | nindent 8 }}
64+
{{- end }}
65+
{{- with .Values.affinity }}
66+
affinity:
67+
{{- toYaml . | nindent 8 }}
68+
{{- end }}
69+
{{- with .Values.tolerations }}
70+
tolerations:
71+
{{- toYaml . | nindent 8 }}
72+
{{- end }}

0 commit comments

Comments
 (0)