pmady
diff --git a/‎charts/kubeai-autoscaler/Chart.yaml‎
Lines changed: 19 additions & 0 deletions b/‎charts/kubeai-autoscaler/Chart.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎charts/kubeai-autoscaler/README.md‎
Lines changed: 58 additions & 0 deletions b/‎charts/kubeai-autoscaler/README.md‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎charts/kubeai-autoscaler/templates/_helpers.tpl‎
Lines changed: 60 additions & 0 deletions b/‎charts/kubeai-autoscaler/templates/_helpers.tpl‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎charts/kubeai-autoscaler/templates/crds.yaml‎
Lines changed: 161 additions & 0 deletions b/‎charts/kubeai-autoscaler/templates/crds.yaml‎
Lines changed: 161 additions & 0 deletions
diff --git a/‎charts/kubeai-autoscaler/templates/deployment.yaml‎
Lines changed: 72 additions & 0 deletions b/‎charts/kubeai-autoscaler/templates/deployment.yaml‎
Lines changed: 72 additions & 0 deletions
@@ -0,0 +1,19 @@
+apiVersion: v2
+name: kubeai-autoscaler
+description: A Helm chart for KubeAI Autoscaler - Kubernetes-native AI inference workload scaling
+type: application
+version: 0.1.0
+appVersion: "0.1.0"
+keywords:
+  - kubernetes
+  - autoscaling
+  - ai
+  - inference
+  - gpu
+  - machine-learning
+home: https://github.com/pmady/kubeai-autoscaler
+sources:
+  - https://github.com/pmady/kubeai-autoscaler
+maintainers:
+  - name: pmady
+    email: pavan4devops@gmail.com
@@ -0,0 +1,58 @@
+# KubeAI Autoscaler Helm Chart
+
+A Helm chart for deploying KubeAI Autoscaler - Kubernetes-native AI inference workload scaling.
+
+## Prerequisites
+
+- Kubernetes 1.24+
+- Helm 3.0+
+- Prometheus (for metrics collection)
+
+## Installation
+
+```bash
+# Add the repository (if published)
+helm repo add kubeai https://pmady.github.io/kubeai-autoscaler
+
+# Install the chart
+helm install kubeai-autoscaler kubeai/kubeai-autoscaler -n kubeai-system --create-namespace
+```
+
+### Install from source
+
+```bash
+helm install kubeai-autoscaler ./charts/kubeai-autoscaler -n kubeai-system --create-namespace
+```
+
+## Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `replicaCount` | Number of controller replicas | `1` |
+| `image.repository` | Controller image repository | `ghcr.io/pmady/kubeai-autoscaler` |
+| `image.tag` | Controller image tag | `""` (uses appVersion) |
+| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
+| `serviceAccount.create` | Create service account | `true` |
+| `prometheus.address` | Prometheus server address | `http://prometheus.monitoring.svc.cluster.local:9090` |
+| `controller.leaderElection` | Enable leader election | `true` |
+| `serviceMonitor.enabled` | Enable ServiceMonitor for Prometheus Operator | `false` |
+| `resources.limits.cpu` | CPU limit | `500m` |
+| `resources.limits.memory` | Memory limit | `128Mi` |
+| `resources.requests.cpu` | CPU request | `100m` |
+| `resources.requests.memory` | Memory request | `64Mi` |
+
+## Example
+
+```bash
+helm install kubeai-autoscaler ./charts/kubeai-autoscaler \
+  -n kubeai-system \
+  --create-namespace \
+  --set prometheus.address=http://prometheus.monitoring:9090 \
+  --set serviceMonitor.enabled=true
+```
+
+## Uninstallation
+
+```bash
+helm uninstall kubeai-autoscaler -n kubeai-system
+```
@@ -0,0 +1,60 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "kubeai-autoscaler.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+*/}}
+{{- define "kubeai-autoscaler.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "kubeai-autoscaler.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "kubeai-autoscaler.labels" -}}
+helm.sh/chart: {{ include "kubeai-autoscaler.chart" . }}
+{{ include "kubeai-autoscaler.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "kubeai-autoscaler.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "kubeai-autoscaler.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "kubeai-autoscaler.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "kubeai-autoscaler.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
@@ -0,0 +1,161 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: aiinferenceautoscalerpolicies.kubeai.io
+  labels:
+    {{- include "kubeai-autoscaler.labels" . | nindent 4 }}
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.14.0
+spec:
+  group: kubeai.io
+  names:
+    kind: AIInferenceAutoscalerPolicy
+    listKind: AIInferenceAutoscalerPolicyList
+    plural: aiinferenceautoscalerpolicies
+    singular: aiinferenceautoscalerpolicy
+    shortNames:
+      - aiap
+      - aipolicy
+  scope: Namespaced
+  versions:
+    - name: v1alpha1
+      served: true
+      storage: true
+      schema:
+        openAPIV3Schema:
+          type: object
+          description: AIInferenceAutoscalerPolicy defines autoscaling rules for AI inference workloads
+          properties:
+            apiVersion:
+              type: string
+            kind:
+              type: string
+            metadata:
+              type: object
+            spec:
+              type: object
+              required:
+                - targetRef
+                - metrics
+              properties:
+                targetRef:
+                  type: object
+                  description: Reference to the target Deployment or StatefulSet
+                  required:
+                    - apiVersion
+                    - kind
+                    - name
+                  properties:
+                    apiVersion:
+                      type: string
+                    kind:
+                      type: string
+                      enum:
+                        - Deployment
+                        - StatefulSet
+                    name:
+                      type: string
+                minReplicas:
+                  type: integer
+                  minimum: 1
+                  default: 1
+                maxReplicas:
+                  type: integer
+                  minimum: 1
+                cooldownPeriod:
+                  type: integer
+                  minimum: 0
+                  default: 300
+                metrics:
+                  type: object
+                  properties:
+                    latency:
+                      type: object
+                      properties:
+                        enabled:
+                          type: boolean
+                          default: true
+                        targetP99Ms:
+                          type: integer
+                        targetP95Ms:
+                          type: integer
+                        prometheusQuery:
+                          type: string
+                    gpuUtilization:
+                      type: object
+                      properties:
+                        enabled:
+                          type: boolean
+                          default: true
+                        targetPercentage:
+                          type: integer
+                          minimum: 1
+                          maximum: 100
+                        prometheusQuery:
+                          type: string
+                    requestQueueDepth:
+                      type: object
+                      properties:
+                        enabled:
+                          type: boolean
+                          default: false
+                        targetDepth:
+                          type: integer
+                          minimum: 0
+                        prometheusQuery:
+                          type: string
+            status:
+              type: object
+              properties:
+                currentReplicas:
+                  type: integer
+                desiredReplicas:
+                  type: integer
+                lastScaleTime:
+                  type: string
+                  format: date-time
+                currentMetrics:
+                  type: object
+                  properties:
+                    latencyP99Ms:
+                      type: integer
+                    latencyP95Ms:
+                      type: integer
+                    gpuUtilizationPercent:
+                      type: integer
+                    requestQueueDepth:
+                      type: integer
+                conditions:
+                  type: array
+                  items:
+                    type: object
+                    properties:
+                      type:
+                        type: string
+                      status:
+                        type: string
+                      lastTransitionTime:
+                        type: string
+                        format: date-time
+                      reason:
+                        type: string
+                      message:
+                        type: string
+      subresources:
+        status: {}
+      additionalPrinterColumns:
+        - name: Target
+          type: string
+          jsonPath: .spec.targetRef.name
+        - name: Min
+          type: integer
+          jsonPath: .spec.minReplicas
+        - name: Max
+          type: integer
+          jsonPath: .spec.maxReplicas
+        - name: Current
+          type: integer
+          jsonPath: .status.currentReplicas
+        - name: Age
+          type: date
+          jsonPath: .metadata.creationTimestamp
@@ -0,0 +1,72 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "kubeai-autoscaler.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "kubeai-autoscaler.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "kubeai-autoscaler.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "kubeai-autoscaler.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "kubeai-autoscaler.serviceAccountName" . }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          args:
+            {{- if .Values.controller.leaderElection }}
+            - --leader-elect
+            {{- end }}
+            - --prometheus-address={{ .Values.prometheus.address }}
+          ports:
+            - name: metrics
+              containerPort: 8080
+              protocol: TCP
+            - name: health
+              containerPort: 8081
+              protocol: TCP
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: health
+            initialDelaySeconds: 15
+            periodSeconds: 20
+          readinessProbe:
+            httpGet:
+              path: /readyz
+              port: health
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}