Skip to content
Closed

Jb test #1803

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* @GoogleCloudPlatform/cloud-samples-infra @yoshi-approver

# Directory-specific owners
/adc/ @GoogleCloudPlatform/cloud-samples-infra @yoshi-approver
/ai-ml/ @alizaidis @moficodes @kenthua @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
/batch/ @alizaidis @moficodes @kenthua @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
/cost-optimization/gke-vpa-recommendations/ @aburhan @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
Expand All @@ -15,4 +16,4 @@
/quickstarts/whereami/ @theemadnes @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
/security/language-vulns/ @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
/streaming/ @aburhan @pwschuurman @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
/windows/ @ibabou @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
/windows/ @ibabou @yoshi-approver @GoogleCloudPlatform/dee-platform-ops
54 changes: 54 additions & 0 deletions .github/workflows/adc-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: adc-ci
"on":
push:
branches:
- main
- jb-test
paths:
- .github/workflows/adc-ci.yml
- adc/**
pull_request:
paths:
- .github/workflows/adc-ci.yml
- adc/**
jobs:
lint:
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- name: Set up Helm
uses: azure/setup-helm@v4
- name: Lint, template, and dry-run adc/ai-inference
run: |
helm lint adc/ai-inference
helm template adc/ai-inference
helm install ai-inference-test adc/ai-inference --dry-run
- name: Lint, template, and dry-run adc/enterprise-production
run: |
helm lint adc/enterprise-production
helm template adc/enterprise-production
helm install enterprise-production-test adc/enterprise-production --dry-run
- name: Lint, template, and dry-run adc/simple-app-multi-region
run: |
helm lint adc/simple-app-multi-region
helm template adc/simple-app-multi-region
helm install simple-app-multi-region-test adc/simple-app-multi-region --dry-run
- name: Lint, template, and dry-run adc/simple-app-single-region
run: |
helm lint adc/simple-app-single-region
helm template adc/simple-app-single-region
helm install simple-app-single-region-test adc/simple-app-single-region --dry-run
20 changes: 20 additions & 0 deletions adc/ai-inference/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v2
name: gemma
description: A Helm chart for deploying the Gemma 2 27B model for inference
type: application
version: 0.1.0
appVersion: "1.0"
3 changes: 3 additions & 0 deletions adc/ai-inference/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AI Inference Helm Chart

These samples show how to deploy a Gemma 2 27B model for inference. Visit https://cloud.google.com/kubernetes-engine/docs/ to follow the tutorial.
66 changes: 66 additions & 0 deletions adc/ai-inference/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{{/*
Copyright 2024 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{/*
Expand the name of the chart.
*/}}
{{- define "gemma.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "gemma.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart labels for a chart.
*/}}
{{- define "gemma.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "gemma.labels" -}}
helm.sh/chart: {{ include "gemma.chart" . }}
{{ include "gemma.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "gemma.selectorLabels" -}}
app.kubernetes.io/name: {{ include "gemma.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
74 changes: 74 additions & 0 deletions adc/ai-inference/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "gemma.fullname" . }}
labels:
{{- include "gemma.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "gemma.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "gemma.selectorLabels" . | nindent 8 }}
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- --model=$(MODEL_ID)
- --disable-log-requests
- --tensor-parallel-size={{ .Values.tensorParallelSize }}
- --max-num-seq=512
- --gpu-memory-utilization=0.95
- --num-scheduler-steps=8
- --max-model-len={{ .Values.maxModelLen }}
command:
- python3
- -m
- vllm.entrypoints.openai.api_server
env:
- name: MODEL_ID
value: {{ .Values.model.id }}
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
key: hf_api_token
name: {{ .Values.model.hfSecret }}
ports:
- containerPort: 8000
name: metrics
readinessProbe:
failureThreshold: 6000
httpGet:
path: /health
port: 8000
periodSeconds: 10
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
- mountPath: /dev/shm
name: dshm
nodeSelector:
{{- toYaml .Values.nodeSelector | nindent 8 }}
volumes:
- emptyDir:
medium: Memory
name: dshm
37 changes: 37 additions & 0 deletions adc/ai-inference/templates/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

{{- if .Values.hpa.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "gemma.fullname" . }}
labels:
{{- include "gemma.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "gemma.fullname" . }}
minReplicas: {{ .Values.hpa.minReplicas }}
maxReplicas: {{ .Values.hpa.maxReplicas }}
metrics:
- pods:
metric:
name: prometheus.googleapis.com|vllm:gpu_cache_usage_perc|gauge
target:
averageValue: 504m
type: AverageValue
type: Pods
{{- end }}
27 changes: 27 additions & 0 deletions adc/ai-inference/templates/pdb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

{{- if .Values.pdb.enabled -}}
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: {{ include "gemma.fullname" . }}
labels:
{{- include "gemma.labels" . | nindent 4 }}
spec:
minAvailable: {{ .Values.pdb.minAvailable }}
selector:
matchLabels:
{{- include "gemma.selectorLabels" . | nindent 6 }}
{{- end }}
29 changes: 29 additions & 0 deletions adc/ai-inference/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Service
metadata:
name: {{ include "gemma.fullname" . }}
labels:
{{- include "gemma.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: http
selector:
{{- include "gemma.selectorLabels" . | nindent 4 }}
57 changes: 57 additions & 0 deletions adc/ai-inference/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Default values for gemma chart.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
repository: vllm/vllm-openai
tag: v0.7.2
pullPolicy: IfNotPresent

model:
id: google/gemma-7b-it
hfSecret: hf-secret

resources:
limits:
nvidia.com/gpu: "1"
requests:
nvidia.com/gpu: "1"

nodeSelector:
cloud.google.com/gke-accelerator: nvidia-l4

hpa:
enabled: true
minReplicas: 1
maxReplicas: 10
targetCPUUtilizationPercentage: 80

# -- Number of GPUs to distribute the model across
tensorParallelSize: 1

# -- Maximum sequence length for the model
maxModelLen: 512

service:
type: ClusterIP
port: 80

pdb:
enabled: true
minAvailable: 1
Loading
Loading