-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcloudbuild.yaml
More file actions
56 lines (50 loc) · 1.63 KB
/
cloudbuild.yaml
File metadata and controls
56 lines (50 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
steps:
# Step 1: Build Docker image
- name: gcr.io/cloud-builders/docker
id: build
args:
- build
- -t
- asia-southeast1-docker.pkg.dev/$PROJECT_ID/llm-cloud-inference-se1/llm-cloud-inference:latest
- -f
- docker/Dockerfile
- .
# Step 2: Push to Artifact Registry
- name: gcr.io/cloud-builders/docker
id: push
args:
- push
- asia-southeast1-docker.pkg.dev/$PROJECT_ID/llm-cloud-inference-se1/llm-cloud-inference:latest
# Step 3: Deploy to Cloud Run
- name: gcr.io/cloud-builders/gcloud
id: deploy
args:
- run
- deploy
- llm-api
- --image=asia-southeast1-docker.pkg.dev/$PROJECT_ID/llm-cloud-inference-se1/llm-cloud-inference:latest
- --region=asia-southeast1
- --platform=managed
- --gpu=1
- --gpu-type=nvidia-l4
- --cpu=8
- --memory=32Gi
- --timeout=3600
- --min-instances=0
- --max-instances=1
- --port=8080
- --cpu-boost
- --no-cpu-throttling
- --set-env-vars=MODEL_NAME=qwen3-8b-awq,GCS_BUCKET=$PROJECT_ID-models
- --service-account=llm-inference@$PROJECT_ID.iam.gserviceaccount.com
- --no-allow-unauthenticated
# Build timeout (40 mins for vLLM Docker build - large base image)
timeout: 2400s
# Store images in Artifact Registry
images:
- asia-southeast1-docker.pkg.dev/$PROJECT_ID/llm-cloud-inference-se1/llm-cloud-inference:latest
options:
machineType: E2_HIGHCPU_8
logging: CLOUD_LOGGING_ONLY
substitutionOption: 'ALLOW_LOOSE'
serviceAccount: 'projects/llm-inference-31155/serviceAccounts/llm-inference@llm-inference-31155.iam.gserviceaccount.com'