Skip to content

Commit 5ce1385

Browse files
authored
fix training server indentation bug and test yaml to build script (#1854)
* fix training server indentation bug and test yaml to build script * add boilerplate header to build deploy script
1 parent 453c164 commit 5ce1385

File tree

2 files changed

+238
-42
lines changed

2 files changed

+238
-42
lines changed

latencypredictor/build-deploy.sh

100755100644
Lines changed: 189 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#!/bin/bash
2-
31
# Copyright 2025 The Kubernetes Authors.
42
#
53
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +11,7 @@
1311
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1412
# See the License for the specific language governing permissions and
1513
# limitations under the License.
14+
1615
set -e
1716

1817
# Configuration
@@ -21,6 +20,7 @@ REGION="your-gcp-region"
2120
REPOSITORY="your-artifact-registry-repo"
2221
TRAINING_IMAGE="latencypredictor-training-server"
2322
PREDICTION_IMAGE="latencypredictor-prediction-server"
23+
TEST_IMAGE="latencypredictor-test"
2424
TAG="latest"
2525

2626
# Colors for output
@@ -53,7 +53,18 @@ check_files() {
5353
fi
5454
done
5555

56-
echo_status "All required files found."
56+
# Check for test-specific files
57+
local test_files=("Dockerfile-test")
58+
for file in "${test_files[@]}"; do
59+
if [[ ! -f "$file" ]]; then
60+
echo_warning "Test file $file not found - test image will not be built"
61+
TEST_BUILD_ENABLED=false
62+
return
63+
fi
64+
done
65+
66+
TEST_BUILD_ENABLED=true
67+
echo_status "All required files found (including test files)."
5768
}
5869

5970
# Build Docker images
@@ -62,7 +73,7 @@ build_images() {
6273

6374
# Build training server image
6475
echo_status "Building training server image..."
65-
docker build -f Dockerfile-training -t ${TRAINING_IMAGE}:${TAG} .
76+
docker build -f Dockerfile-training -t ${TRAINING_IMAGE}:${TAG} .
6677

6778
# Tag for training server
6879
docker tag ${TRAINING_IMAGE}:${TAG} \
@@ -76,7 +87,19 @@ build_images() {
7687
docker tag ${PREDICTION_IMAGE}:${TAG} \
7788
us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${PREDICTION_IMAGE}:${TAG}
7889

79-
echo_status "Images built successfully."
90+
# Build test image if enabled
91+
if [[ "$TEST_BUILD_ENABLED" == "true" ]]; then
92+
echo_status "Building test image..."
93+
docker build -f Dockerfile-test -t ${TEST_IMAGE}:${TAG} .
94+
95+
# Tag for test image
96+
docker tag ${TEST_IMAGE}:${TAG} \
97+
us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${TEST_IMAGE}:${TAG}
98+
99+
echo_status "All images (including test) built successfully."
100+
else
101+
echo_status "Images built successfully (test image skipped)."
102+
fi
80103
}
81104

82105
# Push images to Artifact Registry
@@ -94,7 +117,14 @@ push_images() {
94117
echo_status "Pushing prediction server image..."
95118
docker push us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${PREDICTION_IMAGE}:${TAG}
96119

97-
echo_status "Images pushed successfully."
120+
# Push test image if enabled
121+
if [[ "$TEST_BUILD_ENABLED" == "true" ]]; then
122+
echo_status "Pushing test image..."
123+
docker push us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${TEST_IMAGE}:${TAG}
124+
echo_status "All images (including test) pushed successfully."
125+
else
126+
echo_status "Images pushed successfully (test image skipped)."
127+
fi
98128
}
99129

100130
# Deploy to GKE
@@ -114,6 +144,112 @@ deploy_to_gke() {
114144
echo_status "Deployment completed successfully."
115145
}
116146

147+
# Deploy test job
148+
deploy_test() {
149+
echo_status "Deploying test job..."
150+
151+
if [[ "$TEST_BUILD_ENABLED" != "true" ]]; then
152+
echo_warning "Test image not available. Skipping test deployment."
153+
return
154+
fi
155+
156+
# Check if test manifest exists
157+
if [[ ! -f "test-job.yaml" ]]; then
158+
echo_warning "test-job.yaml not found. Creating a basic test job..."
159+
create_test_manifest
160+
fi
161+
162+
# Delete existing test job if it exists
163+
kubectl delete job latency-predictor-test --ignore-not-found=true
164+
165+
# Apply test job
166+
kubectl apply -f test-job.yaml
167+
168+
echo_status "Test job deployed. Monitor with: kubectl logs -f job/latency-predictor-test"
169+
}
170+
171+
# Create a basic test manifest
172+
create_test_manifest() {
173+
cat > test-job.yaml << EOF
174+
apiVersion: batch/v1
175+
kind: Job
176+
metadata:
177+
name: latency-predictor-test
178+
namespace: default
179+
labels:
180+
app: latency-predictor-test
181+
component: test
182+
spec:
183+
template:
184+
metadata:
185+
labels:
186+
app: latency-predictor-test
187+
component: test
188+
spec:
189+
nodeSelector:
190+
cloud.google.com/gke-nodepool: "pool-2"
191+
restartPolicy: Never
192+
containers:
193+
- name: test-runner
194+
image: us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${TEST_IMAGE}:${TAG}
195+
imagePullPolicy: Always
196+
command: ["pytest"]
197+
args: ["-v", "-s", "test_dual_server_client.py"]
198+
resources:
199+
requests:
200+
cpu: "500m"
201+
memory: "1Gi"
202+
limits:
203+
cpu: "1000m"
204+
memory: "2Gi"
205+
env:
206+
- name: TRAINING_SERVER_URL
207+
value: "http://training-service:8000"
208+
- name: PREDICTION_SERVER_URL
209+
value: "http://prediction-service:80"
210+
- name: TEST_TIMEOUT
211+
value: "300"
212+
volumeMounts:
213+
- name: test-results
214+
mountPath: /test-results
215+
volumes:
216+
- name: test-results
217+
emptyDir: {}
218+
backoffLimit: 3
219+
EOF
220+
echo_status "Created basic test-job.yaml manifest."
221+
}
222+
223+
# Run tests
224+
run_tests() {
225+
echo_status "Running tests..."
226+
227+
if [[ "$TEST_BUILD_ENABLED" != "true" ]]; then
228+
echo_warning "Test image not available. Running basic connectivity tests instead..."
229+
test_deployment
230+
return
231+
fi
232+
233+
# Deploy and run test job
234+
deploy_test
235+
236+
# Wait for job completion and show logs
237+
echo_status "Waiting for test job to complete..."
238+
kubectl wait --for=condition=complete job/latency-predictor-test --timeout=600s || {
239+
echo_error "Test job did not complete successfully"
240+
kubectl describe job latency-predictor-test
241+
kubectl logs job/latency-predictor-test
242+
return 1
243+
}
244+
245+
echo_status "Test job completed. Showing logs:"
246+
kubectl logs job/latency-predictor-test
247+
248+
# Clean up test job
249+
echo_status "Cleaning up test job..."
250+
kubectl delete job latency-predictor-test
251+
}
252+
117253
# Get service information
118254
get_service_info() {
119255
echo_status "Getting service information..."
@@ -143,7 +279,7 @@ get_service_info() {
143279
kubectl get services
144280
}
145281

146-
# Test the deployment
282+
# Test the deployment (basic connectivity tests)
147283
test_deployment() {
148284
echo_status "Testing deployment..."
149285

@@ -177,6 +313,18 @@ test_deployment() {
177313
fi
178314
}
179315

316+
# List built images
317+
list_images() {
318+
echo_status "Listing built images..."
319+
320+
echo_status "Local images:"
321+
docker images | grep -E "${TRAINING_IMAGE}|${PREDICTION_IMAGE}|${TEST_IMAGE}" || echo "No local images found"
322+
323+
echo_status "Remote images in Artifact Registry:"
324+
gcloud artifacts docker images list us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY} \
325+
--include-tags --filter="package~(${TRAINING_IMAGE}|${PREDICTION_IMAGE}|${TEST_IMAGE})" || echo "No remote images found"
326+
}
327+
180328
# Cleanup function
181329
cleanup() {
182330
echo_status "Cleaning up..."
@@ -196,15 +344,27 @@ main() {
196344
build_images
197345
;;
198346
"push")
347+
check_files
199348
push_images
200349
;;
201350
"deploy")
202351
deploy_to_gke
203352
;;
353+
"test-deploy")
354+
check_files
355+
deploy_test
356+
;;
357+
"test")
358+
check_files
359+
run_tests
360+
;;
204361
"info")
205362
get_service_info
206363
;;
207-
"test")
364+
"images")
365+
list_images
366+
;;
367+
"basic-test")
208368
test_deployment
209369
;;
210370
"all")
@@ -216,17 +376,30 @@ main() {
216376
test_deployment
217377
cleanup
218378
;;
379+
"full")
380+
check_files
381+
build_images
382+
push_images
383+
deploy_to_gke
384+
get_service_info
385+
run_tests
386+
cleanup
387+
;;
219388
*)
220-
echo "Usage: $0 {check|build|push|deploy|info|test|all}"
389+
echo "Usage: $0 {check|build|push|deploy|test-deploy|test|info|images|basic-test|all|full}"
221390
echo ""
222391
echo "Commands:"
223-
echo " check - Check if required files exist"
224-
echo " build - Build Docker images"
225-
echo " push - Push images to Artifact Registry"
226-
echo " deploy - Deploy to GKE"
227-
echo " info - Get service information"
228-
echo " test - Test the deployment"
229-
echo " all - Run complete build and deployment process"
392+
echo " check - Check if required files exist"
393+
echo " build - Build Docker images (including test if Dockerfile-test exists)"
394+
echo " push - Push images to Artifact Registry"
395+
echo " deploy - Deploy to GKE"
396+
echo " test-deploy- Deploy test job only"
397+
echo " test - Run comprehensive tests using test image"
398+
echo " info - Get service information"
399+
echo " images - List built images (local and remote)"
400+
echo " basic-test - Run basic connectivity tests"
401+
echo " all - Run complete build and deployment process (no tests)"
402+
echo " full - Run complete process including comprehensive tests"
230403
exit 1
231404
;;
232405
esac

0 commit comments

Comments
 (0)