Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions latencypredictor-v1/Dockerfile-prediction
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ WORKDIR /app

# Copy the requirements file and install dependencies
# (It's good practice to manage dependencies in a requirements.txt file)


RUN apt-get update && apt-get install -y \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

Expand Down
38 changes: 38 additions & 0 deletions latencypredictor-v1/Dockerfile-test
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Dockerfile-test
FROM python:3.9-slim

# Install system dependencies
RUN apt-get update && apt-get install -y \
curl \
wget \
jq \
&& rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Install additional testing dependencies
RUN pip install --no-cache-dir \
pytest \
pytest-asyncio \
requests \
httpx \
aiohttp

# Copy test files
COPY test_dual_server_client.py .


# Create test results directory
RUN mkdir -p /test-results

# Set environment variables
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1

# Default command runs the specific test
CMD ["pytest", "-v", "-s", "test_dual_server_client.py"]
7 changes: 7 additions & 0 deletions latencypredictor-v1/Dockerfile-training
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ WORKDIR /app

# Copy the requirements file and install dependencies
# (It's good practice to manage dependencies in a requirements.txt file)


RUN apt-get update && apt-get install -y \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*


COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

Expand Down
208 changes: 191 additions & 17 deletions latencypredictor-v1/build-deploy.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#!/bin/bash
# Build and deploy script for both servers
# Build and deploy script for training, prediction, and test servers

set -e

# Configuration
PROJECT_ID="kaushikmitra-gke-dev"
REGION="asia-southeast1-c"
REPOSITORY="kaushikmitra-docker-repo"
TRAINING_IMAGE="latencypredictor-v1-training-server"
PREDICTION_IMAGE="latencypredictor-v1-prediction-server"
TRAINING_IMAGE="latencypredictor-v3-training-server"
PREDICTION_IMAGE="latencypredictor-v3-prediction-server"
TEST_IMAGE="latencypredictor-v3-test"
TAG="latest"

# Colors for output
Expand Down Expand Up @@ -41,7 +42,18 @@ check_files() {
fi
done

echo_status "All required files found."
# Check for test-specific files
local test_files=("Dockerfile-test")
for file in "${test_files[@]}"; do
if [[ ! -f "$file" ]]; then
echo_warning "Test file $file not found - test image will not be built"
TEST_BUILD_ENABLED=false
return
fi
done

TEST_BUILD_ENABLED=true
echo_status "All required files found (including test files)."
}

# Build Docker images
Expand All @@ -50,7 +62,7 @@ build_images() {

# Build training server image
echo_status "Building training server image..."
docker build -f Dockerfile-training -t ${TRAINING_IMAGE}:${TAG} .
docker build -f Dockerfile-training -t ${TRAINING_IMAGE}:${TAG} .

# Tag for training server
docker tag ${TRAINING_IMAGE}:${TAG} \
Expand All @@ -64,7 +76,19 @@ build_images() {
docker tag ${PREDICTION_IMAGE}:${TAG} \
us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${PREDICTION_IMAGE}:${TAG}

echo_status "Images built successfully."
# Build test image if enabled
if [[ "$TEST_BUILD_ENABLED" == "true" ]]; then
echo_status "Building test image..."
docker build -f Dockerfile-test -t ${TEST_IMAGE}:${TAG} .

# Tag for test image
docker tag ${TEST_IMAGE}:${TAG} \
us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${TEST_IMAGE}:${TAG}

echo_status "All images (including test) built successfully."
else
echo_status "Images built successfully (test image skipped)."
fi
}

# Push images to Artifact Registry
Expand All @@ -82,7 +106,14 @@ push_images() {
echo_status "Pushing prediction server image..."
docker push us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${PREDICTION_IMAGE}:${TAG}

echo_status "Images pushed successfully."
# Push test image if enabled
if [[ "$TEST_BUILD_ENABLED" == "true" ]]; then
echo_status "Pushing test image..."
docker push us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${TEST_IMAGE}:${TAG}
echo_status "All images (including test) pushed successfully."
else
echo_status "Images pushed successfully (test image skipped)."
fi
}

# Deploy to GKE
Expand All @@ -102,6 +133,112 @@ deploy_to_gke() {
echo_status "Deployment completed successfully."
}

# Deploy test job
deploy_test() {
echo_status "Deploying test job..."

if [[ "$TEST_BUILD_ENABLED" != "true" ]]; then
echo_warning "Test image not available. Skipping test deployment."
return
fi

# Check if test manifest exists
if [[ ! -f "test-job.yaml" ]]; then
echo_warning "test-job.yaml not found. Creating a basic test job..."
create_test_manifest
fi

# Delete existing test job if it exists
kubectl delete job latency-predictor-test --ignore-not-found=true

# Apply test job
kubectl apply -f test-job.yaml

echo_status "Test job deployed. Monitor with: kubectl logs -f job/latency-predictor-test"
}

# Create a basic test manifest
create_test_manifest() {
cat > test-job.yaml << EOF
apiVersion: batch/v1
kind: Job
metadata:
name: latency-predictor-test
namespace: default
labels:
app: latency-predictor-test
component: test
spec:
template:
metadata:
labels:
app: latency-predictor-test
component: test
spec:
nodeSelector:
cloud.google.com/gke-nodepool: "pool-2"
restartPolicy: Never
containers:
- name: test-runner
image: us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY}/${TEST_IMAGE}:${TAG}
imagePullPolicy: Always
command: ["pytest"]
args: ["-v", "-s", "test_dual_server_client.py"]
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"
env:
- name: TRAINING_SERVER_URL
value: "http://training-service:8000"
- name: PREDICTION_SERVER_URL
value: "http://prediction-service:80"
- name: TEST_TIMEOUT
value: "300"
volumeMounts:
- name: test-results
mountPath: /test-results
volumes:
- name: test-results
emptyDir: {}
backoffLimit: 3
EOF
echo_status "Created basic test-job.yaml manifest."
}

# Run tests
run_tests() {
echo_status "Running tests..."

if [[ "$TEST_BUILD_ENABLED" != "true" ]]; then
echo_warning "Test image not available. Running basic connectivity tests instead..."
test_deployment
return
fi

# Deploy and run test job
deploy_test

# Wait for job completion and show logs
echo_status "Waiting for test job to complete..."
kubectl wait --for=condition=complete job/latency-predictor-test --timeout=600s || {
echo_error "Test job did not complete successfully"
kubectl describe job latency-predictor-test
kubectl logs job/latency-predictor-test
return 1
}

echo_status "Test job completed. Showing logs:"
kubectl logs job/latency-predictor-test

# Clean up test job
echo_status "Cleaning up test job..."
kubectl delete job latency-predictor-test
}

# Get service information
get_service_info() {
echo_status "Getting service information..."
Expand Down Expand Up @@ -131,7 +268,7 @@ get_service_info() {
kubectl get services
}

# Test the deployment
# Test the deployment (basic connectivity tests)
test_deployment() {
echo_status "Testing deployment..."

Expand Down Expand Up @@ -165,6 +302,18 @@ test_deployment() {
fi
}

# List built images
list_images() {
echo_status "Listing built images..."

echo_status "Local images:"
docker images | grep -E "${TRAINING_IMAGE}|${PREDICTION_IMAGE}|${TEST_IMAGE}" || echo "No local images found"

echo_status "Remote images in Artifact Registry:"
gcloud artifacts docker images list us-docker.pkg.dev/${PROJECT_ID}/${REPOSITORY} \
--include-tags --filter="package~(${TRAINING_IMAGE}|${PREDICTION_IMAGE}|${TEST_IMAGE})" || echo "No remote images found"
}

# Cleanup function
cleanup() {
echo_status "Cleaning up..."
Expand All @@ -184,15 +333,27 @@ main() {
build_images
;;
"push")
check_files
push_images
;;
"deploy")
deploy_to_gke
;;
"test-deploy")
check_files
deploy_test
;;
"test")
check_files
run_tests
;;
"info")
get_service_info
;;
"test")
"images")
list_images
;;
"basic-test")
test_deployment
;;
"all")
Expand All @@ -204,17 +365,30 @@ main() {
test_deployment
cleanup
;;
"full")
check_files
build_images
push_images
deploy_to_gke
get_service_info
run_tests
cleanup
;;
*)
echo "Usage: $0 {check|build|push|deploy|info|test|all}"
echo "Usage: $0 {check|build|push|deploy|test-deploy|test|info|images|basic-test|all|full}"
echo ""
echo "Commands:"
echo " check - Check if required files exist"
echo " build - Build Docker images"
echo " push - Push images to Artifact Registry"
echo " deploy - Deploy to GKE"
echo " info - Get service information"
echo " test - Test the deployment"
echo " all - Run complete build and deployment process"
echo " check - Check if required files exist"
echo " build - Build Docker images (including test if Dockerfile-test exists)"
echo " push - Push images to Artifact Registry"
echo " deploy - Deploy to GKE"
echo " test-deploy- Deploy test job only"
echo " test - Run comprehensive tests using test image"
echo " info - Get service information"
echo " images - List built images (local and remote)"
echo " basic-test - Run basic connectivity tests"
echo " all - Run complete build and deployment process (no tests)"
echo " full - Run complete process including comprehensive tests"
exit 1
;;
esac
Expand Down
12 changes: 6 additions & 6 deletions latencypredictor-v1/manifests/dual-server-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ spec:
component: training
spec:
nodeSelector:
cloud.google.com/gke-nodepool: "pool-1"
cloud.google.com/gke-nodepool: "pool-2"
containers:
- name: training-server
image: us-docker.pkg.dev/kaushikmitra-gke-dev/kaushikmitra-docker-repo/latencypredictor-v1-training-server:latest
image: us-docker.pkg.dev/kaushikmitra-gke-dev/kaushikmitra-docker-repo/latencypredictor-v3-training-server:latest

imagePullPolicy: Always
ports:
- containerPort: 8000
Expand Down Expand Up @@ -145,7 +145,7 @@ metadata:
app: prediction-server
component: prediction
spec:
replicas: 5
replicas: 10
selector:
matchLabels:
app: prediction-server
Expand All @@ -157,10 +157,10 @@ spec:
component: prediction
spec:
nodeSelector:
cloud.google.com/gke-nodepool: "pool-1"
cloud.google.com/gke-nodepool: "pool-2"
containers:
- name: prediction-server
image: us-docker.pkg.dev/kaushikmitra-gke-dev/kaushikmitra-docker-repo/latencypredictor-v1-prediction-server:latest
image: us-docker.pkg.dev/kaushikmitra-gke-dev/kaushikmitra-docker-repo/latencypredictor-v3-prediction-server:latest
imagePullPolicy: Always
ports:
- containerPort: 8001
Expand Down
Loading