Skip to content

Smoke Tests for SageMaker #1

Smoke Tests for SageMaker

Smoke Tests for SageMaker #1

name: Smoke Tests for SageMaker
on:
workflow_dispatch:
inputs:
commit_sha:
description: 'Commit SHA to use for artifacts'
required: false
type: string
jobs:
smoke-tests-sagemaker:
name: Run Smoke Tests for SageMaker
runs-on: ubuntu-latest
environment: sagemaker-e2e-tests-workflow-env
permissions:
id-token: write
contents: read
env:
COMMIT_SHA: ${{ inputs.commit_sha || github.sha }}
GH_REF_NAME: ${{ github.ref_name }}
SAGEMAKER_ARTIFACT_PREFIX: "code-editor-sagemaker-server"
GH_TOKEN: ${{ github.token }}
AWS_REGION: us-east-2
AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }}
PROJECT_NAME: ${{ secrets.PROJECT_NAME }}
DATAZONE_DOMAIN_ID: ${{ secrets.DATAZONE_DOMAIN_ID }}
TEST_SAGEMAKER_ROLE: ${{ secrets.TEST_SAGEMAKER_ROLE }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup environment
run: |
echo "Installing required dependencies"
sudo apt-get update
sudo apt-get install -y quilt libxml2-utils jq libx11-dev libxkbfile-dev
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '22'
- name: Download sagemaker build artifact
run: |
gh run download --name "$COMMIT_SHA-code-editor-sagemaker-server-build"
- name: Check build artifacts exist
run: |
ls -la
FILES=(
"$SAGEMAKER_ARTIFACT_PREFIX-build.tar.gz"
)
# Check build artifact exists
for file in "${FILES[@]}"; do
if [ ! -f "$file" ]; then
echo "Error: $file not found for commit $COMMIT_SHA"
exit 1
fi
done
- name: Configure ECR role AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.TEST_ECR_ROLE }}
aws-region: ${{ env.AWS_REGION }}
- name: Extract artifacts
run: |
tar -xzf "$SAGEMAKER_ARTIFACT_PREFIX-build.tar.gz"
- name: Build and push Docker image
run: |
# Login to ECR
ECR_REGISTRY="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com"
aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ECR_REGISTRY
# Create image tag with branch-commit format
BRANCH_NAME=$(echo "$GH_REF_NAME" | sed 's/[^a-zA-Z0-9-]/-/g')
IMAGE_TAG="$BRANCH_NAME-$COMMIT_SHA"
# Build image
docker build -f .github/workflows/dockerfiles/Dockerfile.sagemaker -t $ECR_REPOSITORY:$IMAGE_TAG .
# Tag image
docker tag $ECR_REPOSITORY:$IMAGE_TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG
# Push image
docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG > /dev/null
echo "Docker image pushed successfully"
# Get and store the image SHA digest
IMAGE_SHA=$(docker inspect --format='{{index .RepoDigests 0}}' $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG | cut -d'@' -f2)
IMAGE_URI="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY@$IMAGE_SHA"
echo "ECR_IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
echo "Image pushed successfully with SHA: ${IMAGE_SHA:0:12}..."
# Clean up local Docker images and build artifacts to free disk space
docker rmi $ECR_REPOSITORY:$IMAGE_TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG
echo "Local Docker images cleaned up"
rm -rf vscode-reh-web-linux-x64
rm -rf $SAGEMAKER_ARTIFACT_PREFIX-build
echo "Local build artifacts cleaned up"
- name: Configure SageMaker role AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.TEST_SAGEMAKER_ROLE }}
aws-region: ${{ env.AWS_REGION }}
- name: Create SageMaker code editor image
run: |
# Fetch DataZone project ID
PROJECT_ID=$(aws datazone list-projects --domain-identifier "$DATAZONE_DOMAIN_ID" --name "$PROJECT_NAME" --query 'items[0].id' --output text)
if [ -z "$PROJECT_ID" ] || [ "$PROJECT_ID" = "None" ]; then
echo "Error: DataZone project not found"
exit 1
fi
echo "DataZone project found successfully"
echo "::add-mask::$PROJECT_ID"
# Find SageMaker domain by project ID (domain name contains project ID)
DOMAIN_ID=$(aws sagemaker list-domains --query "Domains[?contains(DomainName, '$PROJECT_ID')].DomainId" --output text)
if [ -z "$DOMAIN_ID" ]; then
echo "Error: SageMaker domain not found for project ID"
exit 1
fi
echo "SageMaker domain found successfully"
echo "::add-mask::$DOMAIN_ID"
echo "SAGEMAKER_DOMAIN_ID=$DOMAIN_ID" >> $GITHUB_ENV
echo "PROJECT_ID=$PROJECT_ID" >> $GITHUB_ENV
# Create SageMaker image
BRANCH_NAME=$(echo "$GH_REF_NAME" | sed 's/[^a-zA-Z0-9-]/-/g')
IMAGE_NAME="$BRANCH_NAME-${COMMIT_SHA}"
aws sagemaker create-image \
--image-name "$IMAGE_NAME" \
--role-arn $TEST_SAGEMAKER_ROLE
# Wait for image to be ready (max 30 retries = 5 minutes)
echo "Waiting for SageMaker image to be ready..."
RETRY_COUNT=0
MAX_RETRIES=30
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
STATUS=$(aws sagemaker describe-image --image-name "$IMAGE_NAME" --query 'ImageStatus' --output text)
echo "Image status: $STATUS (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)"
if [ "$STATUS" = "CREATED" ]; then
break
elif [ "$STATUS" = "CREATE_FAILED" ]; then
echo "Image creation failed"
exit 1
fi
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 10
done
if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
echo "Timeout waiting for image to be ready"
exit 1
fi
# Create image version
aws sagemaker create-image-version \
--image-name "$IMAGE_NAME" \
--base-image "$ECR_IMAGE_URI"
echo "SAGEMAKER_IMAGE_NAME=$IMAGE_NAME" >> $GITHUB_ENV
- name: Attach image to domain
run: |
# Wait for image version to be ready (100 seconds)
echo "Waiting for image version to be ready..."
RETRY_COUNT=0
MAX_RETRIES=10
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
VERSION_STATUS=$(aws sagemaker describe-image-version --image-name "$SAGEMAKER_IMAGE_NAME" --query 'ImageVersionStatus' --output text)
echo "Image version status: $VERSION_STATUS (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)"
if [ "$VERSION_STATUS" = "CREATED" ]; then
break
elif [ "$VERSION_STATUS" = "CREATE_FAILED" ]; then
echo "Image version creation failed"
exit 1
fi
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 10
done
if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
echo "Timeout waiting for image version to be ready"
exit 1
fi
# Create universal AppImageConfig (if it doesn't exist)
APP_IMAGE_CONFIG_NAME="code-editor-app-config"
if ! aws sagemaker describe-app-image-config --app-image-config-name "$APP_IMAGE_CONFIG_NAME" >/dev/null 2>&1; then
aws sagemaker create-app-image-config \
--app-image-config-name "$APP_IMAGE_CONFIG_NAME" \
--code-editor-app-image-config '{}'
echo "Created universal AppImageConfig: $APP_IMAGE_CONFIG_NAME"
else
echo "Universal AppImageConfig already exists: $APP_IMAGE_CONFIG_NAME"
fi
# Get existing custom images and append new one
EXISTING_IMAGES=$(aws sagemaker describe-domain --domain-id "$SAGEMAKER_DOMAIN_ID" --query 'DefaultUserSettings.CodeEditorAppSettings.CustomImages' --output json 2>/dev/null || echo '[]')
# Create new custom images array with existing + new image
NEW_IMAGES=$(echo "$EXISTING_IMAGES" | jq --arg imageName "$SAGEMAKER_IMAGE_NAME" --arg configName "$APP_IMAGE_CONFIG_NAME" '. + [{"ImageName": $imageName, "ImageVersionNumber": 1, "AppImageConfigName": $configName}] | unique_by(.ImageName)')
# Update domain with all custom images
aws sagemaker update-domain \
--domain-id "$SAGEMAKER_DOMAIN_ID" \
--default-user-settings "{\"CodeEditorAppSettings\": {\"CustomImages\": $NEW_IMAGES}}"
echo "Image attached to domain successfully"
- name: Create SageMaker code editor space
run: |
# Create space name using branch-commit format
BRANCH_NAME=$(echo "$GH_REF_NAME" | sed 's/[^a-zA-Z0-9-]/-/g')
SPACE_NAME="$BRANCH_NAME-${COMMIT_SHA}"
# Create the space with project ownership
aws sagemaker create-space \
--domain-id "$SAGEMAKER_DOMAIN_ID" \
--space-name "$SPACE_NAME" \
--ownership-settings '{
"OwnerUserProfileName": "'$(aws sagemaker list-user-profiles --domain-id "$SAGEMAKER_DOMAIN_ID" --query 'UserProfiles[0].UserProfileName' --output text)'"
}' \
--space-sharing-settings '{
"SharingType": "Private"
}' \
--space-settings '{
"AppType": "CodeEditor",
"RemoteAccess": "DISABLED",
"SpaceStorageSettings": {
"EbsStorageSettings": {
"EbsVolumeSizeInGb": 16
}
},
"CodeEditorAppSettings": {
"DefaultResourceSpec": {
"SageMakerImageArn": "arn:aws:sagemaker:'$AWS_REGION':'$AWS_ACCOUNT_ID':image/'$SAGEMAKER_IMAGE_NAME'",
"InstanceType": "ml.t3.medium"
}
}
}'
echo "Created SageMaker space: $SPACE_NAME"
echo "SAGEMAKER_SPACE_NAME=$SPACE_NAME" >> $GITHUB_ENV
- name: Start SageMaker code editor space
run: |
# Wait for space to be ready (200 seconds)
echo "Waiting for space to be ready..."
RETRY_COUNT=0
MAX_RETRIES=20
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
SPACE_STATUS=$(aws sagemaker describe-space --domain-id "$SAGEMAKER_DOMAIN_ID" --space-name "$SAGEMAKER_SPACE_NAME" --query 'Status' --output text)
echo "Space status: $SPACE_STATUS (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)"
if [ "$SPACE_STATUS" = "InService" ]; then
break
elif [ "$SPACE_STATUS" = "Failed" ]; then
echo "Space creation failed"
exit 1
fi
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 10
done
if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
echo "Timeout waiting for space to be ready"
exit 1
fi
# Create app to start the space
aws sagemaker create-app \
--domain-id "$SAGEMAKER_DOMAIN_ID" \
--space-name "$SAGEMAKER_SPACE_NAME" \
--app-type "CodeEditor" \
--app-name "default"
echo "Started SageMaker space: $SAGEMAKER_SPACE_NAME"