Skip to content

Nightly Docker Test #188

Nightly Docker Test

Nightly Docker Test #188

name: Nightly Docker Test
# This workflow tests against the latest head/master Docker images from DockerHub
# and if tests pass, tags them as nightly and daily for publishing
on:
workflow_dispatch:
schedule:
- cron: "0 8 * * *" # Run at midnight Pacific time (8 AM UTC) every day
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: true
env:
# Docker registry configuration
# DOCKER_REGISTRY: Set to 'docker.io' for DockerHub, or your custom registry domain
# DOCKER_REPOSITORY: Set to your organization/username (e.g., 'acryldata' for DockerHub)
DOCKER_REGISTRY: "docker.io"
DOCKER_REPOSITORY: "acryldata"
DOCKER_CACHE: "DEPOT"
DEPOT_PROJECT_ID: "${{ vars.DEPOT_PROJECT_ID }}"
DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}"
permissions:
contents: read
id-token: write
jobs:
setup:
runs-on: depot-ubuntu-24.04-small
outputs:
tag: ${{ steps.tag.outputs.tag }}
date_tag: ${{ steps.tag.outputs.date_tag }}
docker-login: ${{ steps.docker-login.outputs.docker-login }}
publish: ${{ steps.publish.outputs.publish }}
python_release_version: ${{ steps.tag.outputs.python_release_version }}
branch_name: ${{ steps.tag.outputs.branch_name }}
repository_name: ${{ steps.tag.outputs.repository_name }}
test_runner_type: ${{ steps.set-runner.outputs.test_runner_type }}
test_runner_type_small: ${{ steps.set-runner.outputs.test_runner_type_small }}
use_depot_cache: ${{ steps.set-runner.outputs.use_depot_cache }}
uv_cache_key: ${{ steps.uv-cache-key.outputs.uv_cache_key }}
uv_cache_key_prefix: ${{ steps.uv-cache-key.outputs.uv_cache_key_prefix }}
yarn_cache_key: ${{ steps.yarn-cache-key.outputs.yarn_cache_key }}
yarn_cache_key_prefix: ${{ steps.yarn-cache-key.outputs.yarn_cache_key_prefix }}
datahub_images: ${{ steps.collect-images.outputs.datahub_images }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v4
- name: Compute Tag
id: tag
env:
GITHUB_REF_FALLBACK: ${{ github.ref }}
GITHUB_EVENT_NAME: ${{ github.event_name }}
run: |
source .github/scripts/docker_helpers.sh
# Get current date in YYYY-MM-DD format
CURRENT_DATE=$(date +%Y-%m-%d)
{
echo "tag=nightly"
echo "date_tag=nightly-${CURRENT_DATE}"
echo "python_release_version=$(get_python_docker_release_v)"
echo "branch_name=${GITHUB_REF#refs/heads/}"
echo "repository_name=${GITHUB_REPOSITORY#*/}"
} >> "$GITHUB_OUTPUT"
- name: Check whether docker login is possible
id: docker-login
env:
ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_USERNAME != '' && secrets.ACRYL_DOCKER_PASSWORD != '' }}
run: |
echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
- name: Check whether publishing enabled
id: publish
env:
ENABLE_PUBLISH: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
run: |
echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- name: Determine runner type
id: set-runner
run: |
if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.DEPOT_PROJECT_ID }}" != "" ]]; then
echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
echo "test_runner_type_small=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT"
echo "use_depot_cache=true" >> "$GITHUB_OUTPUT"
else
echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
echo "test_runner_type_small=ubuntu-latest" >> "$GITHUB_OUTPUT"
echo "use_depot_cache=false" >> "$GITHUB_OUTPUT"
fi
- name: Compute UV Cache Key
id: uv-cache-key
run: |
echo "uv_cache_key=docker-unified-nightly-${{ runner.os }}-uv-${{ hashFiles(
'./datahub-actions/pyproject.toml',
'./datahub-actions/setup.py',
'./smoke-test/requirements.txt',
'./smoke-test/pyproject.toml',
'./metadata-ingestion/pyproject.toml',
'./metadata-ingestion/setup.py') }}" >> "$GITHUB_OUTPUT"
echo "uv_cache_key_prefix=docker-unified-nightly-${{ runner.os }}-uv-" >> "$GITHUB_OUTPUT"
- name: Compute Yarn Cache Key
id: yarn-cache-key
run: |
echo "yarn_cache_key=docker-unified-nightly-${{ runner.os }}-yarn-${{ hashFiles('./smoke-test/tests/cypress/yarn.lock', './datahub-web-react/yarn.lock') }}" >> "$GITHUB_OUTPUT"
echo "yarn_cache_key_prefix=docker-unified-nightly-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"
- name: Download build Metadata for latest head build
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
run_id=$(gh run list --workflow="docker-unified.yml" --branch master --event push --status success --limit 1 --json=databaseId | jq '.[].databaseId')
mkdir -p "${{ github.workspace }}/build"
gh api "repos/datahub-project/datahub/actions/runs/$run_id/artifacts" --jq '.artifacts[] | select(.name == "build-metadata-head") | .id' | xargs -I {} gh api repos/datahub-project/datahub/actions/artifacts/{}/zip >"${{ github.workspace }}/build/build-metadata.zip"
unzip "${{ github.workspace }}/build/build-metadata.zip" -d "${{ github.workspace }}/build/"
ls -l "${{ github.workspace }}/build/"
- name: Collect image:tag from build log
id: collect-images
run: |
# contains full repo/image:tag for all published images (includes all variants)
images="$(depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print | jq -cr '.target[].tags[]' | grep 'head' | tr '\n' ' ')"
echo "datahub_images=$images" >> "$GITHUB_OUTPUT"
- name: Show collected images
run: |
echo "${{ steps.collect-images.outputs.datahub_images }}"
smoke_test:
name: Run Smoke Tests (${{ matrix.profile }}, ${{ matrix.test_strategy }}, ${{ matrix.architecture }})
runs-on: ${{ contains(needs.setup.outputs.test_runner_type, 'depot') && format('depot-ubuntu-24.04{0}-4', matrix.architecture == 'arm' && '-arm' || '') || needs.setup.outputs.test_runner_type }}
needs: [setup]
strategy:
fail-fast: false
matrix:
profile:
[
quickstart-consumers,
quickstart-postgres,
quickstart-consumers-cdc,
quickstart-postgres-cdc,
]
test_strategy: [pytests, cypress]
architecture: [x64, arm]
env:
MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }}
MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
steps:
- name: Free up disk space
if: ${{ !contains(needs.setup.outputs.test_runner_type, 'depot') }}
run: |
sudo apt-get remove 'dotnet-*' azure-cli || true
sudo rm -rf /usr/local/lib/android/ || true
sudo docker image prune -a -f || true
- uses: actions/cache/restore@v4
with:
path: |
~/.cache/uv
key: ${{ needs.setup.outputs.uv_cache_key }}
restore-keys: |
${{ needs.setup.outputs.uv_cache_key_prefix }}
- uses: actions/cache/restore@v4
with:
path: |
~/.cache/yarn
key: ${{ needs.setup.outputs.yarn_cache_key }}
restore-keys: |
${{ needs.setup.outputs.yarn_cache_key_prefix }}
- name: Check out the repo
uses: acryldata/sane-checkout-action@v4
with:
checkout-head-only: false
- name: Install Cypress dependencies on ARM
if: ${{ matrix.architecture == 'arm' && matrix.test_strategy == 'cypress' }}
run: |
sudo apt-get update
sudo apt-get install -y \
libgtk2.0-0 \
libgtk-3-0 \
libgbm-dev \
libnotify-dev \
libnss3 \
libxss1 \
libasound2t64 \
libxtst6 \
xauth \
xvfb
- name: Set up Depot CLI
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
uses: depot/setup-action@v1
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: "pip"
- name: Detect workflow retry
id: retry-detection
run: |
RUN_ATTEMPT="${{ github.run_attempt }}"
if [[ "$RUN_ATTEMPT" -gt 1 ]]; then
echo "This is retry attempt $RUN_ATTEMPT"
echo "is_retry=true" >> "$GITHUB_OUTPUT"
PREVIOUS_ATTEMPT=$((RUN_ATTEMPT - 1))
echo "previous_attempt=${PREVIOUS_ATTEMPT}" >> "$GITHUB_OUTPUT"
else
echo "This is the first attempt"
echo "is_retry=false" >> "$GITHUB_OUTPUT"
fi
- name: Download previous test results
if: steps.retry-detection.outputs.is_retry == 'true'
id: download-artifacts
continue-on-error: true
env:
GH_TOKEN: ${{ github.token }}
run: |
set +e
echo "Downloading artifacts from run ${{ github.run_id }}, attempt ${{ steps.retry-detection.outputs.previous_attempt }}"
# Create directory for previous results
mkdir -p "${{ github.workspace }}/previous-test-results"
# Get artifact ID for this profile and test strategy's test results
ARTIFACT_NAME="Test Results (smoke tests) ${{ matrix.profile }} ${{ matrix.test_strategy }} ${{ matrix.architecture }}"
echo "Looking for artifact: ${ARTIFACT_NAME}"
# Query artifacts for this workflow run
ARTIFACT_ID=$(gh api "repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts" \
--jq ".artifacts[] | select(.name == \"${ARTIFACT_NAME}\") | .id" | head -1)
if [[ -z "$ARTIFACT_ID" ]]; then
echo "No artifact found for ${{ matrix.profile }} ${{ matrix.test_strategy }} ${{ matrix.architecture }}"
echo "download_success=false" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "Found artifact ID: ${ARTIFACT_ID}"
# Download and extract artifact
cd "${{ github.workspace }}/previous-test-results"
gh api "repos/${{ github.repository }}/actions/artifacts/${ARTIFACT_ID}/zip" > artifact.zip
unzip -q artifact.zip
# Verify we got XML files
if [[ "${{ matrix.test_strategy }}" == "cypress" ]]; then
# Cypress XMLs are in smoke-test/tests/cypress/build/smoke-test-results/
if find . -path "*/smoke-test-results/cypress-test-*.xml" -print -quit | grep -q .; then
echo "Successfully downloaded cypress test results"
echo "download_success=true" >> "$GITHUB_OUTPUT"
else
echo "No cypress test XML files found in artifact"
echo "download_success=false" >> "$GITHUB_OUTPUT"
fi
else
# Pytest XMLs are in smoke-test/junit.*.xml
if find . -path "*/junit*.xml" -print -quit | grep -q .; then
echo "Successfully downloaded pytest test results"
echo "download_success=true" >> "$GITHUB_OUTPUT"
else
echo "No pytest XML files found in artifact"
echo "download_success=false" >> "$GITHUB_OUTPUT"
fi
fi
- name: Parse failed Cypress tests
if: |
steps.retry-detection.outputs.is_retry == 'true' &&
matrix.test_strategy == 'cypress' &&
steps.download-artifacts.outputs.download_success == 'true'
id: parse-cypress-failures
run: |
set +e
OUTPUT_FILE="${{ github.workspace }}/failed-tests-${{ matrix.profile }}-${{ matrix.test_strategy }}.txt"
python3 .github/scripts/parse_failed_cypress_tests.py \
--input-dir "${{ github.workspace }}/previous-test-results" \
--output "${OUTPUT_FILE}"
EXIT_CODE=$?
case $EXIT_CODE in
0)
echo "parse_result=has_failures" >> "$GITHUB_OUTPUT"
echo "filtered_tests_file=${OUTPUT_FILE}" >> "$GITHUB_OUTPUT"
echo "Will retry $(wc -l < ${OUTPUT_FILE}) failed test(s)"
;;
2)
echo "parse_result=all_passed" >> "$GITHUB_OUTPUT"
echo "All tests passed in previous attempt - will skip"
;;
3)
echo "parse_result=no_artifacts" >> "$GITHUB_OUTPUT"
echo "No test results found - will run all tests"
;;
*)
echo "parse_result=error" >> "$GITHUB_OUTPUT"
echo "Error parsing test results - will run all tests"
;;
esac
- name: Parse failed pytest modules
if: |
steps.retry-detection.outputs.is_retry == 'true' &&
matrix.test_strategy == 'pytests' &&
steps.download-artifacts.outputs.download_success == 'true'
id: parse-pytest-failures
run: |
set +e
OUTPUT_FILE="${{ github.workspace }}/failed-modules-${{ matrix.profile }}-${{ matrix.test_strategy }}.txt"
python3 .github/scripts/parse_failed_pytest_tests.py \
--input-dir "${{ github.workspace }}/previous-test-results" \
--output "${OUTPUT_FILE}"
EXIT_CODE=$?
case $EXIT_CODE in
0)
echo "parse_result=has_failures" >> "$GITHUB_OUTPUT"
echo "filtered_tests_file=${OUTPUT_FILE}" >> "$GITHUB_OUTPUT"
echo "Will retry $(wc -l < ${OUTPUT_FILE}) failed module(s)"
;;
2)
echo "parse_result=all_passed" >> "$GITHUB_OUTPUT"
echo "All tests passed in previous attempt - will skip"
;;
3)
echo "parse_result=no_artifacts" >> "$GITHUB_OUTPUT"
echo "No test results found - will run all tests"
;;
*)
echo "parse_result=error" >> "$GITHUB_OUTPUT"
echo "Error parsing test results - will run all tests"
;;
esac
- name: Clean up downloaded artifacts
if: steps.retry-detection.outputs.is_retry == 'true'
run: |
# Cleaning up downloaded test results to prevent contamination of current run
rm -rf "${{ github.workspace }}/previous-test-results"
- name: Skip if all tests passed
if: |
steps.parse-cypress-failures.outputs.parse_result == 'all_passed' ||
steps.parse-pytest-failures.outputs.parse_result == 'all_passed'
run: |
echo "✓ All tests passed in previous attempt for ${{ matrix.test_strategy }} on ${{ matrix.profile }} (${{ matrix.architecture }})"
echo "Skipping this run to optimize CI time (Docker images, quickstart, and tests)"
exit 0
- uses: gradle/actions/setup-gradle@v4
if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
- name: Login to registry
uses: docker/login-action@v3
if: ${{ needs.setup.outputs.docker-login == 'true' && env.DOCKER_REGISTRY == 'docker.io' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Download build Metadata for latest head build
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
run_id=$(gh run list --workflow="docker-unified.yml" --branch master --event push --status success --limit 1 --json=databaseId | jq '.[].databaseId')
mkdir -p "${{ github.workspace }}/build"
gh api "repos/datahub-project/datahub/actions/runs/$run_id/artifacts" --jq '.artifacts[] | select(.name == "build-metadata-head") | .id' | xargs -I {} gh api repos/datahub-project/datahub/actions/artifacts/{}/zip >"${{ github.workspace }}/build/build-metadata.zip"
unzip "${{ github.workspace }}/build/build-metadata.zip" -d "${{ github.workspace }}/build/"
ls -l "${{ github.workspace }}/build/"
- name: Collect image:tag from build log
id: collect-images
run: |
# contains full repo/image:tag for all published images (includes all variants)
images="$(depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print | jq -cr '.target[].tags[]' | grep 'head' | tr '\n' ' ')"
echo "datahub_images=$images" >> "$GITHUB_OUTPUT"
- name: Show collected images
run: |
echo "${{ steps.collect-images.outputs.datahub_images }}"
- name: Pull head images from registry
if: |
steps.parse-cypress-failures.outputs.parse_result != 'all_passed' &&
steps.parse-pytest-failures.outputs.parse_result != 'all_passed'
run: |
echo collected images "${{ steps.collect-images.outputs.datahub_images }}"
# Pull the latest head/master images from registry
failed_pulls=0
eval "set -- ${{ steps.collect-images.outputs.datahub_images }}"
for image do
if [ -n "$image" ]; then
echo "Pulling $image"
if ! docker pull "$image"; then
echo "Failed to pull $image"
failed_pulls=$((failed_pulls + 1))
else
# Re-tag the head image with the nightly tag for smoke tests
newImage=${image/\:head/\:${{ needs.setup.outputs.tag }}}
echo "Tagging $image as $newImage"
docker tag "$image" "$newImage"
fi
fi
done
if [ $failed_pulls -gt 0 ]; then
echo "Warning: $failed_pulls images failed to pull"
fi
docker images
- name: run quickstart
if: |
steps.parse-cypress-failures.outputs.parse_result != 'all_passed' &&
steps.parse-pytest-failures.outputs.parse_result != 'all_passed'
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DOCKER_REPOSITORY }}/datahub-actions:head
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
PROFILE_NAME: ${{ matrix.profile }}
run: |
./smoke-test/run-quickstart.sh
- name: Disk Check
run: df -h . && docker images
- name: Disable ES Disk Threshold
if: |
steps.parse-cypress-failures.outputs.parse_result != 'all_passed' &&
steps.parse-pytest-failures.outputs.parse_result != 'all_passed'
run: |
curl -XPUT "http://localhost:9200/_cluster/settings" \
-H 'Content-Type: application/json' -d'{
"persistent": {
"cluster": {
"routing": {
"allocation.disk.threshold_enabled": false
}
}
}
}'
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
run: |
./gradlew :metadata-ingestion:install
- name: Smoke test
if: |
steps.parse-cypress-failures.outputs.parse_result != 'all_passed' &&
steps.parse-pytest-failures.outputs.parse_result != 'all_passed'
env:
RUN_QUICKSTART: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }}
CLEANUP_DATA: "false"
TEST_STRATEGY: ${{ matrix.test_strategy }}
BATCH_COUNT: "1" # since this workflow runs only on schedule trigger, batching isn't really needed.
BATCH_NUMBER: "0"
FILTERED_TESTS: ${{ steps.parse-cypress-failures.outputs.filtered_tests_file || steps.parse-pytest-failures.outputs.filtered_tests_file || '' }}
DATAHUB_SMOKETEST_EXECUTOR_ID: "${{ github.sha }}-${{ matrix.test_strategy }}-${{ matrix.architecture }}"
PROFILE_NAME: ${{ matrix.profile }}
run: |
if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then
echo "=========================================="
if [[ "${{ matrix.test_strategy }}" == "cypress" ]]; then
echo "RETRY MODE: Running only failed Cypress tests"
else
echo "RETRY MODE: Running only failed pytest modules"
fi
echo "=========================================="
echo "Failed items to retry:"
cat "$FILTERED_TESTS"
echo "=========================================="
elif [[ "${{ steps.retry-detection.outputs.is_retry }}" == "true" ]]; then
echo "RETRY MODE: Running all tests (fallback)"
fi
echo "$DATAHUB_VERSION"
./gradlew --stop
./smoke-test/smoke.sh
- name: Disk Check
run: df -h . && docker images
- name: store logs
if: failure()
run: |
docker ps -a
TEST_STRATEGY="-${{ matrix.test_strategy }}"
source .github/scripts/docker_logs.sh
- name: Upload logs
uses: actions/upload-artifact@v4
if: failure()
with:
name: docker-logs-${{ matrix.profile }}-${{ matrix.test_strategy }}-${{ matrix.architecture }}
path: "docker_logs/*.log"
retention-days: 5
- name: Upload screenshots
uses: actions/upload-artifact@v4
if: failure()
with:
name: cypress-snapshots-${{ matrix.profile }}-${{ matrix.test_strategy }}-${{ matrix.architecture }}
path: smoke-test/tests/cypress/cypress/screenshots/
- uses: actions/upload-artifact@v4
if: always()
with:
name: Test Results (smoke tests) ${{ matrix.profile }} ${{ matrix.test_strategy }} ${{ matrix.architecture }}
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/smoke-test-results/cypress-test-*.xml
**/junit.*.xml
!**/binary/**
retention-days: 5
- name: Send failed test metrics to PostHog
if: failure()
continue-on-error: true
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
run: |
if [ -z "$POSTHOG_API_KEY" ]; then
echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
exit 0
fi
TEMP_DIR=$(mktemp -d)
mkdir -p "$TEMP_DIR/test-results"
find . -name "*.xml" -path "*/build/test-results/*" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
find . -name "cypress-test-*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
find . -name "junit.*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
python3 .github/scripts/send_failed_tests_to_posthog.py \
--input-dir "$TEMP_DIR/test-results" \
--posthog-api-key "$POSTHOG_API_KEY" \
--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
--repository "${{ github.repository }}" \
--workflow-name "${{ github.workflow }}" \
--branch "${{ github.head_ref || github.ref_name }}" \
--run-id "${{ github.run_id }}" \
--run-attempt "${{ github.run_attempt }}" \
--batch "${{ matrix.profile }}-${{ matrix.architecture }}" \
--batch-count "${{ strategy.job-total }}" \
--test-strategy "${{ matrix.test_strategy }}"
rm -rf "$TEMP_DIR"
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
override_branch: ${{ github.head_ref || github.ref_name }}
- uses: actions/cache/save@v4
if: ${{ matrix.profile == 'quickstart-consumers' && matrix.test_strategy == 'pytests' && matrix.architecture == 'x64' }}
with:
path: |
~/.cache/uv
key: ${{ needs.setup.outputs.uv_cache_key }}
- uses: actions/cache/save@v4
if: ${{ matrix.profile == 'quickstart-consumers' && matrix.test_strategy == 'pytests' && matrix.architecture == 'x64' }}
with:
path: |
~/.cache/yarn
key: ${{ needs.setup.outputs.yarn_cache_key }}
# Tag and push head images as nightly after smoke tests pass
tag_and_push_images:
name: Tag and push head images as nightly
runs-on: ${{ needs.setup.outputs.test_runner_type_small }}
needs: [setup, smoke_test]
if: ${{ needs.setup.outputs.publish == 'true' && always() && !failure() && !cancelled() }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v4
- name: Check if smoke tests passed
run: |
# Check the overall result of the matrix job
# Matrix jobs can have mixed results, so we check for any failures
if [[ "${{ needs.smoke_test.result }}" == "failure" ]]; then
echo "Smoke tests failed, skipping image tagging and pushing"
exit 1
elif [[ "${{ needs.smoke_test.result }}" == "cancelled" ]]; then
echo "Smoke tests were cancelled, skipping image tagging and pushing"
exit 1
else
echo "Smoke tests completed successfully, proceeding with image tagging and pushing"
fi
- name: Validate registry configuration
run: |
if [[ -z "${{ env.DOCKER_REGISTRY }}" || -z "${{ env.DOCKER_REPOSITORY }}" ]]; then
echo "Error: DOCKER_REGISTRY and DOCKER_REPOSITORY must be set"
exit 1
fi
# Validate that required secrets are available for the target registry
if [[ "${{ env.DOCKER_REGISTRY }}" == "docker.io" ]]; then
if [[ -z "${{ secrets.ACRYL_DOCKER_USERNAME }}" || -z "${{ secrets.ACRYL_DOCKER_PASSWORD }}" ]]; then
echo "Error: DockerHub credentials required but not provided"
exit 1
fi
fi
echo "Using registry: ${{ env.DOCKER_REGISTRY }}"
echo "Using repository: ${{ env.DOCKER_REPOSITORY }}"
- name: Login to registry
uses: docker/login-action@v3
if: ${{ env.DOCKER_REGISTRY == 'docker.io' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Download build Metadata for latest head build
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
run_id=$(gh run list --workflow="docker-unified.yml" --branch master --event push --status success --limit 1 --json=databaseId | jq '.[].databaseId')
mkdir -p "${{ github.workspace }}/build"
gh api "repos/datahub-project/datahub/actions/runs/$run_id/artifacts" --jq '.artifacts[] | select(.name == "build-metadata-head") | .id' | xargs -I {} gh api repos/datahub-project/datahub/actions/artifacts/{}/zip >"${{ github.workspace }}/build/build-metadata.zip"
unzip "${{ github.workspace }}/build/build-metadata.zip" -d "${{ github.workspace }}/build/"
ls -l "${{ github.workspace }}/build/"
- name: Collect image:tag from build log
id: collect-images
run: |
# contains full repo/image:tag for all published images (includes all variants)
images="$(depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print | jq -cr '.target[].tags[]' | grep 'head' | tr '\n' ' ')"
echo "datahub_images=$images" >> "$GITHUB_OUTPUT"
- name: Show collected images
run: |
echo "${{ steps.collect-images.outputs.datahub_images }}"
- name: Pull head images
run: |
failed_pulls=0
eval "set -- ${{ steps.collect-images.outputs.datahub_images }}"
for image do
if [ -n "$image" ]; then
echo "Pulling $image"
if ! docker pull "$image"; then
echo "Failed to pull $image"
failed_pulls=$((failed_pulls + 1))
fi
fi
done
if [ $failed_pulls -gt 0 ]; then
echo "Warning: $failed_pulls images failed to pull"
fi
- name: Tag images with nightly and daily tags
run: |
# Tag all images with nightly and daily tags
failed_tags=0
failed_pushes=0
eval "set -- ${{ steps.collect-images.outputs.datahub_images }}"
for image do
if [ -n "$image" ]; then
imageWithNightlyTag=${image/\:head/\:${{ needs.setup.outputs.tag }}}
echo "Tagging $image as $imageWithNightlyTag"
if ! docker tag "$image" "$imageWithNightlyTag"; then
echo "Failed to tag $imageWithNightlyTag"
failed_tags=$((failed_tags + 1))
fi
if ! docker push "$imageWithNightlyTag"; then
echo "Failed to push $imageWithNightlyTag"
failed_pushes=$((failed_pushes + 1))
fi
imageWithDateTag=${image/\:head/\:${{ needs.setup.outputs.date_tag }}}
echo "Tagging $image as imageWithDateTag"
if ! docker tag "$image" "$imageWithDateTag"; then
echo "Failed to tag imageWithDateTag"
failed_tags=$((failed_tags + 1))
fi
if ! docker push "$imageWithDateTag"; then
echo "Failed to push $imageWithDateTag"
failed_pushes=$((failed_pushes + 1))
fi
fi
done
if [ $failed_tags -gt 0 ]; then
echo "Warning: $failed_tags tags failed to create"
fi
if [ $failed_pushes -gt 0 ]; then
echo "Warning: $failed_pushes tags failed to push"
fi
- name: Show pushed images
run: |
echo "Successfully tagged and pushed images:"
docker images | grep ${{ env.DOCKER_REPOSITORY }}