From f7e4e2ba8ee8de274983724456844cd7ff69eb2b Mon Sep 17 00:00:00 2001 From: sophia Date: Tue, 4 Mar 2025 10:09:22 -0800 Subject: [PATCH 01/13] Run e2e and deployment tests against aws --- .github/actions/init-cloud-aws/action.yml | 90 +++++++++++++ .github/workflows/test_aws_integration.yaml | 138 +++++++++++++++----- 2 files changed, 195 insertions(+), 33 deletions(-) create mode 100644 .github/actions/init-cloud-aws/action.yml diff --git a/.github/actions/init-cloud-aws/action.yml b/.github/actions/init-cloud-aws/action.yml new file mode 100644 index 0000000000..59c8cf0d30 --- /dev/null +++ b/.github/actions/init-cloud-aws/action.yml @@ -0,0 +1,90 @@ +name: init-cloud-aws +description: "Initialize Nebari config for cloud deployment" + +inputs: + directory: + description: "Path to directory to initialize in" + required: false + default: './local-deployment' + +outputs: + directory: + description: "Path to config directory" + value: ${{ steps.metadata.outputs.directory }} + config: + description: "Path to Nebari config" + value: ${{ steps.metadata.outputs.config }} + project: + description: "Project name" + value: ${{ steps.metadata.outputs.project }} + domain: + description: "Domain name" + value: ${{ steps.metadata.outputs.domain }} + +runs: + using: composite + + steps: + - shell: bash + id: metadata + run: | + # Setup metadata + DIRECTORY=$(realpath '${{ inputs.directory }}') + mkdir --parents "${DIRECTORY}" + echo "directory=${DIRECTORY}" | tee --append "${GITHUB_OUTPUT}" + + CONFIG="${DIRECTORY}/nebari-config.yaml" + echo "config=${CONFIG}" | tee --append "${GITHUB_OUTPUT}" + + PROJECT='github-actions' + echo "project=${PROJECT}" | tee --append "${GITHUB_OUTPUT}" + + DOMAIN='github-actions-aws.nebari.dev' + nslookup "${DOMAIN}" + echo "domain=${DOMAIN}" | tee --append "${GITHUB_OUTPUT}" + + - name: Authenticate to AWS + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + role-session-name: github-action + aws-region: ${{ env.AWS_DEFAULT_REGION }} + + - shell: bash -l {0} + id: init + working-directory: ${{ steps.metadata.outputs.directory }} + run: | + nebari init aws \ + --project-name '${{ steps.metadata.outputs.project }}' \ + --domain-name '${{ steps.metadata.outputs.domain }}' \ + --auth-provider password \ + --output '${{ steps.metadata.outputs.config }}' + env: + CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + + - shell: bash + run: | + # Update nebari config for CI + + # Change default JupyterLab theme + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + jupyterlab: + default_settings: + "@jupyterlab/apputils-extension:themes": + theme: JupyterLab Dark + EOM + + # Change default value for minio persistence size + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + monitoring: + enabled: true + overrides: + minio: + persistence: + size: 1Gi + EOM + + - shell: bash + run: | + # Display Nebari config + cat '${{ steps.metadata.outputs.config }}' diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 59075ea7de..a8c1df4111 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -1,37 +1,40 @@ name: AWS Deployment +env: + TEST_USERNAME: "test-user" + TEST_PASSWORD: "P@sswo3d" + NEBARI_IMAGE_TAG: "main" + AWS_DEFAULT_REGION: "us-west-2" + on: + pull_request: + paths: + - ".github/workflows/test_aws_integration.yaml" + - "tests/**" + - "scripts/**" + - "src/**" + - "pyproject.toml" + - "pytest.ini" + - ".cirun.yml" + push: + branches: + - main + - release/\d{4}.\d{1,2}.\d{1,2} + paths: + - ".github/workflows/test_aws_integration.yaml" + - "tests/**" + - "scripts/**" + - "src/**" + - "pyproject.toml" + - "pytest.ini" + - ".cirun.yml" schedule: - cron: "0 0 * * MON" workflow_dispatch: - inputs: - image-tag: - description: 'Nebari image tag created by the nebari-docker-images repo' - required: true - default: main - type: string - tf-log-level: - description: 'Change Terraform log levels' - required: false - default: info - type: choice - options: - - info - - warn - - debug - - trace - - error - - -env: - AWS_DEFAULT_REGION: "us-west-2" - NEBARI_IMAGE_TAG: ${{ github.event.inputs.image-tag || 'main' }} - TF_LOG: ${{ github.event.inputs.tf-log-level || 'info' }} jobs: test-aws-integration: runs-on: ubuntu-latest - if: ${{ vars.SKIP_AWS_INTEGRATION_TEST != 'true' }} permissions: id-token: write contents: read @@ -42,9 +45,18 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: conda-incubator/setup-miniconda@v3 + env: + CONDA: /home/runnerx/miniconda3 + with: + auto-update-conda: true + python-version: "3.11" + miniconda-version: "latest" + + - name: Install kubectl + uses: azure/setup-kubectl@v4 with: - python-version: 3.11 + version: v1.19.16 - name: Install Nebari run: | @@ -58,12 +70,72 @@ jobs: role-session-name: github-action aws-region: ${{ env.AWS_DEFAULT_REGION }} - - name: Integration Tests + - name: Initialize Nebari config for aws deployment + id: init + uses: ./.github/actions/init-cloud-aws + + - name: Deploy Nebari + working-directory: ${{ steps.init.outputs.directory }} + run: nebari deploy --config ${{ steps.init.outputs.config }} --disable-prompt + + - name: Health check + uses: ./.github/actions/health-check + with: + domain: ${{ steps.init.outputs.domain }} + + - name: Create example-user + working-directory: ${{ steps.init.outputs.directory }} run: | - pytest --version - pytest tests/tests_integration/ -vvv -s --cloud aws + nebari keycloak adduser --user "${TEST_USERNAME}" "${TEST_PASSWORD}" --config ${{ steps.init.outputs.config }} + nebari keycloak listusers --config ${{ steps.init.outputs.config }} + + - name: Await Workloads + uses: jupyterhub/action-k8s-await-workloads@v3 + with: + workloads: "" # all + namespace: "dev" + timeout: 300 + max-restarts: 3 + + ### DEPLOYMENT TESTS + - name: Deployment Pytests env: - NEBARI_SECRET__default_images__jupyterhub: "quay.io/nebari/nebari-jupyterhub:${{ env.NEBARI_IMAGE_TAG }}" - NEBARI_SECRET__default_images__jupyterlab: "quay.io/nebari/nebari-jupyterlab:${{ env.NEBARI_IMAGE_TAG }}" - NEBARI_SECRET__default_images__dask_worker: "quay.io/nebari/nebari-dask-worker:${{ env.NEBARI_IMAGE_TAG }}" - CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + NEBARI_CONFIG_PATH: ${{ steps.init.outputs.config }} + KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} + KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} + run: | + pytest tests/tests_deployment/ -v -s + + ### USER-JOURNEY TESTS + - uses: actions/setup-node@v4 + with: + node-version: 20 + + - name: Playwright Tests + env: + KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} + KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} + NEBARI_FULL_URL: "https://${{ steps.init.outputs.domain }}/" + working-directory: tests/tests_e2e/playwright + run: | + # create environment file + envsubst < .env.tpl > .env + # run playwright pytest tests in headed mode with the chromium browser + xvfb-run pytest --browser chromium --slowmo 300 --headed + + - name: Save Playwright recording artifacts + if: always() + uses: actions/upload-artifact@v4.3.1 + with: + name: e2e-playwright + path: | + ./tests/tests_e2e/playwright/videos/ + + ### CLEANUP AFTER TESTS + - name: Cleanup nebari deployment + # Since this is not critical for most pull requests and takes more than half of the time + # in the CI, it makes sense to only run on merge to main or workflow_dispatch to speed + # up feedback cycle + if: github.ref_name == 'main' || github.event_name == 'workflow_dispatch' + working-directory: ${{ steps.init.outputs.directory }} + run: nebari destroy --config ${{ steps.init.outputs.config }} --disable-prompt From 347b24dca4ad746ba1be1f48c92c5e06d27f2297 Mon Sep 17 00:00:00 2001 From: sophia Date: Wed, 5 Mar 2025 10:22:35 -0800 Subject: [PATCH 02/13] Explicitly pass secrets and env info to inti-cloud-aws action --- .github/actions/init-cloud-aws/action.yml | 16 +++++++++++++--- .github/workflows/test_aws_integration.yaml | 11 ++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/actions/init-cloud-aws/action.yml b/.github/actions/init-cloud-aws/action.yml index 59c8cf0d30..4723422af5 100644 --- a/.github/actions/init-cloud-aws/action.yml +++ b/.github/actions/init-cloud-aws/action.yml @@ -6,6 +6,16 @@ inputs: description: "Path to directory to initialize in" required: false default: './local-deployment' + aws_region: + description: "AWS region to use for deployment" + required: false + default: 'us-west-2' + aws_role_to_assume: + description: "ARN of AWS role to assume for deployment" + required: true + cloudflare_token: + description: "Token for Cloudflare API" + required: true outputs: directory: @@ -46,9 +56,9 @@ runs: - name: Authenticate to AWS uses: aws-actions/configure-aws-credentials@v1 with: - role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + role-to-assume: ${{ inputs.aws_role_to_assume }} role-session-name: github-action - aws-region: ${{ env.AWS_DEFAULT_REGION }} + aws-region: ${{ inputs.aws_region }} - shell: bash -l {0} id: init @@ -60,7 +70,7 @@ runs: --auth-provider password \ --output '${{ steps.metadata.outputs.config }}' env: - CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + CLOUDFLARE_TOKEN: ${{ inputs.cloudflare_token }} - shell: bash run: | diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index a8c1df4111..9cdd860f90 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -63,16 +63,13 @@ jobs: pip install .[dev] playwright install - - name: Authenticate to AWS - uses: aws-actions/configure-aws-credentials@v1 - with: - role-to-assume: ${{ secrets.AWS_ROLE_ARN }} - role-session-name: github-action - aws-region: ${{ env.AWS_DEFAULT_REGION }} - - name: Initialize Nebari config for aws deployment id: init uses: ./.github/actions/init-cloud-aws + with: + aws_region: ${{ env.AWS_DEFAULT_REGION }} + aws_role_to_assume: ${{ secrets.AWS_ROLE_ARN }} + cloudflare_token: ${{ secrets.CLOUDFLARE_TOKEN }} - name: Deploy Nebari working-directory: ${{ steps.init.outputs.directory }} From ab9766aea91c00fe20cfd14be5a96f863acce19e Mon Sep 17 00:00:00 2001 From: sophia Date: Wed, 5 Mar 2025 12:08:54 -0800 Subject: [PATCH 03/13] Update domain --- .github/actions/init-cloud-aws/action.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/actions/init-cloud-aws/action.yml b/.github/actions/init-cloud-aws/action.yml index 4723422af5..4e23bfaf8f 100644 --- a/.github/actions/init-cloud-aws/action.yml +++ b/.github/actions/init-cloud-aws/action.yml @@ -5,7 +5,7 @@ inputs: directory: description: "Path to directory to initialize in" required: false - default: './local-deployment' + default: './aws-deployment' aws_region: description: "AWS region to use for deployment" required: false @@ -49,8 +49,7 @@ runs: PROJECT='github-actions' echo "project=${PROJECT}" | tee --append "${GITHUB_OUTPUT}" - DOMAIN='github-actions-aws.nebari.dev' - nslookup "${DOMAIN}" + DOMAIN='ci-aws.nebari.dev' echo "domain=${DOMAIN}" | tee --append "${GITHUB_OUTPUT}" - name: Authenticate to AWS @@ -94,6 +93,21 @@ runs: size: 1Gi EOM + # Update certificate info + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + certificate: + type: lets-encrypt + acme_email: internal-devops@quansight.com + acme_server: https://acme-v02.api.letsencrypt.org/directory + EOM + + # Update dns config + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + dns: + provider: cloudflare + auto_provision: true + EOM + - shell: bash run: | # Display Nebari config From e6ead079aebcb70eade2f2034e360bb9d862cbe4 Mon Sep 17 00:00:00 2001 From: sophia Date: Wed, 5 Mar 2025 14:36:51 -0800 Subject: [PATCH 04/13] Always cleanup deployment --- .github/workflows/test_aws_integration.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 9cdd860f90..9a50bf9bfb 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -130,9 +130,6 @@ jobs: ### CLEANUP AFTER TESTS - name: Cleanup nebari deployment - # Since this is not critical for most pull requests and takes more than half of the time - # in the CI, it makes sense to only run on merge to main or workflow_dispatch to speed - # up feedback cycle - if: github.ref_name == 'main' || github.event_name == 'workflow_dispatch' + if: always() working-directory: ${{ steps.init.outputs.directory }} run: nebari destroy --config ${{ steps.init.outputs.config }} --disable-prompt From 9ddbd33a88b503d2967212d256bdc03396547a69 Mon Sep 17 00:00:00 2001 From: sophia Date: Wed, 5 Mar 2025 15:18:38 -0800 Subject: [PATCH 05/13] Specify cloudflare token when deploying --- .github/actions/init-cloud-aws/action.yml | 5 ----- .github/workflows/test_aws_integration.yaml | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/actions/init-cloud-aws/action.yml b/.github/actions/init-cloud-aws/action.yml index 4e23bfaf8f..862eb193d4 100644 --- a/.github/actions/init-cloud-aws/action.yml +++ b/.github/actions/init-cloud-aws/action.yml @@ -13,9 +13,6 @@ inputs: aws_role_to_assume: description: "ARN of AWS role to assume for deployment" required: true - cloudflare_token: - description: "Token for Cloudflare API" - required: true outputs: directory: @@ -68,8 +65,6 @@ runs: --domain-name '${{ steps.metadata.outputs.domain }}' \ --auth-provider password \ --output '${{ steps.metadata.outputs.config }}' - env: - CLOUDFLARE_TOKEN: ${{ inputs.cloudflare_token }} - shell: bash run: | diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 9a50bf9bfb..f3933a633f 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -69,11 +69,12 @@ jobs: with: aws_region: ${{ env.AWS_DEFAULT_REGION }} aws_role_to_assume: ${{ secrets.AWS_ROLE_ARN }} - cloudflare_token: ${{ secrets.CLOUDFLARE_TOKEN }} - name: Deploy Nebari working-directory: ${{ steps.init.outputs.directory }} run: nebari deploy --config ${{ steps.init.outputs.config }} --disable-prompt + env: + CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} - name: Health check uses: ./.github/actions/health-check From fcff0ebfe63ca6b344e0ad349fc29655b343312e Mon Sep 17 00:00:00 2001 From: sophia Date: Thu, 6 Mar 2025 11:59:27 -0800 Subject: [PATCH 06/13] Update kubeconfig for eks --- .github/workflows/test_aws_integration.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index f3933a633f..723d951ed0 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -76,6 +76,16 @@ jobs: env: CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + - name: Authenticate to AWS + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + role-session-name: github-action + aws-region: ${{ env.AWS_DEFAULT_REGION }} + + - name: Update kube config + run: aws eks update-kubeconfig --name github-actions-dev --region ${{ env.AWS_DEFAULT_REGION }} + - name: Health check uses: ./.github/actions/health-check with: From 6bfabea5adc218fbe031cb89c0be2e0df7004110 Mon Sep 17 00:00:00 2001 From: sophia Date: Thu, 6 Mar 2025 14:33:39 -0800 Subject: [PATCH 07/13] Refactor ini-cloud-aws to be cloud agnostic --- .../{init-cloud-aws => init-cloud}/action.yml | 23 +++++-------------- .github/workflows/test_aws_integration.yaml | 19 ++++++++------- 2 files changed, 15 insertions(+), 27 deletions(-) rename .github/actions/{init-cloud-aws => init-cloud}/action.yml (82%) diff --git a/.github/actions/init-cloud-aws/action.yml b/.github/actions/init-cloud/action.yml similarity index 82% rename from .github/actions/init-cloud-aws/action.yml rename to .github/actions/init-cloud/action.yml index 862eb193d4..486005bbea 100644 --- a/.github/actions/init-cloud-aws/action.yml +++ b/.github/actions/init-cloud/action.yml @@ -1,17 +1,13 @@ -name: init-cloud-aws +name: init-cloud description: "Initialize Nebari config for cloud deployment" inputs: directory: description: "Path to directory to initialize in" required: false - default: './aws-deployment' - aws_region: - description: "AWS region to use for deployment" - required: false - default: 'us-west-2' - aws_role_to_assume: - description: "ARN of AWS role to assume for deployment" + default: './cloud-deployment' + cloud: + description: "Cloud provider to use for deployment" required: true outputs: @@ -46,21 +42,14 @@ runs: PROJECT='github-actions' echo "project=${PROJECT}" | tee --append "${GITHUB_OUTPUT}" - DOMAIN='ci-aws.nebari.dev' + DOMAIN='ci-${{ inputs.cloud }}.nebari.dev' echo "domain=${DOMAIN}" | tee --append "${GITHUB_OUTPUT}" - - name: Authenticate to AWS - uses: aws-actions/configure-aws-credentials@v1 - with: - role-to-assume: ${{ inputs.aws_role_to_assume }} - role-session-name: github-action - aws-region: ${{ inputs.aws_region }} - - shell: bash -l {0} id: init working-directory: ${{ steps.metadata.outputs.directory }} run: | - nebari init aws \ + nebari init ${{ inputs.cloud }} \ --project-name '${{ steps.metadata.outputs.project }}' \ --domain-name '${{ steps.metadata.outputs.domain }}' \ --auth-provider password \ diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 723d951ed0..c2cd0b04e5 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -63,12 +63,18 @@ jobs: pip install .[dev] playwright install + - name: Authenticate to AWS + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + role-session-name: github-action + aws-region: ${{ env.AWS_DEFAULT_REGION }} + - name: Initialize Nebari config for aws deployment id: init - uses: ./.github/actions/init-cloud-aws + uses: ./.github/actions/init-cloud with: - aws_region: ${{ env.AWS_DEFAULT_REGION }} - aws_role_to_assume: ${{ secrets.AWS_ROLE_ARN }} + cloud: aws - name: Deploy Nebari working-directory: ${{ steps.init.outputs.directory }} @@ -76,13 +82,6 @@ jobs: env: CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} - - name: Authenticate to AWS - uses: aws-actions/configure-aws-credentials@v1 - with: - role-to-assume: ${{ secrets.AWS_ROLE_ARN }} - role-session-name: github-action - aws-region: ${{ env.AWS_DEFAULT_REGION }} - - name: Update kube config run: aws eks update-kubeconfig --name github-actions-dev --region ${{ env.AWS_DEFAULT_REGION }} From 33e7fdfc2386af48f08246d2257e70a6bfa204a5 Mon Sep 17 00:00:00 2001 From: sophia Date: Thu, 6 Mar 2025 15:04:03 -0800 Subject: [PATCH 08/13] Set hostname for deployment tests --- .github/workflows/test_aws_integration.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index c2cd0b04e5..7060185cad 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -110,8 +110,9 @@ jobs: NEBARI_CONFIG_PATH: ${{ steps.init.outputs.config }} KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} + NEBARI_HOSTNAME: ${{ steps.init.outputs.domain }} run: | - pytest tests/tests_deployment/ -v -s + pytest tests/tests_deployment/ -v ### USER-JOURNEY TESTS - uses: actions/setup-node@v4 From 3b3c23d648ec29433e9774d62ea3aa31f3621f95 Mon Sep 17 00:00:00 2001 From: sophia Date: Sat, 8 Mar 2025 18:13:59 -0800 Subject: [PATCH 09/13] Bump boto to avoid test errors --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 177df4dfce..71247a9194 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ dependencies = [ "azure-mgmt-containerservice==26.0.0", "azure-mgmt-resource==23.0.1", "bcrypt==4.0.1", - "boto3==1.34.63", + "boto3==1.37.9", "cloudflare==2.11.7", "google-auth>=2.31.0,<3.0.0", "google-cloud-compute==1.19.1", From a4914ea2c82389367b0c2efc2587e911aad394e5 Mon Sep 17 00:00:00 2001 From: sophia Date: Sat, 8 Mar 2025 19:00:47 -0800 Subject: [PATCH 10/13] Rename project --- .github/actions/init-cloud/action.yml | 2 +- .github/workflows/test_aws_integration.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/init-cloud/action.yml b/.github/actions/init-cloud/action.yml index 486005bbea..7519060b8d 100644 --- a/.github/actions/init-cloud/action.yml +++ b/.github/actions/init-cloud/action.yml @@ -39,7 +39,7 @@ runs: CONFIG="${DIRECTORY}/nebari-config.yaml" echo "config=${CONFIG}" | tee --append "${GITHUB_OUTPUT}" - PROJECT='github-actions' + PROJECT='gha-cloud-test' echo "project=${PROJECT}" | tee --append "${GITHUB_OUTPUT}" DOMAIN='ci-${{ inputs.cloud }}.nebari.dev' diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 7060185cad..b8b32598ec 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -83,7 +83,7 @@ jobs: CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} - name: Update kube config - run: aws eks update-kubeconfig --name github-actions-dev --region ${{ env.AWS_DEFAULT_REGION }} + run: aws eks update-kubeconfig --name gha-cloud-test-dev --region ${{ env.AWS_DEFAULT_REGION }} - name: Health check uses: ./.github/actions/health-check From b55a46439b6d0f1069530538787ad082e25b31ec Mon Sep 17 00:00:00 2001 From: sophia Date: Wed, 12 Mar 2025 11:50:38 -0700 Subject: [PATCH 11/13] Mark deployment tests that require access to k8 api --- .github/workflows/test_aws_integration.yaml | 2 +- pytest.ini | 1 + tests/tests_deployment/test_loki_deployment.py | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index b8b32598ec..596b3613fe 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -112,7 +112,7 @@ jobs: KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} NEBARI_HOSTNAME: ${{ steps.init.outputs.domain }} run: | - pytest tests/tests_deployment/ -v + pytest tests/tests_deployment/ -v -W ignore::DeprecationWarning -m "not requires_kubeconfig" ### USER-JOURNEY TESTS - uses: actions/setup-node@v4 diff --git a/pytest.ini b/pytest.ini index d299f154a8..48a20f632c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,6 +9,7 @@ addopts = markers = gpu: test gpu working properly preemptible: test preemptible instances + requires_kubeconfig: test that requires to interact with the kubernetes api testpaths = tests xfail_strict = True diff --git a/tests/tests_deployment/test_loki_deployment.py b/tests/tests_deployment/test_loki_deployment.py index 59210a8fc3..72966ccbda 100644 --- a/tests/tests_deployment/test_loki_deployment.py +++ b/tests/tests_deployment/test_loki_deployment.py @@ -26,6 +26,7 @@ } +@pytest.mark.requires_kubeconfig @pytest.fixture(scope="module") def port_forward_fixture(request): """Pytest fixture to port forward loki backend pod to make it accessible @@ -41,6 +42,7 @@ def port_forward(labels, port): return pytest.mark.parametrize("port_forward_fixture", [params], indirect=True) +@pytest.mark.requires_kubeconfig @pytest.mark.parametrize( "endpoint_path", ( @@ -67,6 +69,7 @@ def test_loki_endpoint(endpoint_path: str, port_forward_fixture: V1Pod): response.close() +@pytest.mark.requires_kubeconfig @port_forward(labels=MINIO_POD_LABELS, port=MINIO_PORT) def test_minio_accessible(port_forward_fixture: V1Pod): """This will hit liveness endpoint of minio API and verify that we @@ -82,6 +85,7 @@ def test_minio_accessible(port_forward_fixture: V1Pod): response.close() +@pytest.mark.requires_kubeconfig @port_forward(labels=LOKI_GATEWAY_POD_LABELS, port=LOKI_GATEWAY_PORT) def test_loki_gateway(port_forward_fixture: V1Pod): """This will hit an endpoint of loki gateway API and verify that we @@ -99,6 +103,7 @@ def test_loki_gateway(port_forward_fixture: V1Pod): response.close() +@pytest.mark.requires_kubeconfig @port_forward(labels=LOKI_GATEWAY_POD_LABELS, port=LOKI_GATEWAY_PORT) def test_loki_gateway_fetch_logs(port_forward_fixture: V1Pod): """This will hit an endpoint of loki gateway API to fetch some logs From f6d8220454be03a9527b4c4359dbcb5cae05799a Mon Sep 17 00:00:00 2001 From: sophia Date: Thu, 13 Mar 2025 08:51:58 -0700 Subject: [PATCH 12/13] Force destroy aws resources --- .github/workflows/test_aws_integration.yaml | 6 ++++-- scripts/aws-force-destroy.py | 4 +--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 596b3613fe..3fded6f99f 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -142,5 +142,7 @@ jobs: ### CLEANUP AFTER TESTS - name: Cleanup nebari deployment if: always() - working-directory: ${{ steps.init.outputs.directory }} - run: nebari destroy --config ${{ steps.init.outputs.config }} --disable-prompt + run: | + ls + ls scripts + python scripts/aws-force-destroy.py --config ${{ steps.init.outputs.config }} diff --git a/scripts/aws-force-destroy.py b/scripts/aws-force-destroy.py index f58d292487..5f4f3a326e 100644 --- a/scripts/aws-force-destroy.py +++ b/scripts/aws-force-destroy.py @@ -3,7 +3,7 @@ import time from pathlib import Path -from _nebari.utils import check_cloud_credentials, load_yaml, timer +from _nebari.utils import load_yaml, timer logging.basicConfig(level=logging.INFO) @@ -55,8 +55,6 @@ def force_destroy_configuration(config): with timer(logging, "destroying nebari"): # 01 Check we have cloud details we need - check_cloud_credentials(config) - if config.get("provider", "") != "aws": raise ValueError("force-destroy currently only available for AWS") From 7fd8ead6c60f6f5f12b4f8abe4f831e04864a6b8 Mon Sep 17 00:00:00 2001 From: sophia Date: Thu, 13 Mar 2025 10:28:12 -0700 Subject: [PATCH 13/13] Retry destroying aws infrastructure --- scripts/aws-force-destroy.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/scripts/aws-force-destroy.py b/scripts/aws-force-destroy.py index 5f4f3a326e..ed0629b98d 100644 --- a/scripts/aws-force-destroy.py +++ b/scripts/aws-force-destroy.py @@ -7,16 +7,34 @@ logging.basicConfig(level=logging.INFO) +RETRY_TIMES = 7 + def main(): parser = argparse.ArgumentParser(description="Force Destroy AWS environment.") parser.add_argument("-c", "--config", help="nebari configuration", required=True) args = parser.parse_args() - handle_force_destroy(args) + success = False + retries = 0 + + # sometimes just need to retry + while retries < RETRY_TIMES and not success: + success = handle_force_destroy(args) + if not success: + logging.info(f"Attempt {retries+1} failed!") + time.sleep(7) + retries += 1 + + +def handle_force_destroy(args) -> bool: + """Force Destroy AWS environment. + If the environment is successfully destroyed, return True. + If the environment is not successfully destroyed, return False. -def handle_force_destroy(args): + :rtype: bool + """ config_filename = Path(args.config) if not config_filename.is_file(): raise ValueError( @@ -25,9 +43,14 @@ def handle_force_destroy(args): config = load_yaml(config_filename) - # Don't verify(config) in case the schema has changed - just pick out the important bits and tear down + # Try to destroy the AWS environment + try: + force_destroy_configuration(config) + except Exception as e: + logging.error(f"Failed to destroy AWS environment: {e}") + return False - force_destroy_configuration(config) + return True def parse_arn(arn):