diff --git a/.github/actions/init-cloud/action.yml b/.github/actions/init-cloud/action.yml new file mode 100644 index 0000000000..7519060b8d --- /dev/null +++ b/.github/actions/init-cloud/action.yml @@ -0,0 +1,98 @@ +name: init-cloud +description: "Initialize Nebari config for cloud deployment" + +inputs: + directory: + description: "Path to directory to initialize in" + required: false + default: './cloud-deployment' + cloud: + description: "Cloud provider to use for deployment" + required: true + +outputs: + directory: + description: "Path to config directory" + value: ${{ steps.metadata.outputs.directory }} + config: + description: "Path to Nebari config" + value: ${{ steps.metadata.outputs.config }} + project: + description: "Project name" + value: ${{ steps.metadata.outputs.project }} + domain: + description: "Domain name" + value: ${{ steps.metadata.outputs.domain }} + +runs: + using: composite + + steps: + - shell: bash + id: metadata + run: | + # Setup metadata + DIRECTORY=$(realpath '${{ inputs.directory }}') + mkdir --parents "${DIRECTORY}" + echo "directory=${DIRECTORY}" | tee --append "${GITHUB_OUTPUT}" + + CONFIG="${DIRECTORY}/nebari-config.yaml" + echo "config=${CONFIG}" | tee --append "${GITHUB_OUTPUT}" + + PROJECT='gha-cloud-test' + echo "project=${PROJECT}" | tee --append "${GITHUB_OUTPUT}" + + DOMAIN='ci-${{ inputs.cloud }}.nebari.dev' + echo "domain=${DOMAIN}" | tee --append "${GITHUB_OUTPUT}" + + - shell: bash -l {0} + id: init + working-directory: ${{ steps.metadata.outputs.directory }} + run: | + nebari init ${{ inputs.cloud }} \ + --project-name '${{ steps.metadata.outputs.project }}' \ + --domain-name '${{ steps.metadata.outputs.domain }}' \ + --auth-provider password \ + --output '${{ steps.metadata.outputs.config }}' + + - shell: bash + run: | + # Update nebari config for CI + + # Change default JupyterLab theme + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + jupyterlab: + default_settings: + "@jupyterlab/apputils-extension:themes": + theme: JupyterLab Dark + EOM + + # Change default value for minio persistence size + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + monitoring: + enabled: true + overrides: + minio: + persistence: + size: 1Gi + EOM + + # Update certificate info + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + certificate: + type: lets-encrypt + acme_email: internal-devops@quansight.com + acme_server: https://acme-v02.api.letsencrypt.org/directory + EOM + + # Update dns config + cat >> '${{ steps.metadata.outputs.config }}' <<- EOM + dns: + provider: cloudflare + auto_provision: true + EOM + + - shell: bash + run: | + # Display Nebari config + cat '${{ steps.metadata.outputs.config }}' diff --git a/.github/workflows/test_aws_integration.yaml b/.github/workflows/test_aws_integration.yaml index 59075ea7de..3fded6f99f 100644 --- a/.github/workflows/test_aws_integration.yaml +++ b/.github/workflows/test_aws_integration.yaml @@ -1,37 +1,40 @@ name: AWS Deployment +env: + TEST_USERNAME: "test-user" + TEST_PASSWORD: "P@sswo3d" + NEBARI_IMAGE_TAG: "main" + AWS_DEFAULT_REGION: "us-west-2" + on: + pull_request: + paths: + - ".github/workflows/test_aws_integration.yaml" + - "tests/**" + - "scripts/**" + - "src/**" + - "pyproject.toml" + - "pytest.ini" + - ".cirun.yml" + push: + branches: + - main + - release/\d{4}.\d{1,2}.\d{1,2} + paths: + - ".github/workflows/test_aws_integration.yaml" + - "tests/**" + - "scripts/**" + - "src/**" + - "pyproject.toml" + - "pytest.ini" + - ".cirun.yml" schedule: - cron: "0 0 * * MON" workflow_dispatch: - inputs: - image-tag: - description: 'Nebari image tag created by the nebari-docker-images repo' - required: true - default: main - type: string - tf-log-level: - description: 'Change Terraform log levels' - required: false - default: info - type: choice - options: - - info - - warn - - debug - - trace - - error - - -env: - AWS_DEFAULT_REGION: "us-west-2" - NEBARI_IMAGE_TAG: ${{ github.event.inputs.image-tag || 'main' }} - TF_LOG: ${{ github.event.inputs.tf-log-level || 'info' }} jobs: test-aws-integration: runs-on: ubuntu-latest - if: ${{ vars.SKIP_AWS_INTEGRATION_TEST != 'true' }} permissions: id-token: write contents: read @@ -42,9 +45,18 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: conda-incubator/setup-miniconda@v3 + env: + CONDA: /home/runnerx/miniconda3 with: - python-version: 3.11 + auto-update-conda: true + python-version: "3.11" + miniconda-version: "latest" + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: v1.19.16 - name: Install Nebari run: | @@ -58,12 +70,79 @@ jobs: role-session-name: github-action aws-region: ${{ env.AWS_DEFAULT_REGION }} - - name: Integration Tests - run: | - pytest --version - pytest tests/tests_integration/ -vvv -s --cloud aws + - name: Initialize Nebari config for aws deployment + id: init + uses: ./.github/actions/init-cloud + with: + cloud: aws + + - name: Deploy Nebari + working-directory: ${{ steps.init.outputs.directory }} + run: nebari deploy --config ${{ steps.init.outputs.config }} --disable-prompt env: - NEBARI_SECRET__default_images__jupyterhub: "quay.io/nebari/nebari-jupyterhub:${{ env.NEBARI_IMAGE_TAG }}" - NEBARI_SECRET__default_images__jupyterlab: "quay.io/nebari/nebari-jupyterlab:${{ env.NEBARI_IMAGE_TAG }}" - NEBARI_SECRET__default_images__dask_worker: "quay.io/nebari/nebari-dask-worker:${{ env.NEBARI_IMAGE_TAG }}" CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + + - name: Update kube config + run: aws eks update-kubeconfig --name gha-cloud-test-dev --region ${{ env.AWS_DEFAULT_REGION }} + + - name: Health check + uses: ./.github/actions/health-check + with: + domain: ${{ steps.init.outputs.domain }} + + - name: Create example-user + working-directory: ${{ steps.init.outputs.directory }} + run: | + nebari keycloak adduser --user "${TEST_USERNAME}" "${TEST_PASSWORD}" --config ${{ steps.init.outputs.config }} + nebari keycloak listusers --config ${{ steps.init.outputs.config }} + + - name: Await Workloads + uses: jupyterhub/action-k8s-await-workloads@v3 + with: + workloads: "" # all + namespace: "dev" + timeout: 300 + max-restarts: 3 + + ### DEPLOYMENT TESTS + - name: Deployment Pytests + env: + NEBARI_CONFIG_PATH: ${{ steps.init.outputs.config }} + KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} + KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} + NEBARI_HOSTNAME: ${{ steps.init.outputs.domain }} + run: | + pytest tests/tests_deployment/ -v -W ignore::DeprecationWarning -m "not requires_kubeconfig" + + ### USER-JOURNEY TESTS + - uses: actions/setup-node@v4 + with: + node-version: 20 + + - name: Playwright Tests + env: + KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} + KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} + NEBARI_FULL_URL: "https://${{ steps.init.outputs.domain }}/" + working-directory: tests/tests_e2e/playwright + run: | + # create environment file + envsubst < .env.tpl > .env + # run playwright pytest tests in headed mode with the chromium browser + xvfb-run pytest --browser chromium --slowmo 300 --headed + + - name: Save Playwright recording artifacts + if: always() + uses: actions/upload-artifact@v4.3.1 + with: + name: e2e-playwright + path: | + ./tests/tests_e2e/playwright/videos/ + + ### CLEANUP AFTER TESTS + - name: Cleanup nebari deployment + if: always() + run: | + ls + ls scripts + python scripts/aws-force-destroy.py --config ${{ steps.init.outputs.config }} diff --git a/pyproject.toml b/pyproject.toml index 177df4dfce..71247a9194 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ dependencies = [ "azure-mgmt-containerservice==26.0.0", "azure-mgmt-resource==23.0.1", "bcrypt==4.0.1", - "boto3==1.34.63", + "boto3==1.37.9", "cloudflare==2.11.7", "google-auth>=2.31.0,<3.0.0", "google-cloud-compute==1.19.1", diff --git a/pytest.ini b/pytest.ini index d299f154a8..48a20f632c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,6 +9,7 @@ addopts = markers = gpu: test gpu working properly preemptible: test preemptible instances + requires_kubeconfig: test that requires to interact with the kubernetes api testpaths = tests xfail_strict = True diff --git a/scripts/aws-force-destroy.py b/scripts/aws-force-destroy.py index f58d292487..ed0629b98d 100644 --- a/scripts/aws-force-destroy.py +++ b/scripts/aws-force-destroy.py @@ -3,20 +3,38 @@ import time from pathlib import Path -from _nebari.utils import check_cloud_credentials, load_yaml, timer +from _nebari.utils import load_yaml, timer logging.basicConfig(level=logging.INFO) +RETRY_TIMES = 7 + def main(): parser = argparse.ArgumentParser(description="Force Destroy AWS environment.") parser.add_argument("-c", "--config", help="nebari configuration", required=True) args = parser.parse_args() - handle_force_destroy(args) + success = False + retries = 0 + + # sometimes just need to retry + while retries < RETRY_TIMES and not success: + success = handle_force_destroy(args) + if not success: + logging.info(f"Attempt {retries+1} failed!") + time.sleep(7) + retries += 1 + +def handle_force_destroy(args) -> bool: + """Force Destroy AWS environment. -def handle_force_destroy(args): + If the environment is successfully destroyed, return True. + If the environment is not successfully destroyed, return False. + + :rtype: bool + """ config_filename = Path(args.config) if not config_filename.is_file(): raise ValueError( @@ -25,9 +43,14 @@ def handle_force_destroy(args): config = load_yaml(config_filename) - # Don't verify(config) in case the schema has changed - just pick out the important bits and tear down + # Try to destroy the AWS environment + try: + force_destroy_configuration(config) + except Exception as e: + logging.error(f"Failed to destroy AWS environment: {e}") + return False - force_destroy_configuration(config) + return True def parse_arn(arn): @@ -55,8 +78,6 @@ def force_destroy_configuration(config): with timer(logging, "destroying nebari"): # 01 Check we have cloud details we need - check_cloud_credentials(config) - if config.get("provider", "") != "aws": raise ValueError("force-destroy currently only available for AWS") diff --git a/tests/tests_deployment/test_loki_deployment.py b/tests/tests_deployment/test_loki_deployment.py index 59210a8fc3..72966ccbda 100644 --- a/tests/tests_deployment/test_loki_deployment.py +++ b/tests/tests_deployment/test_loki_deployment.py @@ -26,6 +26,7 @@ } +@pytest.mark.requires_kubeconfig @pytest.fixture(scope="module") def port_forward_fixture(request): """Pytest fixture to port forward loki backend pod to make it accessible @@ -41,6 +42,7 @@ def port_forward(labels, port): return pytest.mark.parametrize("port_forward_fixture", [params], indirect=True) +@pytest.mark.requires_kubeconfig @pytest.mark.parametrize( "endpoint_path", ( @@ -67,6 +69,7 @@ def test_loki_endpoint(endpoint_path: str, port_forward_fixture: V1Pod): response.close() +@pytest.mark.requires_kubeconfig @port_forward(labels=MINIO_POD_LABELS, port=MINIO_PORT) def test_minio_accessible(port_forward_fixture: V1Pod): """This will hit liveness endpoint of minio API and verify that we @@ -82,6 +85,7 @@ def test_minio_accessible(port_forward_fixture: V1Pod): response.close() +@pytest.mark.requires_kubeconfig @port_forward(labels=LOKI_GATEWAY_POD_LABELS, port=LOKI_GATEWAY_PORT) def test_loki_gateway(port_forward_fixture: V1Pod): """This will hit an endpoint of loki gateway API and verify that we @@ -99,6 +103,7 @@ def test_loki_gateway(port_forward_fixture: V1Pod): response.close() +@pytest.mark.requires_kubeconfig @port_forward(labels=LOKI_GATEWAY_POD_LABELS, port=LOKI_GATEWAY_PORT) def test_loki_gateway_fetch_logs(port_forward_fixture: V1Pod): """This will hit an endpoint of loki gateway API to fetch some logs