diff --git a/.github/actions/download-load-docker-images/action.yml b/.github/actions/download-load-docker-images/action.yml new file mode 100644 index 00000000000..5f9f03be3c3 --- /dev/null +++ b/.github/actions/download-load-docker-images/action.yml @@ -0,0 +1,31 @@ +name: 'Download and Load Docker Images' +description: 'Downloads and loads Docker images for integration/system tests' + +inputs: + artifact-name-pattern: + description: 'Artifact name pattern for docker images (e.g., backend, or * for all)' + required: false + default: 'backend' + + download-path: + description: 'Path to download artifacts to' + required: false + default: '/${{ runner.temp }}/build' + +runs: + using: 'composite' + steps: + # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 + - name: download docker images with retry + uses: Wandalen/wretry.action@master + with: + action: actions/download-artifact@v4 + with: | + ${{ inputs.artifact-name-pattern == '*' && 'pattern' || 'name' }}: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-${{ inputs.artifact-name-pattern }} + path: ${{ inputs.download-path }} + attempt_limit: 5 + attempt_delay: 1000 + + - name: load docker images + shell: bash + run: make load-images local-src=${{ inputs.download-path }} diff --git a/.github/actions/setup-simcore-env/action.yml b/.github/actions/setup-simcore-env/action.yml new file mode 100644 index 00000000000..028cacd4830 --- /dev/null +++ b/.github/actions/setup-simcore-env/action.yml @@ -0,0 +1,64 @@ +name: 'Setup SimCore Environment' +description: 'Sets up the common environment for SimCore CI jobs' + +inputs: + python-version: + description: 'Python version to use' + required: false + default: '3.11' + + uv-version: + description: 'UV version to use' + required: false + default: '0.6.x' + + cache-dependency-glob: + description: 'Glob pattern for cache dependency files' + required: false + default: '' + + setup-docker: + description: 'Whether to setup Docker BuildX' + required: false + default: 'true' + + show-system-versions: + description: 'Whether to show system versions' + required: false + default: 'true' + + expose-github-runtime: + description: 'Whether to expose GitHub runtime for buildx (needed for some integration tests)' + required: false + default: 'true' + +runs: + using: 'composite' + steps: + - name: Setup Docker BuildX + if: inputs.setup-docker == 'true' + id: buildx + uses: docker/setup-buildx-action@v3 + with: + driver: docker-container + + - name: Expose GitHub runtime for BuildX + if: inputs.expose-github-runtime == 'true' + uses: crazy-max/ghaction-github-runtime@v3 + + - name: Setup Python environment + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Install UV + uses: astral-sh/setup-uv@v6 + with: + version: ${{ inputs.uv-version }} + enable-cache: false + cache-dependency-glob: ${{ inputs.cache-dependency-glob }} + + - name: Show system versions + if: inputs.show-system-versions == 'true' + shell: bash + run: ./ci/helpers/show_system_versions.bash diff --git a/.github/workflows/_reusable-build-images.yml b/.github/workflows/_reusable-build-images.yml index f634e1c2d78..2276d1d3d0b 100644 --- a/.github/workflows/_reusable-build-images.yml +++ b/.github/workflows/_reusable-build-images.yml @@ -26,15 +26,11 @@ jobs: runs-on: ${{ inputs.os }} steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: - driver: docker-container - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - - name: show system environs - run: ./ci/helpers/show_system_versions.bash + python-version: ${{ inputs.python-version }} + expose-github-runtime: 'true' - name: build backend images if: ${{ inputs.build-backend }} run: | diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index 05a060c7709..d626dc30941 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -138,6 +138,7 @@ jobs: - 'packages/pytest-simcore/**' - 'packages/service-integration/**' - 'services/docker-compose*' + - 'mypy.ini' service-library: - 'packages/pytest-simcore/**' - 'packages/service-library/**' @@ -148,9 +149,11 @@ jobs: - 'packages/pytest-simcore/**' - 'packages/settings-library/**' - 'services/docker-compose*' + - 'mypy.ini' simcore-sdk: - 'packages/**' - 'services/docker-compose*' + - 'mypy.ini' agent: - 'packages/**' - 'services/agent/**' @@ -165,6 +168,7 @@ jobs: - 'mypy.ini' api: - 'api/**' + - 'mypy.ini' api-server: - 'packages/**' - 'services/api-server/**' @@ -235,6 +239,7 @@ jobs: - 'packages/**' - 'services/migration/**' - 'services/docker-compose*' + - 'mypy.ini' payments: - 'packages/**' - 'services/payments/**' @@ -250,6 +255,7 @@ jobs: docker-api-proxy: - 'packages/**' - 'services/docker-api-proxy/**' + - 'mypy.ini' resource-usage-tracker: - 'packages/**' - 'services/resource-usage-tracker/**' @@ -269,12 +275,15 @@ jobs: - 'packages/**' - 'services/web/**' - 'services/docker-compose*' + - 'mypy.ini' anything: - 'packages/**' - 'services/**' - 'tests/**' + - 'mypy.ini' anything-py: - '**/*.py' + - 'mypy.ini' anything-js: - '**/*.js' build-test-images: @@ -303,24 +312,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/web/server/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install webserver run: ./ci/github/unit-testing/webserver.bash install - name: typecheck @@ -351,24 +349,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/web/server/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install webserver run: ./ci/github/unit-testing/webserver.bash install - name: test @@ -393,24 +380,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/web/server/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install webserver run: ./ci/github/unit-testing/webserver.bash install - name: test @@ -435,24 +411,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/web/server/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install webserver run: ./ci/github/unit-testing/webserver.bash install - name: test @@ -462,11 +427,7 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: flags: unittests #optional - - name: Upload test results to Codecov - if: ${{ !cancelled() }} - uses: codecov/test-results-action@v1 - with: - token: ${{ secrets.CODECOV_TOKEN }} + unit-test-storage: needs: changes @@ -480,24 +441,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/storage/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/storage.bash install - name: typecheck @@ -525,26 +475,15 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} + cache-dependency-glob: "**/agent/requirements/ci.txt" - name: install rclone run: sudo ./ci/github/helpers/install_rclone.bash - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/agent/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/agent.bash install - name: typecheck @@ -571,24 +510,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/notifications/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/notifications.bash install - name: typecheck @@ -615,24 +543,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/api/tests/requirements.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash + cache-dependency-glob: "**/api/requirements/ci.txt" - name: install api run: ./ci/github/unit-testing/api.bash install - name: test @@ -657,24 +574,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/api-server/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/api-server.bash install - name: typecheck @@ -705,24 +611,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/autoscaling/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/autoscaling.bash install - name: typecheck @@ -750,24 +645,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/catalog/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/catalog.bash install - name: typecheck @@ -801,24 +685,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/clusters-keeper/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: | make devenv @@ -857,24 +730,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/datcore-adapter/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/datcore-adapter.bash install - name: typecheck @@ -908,24 +770,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/director/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/director.bash install - name: typecheck @@ -959,24 +810,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/director-v2/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/director-v2.bash install - name: typecheck @@ -1010,24 +850,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/aws-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/aws-library.bash install - name: typecheck @@ -1098,24 +927,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/dask-task-models-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/dask-task-models-library.bash install - name: typecheck @@ -1143,24 +961,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/dask-sidecar/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/dask-sidecar.bash install - name: typecheck @@ -1188,24 +995,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/payments/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/payments.bash install - name: typecheck @@ -1233,24 +1029,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/dynamic-scheduler/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/dynamic-scheduler.bash install - name: typecheck @@ -1278,24 +1063,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/resource-usage-tracker/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: | make devenv @@ -1333,24 +1107,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/dynamic-sidecar.bash install - name: typecheck @@ -1378,24 +1141,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/efs-guardian/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: | make devenv @@ -1434,24 +1186,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/ci/helpers/requirements.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash + cache-dependency-glob: "**/pylint/requirements/ci.txt" - name: install run: ./ci/github/unit-testing/python-linting.bash install - name: test @@ -1469,24 +1210,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/postgres-database/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/postgres-database.bash install - name: typecheck @@ -1514,24 +1244,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/notifications-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash + cache-dependency-glob: "**/invitations/requirements/ci.txt" - name: install run: ./ci/github/unit-testing/invitations.bash install - name: typecheck @@ -1559,24 +1278,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/service-integration/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/service-integration.bash install - name: typecheck @@ -1604,24 +1312,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/service-library/requirements/ci*.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash + cache-dependency-glob: "**/service-library/requirements/ci.txt" - name: install run: ./ci/github/unit-testing/service-library.bash install_all - name: typecheck @@ -1649,24 +1346,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/settings-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/settings-library.bash install - name: typecheck @@ -1694,24 +1380,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/models-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/models-library.bash install - name: typecheck @@ -1738,24 +1413,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/common-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/common-library.bash install - name: typecheck @@ -1781,24 +1445,13 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/notifications-library/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/unit-testing/notifications-library.bash install - name: typecheck @@ -1826,26 +1479,15 @@ jobs: os: [ubuntu-24.04] fail-fast: false steps: - - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: install rclone - run: sudo ./ci/github/helpers/install_rclone.bash - - name: setup python environment - uses: actions/setup-python@v5 + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/simcore-sdk/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: install rclone + run: sudo ./ci/github/helpers/install_rclone.bash - name: install run: ./ci/github/unit-testing/simcore-sdk.bash install - name: typecheck @@ -1925,35 +1567,15 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/web/server/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: 'backend' - name: install run: ./ci/github/integration-testing/webserver.bash install - name: test @@ -1987,37 +1609,15 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/web/server/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: 'backend' - name: install run: ./ci/github/integration-testing/webserver.bash install - name: test @@ -2051,37 +1651,15 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/director-v2/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: 'backend' - name: install run: ./ci/github/integration-testing/director-v2.bash install - name: test @@ -2117,44 +1695,21 @@ jobs: # NOTE: DIRECTOR_DEFAULT_MAX_* used for integration-tests that include `director` service DIRECTOR_DEFAULT_MAX_MEMORY: 268435456 DIRECTOR_DEFAULT_MAX_NANO_CPUS: 10000000 - DIRECTOR_TRACING: null steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: - driver: docker-container + python-version: ${{ matrix.python }} + cache-dependency-glob: "**/director-v2/requirements/ci.txt" - name: setup rclone docker volume plugin run: sudo ./ci/github/helpers/install_rclone_docker_volume_plugin.bash - - name: setup python environment - uses: actions/setup-python@v5 + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images with: - python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 + artifact-name-pattern: 'backend' - name: install rclone run: sudo ./ci/github/helpers/install_rclone.bash - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/director-v2/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/integration-testing/director-v2.bash install - name: test @@ -2188,39 +1743,17 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 + cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt" + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: 'backend' - name: install rclone run: sudo ./ci/github/helpers/install_rclone.bash - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/integration-testing/dynamic-sidecar.bash install - name: test @@ -2255,37 +1788,15 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/docker-api-proxy/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: 'backend' - name: install run: ./ci/github/integration-testing/docker-api-proxy.bash install - name: test @@ -2319,37 +1830,17 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: - driver: docker-container + python-version: ${{ matrix.python }} + cache-dependency-glob: "**/simcore-sdk/requirements/ci.txt" - name: install rclone run: sudo ./ci/github/helpers/install_rclone.bash - - name: setup python environment - uses: actions/setup-python@v5 + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images with: - python-version: ${{ matrix.python }} - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - name: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-backend - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/simcore-sdk/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + artifact-name-pattern: 'backend' - name: install run: ./ci/github/integration-testing/simcore-sdk.bash install - name: test @@ -2407,37 +1898,15 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - pattern: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-* - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/public-api/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: '*' - name: install env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -2467,37 +1936,15 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - pattern: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-* - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/swarm-deploy/requirements/ci.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: '*' - name: install run: ./ci/github/system-testing/swarm-deploy.bash install - name: test @@ -2533,42 +1980,20 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} + cache-dependency-glob: "**/e2e/requirements/requirements.txt" - uses: actions/setup-node@v4.1.0 with: node-version: ${{ matrix.node }} cache: "npm" cache-dependency-path: "tests/e2e/package-lock.json" - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - pattern: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-* - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false - cache-dependency-glob: "**/e2e/requirements/requirements.txt" - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build - - name: show system version - run: ./ci/helpers/show_system_versions.bash + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: '*' - name: setup run: ./ci/github/system-testing/e2e.bash install - name: test @@ -2615,34 +2040,17 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/e2e-playwright/requirements/ci.txt" - - name: expose github runtime for buildx - uses: crazy-max/ghaction-github-runtime@v3 - - name: download docker images - uses: actions/download-artifact@v4 - with: - pattern: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-* - path: /${{ runner.temp }}/build - - name: load docker images - run: make load-images local-src=/${{ runner.temp }}/build + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: '*' - name: prepare devenv run: make devenv - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: setup run: | ./ci/github/system-testing/e2e-playwright.bash install @@ -2677,23 +2085,11 @@ jobs: fail-fast: false steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: setup python environment - uses: actions/setup-python@v5 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: python-version: ${{ matrix.python }} - - name: install uv - uses: astral-sh/setup-uv@v6 - with: - version: "0.6.x" - enable-cache: false cache-dependency-glob: "**/environment-setup/requirements/ci.txt" - - name: show system version - run: ./ci/helpers/show_system_versions.bash - name: install run: ./ci/github/system-testing/environment-setup.bash install - name: test @@ -2741,30 +2137,16 @@ jobs: DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} steps: - uses: actions/checkout@v4 - - name: setup docker buildx - id: buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - - name: install uv - uses: astral-sh/setup-uv@v6 + - name: Setup SimCore environment + uses: ./.github/actions/setup-simcore-env with: - version: "0.6.x" - enable-cache: false + python-version: ${{ matrix.python }} cache-dependency-glob: "**/e2e-playwright/requirements/ci.txt" - # FIXME: Workaround for https://github.com/actions/download-artifact/issues/249 - - name: download docker images with retry - uses: Wandalen/wretry.action@master - with: - action: actions/download-artifact@v4 - with: | - pattern: docker-buildx-images-${{ runner.os }}-${{ github.sha }}-* - path: /${{ runner.temp }}/build - attempt_limit: 5 - attempt_delay: 1000 - - name: load docker images - run: | - make load-images local-src=/${{ runner.temp }}/build + show-system-versions: false + - name: Download and load Docker images + uses: ./.github/actions/download-load-docker-images + with: + artifact-name-pattern: '*' - name: set owner variable run: echo "OWNER=${GITHUB_REPOSITORY%/*}" >> $GITHUB_ENV - if: github.ref == 'refs/heads/master' diff --git a/mypy.ini b/mypy.ini index 9df50ed1d52..59dca99ec8c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -11,13 +11,20 @@ namespace_packages = True no_implicit_reexport = True # NOTE: this crashes mypy when declared here, therefore it is declared in the setup.cfg files # plugins = pydantic.mypy +cache_dir = .mypy_cache +error_summary = True +incremental = True +sqlite_cache = True +local_partial_types = True +no_implicit_optional = True +pretty = True python_version = 3.11 show_column_numbers = True show_error_context = False strict_optional = True -; no_implicit_optional = True warn_redundant_casts = True warn_return_any = True +warn_unreachable = True warn_unused_configs = True warn_unused_ignores = True @@ -32,3 +39,7 @@ warn_untyped_fields = True # Per-module options (one is kept as example): ; [mypy-aio-pika.*] ; ignore_missing_imports = True + +# Ignore specific error codes in alembic migration files +[mypy-*.migration.versions.*] +disable_error_code = var-annotated diff --git a/packages/aws-library/src/aws_library/s3/_client.py b/packages/aws-library/src/aws_library/s3/_client.py index 69dba4fa343..ca4df85f03c 100644 --- a/packages/aws-library/src/aws_library/s3/_client.py +++ b/packages/aws-library/src/aws_library/s3/_client.py @@ -124,7 +124,7 @@ async def close(self) -> None: async def http_check_bucket_connected(self, *, bucket: S3BucketName) -> bool: with log_catch(_logger, reraise=False): return await self.bucket_exists(bucket=bucket) - return False + return False # type: ignore[unreachable] @s3_exception_handler(_logger) async def create_bucket( diff --git a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py index ea9292d483c..3785b9790c6 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py @@ -21,8 +21,8 @@ def topic_name() -> str: def _dask_key_to_dask_task_id(key: dask.typing.Key) -> str: - if isinstance(key, bytes): - return key.decode("utf-8") + if isinstance(key, bytes): # type: ignore[unreachable] + return key.decode("utf-8") # type: ignore[unreachable] if isinstance(key, tuple): return "(" + ", ".join(_dask_key_to_dask_task_id(k) for k in key) + ")" return f"{key}" diff --git a/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_scheduler_plugin.py b/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_scheduler_plugin.py index 69fbc35e21e..be3c8d730a4 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_scheduler_plugin.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_scheduler_plugin.py @@ -14,11 +14,11 @@ class TaskLifecycleSchedulerPlugin(SchedulerPlugin): def __init__(self) -> None: - self.scheduler = None + self.scheduler: Scheduler | None = None _logger.info("initialized TaskLifecycleSchedulerPlugin") async def start(self, scheduler: Scheduler) -> None: - self.scheduler = scheduler # type: ignore[assignment] + self.scheduler = scheduler _logger.info("started TaskLifecycleSchedulerPlugin") def transition( diff --git a/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_worker_plugin.py b/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_worker_plugin.py index ebc6aabcad8..958e77720b1 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_worker_plugin.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/plugins/task_life_cycle_worker_plugin.py @@ -15,12 +15,12 @@ class TaskLifecycleWorkerPlugin(WorkerPlugin): def __init__(self) -> None: - self._worker = None + self._worker: Worker | None = None _logger.info("TaskLifecycleWorkerPlugin initialized") def setup(self, worker: Worker) -> Awaitable[None]: async def _() -> None: - self._worker = worker # type: ignore[assignment] + self._worker = worker _logger.info("TaskLifecycleWorkerPlugin setup completed") return _() diff --git a/packages/models-library/src/models_library/function_services_catalog/_utils.py b/packages/models-library/src/models_library/function_services_catalog/_utils.py index a58a524d094..af7e8a04188 100644 --- a/packages/models-library/src/models_library/function_services_catalog/_utils.py +++ b/packages/models-library/src/models_library/function_services_catalog/_utils.py @@ -52,7 +52,7 @@ def add( raises ValueError """ if not isinstance(meta, ServiceMetaDataPublished): - msg = f"Expected ServiceDockerData, got {type(meta)}" + msg = f"Expected ServiceDockerData, got {type(meta)}" # type: ignore[unreachable] raise ValueError(msg) # ensure unique diff --git a/packages/models-library/src/models_library/progress_bar.py b/packages/models-library/src/models_library/progress_bar.py index ad8130570e5..13eb94152dc 100644 --- a/packages/models-library/src/models_library/progress_bar.py +++ b/packages/models-library/src/models_library/progress_bar.py @@ -1,4 +1,4 @@ -from typing import Literal, TypeAlias +from typing import Final, Literal, TypeAlias from pydantic import BaseModel, ConfigDict @@ -43,7 +43,7 @@ class ProgressStructuredMessage(BaseModel): ) -UNITLESS = None +UNITLESS: Final[None] = None class ProgressReport(BaseModel): @@ -96,7 +96,13 @@ def composed_message(self) -> str: { "actual_value": 0.3, "total": 1.0, - "message": ProgressStructuredMessage.model_config["json_schema_extra"]["examples"][2], # type: ignore [index] + "message": ProgressStructuredMessage.model_config[ # type: ignore [index] + "json_schema_extra" + ][ + "examples" + ][ + 2 # type: ignore [index] + ], }, ] }, diff --git a/packages/models-library/src/models_library/service_settings_labels.py b/packages/models-library/src/models_library/service_settings_labels.py index b3e1956caba..1d7c5328656 100644 --- a/packages/models-library/src/models_library/service_settings_labels.py +++ b/packages/models-library/src/models_library/service_settings_labels.py @@ -518,7 +518,7 @@ def _ensure_callbacks_mapping_container_names_defined_in_compose_spec( cls, v: CallbacksMapping, info: ValidationInfo ): if v is None: - return {} + return {} # type: ignore[unreachable] defined_services: set[str] = {x.service for x in v.before_shutdown} if v.metrics: diff --git a/packages/models-library/src/models_library/services_types.py b/packages/models-library/src/models_library/services_types.py index b6689fdf888..faa883460dc 100644 --- a/packages/models-library/src/models_library/services_types.py +++ b/packages/models-library/src/models_library/services_types.py @@ -97,5 +97,5 @@ def validate(cls, v: "ServiceRunID | str", _: ValidationInfo) -> "ServiceRunID": return v if isinstance(v, str): return cls(v) - msg = f"Invalid value for RunID: {v}" + msg = f"Invalid value for RunID: {v}" # type: ignore[unreachable] raise TypeError(msg) diff --git a/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py b/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py index 5eac7c1b2f1..d5f508a2d28 100644 --- a/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py +++ b/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py @@ -5,15 +5,16 @@ # import dataclasses from collections import defaultdict, deque +from collections.abc import Callable from enum import Enum from pathlib import PurePath from types import GeneratorType -from typing import Any, Callable, Union, get_origin +from typing import Annotated, Any, Union, get_origin from common_library.json_serialization import ENCODERS_BY_TYPE from pydantic import BaseModel from pydantic_core import PydanticUndefined, PydanticUndefinedType -from typing_extensions import Annotated, Doc +from typing_extensions import Doc Undefined = PydanticUndefined UndefinedType = PydanticUndefinedType @@ -142,14 +143,13 @@ def jsonable_encoder( if custom_encoder: if type(obj) in custom_encoder: return custom_encoder[type(obj)](obj) - else: - for encoder_type, encoder_instance in custom_encoder.items(): - if isinstance(obj, encoder_type): - return encoder_instance(obj) - if include is not None and not isinstance(include, (set, dict)): - include = set(include) - if exclude is not None and not isinstance(exclude, (set, dict)): - exclude = set(exclude) + for encoder_type, encoder_instance in custom_encoder.items(): + if isinstance(obj, encoder_type): + return encoder_instance(obj) + if include is not None and not isinstance(include, set | dict): + include = set(include) # type: ignore[unreachable] + if exclude is not None and not isinstance(exclude, set | dict): + exclude = set(exclude) # type: ignore[unreachable] if isinstance(obj, BaseModel): obj_dict = BaseModel.model_dump( obj, diff --git a/packages/models-library/src/models_library/utils/common_validators.py b/packages/models-library/src/models_library/utils/common_validators.py index c55db09c5f5..b1548bd12dd 100644 --- a/packages/models-library/src/models_library/utils/common_validators.py +++ b/packages/models-library/src/models_library/utils/common_validators.py @@ -27,7 +27,7 @@ class MyModel(BaseModel): def trim_string_before(max_length: int) -> BeforeValidator: - def _trim(value: str): + def _trim(value: str | Any) -> str | Any: if isinstance(value, str): return value[:max_length] return value @@ -125,14 +125,16 @@ def _validator(cls: type[BaseModel], values): } if not functools.reduce(operator.xor, (v is not None for v in got.values())): - msg = f"Either { ' or '.join(got.keys()) } must be set, but not both. Got {got}" + msg = ( + f"Either {' or '.join(got.keys())} must be set, but not both. Got {got}" + ) raise ValueError(msg) return values return _validator -def to_camel_recursive(data: dict[str, Any]) -> dict[str, Any]: +def to_camel_recursive(data: dict[str, Any] | Any) -> dict[str, Any] | Any: """Recursively convert dictionary keys to camelCase""" if not isinstance(data, dict): return data # Return as-is if it's not a dictionary diff --git a/packages/service-library/src/servicelib/aiohttp/application_setup.py b/packages/service-library/src/servicelib/aiohttp/application_setup.py index 0d52603f965..7f3405e4e70 100644 --- a/packages/service-library/src/servicelib/aiohttp/application_setup.py +++ b/packages/service-library/src/servicelib/aiohttp/application_setup.py @@ -17,19 +17,17 @@ log = logging.getLogger(__name__) -APP_SETUP_COMPLETED_KEY = f"{__name__ }.setup" +APP_SETUP_COMPLETED_KEY = f"{__name__}.setup" class _SetupFunc(Protocol): __name__: str - def __call__(self, app: web.Application, *args: Any, **kwds: Any) -> bool: - ... + def __call__(self, app: web.Application, *args: Any, **kwds: Any) -> bool: ... class _ApplicationSettings(Protocol): - def is_enabled(self, field_name: str) -> bool: - ... + def is_enabled(self, field_name: str) -> bool: ... class ModuleCategory(Enum): @@ -46,12 +44,10 @@ def __init__(self, *, reason) -> None: super().__init__(reason) -class ApplicationSetupError(Exception): - ... +class ApplicationSetupError(Exception): ... -class DependencyError(ApplicationSetupError): - ... +class DependencyError(ApplicationSetupError): ... class SetupMetadataDict(TypedDict): @@ -91,9 +87,9 @@ def _is_addon_enabled_from_config( cfg: dict[str, Any], dotted_section: str, section ) -> bool: try: - parts: list[str] = dotted_section.split(".") + parts = dotted_section.split(".") # navigates app_config (cfg) searching for section - searched_config = deepcopy(cfg) + searched_config: Any = deepcopy(cfg) for part in parts: if section and part == "enabled": # if section exists, no need to explicitly enable it @@ -278,7 +274,7 @@ def _wrapper(app: web.Application, *args, **kargs) -> bool: # post-setup if completed is None: - completed = True + completed = True # type: ignore[unreachable] if completed: # registers completed setup app[APP_SETUP_COMPLETED_KEY].append(module_name) diff --git a/packages/service-library/src/servicelib/aiohttp/docker_utils.py b/packages/service-library/src/servicelib/aiohttp/docker_utils.py index 8e9393e1e69..94b39a89167 100644 --- a/packages/service-library/src/servicelib/aiohttp/docker_utils.py +++ b/packages/service-library/src/servicelib/aiohttp/docker_utils.py @@ -97,4 +97,4 @@ async def retrieve_image_layer_information( return TypeAdapter(DockerImageManifestsV2).validate_python( json_response ) - return None + return None # type: ignore[unreachable] diff --git a/packages/service-library/src/servicelib/deferred_tasks/_deferred_manager.py b/packages/service-library/src/servicelib/deferred_tasks/_deferred_manager.py index b49990a7834..19c8dde7fc5 100644 --- a/packages/service-library/src/servicelib/deferred_tasks/_deferred_manager.py +++ b/packages/service-library/src/servicelib/deferred_tasks/_deferred_manager.py @@ -122,7 +122,6 @@ def __init__( max_workers: NonNegativeInt = _DEFAULT_DEFERRED_MANAGER_WORKER_SLOTS, delay_when_requeuing_message: timedelta = _DEFAULT_DELAY_BEFORE_NACK, ) -> None: - self._task_tracker: BaseTaskTracker = RedisTaskTracker(scheduler_redis_sdk) self._worker_tracker = WorkerTracker(max_workers) @@ -216,7 +215,7 @@ def un_patch_base_deferred_handlers(cls) -> None: ) if isinstance(subclass.start, _PatchStartDeferred): - with log_context( + with log_context( # type: ignore[unreachable] _logger, logging.DEBUG, f"Remove `start` patch for {class_unique_reference}", @@ -345,7 +344,6 @@ async def __get_task_schedule( async def _fs_handle_scheduled( # pylint:disable=method-hidden self, task_uid: TaskUID ) -> None: - _log_state(TaskState.SCHEDULED, task_uid) task_schedule = await self.__get_task_schedule( @@ -425,7 +423,7 @@ async def _fs_handle_worker( # pylint:disable=method-hidden await self.__publish_to_queue(task_uid, _FastStreamRabbitQueue.ERROR_RESULT) return - msg = ( + msg = ( # type: ignore[unreachable] f"Unexpected state, result type={type(task_schedule.result)} should be an instance " f"of {TaskResultSuccess.__name__}, {TaskResultError.__name__} or {TaskResultCancelledError.__name__}" ) diff --git a/packages/service-library/src/servicelib/fastapi/docker_utils.py b/packages/service-library/src/servicelib/fastapi/docker_utils.py index 20900916963..093cb136a08 100644 --- a/packages/service-library/src/servicelib/fastapi/docker_utils.py +++ b/packages/service-library/src/servicelib/fastapi/docker_utils.py @@ -104,7 +104,7 @@ async def retrieve_image_layer_information( return TypeAdapter(DockerImageManifestsV2).validate_python( json_response ) - return None + return None # type: ignore[unreachable] async def pull_images( @@ -130,7 +130,6 @@ async def pull_images( progress_unit="Byte", description=f"pulling {len(images)} images", ) as pbar: - await asyncio.gather( *[ pull_image( diff --git a/packages/service-library/src/servicelib/fastapi/lifespan_utils.py b/packages/service-library/src/servicelib/fastapi/lifespan_utils.py index 4ccf0410930..348342bce19 100644 --- a/packages/service-library/src/servicelib/fastapi/lifespan_utils.py +++ b/packages/service-library/src/servicelib/fastapi/lifespan_utils.py @@ -37,7 +37,8 @@ def is_lifespan_called(state: State, lifespan_name: str) -> bool: # Valid signatures include: `()`, `(app)`, `(app, state)`, or even `(_, state)`. # It's easy to accidentally swap or misplace these arguments. assert not isinstance( # nosec - state, FastAPI + state, # type: ignore[unreachable] + FastAPI, ), "Did you swap arguments? `lifespan(app, state)` expects (app: FastAPI, state: State)" called_lifespans = state.get(_CALLED_LIFESPANS_KEY, set()) diff --git a/packages/service-library/src/servicelib/logging_utils.py b/packages/service-library/src/servicelib/logging_utils.py index 7ef3bc28e94..00f84a6b060 100644 --- a/packages/service-library/src/servicelib/logging_utils.py +++ b/packages/service-library/src/servicelib/logging_utils.py @@ -9,7 +9,7 @@ import functools import logging from asyncio import iscoroutinefunction -from collections.abc import Callable, Iterator +from collections.abc import Callable, Generator, Iterator from contextlib import contextmanager from datetime import datetime from inspect import getframeinfo, stack @@ -339,7 +339,6 @@ def log_decorator( logger_obj = logger or _logger def _decorator(func_or_coro: F) -> F: - _log_exc_kwargs = LogExceptionsKwargsDict( logger=logger_obj, level=level, @@ -378,7 +377,14 @@ def _sync_wrapper(*args: Any, **kwargs: Any) -> Any: @contextmanager -def log_catch(logger: logging.Logger, *, reraise: bool = True) -> Iterator[None]: +def log_catch( + logger: logging.Logger, *, reraise: bool = True +) -> Generator[None, None, None]: + """Context manager that catches and logs exceptions. + + When reraise=False, exceptions are caught and suppressed, allowing + execution to continue after the with block. + """ try: yield except asyncio.CancelledError: @@ -388,6 +394,7 @@ def log_catch(logger: logging.Logger, *, reraise: bool = True) -> Iterator[None] logger.exception("Unhandled exception:") if reraise: raise exc from exc + # When reraise=False, exceptions are suppressed and execution continues LogLevelInt: TypeAlias = int @@ -420,7 +427,7 @@ def log_context( logger.log(level, log_msg, *args, **kwargs, stacklevel=stackelvel) yield duration = ( - f" in {(datetime.now() - start ).total_seconds()}s" # noqa: DTZ005 + f" in {(datetime.now() - start).total_seconds()}s" # noqa: DTZ005 if log_duration else "" ) diff --git a/packages/service-library/src/servicelib/pools.py b/packages/service-library/src/servicelib/pools.py index 6899950055a..7d3b946c060 100644 --- a/packages/service-library/src/servicelib/pools.py +++ b/packages/service-library/src/servicelib/pools.py @@ -1,11 +1,11 @@ +from collections.abc import Iterator from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from contextlib import contextmanager -from typing import Iterator # only gets created on use and is guaranteed to be the s # ame for the entire lifetime of the application -__shared_process_pool_executor = {} -__shared_thread_pool_executor = {} +__shared_process_pool_executor: dict[str, ProcessPoolExecutor] = {} +__shared_thread_pool_executor: dict[str, ThreadPoolExecutor] = {} def _get_shared_process_pool_executor(**kwargs) -> ProcessPoolExecutor: diff --git a/packages/service-library/src/servicelib/rabbitmq/_client_base.py b/packages/service-library/src/servicelib/rabbitmq/_client_base.py index 69720659e50..cbc0352812b 100644 --- a/packages/service-library/src/servicelib/rabbitmq/_client_base.py +++ b/packages/service-library/src/servicelib/rabbitmq/_client_base.py @@ -67,7 +67,7 @@ async def ping(self) -> bool: async with await aio_pika.connect(self.settings.dsn, timeout=1): ... return True - return False + return False # type: ignore[unreachable] @abstractmethod async def close(self) -> None: ... diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/resource_usage_tracker/pricing_plans.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/resource_usage_tracker/pricing_plans.py index 4faa6fa3f0c..17f259f750e 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/resource_usage_tracker/pricing_plans.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/resource_usage_tracker/pricing_plans.py @@ -114,7 +114,7 @@ async def list_connected_services_to_pricing_plan_by_pricing_plan( product_name: ProductName, pricing_plan_id: PricingPlanId, ) -> list[PricingPlanToServiceGet]: - result: RutPricingPlanGet = await rabbitmq_rpc_client.request( + result = await rabbitmq_rpc_client.request( RESOURCE_USAGE_TRACKER_RPC_NAMESPACE, _RPC_METHOD_NAME_ADAPTER.validate_python( "list_connected_services_to_pricing_plan_by_pricing_plan" diff --git a/packages/service-library/src/servicelib/redis/_client.py b/packages/service-library/src/servicelib/redis/_client.py index c2a08154110..b9833fb5287 100644 --- a/packages/service-library/src/servicelib/redis/_client.py +++ b/packages/service-library/src/servicelib/redis/_client.py @@ -99,7 +99,7 @@ async def ping(self) -> bool: # NOTE: retry_* input parameters from aioredis.from_url do not apply for the ping call await self._client.ping() return True - return False + return False # type: ignore[unreachable] @property def is_healthy(self) -> bool: diff --git a/packages/service-library/src/servicelib/rest_responses.py b/packages/service-library/src/servicelib/rest_responses.py index 9dc32ed9e5a..6ff642175bc 100644 --- a/packages/service-library/src/servicelib/rest_responses.py +++ b/packages/service-library/src/servicelib/rest_responses.py @@ -25,7 +25,7 @@ def is_enveloped(payload: Mapping | str) -> bool: return is_enveloped_from_map(payload) if isinstance(payload, str): return is_enveloped_from_text(text=payload) - return False + return False # type: ignore[unreachable] def unwrap_envelope(payload: Mapping[str, Any]) -> tuple: diff --git a/packages/settings-library/src/settings_library/utils_service.py b/packages/settings-library/src/settings_library/utils_service.py index 8c23df45a55..3b1116a5cc1 100644 --- a/packages/settings-library/src/settings_library/utils_service.py +++ b/packages/settings-library/src/settings_library/utils_service.py @@ -1,8 +1,7 @@ -""" Helpers to build settings for services with http API +"""Helpers to build settings for services with http API""" - -""" from enum import Enum, auto +from typing import Any from pydantic.networks import AnyUrl from pydantic.types import SecretStr @@ -97,7 +96,7 @@ def _compose_url( port_value = self._safe_getattr(f"{prefix}_PORT", port) - parts = { + parts: dict[str, Any] = { "scheme": ( "https" if self._safe_getattr(f"{prefix}_SECURE", URLPart.OPTIONAL) @@ -114,21 +113,16 @@ def _compose_url( parts["path"] = f"{v}" # post process parts dict - kwargs = {} - for k, v in parts.items(): # type: ignore[assignment] - if isinstance(v, SecretStr): - value = v.get_secret_value() - else: - value = v + kwargs: dict[str, Any] = {} + for k, v in parts.items(): + value = v.get_secret_value() if isinstance(v, SecretStr) else v if value is not None: kwargs[k] = value - assert all( - isinstance(v, (str, int)) or v is None for v in kwargs.values() - ) # nosec + assert all(isinstance(v, str | int) or v is None for v in kwargs.values()) # nosec - composed_url: str = str(AnyUrl.build(**kwargs)) # type: ignore[arg-type] # pylint: disable=missing-kwoa + composed_url = str(AnyUrl.build(**kwargs)) # pylint: disable=missing-kwoa return composed_url.rstrip("/") def _build_api_base_url(self, *, prefix: str) -> str: diff --git a/scripts/common.Makefile b/scripts/common.Makefile index 0dc78b889dd..dbaddbfce93 100644 --- a/scripts/common.Makefile +++ b/scripts/common.Makefile @@ -173,6 +173,21 @@ mypy: $(REPO_BASE_DIR)/mypy.ini ## runs mypy python static type-checker on this $(CURDIR)/src +.PHONY: mypy-debug +mypy-debug: $(REPO_BASE_DIR)/mypy.ini ## runs mypy with profiling and reporting enabled + @rm -rf $(CURDIR)/.mypy-report + @mkdir -p $(CURDIR)/.mypy-report + @mypy \ + --config-file=$(REPO_BASE_DIR)/mypy.ini \ + --show-error-context \ + --show-traceback \ + --verbose \ + --linecount-report $(CURDIR)/.mypy-report \ + --any-exprs-report $(CURDIR)/.mypy-report \ + $(CURDIR)/src \ + | tee $(CURDIR)/.mypy-report/mypy.logs + + .PHONY: codestyle codestyle codestyle-ci: ## enforces codestyle (isort & black) finally runs pylint & mypy @$(SCRIPTS_DIR)/codestyle.bash $(if $(findstring -ci,$@),ci,development) $(shell basename "${SRC_DIR}") diff --git a/services/api-server/src/simcore_service_api_server/api/routes/files.py b/services/api-server/src/simcore_service_api_server/api/routes/files.py index a220edacb23..3aa09796e63 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/files.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/files.py @@ -84,10 +84,10 @@ async def _get_file( """Gets metadata for a given file resource""" try: - stored_files: list[StorageFileMetaData] = ( - await storage_client.search_owned_files( - user_id=user_id, file_id=file_id, limit=1 - ) + stored_files: list[ + StorageFileMetaData + ] = await storage_client.search_owned_files( + user_id=user_id, file_id=file_id, limit=1 ) if not stored_files: msg = "Not found in storage" @@ -128,7 +128,7 @@ async def _create_domain_file( project_id=project.uuid, node_id=NodeID(node_id) ) else: - err_msg = f"Invalid client_file type passed: {type(client_file)=}" + err_msg = f"Invalid client_file type passed: {type(client_file)=}" # type: ignore[unreachable] raise TypeError(err_msg) return file diff --git a/services/api-server/src/simcore_service_api_server/api/routes/functions_routes.py b/services/api-server/src/simcore_service_api_server/api/routes/functions_routes.py index bbfeefc4efc..7153112e64a 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/functions_routes.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/functions_routes.py @@ -223,9 +223,9 @@ async def update_function_title( returned_function = await wb_api_rpc.update_function_title( function_id=function_id, title=title, user_id=user_id, product_name=product_name ) - assert ( - returned_function.title == title - ), f"Function title was not updated. Expected {title} but got {returned_function.title}" # nosec + assert returned_function.title == title, ( + f"Function title was not updated. Expected {title} but got {returned_function.title}" + ) # nosec return returned_function @@ -251,9 +251,9 @@ async def update_function_description( user_id=user_id, product_name=product_name, ) - assert ( - returned_function.description == description - ), f"Function description was not updated. Expected {description} but got {returned_function.description}" # nosec + assert returned_function.description == description, ( + f"Function description was not updated. Expected {description} but got {returned_function.description}" + ) # nosec return returned_function @@ -337,7 +337,7 @@ async def validate_function_inputs( ) if function.input_schema is None or function.input_schema.schema_content is None: - return True, "No input schema defined for this function" + return True, "No input schema defined for this function" # type: ignore[unreachable] if function.input_schema.schema_class == FunctionSchemaClass.json_schema: try: @@ -348,7 +348,7 @@ async def validate_function_inputs( return False, str(err) return True, "Inputs are valid" - return ( + return ( # type: ignore[unreachable] False, f"Unsupported function schema class {function.input_schema.schema_class}", ) @@ -378,7 +378,6 @@ async def run_function( # noqa: PLR0913 x_simcore_parent_project_uuid: Annotated[ProjectID | Literal["null"], Header()], x_simcore_parent_node_id: Annotated[NodeID | Literal["null"], Header()], ) -> RegisteredFunctionJob: - parent_project_uuid = ( x_simcore_parent_project_uuid if isinstance(x_simcore_parent_project_uuid, ProjectID) diff --git a/services/api-server/src/simcore_service_api_server/exceptions/service_errors_utils.py b/services/api-server/src/simcore_service_api_server/exceptions/service_errors_utils.py index 90bdc27bad4..56456daca4a 100644 --- a/services/api-server/src/simcore_service_api_server/exceptions/service_errors_utils.py +++ b/services/api-server/src/simcore_service_api_server/exceptions/service_errors_utils.py @@ -113,7 +113,6 @@ def service_exception_handler( headers: dict[str, str] = {} try: - yield except ValidationError as exc: @@ -126,7 +125,6 @@ def service_exception_handler( ) from exc except httpx.HTTPStatusError as exc: - status_code, detail, headers = _get_http_exception_kwargs( service_name, exc, http_status_map=http_status_map, **context ) @@ -142,11 +140,14 @@ def service_exception_handler( status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="Request to backend timed out", ) from exc - if type(exc) in { - asyncio.exceptions.CancelledError, - RuntimeError, - RemoteMethodNotRegisteredError, - }: # https://github.com/ITISFoundation/osparc-simcore/blob/master/packages/service-library/src/servicelib/rabbitmq/_client_rpc.py#L76 + if ( + type(exc) + in { + asyncio.exceptions.CancelledError, + RuntimeError, + RemoteMethodNotRegisteredError, + } + ): # https://github.com/ITISFoundation/osparc-simcore/blob/master/packages/service-library/src/servicelib/rabbitmq/_client_rpc.py#L76 raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, detail="Request to backend failed", @@ -193,7 +194,7 @@ def _assert_correct_kwargs(func: Callable, exception_types: set[BackEndErrorType } for exc_type in exception_types: assert isinstance(exc_type, type) # nosec - _exception_inputs = exc_type.named_fields() - assert _exception_inputs.issubset( - _required_kwargs - ), f"{_exception_inputs - _required_kwargs} are inputs to `{exc_type.__name__}.msg_template` but not a kwarg in the decorated coroutine `{func.__module__}.{func.__name__}`" # nosec + _exception_inputs = exc_type.named_fields() # type: ignore[unreachable] + assert _exception_inputs.issubset(_required_kwargs), ( + f"{_exception_inputs - _required_kwargs} are inputs to `{exc_type.__name__}.msg_template` but not a kwarg in the decorated coroutine `{func.__module__}.{func.__name__}`" + ) # nosec diff --git a/services/api-server/src/simcore_service_api_server/services_http/log_streaming.py b/services/api-server/src/simcore_service_api_server/services_http/log_streaming.py index 5781659d1c7..3c26d13d2d6 100644 --- a/services/api-server/src/simcore_service_api_server/services_http/log_streaming.py +++ b/services/api-server/src/simcore_service_api_server/services_http/log_streaming.py @@ -66,7 +66,7 @@ async def _distribute_logs(self, data: bytes): raise LogStreamerNotRegisteredError(job_id=item.job_id, details=msg) await queue.put(item) return True - return False + return False # type: ignore[unreachable] async def register(self, job_id: JobID, queue: Queue[JobLog]): _logger.debug("Registering log streamer for job_id=%s", job_id) diff --git a/services/api-server/src/simcore_service_api_server/services_http/study_job_models_converters.py b/services/api-server/src/simcore_service_api_server/services_http/study_job_models_converters.py index 99bb5a59ae9..cfd3a9c8df5 100644 --- a/services/api-server/src/simcore_service_api_server/services_http/study_job_models_converters.py +++ b/services/api-server/src/simcore_service_api_server/services_http/study_job_models_converters.py @@ -40,7 +40,7 @@ def get_project_and_file_inputs_from_job_inputs( for name, value in job_inputs.values.items(): if isinstance(value, File): # FIXME: ensure this aligns with storage policy - file_inputs[InputID(name)] = SimCoreFileLink( + file_inputs[InputID(name)] = SimCoreFileLink( # type: ignore[unreachable] store=0, path=SimcoreS3FileID(f"api/{value.id}/{value.filename}"), label=value.filename, diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/application.py b/services/autoscaling/src/simcore_service_autoscaling/core/application.py index 95fcff3b4b7..22548e7f8a3 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/core/application.py +++ b/services/autoscaling/src/simcore_service_autoscaling/core/application.py @@ -17,8 +17,12 @@ APP_STARTED_DYNAMIC_BANNER_MSG, ) from ..api.routes import setup_api_routes -from ..modules.auto_scaling_task import setup as setup_auto_scaler_background_task -from ..modules.buffer_machines_pool_task import setup as setup_buffer_machines_pool_task +from ..modules.cluster_scaling.auto_scaling_task import ( + setup as setup_auto_scaler_background_task, +) +from ..modules.cluster_scaling.buffer_machines_pool_task import ( + setup as setup_buffer_machines_pool_task, +) from ..modules.docker import setup as setup_docker from ..modules.ec2 import setup as setup_ec2 from ..modules.instrumentation import setup as setup_instrumentation diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py deleted file mode 100644 index b9df042c622..00000000000 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py +++ /dev/null @@ -1,80 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass - -from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources -from fastapi import FastAPI -from models_library.docker import DockerLabelKey -from models_library.generated_models.docker_rest_api import Node as DockerNode -from types_aiobotocore_ec2.literals import InstanceTypeType - -from ..models import AssociatedInstance -from ..utils import utils_docker - - -@dataclass -class BaseAutoscaling(ABC): # pragma: no cover - @staticmethod - @abstractmethod - async def get_monitored_nodes(app: FastAPI) -> list[DockerNode]: ... - - @staticmethod - @abstractmethod - def get_ec2_tags(app: FastAPI) -> EC2Tags: ... - - @staticmethod - @abstractmethod - def get_new_node_docker_tags( - app: FastAPI, ec2_instance_data: EC2InstanceData - ) -> dict[DockerLabelKey, str]: ... - - @staticmethod - @abstractmethod - async def list_unrunnable_tasks(app: FastAPI) -> list: ... - - @staticmethod - @abstractmethod - def get_task_required_resources(task) -> Resources: ... - - @staticmethod - @abstractmethod - async def get_task_defined_instance( - app: FastAPI, task - ) -> InstanceTypeType | None: ... - - @staticmethod - @abstractmethod - async def compute_node_used_resources( - app: FastAPI, instance: AssociatedInstance - ) -> Resources: ... - - @staticmethod - @abstractmethod - async def compute_cluster_used_resources( - app: FastAPI, instances: list[AssociatedInstance] - ) -> Resources: ... - - @staticmethod - @abstractmethod - async def compute_cluster_total_resources( - app: FastAPI, instances: list[AssociatedInstance] - ) -> Resources: ... - - @staticmethod - @abstractmethod - async def is_instance_active( - app: FastAPI, instance: AssociatedInstance - ) -> bool: ... - - @staticmethod - @abstractmethod - async def is_instance_retired( - app: FastAPI, instance: AssociatedInstance - ) -> bool: ... - - @staticmethod - def is_instance_drained(instance: AssociatedInstance) -> bool: - return not utils_docker.is_node_osparc_ready(instance.node) - - @staticmethod - @abstractmethod - async def try_retire_nodes(app: FastAPI) -> None: ... diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/__init__.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py similarity index 96% rename from services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py rename to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py index a3a34e7b5d0..3b0e1fc0666 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py @@ -17,29 +17,29 @@ ) from aws_library.ec2._errors import EC2TooManyInstancesError from fastapi import FastAPI -from models_library.generated_models.docker_rest_api import Node, NodeState +from models_library.generated_models.docker_rest_api import Node from models_library.rabbitmq_messages import ProgressType from servicelib.logging_utils import log_catch, log_context from servicelib.utils import limited_gather from servicelib.utils_formatting import timedelta_as_minute_second from types_aiobotocore_ec2.literals import InstanceTypeType -from ..constants import DOCKER_JOIN_COMMAND_EC2_TAG_KEY, DOCKER_JOIN_COMMAND_NAME -from ..core.errors import ( +from ...constants import DOCKER_JOIN_COMMAND_EC2_TAG_KEY, DOCKER_JOIN_COMMAND_NAME +from ...core.errors import ( Ec2InvalidDnsNameError, TaskBestFittingInstanceNotFoundError, TaskRequirementsAboveRequiredEC2InstanceTypeError, TaskRequiresUnauthorizedEC2InstanceTypeError, ) -from ..core.settings import ApplicationSettings, get_application_settings -from ..models import ( +from ...core.settings import ApplicationSettings, get_application_settings +from ...models import ( AssignedTasksToInstanceType, AssociatedInstance, Cluster, NonAssociatedInstance, ) -from ..utils import utils_docker, utils_ec2 -from ..utils.auto_scaling_core import ( +from ...utils import utils_docker, utils_ec2 +from ...utils.auto_scaling_core import ( associate_ec2_instances_with_nodes, ec2_startup_script, find_selected_instance_type_for_task, @@ -47,33 +47,28 @@ node_host_name_from_ec2_private_dns, sort_drained_nodes, ) -from ..utils.buffer_machines_pool_core import ( +from ...utils.buffer_machines_pool_core import ( get_activated_buffer_ec2_tags, get_deactivated_buffer_ec2_tags, is_buffer_machine, ) -from ..utils.rabbitmq import ( +from ...utils.rabbitmq import ( post_autoscaling_status_message, post_tasks_log_message, post_tasks_progress_message, ) -from .auto_scaling_mode_base import BaseAutoscaling -from .docker import get_docker_client -from .ec2 import get_ec2_client -from .instrumentation import get_instrumentation, has_instrumentation -from .ssm import get_ssm_client +from ..docker import get_docker_client +from ..ec2 import get_ec2_client +from ..instrumentation import get_instrumentation, has_instrumentation +from ..ssm import get_ssm_client +from ._provider_protocol import AutoscalingProvider _logger = logging.getLogger(__name__) -def _node_not_ready(node: Node) -> bool: - assert node.status # nosec - return bool(node.status.state != NodeState.ready) - - async def _analyze_current_cluster( app: FastAPI, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, allowed_instance_types: list[EC2InstanceType], ) -> Cluster: app_settings = get_application_settings(app) @@ -97,7 +92,7 @@ async def _analyze_current_cluster( buffer_ec2_instances = await get_ec2_client(app).get_instances( key_names=[app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME], - tags=get_deactivated_buffer_ec2_tags(app, auto_scaling_mode), + tags=get_deactivated_buffer_ec2_tags(auto_scaling_mode.get_ec2_tags(app)), state_names=["stopped"], ) @@ -141,7 +136,7 @@ async def _analyze_current_cluster( - node_used_resources, ) ) - elif auto_scaling_mode.is_instance_drained(instance): + elif utils_docker.is_instance_drained(instance): all_drained_nodes.append(instance) elif await auto_scaling_mode.is_instance_retired(app, instance): # it should be drained, but it is not, so we force it to be drained such that it might be re-used if needed @@ -166,7 +161,9 @@ async def _analyze_current_cluster( terminated_instances=[ NonAssociatedInstance(ec2_instance=i) for i in terminated_ec2_instances ], - disconnected_nodes=[n for n in docker_nodes if _node_not_ready(n)], + disconnected_nodes=[ + n for n in docker_nodes if not utils_docker.is_node_ready(n) + ], retired_nodes=retired_nodes, ) _logger.info("current state: %s", f"{cluster!r}") @@ -278,7 +275,7 @@ async def _make_pending_buffer_ec2s_join_cluster( async def _try_attach_pending_ec2s( app: FastAPI, cluster: Cluster, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, allowed_instance_types: list[EC2InstanceType], ) -> Cluster: """label the drained instances that connected to the swarm which are missing the monitoring labels""" @@ -424,7 +421,7 @@ async def _activate_drained_nodes( async def _start_warm_buffer_instances( - app: FastAPI, cluster: Cluster, auto_scaling_mode: BaseAutoscaling + app: FastAPI, cluster: Cluster, auto_scaling_mode: AutoscalingProvider ) -> Cluster: """starts warm buffer if there are assigned tasks, or if a hot buffer of the same type is needed""" @@ -479,7 +476,7 @@ async def _start_warm_buffer_instances( # NOTE: first start the instance and then set the tags in case the instance cannot start (e.g. InsufficientInstanceCapacity) await get_ec2_client(app).set_instances_tags( started_instances, - tags=get_activated_buffer_ec2_tags(app, auto_scaling_mode), + tags=get_activated_buffer_ec2_tags(auto_scaling_mode.get_ec2_tags(app)), ) started_instance_ids = [i.id for i in started_instances] @@ -547,7 +544,7 @@ async def _assign_tasks_to_current_cluster( app: FastAPI, tasks: list, cluster: Cluster, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, ) -> tuple[list, Cluster]: """ Evaluates whether a task can be executed on any instance within the cluster. If the task's resource requirements are met, the task is *denoted* as assigned to the cluster. @@ -605,7 +602,7 @@ async def _find_needed_instances( unassigned_tasks: list, available_ec2_types: list[EC2InstanceType], cluster: Cluster, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, ) -> dict[EC2InstanceType, int]: # 1. check first the pending task needs needed_new_instance_types_for_tasks: list[AssignedTasksToInstanceType] = [] @@ -634,8 +631,8 @@ async def _find_needed_instances( defined_ec2 = find_selected_instance_type_for_task( task_required_ec2_instance, available_ec2_types, - auto_scaling_mode, task, + auto_scaling_mode.get_task_required_resources(task), ) needed_new_instance_types_for_tasks.append( AssignedTasksToInstanceType( @@ -778,7 +775,7 @@ async def _launch_instances( app: FastAPI, needed_instances: dict[EC2InstanceType, int], tasks: list, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, ) -> list[EC2InstanceData]: ec2_client = get_ec2_client(app) app_settings = get_application_settings(app) @@ -844,7 +841,7 @@ async def _launch_instances( elif isinstance(r, list): new_pending_instances.extend(r) else: - new_pending_instances.append(r) + new_pending_instances.append(r) # type: ignore[unreachable] log_message = ( f"{sum(n for n in capped_needed_machines.values())} new machines launched" @@ -1150,7 +1147,7 @@ async def _drain_retired_nodes( async def _scale_down_unused_cluster_instances( app: FastAPI, cluster: Cluster, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, ) -> Cluster: await auto_scaling_mode.try_retire_nodes(app) cluster = await _deactivate_empty_nodes(app, cluster) @@ -1160,7 +1157,7 @@ async def _scale_down_unused_cluster_instances( async def _scale_up_cluster( app: FastAPI, cluster: Cluster, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, allowed_instance_types: list[EC2InstanceType], unassigned_tasks: list, ) -> Cluster: @@ -1212,7 +1209,7 @@ async def _scale_up_cluster( async def _autoscale_cluster( app: FastAPI, cluster: Cluster, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, allowed_instance_types: list[EC2InstanceType], ) -> Cluster: # 1. check if we have pending tasks @@ -1245,7 +1242,7 @@ async def _autoscale_cluster( async def _notify_autoscaling_status( - app: FastAPI, cluster: Cluster, auto_scaling_mode: BaseAutoscaling + app: FastAPI, cluster: Cluster, auto_scaling_mode: AutoscalingProvider ) -> None: monitored_instances = list( itertools.chain( @@ -1274,7 +1271,7 @@ async def _notify_autoscaling_status( async def auto_scale_cluster( - *, app: FastAPI, auto_scaling_mode: BaseAutoscaling + *, app: FastAPI, auto_scaling_mode: AutoscalingProvider ) -> None: """Check that there are no pending tasks requiring additional resources in the cluster (docker swarm) If there are such tasks, this method will allocate new machines in AWS to cope with diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_buffer_machines_pool_core.py similarity index 95% rename from services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py rename to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_buffer_machines_pool_core.py index d9f1c550568..d79d725a3ea 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_buffer_machines_pool_core.py @@ -35,24 +35,24 @@ from servicelib.logging_utils import log_context from types_aiobotocore_ec2.literals import InstanceTypeType -from ..constants import ( +from ...constants import ( BUFFER_MACHINE_PULLING_COMMAND_ID_EC2_TAG_KEY, BUFFER_MACHINE_PULLING_EC2_TAG_KEY, DOCKER_PULL_COMMAND, PREPULL_COMMAND_NAME, ) -from ..core.settings import get_application_settings -from ..models import BufferPool, BufferPoolManager -from ..utils.auto_scaling_core import ec2_buffer_startup_script -from ..utils.buffer_machines_pool_core import ( +from ...core.settings import get_application_settings +from ...models import BufferPool, BufferPoolManager +from ...utils.auto_scaling_core import ec2_buffer_startup_script +from ...utils.buffer_machines_pool_core import ( dump_pre_pulled_images_as_tags, get_deactivated_buffer_ec2_tags, load_pre_pulled_images_from_tags, ) -from .auto_scaling_mode_base import BaseAutoscaling -from .ec2 import get_ec2_client -from .instrumentation import get_instrumentation, has_instrumentation -from .ssm import get_ssm_client +from ..ec2 import get_ec2_client +from ..instrumentation import get_instrumentation, has_instrumentation +from ..ssm import get_ssm_client +from ._provider_protocol import AutoscalingProvider _logger = logging.getLogger(__name__) @@ -111,7 +111,7 @@ async def _analyze_running_instance_state( async def _analyse_current_state( - app: FastAPI, *, auto_scaling_mode: BaseAutoscaling + app: FastAPI, *, auto_scaling_mode: AutoscalingProvider ) -> BufferPoolManager: ec2_client = get_ec2_client(app) app_settings = get_application_settings(app) @@ -119,7 +119,7 @@ async def _analyse_current_state( all_buffer_instances = await ec2_client.get_instances( key_names=[app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME], - tags=get_deactivated_buffer_ec2_tags(app, auto_scaling_mode), + tags=get_deactivated_buffer_ec2_tags(auto_scaling_mode.get_ec2_tags(app)), state_names=["stopped", "pending", "running", "stopping"], ) buffers_manager = BufferPoolManager() @@ -229,7 +229,7 @@ async def _add_remove_buffer_instances( app: FastAPI, buffers_manager: BufferPoolManager, *, - auto_scaling_mode: BaseAutoscaling, + auto_scaling_mode: AutoscalingProvider, ) -> BufferPoolManager: ec2_client = get_ec2_client(app) app_settings = get_application_settings(app) @@ -265,7 +265,9 @@ async def _add_remove_buffer_instances( name=ec2_type, resources=Resources.create_as_empty(), # fake resources ), - tags=get_deactivated_buffer_ec2_tags(app, auto_scaling_mode), + tags=get_deactivated_buffer_ec2_tags( + auto_scaling_mode.get_ec2_tags(app) + ), startup_script=ec2_buffer_startup_script( ec2_boot_specific, app_settings ), @@ -397,7 +399,7 @@ async def _handle_image_pre_pulling( async def monitor_buffer_machines( - app: FastAPI, *, auto_scaling_mode: BaseAutoscaling + app: FastAPI, *, auto_scaling_mode: AutoscalingProvider ) -> None: """Buffer machine creation works like so: 1. a EC2 is created with an EBS attached volume wO auto prepulling and wO auto connect to swarm diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py similarity index 76% rename from services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py rename to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py index 2fb2344f22f..28cba92fd2f 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py @@ -14,18 +14,17 @@ from servicelib.utils import logged_gather from types_aiobotocore_ec2.literals import InstanceTypeType -from ..core.errors import ( +from ...core.errors import ( DaskNoWorkersError, DaskSchedulerNotFoundError, DaskWorkerNotFoundError, ) -from ..core.settings import get_application_settings -from ..models import AssociatedInstance, DaskTask -from ..utils import computational_scaling as utils -from ..utils import utils_docker, utils_ec2 -from . import dask -from .auto_scaling_mode_base import BaseAutoscaling -from .docker import get_docker_client +from ...core.settings import get_application_settings +from ...models import AssociatedInstance, DaskTask +from ...utils import computational_scaling as utils +from ...utils import utils_docker, utils_ec2 +from .. import dask +from ..docker import get_docker_client _logger = logging.getLogger(__name__) @@ -42,27 +41,23 @@ def _scheduler_auth(app: FastAPI) -> ClusterAuthentication: return app_settings.AUTOSCALING_DASK.DASK_SCHEDULER_AUTH -class ComputationalAutoscaling(BaseAutoscaling): - @staticmethod - async def get_monitored_nodes(app: FastAPI) -> list[Node]: +class ComputationalAutoscalingProvider: + async def get_monitored_nodes(self, app: FastAPI) -> list[Node]: return await utils_docker.get_worker_nodes(get_docker_client(app)) - @staticmethod - def get_ec2_tags(app: FastAPI) -> EC2Tags: + def get_ec2_tags(self, app: FastAPI) -> EC2Tags: app_settings = get_application_settings(app) return utils_ec2.get_ec2_tags_computational(app_settings) - @staticmethod def get_new_node_docker_tags( - app: FastAPI, ec2_instance_data: EC2InstanceData + self, app: FastAPI, ec2_instance_data: EC2InstanceData ) -> dict[DockerLabelKey, str]: assert app # nosec return { DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: ec2_instance_data.type } - @staticmethod - async def list_unrunnable_tasks(app: FastAPI) -> list[DaskTask]: + async def list_unrunnable_tasks(self, app: FastAPI) -> list[DaskTask]: try: unrunnable_tasks = await dask.list_unrunnable_tasks( _scheduler_url(app), _scheduler_auth(app) @@ -87,18 +82,17 @@ async def list_unrunnable_tasks(app: FastAPI) -> list[DaskTask]: ) return [] - @staticmethod - def get_task_required_resources(task) -> Resources: + def get_task_required_resources(self, task) -> Resources: return utils.resources_from_dask_task(task) - @staticmethod - async def get_task_defined_instance(app: FastAPI, task) -> InstanceTypeType | None: + async def get_task_defined_instance( + self, app: FastAPI, task + ) -> InstanceTypeType | None: assert app # nosec return cast(InstanceTypeType | None, utils.get_task_instance_restriction(task)) - @staticmethod async def compute_node_used_resources( - app: FastAPI, instance: AssociatedInstance + self, app: FastAPI, instance: AssociatedInstance ) -> Resources: try: resource = await dask.get_worker_used_resources( @@ -127,24 +121,19 @@ async def compute_node_used_resources( _logger.debug("no resource found for %s", f"{instance.ec2_instance.id}") return Resources.create_as_empty() - @staticmethod async def compute_cluster_used_resources( - app: FastAPI, instances: list[AssociatedInstance] + self, app: FastAPI, instances: list[AssociatedInstance] ) -> Resources: list_of_used_resources: list[Resources] = await logged_gather( - *( - ComputationalAutoscaling.compute_node_used_resources(app, i) - for i in instances - ) + *(self.compute_node_used_resources(app, i) for i in instances) ) - counter = collections.Counter({k: 0 for k in Resources.model_fields}) + counter = collections.Counter(dict.fromkeys(Resources.model_fields, 0)) for result in list_of_used_resources: counter.update(result.model_dump()) return Resources.model_validate(dict(counter)) - @staticmethod async def compute_cluster_total_resources( - app: FastAPI, instances: list[AssociatedInstance] + self, app: FastAPI, instances: list[AssociatedInstance] ) -> Resources: try: return await dask.compute_cluster_total_resources( @@ -153,8 +142,9 @@ async def compute_cluster_total_resources( except DaskNoWorkersError: return Resources.create_as_empty() - @staticmethod - async def is_instance_active(app: FastAPI, instance: AssociatedInstance) -> bool: + async def is_instance_active( + self, app: FastAPI, instance: AssociatedInstance + ) -> bool: if not utils_docker.is_node_osparc_ready(instance.node): return False @@ -163,14 +153,14 @@ async def is_instance_active(app: FastAPI, instance: AssociatedInstance) -> bool _scheduler_url(app), _scheduler_auth(app), instance.ec2_instance ) - @staticmethod - async def is_instance_retired(app: FastAPI, instance: AssociatedInstance) -> bool: + async def is_instance_retired( + self, app: FastAPI, instance: AssociatedInstance + ) -> bool: if not utils_docker.is_node_osparc_ready(instance.node): return False return await dask.is_worker_retired( _scheduler_url(app), _scheduler_auth(app), instance.ec2_instance ) - @staticmethod - async def try_retire_nodes(app: FastAPI) -> None: + async def try_retire_nodes(self, app: FastAPI) -> None: await dask.try_retire_nodes(_scheduler_url(app), _scheduler_auth(app)) diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py similarity index 67% rename from services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py rename to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py index a8dcd7552ac..a5e2171fd36 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py @@ -4,16 +4,14 @@ from models_library.generated_models.docker_rest_api import Node, Task from types_aiobotocore_ec2.literals import InstanceTypeType -from ..core.settings import get_application_settings -from ..models import AssociatedInstance -from ..utils import utils_docker, utils_ec2 -from .auto_scaling_mode_base import BaseAutoscaling -from .docker import get_docker_client +from ...core.settings import get_application_settings +from ...models import AssociatedInstance +from ...utils import utils_docker, utils_ec2 +from ..docker import get_docker_client -class DynamicAutoscaling(BaseAutoscaling): - @staticmethod - async def get_monitored_nodes(app: FastAPI) -> list[Node]: +class DynamicAutoscalingProvider: + async def get_monitored_nodes(self, app: FastAPI) -> list[Node]: app_settings = get_application_settings(app) assert app_settings.AUTOSCALING_NODES_MONITORING # nosec return await utils_docker.get_monitored_nodes( @@ -21,20 +19,17 @@ async def get_monitored_nodes(app: FastAPI) -> list[Node]: node_labels=app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS, ) - @staticmethod - def get_ec2_tags(app: FastAPI) -> EC2Tags: + def get_ec2_tags(self, app: FastAPI) -> EC2Tags: app_settings = get_application_settings(app) return utils_ec2.get_ec2_tags_dynamic(app_settings) - @staticmethod def get_new_node_docker_tags( - app: FastAPI, ec2_instance_data: EC2InstanceData + self, app: FastAPI, ec2_instance_data: EC2InstanceData ) -> dict[DockerLabelKey, str]: app_settings = get_application_settings(app) return utils_docker.get_new_node_docker_tags(app_settings, ec2_instance_data) - @staticmethod - async def list_unrunnable_tasks(app: FastAPI) -> list[Task]: + async def list_unrunnable_tasks(self, app: FastAPI) -> list[Task]: app_settings = get_application_settings(app) assert app_settings.AUTOSCALING_NODES_MONITORING # nosec return await utils_docker.pending_service_tasks_with_insufficient_resources( @@ -42,19 +37,18 @@ async def list_unrunnable_tasks(app: FastAPI) -> list[Task]: service_labels=app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_SERVICE_LABELS, ) - @staticmethod - def get_task_required_resources(task) -> Resources: + def get_task_required_resources(self, task) -> Resources: return utils_docker.get_max_resources_from_docker_task(task) - @staticmethod - async def get_task_defined_instance(app: FastAPI, task) -> InstanceTypeType | None: + async def get_task_defined_instance( + self, app: FastAPI, task + ) -> InstanceTypeType | None: return await utils_docker.get_task_instance_restriction( get_docker_client(app), task ) - @staticmethod async def compute_node_used_resources( - app: FastAPI, instance: AssociatedInstance + self, app: FastAPI, instance: AssociatedInstance ) -> Resources: docker_client = get_docker_client(app) app_settings = get_application_settings(app) @@ -65,37 +59,36 @@ async def compute_node_used_resources( service_labels=app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_SERVICE_LABELS, ) - @staticmethod async def compute_cluster_used_resources( - app: FastAPI, instances: list[AssociatedInstance] + self, app: FastAPI, instances: list[AssociatedInstance] ) -> Resources: docker_client = get_docker_client(app) return await utils_docker.compute_cluster_used_resources( docker_client, [i.node for i in instances] ) - @staticmethod async def compute_cluster_total_resources( - app: FastAPI, instances: list[AssociatedInstance] + self, app: FastAPI, instances: list[AssociatedInstance] ) -> Resources: assert app # nosec return await utils_docker.compute_cluster_total_resources( [i.node for i in instances] ) - @staticmethod - async def is_instance_active(app: FastAPI, instance: AssociatedInstance) -> bool: + async def is_instance_active( + self, app: FastAPI, instance: AssociatedInstance + ) -> bool: assert app # nosec return utils_docker.is_node_osparc_ready(instance.node) - @staticmethod - async def is_instance_retired(app: FastAPI, instance: AssociatedInstance) -> bool: + async def is_instance_retired( + self, app: FastAPI, instance: AssociatedInstance + ) -> bool: assert app # nosec assert instance # nosec # nothing to do here return False - @staticmethod - async def try_retire_nodes(app: FastAPI) -> None: + async def try_retire_nodes(self, app: FastAPI) -> None: assert app # nosec # nothing to do here diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py new file mode 100644 index 00000000000..355394b9f1d --- /dev/null +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py @@ -0,0 +1,49 @@ +from typing import Protocol + +from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources +from fastapi import FastAPI +from models_library.docker import DockerLabelKey +from models_library.generated_models.docker_rest_api import Node as DockerNode +from types_aiobotocore_ec2.literals import InstanceTypeType + +from ...models import AssociatedInstance + + +class AutoscalingProvider(Protocol): + async def get_monitored_nodes(self, app: FastAPI) -> list[DockerNode]: ... + + def get_ec2_tags(self, app: FastAPI) -> EC2Tags: ... + + def get_new_node_docker_tags( + self, app: FastAPI, ec2_instance_data: EC2InstanceData + ) -> dict[DockerLabelKey, str]: ... + + async def list_unrunnable_tasks(self, app: FastAPI) -> list: ... + + def get_task_required_resources(self, task) -> Resources: ... + + async def get_task_defined_instance( + self, app: FastAPI, task + ) -> InstanceTypeType | None: ... + + async def compute_node_used_resources( + self, app: FastAPI, instance: AssociatedInstance + ) -> Resources: ... + + async def compute_cluster_used_resources( + self, app: FastAPI, instances: list[AssociatedInstance] + ) -> Resources: ... + + async def compute_cluster_total_resources( + self, app: FastAPI, instances: list[AssociatedInstance] + ) -> Resources: ... + + async def is_instance_active( + self, app: FastAPI, instance: AssociatedInstance + ) -> bool: ... + + async def is_instance_retired( + self, app: FastAPI, instance: AssociatedInstance + ) -> bool: ... + + async def try_retire_nodes(self, app: FastAPI) -> None: ... diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_task.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/auto_scaling_task.py similarity index 82% rename from services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_task.py rename to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/auto_scaling_task.py index 5ebc6a190f8..964e8558f9f 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_task.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/auto_scaling_task.py @@ -7,12 +7,12 @@ from servicelib.background_task import create_periodic_task from servicelib.redis import exclusive -from ..core.settings import ApplicationSettings -from ..utils.redis import create_lock_key_and_value -from .auto_scaling_core import auto_scale_cluster -from .auto_scaling_mode_computational import ComputationalAutoscaling -from .auto_scaling_mode_dynamic import DynamicAutoscaling -from .redis import get_redis_client +from ...core.settings import ApplicationSettings +from ...utils.redis import create_lock_key_and_value +from ..redis import get_redis_client +from ._auto_scaling_core import auto_scale_cluster +from ._provider_computational import ComputationalAutoscalingProvider +from ._provider_dynamic import DynamicAutoscalingProvider _TASK_NAME: Final[str] = "Autoscaling EC2 instances" @@ -33,9 +33,9 @@ async def _startup() -> None: task_name=_TASK_NAME, app=app, auto_scaling_mode=( - DynamicAutoscaling() + DynamicAutoscalingProvider() if app_settings.AUTOSCALING_NODES_MONITORING is not None - else ComputationalAutoscaling() + else ComputationalAutoscalingProvider() ), ) diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_task.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/buffer_machines_pool_task.py similarity index 88% rename from services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_task.py rename to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/buffer_machines_pool_task.py index 2985e2ffcc4..347e7467abf 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_task.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/buffer_machines_pool_task.py @@ -7,11 +7,11 @@ from servicelib.background_task import create_periodic_task from servicelib.redis import exclusive -from ..core.settings import ApplicationSettings -from ..utils.redis import create_lock_key_and_value -from .auto_scaling_mode_dynamic import DynamicAutoscaling -from .buffer_machines_pool_core import monitor_buffer_machines -from .redis import get_redis_client +from ...core.settings import ApplicationSettings +from ...utils.redis import create_lock_key_and_value +from ..redis import get_redis_client +from ._buffer_machines_pool_core import monitor_buffer_machines +from ._provider_dynamic import DynamicAutoscalingProvider _TASK_NAME_BUFFER: Final[str] = "Autoscaling Buffer Machines Pool" @@ -35,7 +35,7 @@ async def _startup() -> None: interval=app_settings.AUTOSCALING_POLL_INTERVAL, task_name=_TASK_NAME_BUFFER, app=app, - auto_scaling_mode=(DynamicAutoscaling()), + auto_scaling_mode=(DynamicAutoscalingProvider()), ) return _startup diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py index d57508babf8..00e8fdd8e02 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py @@ -30,7 +30,7 @@ async def _wrap_client_async_routine( - client_coroutine: Coroutine[Any, Any, Any] | Any | None + client_coroutine: Coroutine[Any, Any, Any] | Any | None, ) -> Any: """Dask async behavior does not go well with Pylance as it returns a union of types. this wrapper makes both mypy and pylance happy""" @@ -96,7 +96,7 @@ def _dask_worker_from_ec2_instance( # dict is of type dask_worker_address: worker_details def _find_by_worker_host( - dask_worker: tuple[DaskWorkerUrl, DaskWorkerDetails] + dask_worker: tuple[DaskWorkerUrl, DaskWorkerDetails], ) -> bool: _, details = dask_worker if match := re.match(DASK_NAME_PATTERN, details["name"]): @@ -108,9 +108,9 @@ def _find_by_worker_host( raise DaskWorkerNotFoundError( worker_host=ec2_instance.aws_private_dns, url=client.scheduler.address ) - assert ( - len(filtered_workers) == 1 - ), f"returned workers {filtered_workers}, {node_hostname=}" # nosec + assert len(filtered_workers) == 1, ( + f"returned workers {filtered_workers}, {node_hostname=}" + ) # nosec return next(iter(filtered_workers.items())) @@ -147,8 +147,8 @@ async def is_worker_retired( def _dask_key_to_dask_task_id(key: dask.typing.Key) -> DaskTaskId: - if isinstance(key, bytes): - return key.decode("utf-8") + if isinstance(key, bytes): # type: ignore[unreachable] + return key.decode("utf-8") # type: ignore[unreachable] if isinstance(key, tuple): return "(" + ", ".join(_dask_key_to_dask_task_id(k) for k in key) + ")" return f"{key}" diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py index d7f69d50b54..3dfa6c12a1c 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py @@ -1,9 +1,14 @@ import functools import logging import re -from typing import Final +from typing import Final, TypeAlias -from aws_library.ec2 import EC2InstanceBootSpecific, EC2InstanceData, EC2InstanceType +from aws_library.ec2 import ( + EC2InstanceBootSpecific, + EC2InstanceData, + EC2InstanceType, + Resources, +) from models_library.generated_models.docker_rest_api import Node from types_aiobotocore_ec2.literals import InstanceTypeType @@ -14,7 +19,6 @@ ) from ..core.settings import ApplicationSettings from ..models import AssociatedInstance -from ..modules.auto_scaling_mode_base import BaseAutoscaling from . import utils_docker _EC2_INTERNAL_DNS_RE: Final[re.Pattern] = re.compile(r"^(?Pip-[^.]+).*$") @@ -130,16 +134,14 @@ def ec2_buffer_startup_script( def _instance_type_by_type_name( ec2_type: EC2InstanceType, *, type_name: InstanceTypeType | None ) -> bool: - if type_name is None: - return True - return bool(ec2_type.name == type_name) + return type_name is None or ec2_type.name == type_name def find_selected_instance_type_for_task( instance_type_name: InstanceTypeType, available_ec2_types: list[EC2InstanceType], - auto_scaling_mode: BaseAutoscaling, task, + task_required_resources: Resources, ) -> EC2InstanceType: filtered_instances = list( filter( @@ -158,14 +160,11 @@ def find_selected_instance_type_for_task( selected_instance = filtered_instances[0] # check that the assigned resources and the machine resource fit - if ( - auto_scaling_mode.get_task_required_resources(task) - > selected_instance.resources - ): + if task_required_resources > selected_instance.resources: raise TaskRequirementsAboveRequiredEC2InstanceTypeError( task=task, instance_type=selected_instance, - resources=auto_scaling_mode.get_task_required_resources(task), + resources=task_required_resources, ) return selected_instance @@ -178,9 +177,9 @@ def get_machine_buffer_type( return available_ec2_types[0] -DrainedNodes = list[AssociatedInstance] -BufferDrainedNodes = list[AssociatedInstance] -TerminatingNodes = list[AssociatedInstance] +DrainedNodes: TypeAlias = list[AssociatedInstance] +BufferDrainedNodes: TypeAlias = list[AssociatedInstance] +TerminatingNodes: TypeAlias = list[AssociatedInstance] def sort_drained_nodes( diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/buffer_machines_pool_core.py b/services/autoscaling/src/simcore_service_autoscaling/utils/buffer_machines_pool_core.py index 66ff7972306..3a72b14bed8 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/buffer_machines_pool_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/buffer_machines_pool_core.py @@ -4,7 +4,6 @@ from aws_library.ec2 import AWS_TAG_VALUE_MAX_LENGTH, AWSTagKey, AWSTagValue, EC2Tags from common_library.json_serialization import json_dumps -from fastapi import FastAPI from models_library.docker import DockerGenericTag from pydantic import TypeAdapter @@ -15,27 +14,20 @@ PRE_PULLED_IMAGES_EC2_TAG_KEY, PRE_PULLED_IMAGES_RE, ) -from ..modules.auto_scaling_mode_base import BaseAutoscaling _NAME_EC2_TAG_KEY: Final[AWSTagKey] = TypeAdapter(AWSTagKey).validate_python("Name") -def get_activated_buffer_ec2_tags( - app: FastAPI, auto_scaling_mode: BaseAutoscaling -) -> EC2Tags: - return auto_scaling_mode.get_ec2_tags(app) | ACTIVATED_BUFFER_MACHINE_EC2_TAGS +def get_activated_buffer_ec2_tags(base_ec2_tags: EC2Tags) -> EC2Tags: + return base_ec2_tags | ACTIVATED_BUFFER_MACHINE_EC2_TAGS -def get_deactivated_buffer_ec2_tags( - app: FastAPI, auto_scaling_mode: BaseAutoscaling -) -> EC2Tags: - base_ec2_tags = ( - auto_scaling_mode.get_ec2_tags(app) | DEACTIVATED_BUFFER_MACHINE_EC2_TAGS +def get_deactivated_buffer_ec2_tags(base_ec2_tags: EC2Tags) -> EC2Tags: + new_base_ec2_tags = base_ec2_tags | DEACTIVATED_BUFFER_MACHINE_EC2_TAGS + new_base_ec2_tags[_NAME_EC2_TAG_KEY] = TypeAdapter(AWSTagValue).validate_python( + f"{new_base_ec2_tags[_NAME_EC2_TAG_KEY]}-buffer" ) - base_ec2_tags[_NAME_EC2_TAG_KEY] = AWSTagValue( - f"{base_ec2_tags[_NAME_EC2_TAG_KEY]}-buffer" - ) - return base_ec2_tags + return new_base_ec2_tags def is_buffer_machine(tags: EC2Tags) -> bool: diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py index 9c3f187a78f..ac3ff4325c5 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py @@ -35,6 +35,7 @@ from types_aiobotocore_ec2.literals import InstanceTypeType from ..core.settings import ApplicationSettings +from ..models import AssociatedInstance from ..modules.docker import AutoscalingDocker logger = logging.getLogger(__name__) @@ -278,24 +279,32 @@ def get_max_resources_from_docker_task(task: Task) -> Resources: return Resources( cpus=max( ( - task.spec.resources.reservations - and task.spec.resources.reservations.nano_cp_us + ( + task.spec.resources.reservations + and task.spec.resources.reservations.nano_cp_us + ) or 0 ), ( - task.spec.resources.limits - and task.spec.resources.limits.nano_cp_us + ( + task.spec.resources.limits + and task.spec.resources.limits.nano_cp_us + ) or 0 ), ) / _NANO_CPU, ram=TypeAdapter(ByteSize).validate_python( max( - task.spec.resources.reservations - and task.spec.resources.reservations.memory_bytes + ( + task.spec.resources.reservations + and task.spec.resources.reservations.memory_bytes + ) or 0, - task.spec.resources.limits - and task.spec.resources.limits.memory_bytes + ( + task.spec.resources.limits + and task.spec.resources.limits.memory_bytes + ) or 0, ) ), @@ -382,7 +391,7 @@ async def compute_cluster_used_resources( list_of_used_resources = await logged_gather( *(compute_node_used_resources(docker_client, node) for node in nodes) ) - counter = collections.Counter({k: 0 for k in list(Resources.model_fields)}) + counter = collections.Counter(dict.fromkeys(list(Resources.model_fields), 0)) for result in list_of_used_resources: counter.update(result.model_dump()) @@ -570,14 +579,14 @@ def get_new_node_docker_tags( ) -> dict[DockerLabelKey, str]: assert app_settings.AUTOSCALING_NODES_MONITORING # nosec return ( - { - tag_key: "true" - for tag_key in app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS - } - | { - tag_key: "true" - for tag_key in app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS - } + dict.fromkeys( + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS, + "true", + ) + | dict.fromkeys( + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS, + "true", + ) | {DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: ec2_instance.type} ) @@ -601,6 +610,10 @@ def is_node_osparc_ready(node: Node) -> bool: ) +def is_instance_drained(instance: AssociatedInstance) -> bool: + return not is_node_osparc_ready(instance.node) + + async def set_node_osparc_ready( app_settings: ApplicationSettings, docker_client: AutoscalingDocker, @@ -702,3 +715,8 @@ async def attach_node( tags=new_tags, available=app_settings.AUTOSCALING_DRAIN_NODES_WITH_LABELS, # NOTE: full drain sometimes impede on performance ) + + +def is_node_ready(node: Node) -> bool: + assert node.status # nosec + return bool(node.status.state is NodeState.ready) diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py index a49ec4e46b2..14960ce696c 100644 --- a/services/autoscaling/tests/unit/conftest.py +++ b/services/autoscaling/tests/unit/conftest.py @@ -78,9 +78,9 @@ Cluster, DaskTaskResources, ) -from simcore_service_autoscaling.modules import auto_scaling_core -from simcore_service_autoscaling.modules.auto_scaling_mode_dynamic import ( - DynamicAutoscaling, +from simcore_service_autoscaling.modules.cluster_scaling import _auto_scaling_core +from simcore_service_autoscaling.modules.cluster_scaling._provider_dynamic import ( + DynamicAutoscalingProvider, ) from simcore_service_autoscaling.modules.docker import AutoscalingDocker from simcore_service_autoscaling.modules.ec2 import SimcoreEC2API @@ -324,12 +324,12 @@ def mocked_ec2_instances_envs( @pytest.fixture def disable_autoscaling_background_task(mocker: MockerFixture) -> None: mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_task.create_periodic_task", + "simcore_service_autoscaling.modules.cluster_scaling.auto_scaling_task.create_periodic_task", autospec=True, ) mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_task.cancel_wait_task", + "simcore_service_autoscaling.modules.cluster_scaling.auto_scaling_task.cancel_wait_task", autospec=True, ) @@ -337,12 +337,12 @@ def disable_autoscaling_background_task(mocker: MockerFixture) -> None: @pytest.fixture def disable_buffers_pool_background_task(mocker: MockerFixture) -> None: mocker.patch( - "simcore_service_autoscaling.modules.buffer_machines_pool_task.create_periodic_task", + "simcore_service_autoscaling.modules.cluster_scaling.buffer_machines_pool_task.create_periodic_task", autospec=True, ) mocker.patch( - "simcore_service_autoscaling.modules.buffer_machines_pool_task.cancel_wait_task", + "simcore_service_autoscaling.modules.cluster_scaling.buffer_machines_pool_task.cancel_wait_task", autospec=True, ) @@ -445,10 +445,10 @@ def service_monitored_labels( app_settings: ApplicationSettings, ) -> dict[DockerLabelKey, str]: assert app_settings.AUTOSCALING_NODES_MONITORING - return { - key: "true" - for key in app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_SERVICE_LABELS - } + return dict.fromkeys( + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_SERVICE_LABELS, + "true", + ) @pytest.fixture @@ -902,7 +902,7 @@ async def _fake_set_node_availability( return returned_node return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.set_node_availability", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.set_node_availability", autospec=True, side_effect=_fake_set_node_availability, ) @@ -926,7 +926,7 @@ async def fake_tag_node( return updated_node return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.tag_node", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.tag_node", autospec=True, side_effect=fake_tag_node, ) @@ -1043,7 +1043,7 @@ def hot_buffer_instance_type(app_settings: ApplicationSettings) -> InstanceTypeT @pytest.fixture def mock_find_node_with_name_returns_none(mocker: MockerFixture) -> Iterator[mock.Mock]: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.find_node_with_name", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.find_node_with_name", autospec=True, return_value=None, ) @@ -1070,7 +1070,7 @@ def with_short_ec2_instances_max_start_time( @pytest.fixture async def spied_cluster_analysis(mocker: MockerFixture) -> MockType: - return mocker.spy(auto_scaling_core, "_analyze_current_cluster") + return mocker.spy(_auto_scaling_core, "_analyze_current_cluster") @pytest.fixture @@ -1081,7 +1081,7 @@ async def _( return [], ec2_instances return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.associate_ec2_instances_with_nodes", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.associate_ec2_instances_with_nodes", autospec=True, side_effect=_, ) @@ -1185,7 +1185,7 @@ async def _do( resource_tags: list[TagTypeDef] = [ {"Key": tag_key, "Value": tag_value} for tag_key, tag_value in get_deactivated_buffer_ec2_tags( - initialized_app, DynamicAutoscaling() + DynamicAutoscalingProvider().get_ec2_tags(initialized_app) ).items() ] if pre_pull_images is not None and instance_state_name == "stopped": diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_task.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_auto_scaling_task.py similarity index 96% rename from services/autoscaling/tests/unit/test_modules_auto_scaling_task.py rename to services/autoscaling/tests/unit/test_modules_cluster_scaling_auto_scaling_task.py index 4a3d3e85bae..8778996d9e6 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_task.py +++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_auto_scaling_task.py @@ -40,7 +40,7 @@ def app_environment( @pytest.fixture def mock_background_task(mocker: MockerFixture) -> mock.Mock: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_task.auto_scale_cluster", + "simcore_service_autoscaling.modules.cluster_scaling.auto_scaling_task.auto_scale_cluster", autospec=True, ) diff --git a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_buffer_machine_core.py similarity index 97% rename from services/autoscaling/tests/unit/test_modules_buffer_machine_core.py rename to services/autoscaling/tests/unit/test_modules_cluster_scaling_buffer_machine_core.py index 32d38c0eea9..b945d709334 100644 --- a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py +++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_buffer_machine_core.py @@ -28,12 +28,12 @@ from pytest_simcore.helpers.logging_tools import log_context from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict from simcore_service_autoscaling.constants import PRE_PULLED_IMAGES_EC2_TAG_KEY -from simcore_service_autoscaling.modules.auto_scaling_mode_dynamic import ( - DynamicAutoscaling, -) -from simcore_service_autoscaling.modules.buffer_machines_pool_core import ( +from simcore_service_autoscaling.modules.cluster_scaling._buffer_machines_pool_core import ( monitor_buffer_machines, ) +from simcore_service_autoscaling.modules.cluster_scaling._provider_dynamic import ( + DynamicAutoscalingProvider, +) from types_aiobotocore_ec2 import EC2Client from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType from types_aiobotocore_ec2.type_defs import FilterTypeDef @@ -95,7 +95,7 @@ async def test_if_send_command_is_mocked_by_moto( # 1. run, this will create as many buffer machines as needed await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, @@ -112,7 +112,7 @@ async def test_if_send_command_is_mocked_by_moto( # 2. this should generate a failure as current version of moto does not handle this await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) @@ -168,7 +168,7 @@ async def _test_monitor_buffer_machines( # 1. run, this will create as many buffer machines as needed with log_context(logging.INFO, "create buffer machines"): await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) with log_context( logging.INFO, f"waiting for {buffer_count} buffer instances to be running" @@ -211,7 +211,7 @@ async def _assert_buffer_machines_running() -> None: ) async def _assert_run_ssm_command_for_pulling() -> None: await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, @@ -248,7 +248,7 @@ async def _assert_run_ssm_command_for_pulling() -> None: ) async def _assert_wait_for_ssm_command_to_finish() -> None: await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, @@ -354,7 +354,7 @@ async def test_monitor_buffer_machines_terminates_supernumerary_instances( ) # this will terminate the supernumerary instances and start new ones await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, @@ -414,7 +414,7 @@ async def test_monitor_buffer_machines_terminates_instances_with_incorrect_pre_p ) # this will terminate the wrong instances and start new ones and pre-pull the new set of images await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, @@ -491,7 +491,7 @@ async def test_monitor_buffer_machines_terminates_unneeded_pool( # this will terminate the unwanted buffer pool and replace with the expected ones await monitor_buffer_machines( - initialized_app, auto_scaling_mode=DynamicAutoscaling() + initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py similarity index 97% rename from services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py rename to services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py index 8a9f82ec847..dfafec0b21f 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py +++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py @@ -27,9 +27,14 @@ from faker import Faker from fastapi import FastAPI from models_library.docker import DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY -from models_library.generated_models.docker_rest_api import Availability +from models_library.generated_models.docker_rest_api import ( + Availability, +) from models_library.generated_models.docker_rest_api import Node as DockerNode -from models_library.generated_models.docker_rest_api import NodeState, NodeStatus +from models_library.generated_models.docker_rest_api import ( + NodeState, + NodeStatus, +) from models_library.rabbitmq_messages import RabbitAutoscalingStatusMessage from pydantic import ByteSize, TypeAdapter from pytest_mock import MockerFixture, MockType @@ -41,9 +46,11 @@ from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict from simcore_service_autoscaling.core.settings import ApplicationSettings from simcore_service_autoscaling.models import EC2InstanceData -from simcore_service_autoscaling.modules.auto_scaling_core import auto_scale_cluster -from simcore_service_autoscaling.modules.auto_scaling_mode_computational import ( - ComputationalAutoscaling, +from simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core import ( + auto_scale_cluster, +) +from simcore_service_autoscaling.modules.cluster_scaling._provider_computational import ( + ComputationalAutoscalingProvider, ) from simcore_service_autoscaling.modules.dask import DaskTaskResources from simcore_service_autoscaling.modules.docker import get_docker_client @@ -128,7 +135,7 @@ def mock_docker_find_node_with_name_returns_fake_node( mocker: MockerFixture, fake_node: DockerNode ) -> Iterator[mock.Mock]: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.find_node_with_name", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.find_node_with_name", autospec=True, return_value=fake_node, ) @@ -137,7 +144,7 @@ def mock_docker_find_node_with_name_returns_fake_node( @pytest.fixture def mock_docker_compute_node_used_resources(mocker: MockerFixture) -> mock.Mock: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.compute_node_used_resources", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.compute_node_used_resources", autospec=True, return_value=Resources.create_as_empty(), ) @@ -326,7 +333,7 @@ async def test_cluster_scaling_with_no_tasks_does_nothing( dask_spec_local_cluster: distributed.SpecCluster, ): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) mock_launch_instances.assert_not_called() mock_terminate_instances.assert_not_called() @@ -364,7 +371,7 @@ async def test_cluster_scaling_with_disabled_ssm_does_not_block_autoscaling( dask_spec_local_cluster: distributed.SpecCluster, ): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) mock_launch_instances.assert_not_called() mock_terminate_instances.assert_not_called() @@ -405,7 +412,7 @@ async def test_cluster_scaling_with_task_with_too_much_resources_starts_nothing( assert dask_future await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) mock_launch_instances.assert_not_called() mock_terminate_instances.assert_not_called() @@ -497,7 +504,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 assert dask_futures # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # check the instance was started and we have exactly 1 @@ -531,7 +538,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 # 2. running this again should not scale again, but tag the node and make it available await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) mock_dask_get_worker_has_results_in_memory.assert_called_once() mock_dask_get_worker_has_results_in_memory.reset_mock() @@ -629,7 +636,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 assert fake_attached_node.description fake_attached_node.description.hostname = internal_dns_name - auto_scaling_mode = ComputationalAutoscaling() + auto_scaling_mode = ComputationalAutoscalingProvider() mocker.patch.object( auto_scaling_mode, "get_monitored_nodes", @@ -766,7 +773,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 < app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION ) mocked_docker_remove_node = mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.remove_nodes", return_value=None, autospec=True, ) @@ -873,7 +880,7 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_allowed( # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # nothing runs @@ -924,7 +931,7 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_reso # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # nothing runs @@ -991,7 +998,7 @@ async def test_cluster_scaling_up_starts_multiple_instances( # run the code await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # check the instances were started @@ -1083,7 +1090,7 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) await assert_autoscaled_computational_ec2_instances( ec2_client, @@ -1115,7 +1122,7 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and num_useless_calls = 10 for _ in range(num_useless_calls): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) await assert_autoscaled_computational_ec2_instances( ec2_client, @@ -1184,7 +1191,7 @@ async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_star # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # one of each type is created with some that will have 2 instances @@ -1228,7 +1235,7 @@ async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_star num_useless_calls = 10 for _ in range(num_useless_calls): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) all_instances = await ec2_client.describe_instances() assert len(all_instances["Reservations"]) == len( @@ -1294,7 +1301,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # check the instance was started and we have exactly 1 @@ -1338,7 +1345,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( # 2. running again several times the autoscaler, the node does not join for i in range(7): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # there should be no scaling up, since there is already a pending instance instances = await assert_autoscaled_computational_ec2_instances( @@ -1382,7 +1389,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( ) # scaling now will terminate the broken ec2 that did not connect, and directly create a replacement await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) # we have therefore 2 reservations, first instance is terminated and a second one started all_instances = await ec2_client.describe_instances() @@ -1485,7 +1492,7 @@ async def test_cluster_adapts_machines_on_the_fly( # it will only scale once and do nothing else await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) await assert_autoscaled_computational_ec2_instances( ec2_client, @@ -1512,7 +1519,7 @@ async def test_cluster_adapts_machines_on_the_fly( # # 2. now the machines are associated await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1535,7 +1542,7 @@ async def test_cluster_adapts_machines_on_the_fly( # scaling will do nothing since we have hit the maximum number of machines for _ in range(3): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) await assert_autoscaled_computational_ec2_instances( ec2_client, @@ -1565,11 +1572,11 @@ async def test_cluster_adapts_machines_on_the_fly( # first call to auto_scale_cluster will mark 1 node as empty with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.set_node_found_empty", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.set_node_found_empty", autospec=True, ) as mock_docker_set_node_found_empty: await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1587,14 +1594,14 @@ async def test_cluster_adapts_machines_on_the_fly( # now we mock the get_node_found_empty so the next call will actually drain the machine with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_empty_since", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.get_node_empty_since", autospec=True, return_value=arrow.utcnow().datetime - 1.5 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_DRAINING, ) as mocked_get_node_empty_since: await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) mocked_get_node_empty_since.assert_called_once() analyzed_cluster = assert_cluster_state( @@ -1610,7 +1617,7 @@ async def test_cluster_adapts_machines_on_the_fly( create_fake_node, drained_machine_instance_id, None ) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1622,7 +1629,7 @@ async def test_cluster_adapts_machines_on_the_fly( # this will initiate termination now with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_last_readyness_update", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.get_node_last_readyness_update", autospec=True, return_value=arrow.utcnow().datetime - 1.5 @@ -1630,7 +1637,7 @@ async def test_cluster_adapts_machines_on_the_fly( ): mock_docker_tag_node.reset_mock() await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1649,7 +1656,7 @@ async def test_cluster_adapts_machines_on_the_fly( create_fake_node, drained_machine_instance_id, drained_machine_instance_id ) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1662,19 +1669,19 @@ async def test_cluster_adapts_machines_on_the_fly( # now this will terminate it and straight away start a new machine type with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_termination_started_since", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.get_node_termination_started_since", autospec=True, return_value=arrow.utcnow().datetime - 1.5 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION, ): mocked_docker_remove_node = mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.remove_nodes", return_value=None, autospec=True, ) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + app=initialized_app, auto_scaling_mode=ComputationalAutoscalingProvider() ) mocked_docker_remove_node.assert_called_once() diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py similarity index 97% rename from services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py rename to services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py index 19bb4c69c89..98a76ae7172 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py +++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py @@ -53,14 +53,14 @@ from simcore_service_autoscaling.constants import BUFFER_MACHINE_TAG_KEY from simcore_service_autoscaling.core.settings import ApplicationSettings from simcore_service_autoscaling.models import AssociatedInstance, Cluster -from simcore_service_autoscaling.modules.auto_scaling_core import ( +from simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core import ( _activate_drained_nodes, _find_terminateable_instances, _try_scale_down_cluster, auto_scale_cluster, ) -from simcore_service_autoscaling.modules.auto_scaling_mode_dynamic import ( - DynamicAutoscaling, +from simcore_service_autoscaling.modules.cluster_scaling._provider_dynamic import ( + DynamicAutoscalingProvider, ) from simcore_service_autoscaling.modules.docker import ( AutoscalingDocker, @@ -113,7 +113,7 @@ def mock_find_node_with_name_returns_fake_node( mocker: MockerFixture, fake_node: Node ) -> Iterator[mock.Mock]: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.find_node_with_name", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.find_node_with_name", autospec=True, return_value=fake_node, ) @@ -122,7 +122,7 @@ def mock_find_node_with_name_returns_fake_node( @pytest.fixture def mock_remove_nodes(mocker: MockerFixture) -> mock.Mock: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.remove_nodes", autospec=True, ) @@ -130,7 +130,7 @@ def mock_remove_nodes(mocker: MockerFixture) -> mock.Mock: @pytest.fixture def mock_compute_node_used_resources(mocker: MockerFixture) -> mock.Mock: return mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.compute_node_used_resources", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.compute_node_used_resources", autospec=True, return_value=Resources.create_as_empty(), ) @@ -323,7 +323,7 @@ async def test_cluster_scaling_with_no_services_does_nothing( mock_rabbitmq_post_message: mock.Mock, ): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) mock_launch_instances.assert_not_called() mock_terminate_instances.assert_not_called() @@ -362,7 +362,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect ): assert app_settings.AUTOSCALING_EC2_INSTANCES await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_ec2_instances( ec2_client, @@ -387,7 +387,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect mock_rabbitmq_post_message.reset_mock() # calling again should attach the new nodes to the reserve, but nothing should start await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_ec2_instances( ec2_client, @@ -426,7 +426,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect # calling it again should not create anything new for _ in range(10): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_ec2_instances( ec2_client, @@ -486,7 +486,7 @@ async def test_cluster_scaling_with_service_asking_for_too_much_resources_starts await create_services_batch(scale_up_params) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) mock_launch_instances.assert_not_called() mock_terminate_instances.assert_not_called() @@ -529,7 +529,7 @@ async def _test_cluster_scaling_up_and_down( # noqa: PLR0915 # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) assert_cluster_state( spied_cluster_analysis, expected_calls=1, expected_num_machines=0 @@ -578,7 +578,7 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: # 2. running this again should not scale again, but tag the node and make it available await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) assert_cluster_state( spied_cluster_analysis, expected_calls=1, expected_num_machines=1 @@ -591,13 +591,11 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: ) assert fake_attached_node.spec.labels assert app_settings.AUTOSCALING_NODES_MONITORING - expected_docker_node_tags = { - tag_key: "true" - for tag_key in ( - app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS - + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS - ) - } | { + expected_docker_node_tags = dict.fromkeys( + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS + + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS, + "true", + ) | { DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: scale_up_params.expected_instance_type } fake_attached_node.spec.labels |= expected_docker_node_tags | { @@ -713,7 +711,7 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: fake_attached_node.spec.availability = Availability.active fake_attached_node.description.hostname = internal_dns_name - auto_scaling_mode = DynamicAutoscaling() + auto_scaling_mode = DynamicAutoscalingProvider() mocker.patch.object( auto_scaling_mode, "get_monitored_nodes", @@ -862,7 +860,7 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: < app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION ) mocked_docker_remove_node = mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.remove_nodes", return_value=None, autospec=True, ) @@ -1192,7 +1190,7 @@ async def test_cluster_scaling_up_starts_multiple_instances( # run the code await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) # check the instances were started @@ -1294,7 +1292,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # it will only scale once and do nothing else await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_ec2_instances( ec2_client, @@ -1319,7 +1317,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # # 2. now the machines are associated await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1341,7 +1339,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # scaling will do nothing since we have hit the maximum number of machines for _ in range(3): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_ec2_instances( ec2_client, @@ -1380,11 +1378,11 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # first call to auto_scale_cluster will mark 1 node as empty with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.set_node_found_empty", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.set_node_found_empty", autospec=True, ) as mock_docker_set_node_found_empty: await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1402,14 +1400,14 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # now we mock the get_node_found_empty so the next call will actually drain the machine with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_empty_since", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.get_node_empty_since", autospec=True, return_value=arrow.utcnow().datetime - 1.5 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_DRAINING, ) as mocked_get_node_empty_since: await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) mocked_get_node_empty_since.assert_called_once() analyzed_cluster = assert_cluster_state( @@ -1425,7 +1423,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 create_fake_node, drained_machine_instance_id, None ) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1437,7 +1435,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # this will initiate termination now with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_last_readyness_update", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.get_node_last_readyness_update", autospec=True, return_value=arrow.utcnow().datetime - 1.5 @@ -1445,7 +1443,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 ): mock_docker_tag_node.reset_mock() await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1464,7 +1462,7 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 create_fake_node, drained_machine_instance_id, drained_machine_instance_id ) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) analyzed_cluster = assert_cluster_state( spied_cluster_analysis, @@ -1477,19 +1475,19 @@ async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 # now this will terminate it and straight away start a new machine type with mock.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_termination_started_since", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.get_node_termination_started_since", autospec=True, return_value=arrow.utcnow().datetime - 1.5 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION, ): mocked_docker_remove_node = mocker.patch( - "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + "simcore_service_autoscaling.modules.cluster_scaling._auto_scaling_core.utils_docker.remove_nodes", return_value=None, autospec=True, ) await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) mocked_docker_remove_node.assert_called_once() @@ -1578,7 +1576,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( # this should trigger a scaling up as we have no nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) # check the instance was started and we have exactly 1 @@ -1622,7 +1620,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( # 2. running again several times the autoscaler, the node does not join for i in range(7): await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) # there should be no scaling up, since there is already a pending instance instances = await assert_autoscaled_dynamic_ec2_instances( @@ -1666,7 +1664,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( ) # scaling now will terminate the broken ec2 that did not connect, and directly create a replacement await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) # we have therefore 2 reservations, first instance is terminated and a second one started all_instances = await ec2_client.describe_instances() @@ -2006,7 +2004,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers( # let's autoscale, this should move the warm buffers to hot buffers await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) mock_docker_tag_node.assert_not_called() # at analysis time, we had no machines running @@ -2041,7 +2039,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers( # let's autoscale again, to check the cluster analysis and tag the nodes await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) mock_docker_tag_node.assert_called() assert ( @@ -2124,7 +2122,7 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7 # ensure we get our running hot buffer await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) await assert_autoscaled_dynamic_ec2_instances( ec2_client, @@ -2137,7 +2135,7 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7 ) # this brings a new analysis await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + app=initialized_app, auto_scaling_mode=DynamicAutoscalingProvider() ) spied_cluster = assert_cluster_state( spied_cluster_analysis, expected_calls=2, expected_num_machines=5 @@ -2150,13 +2148,11 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7 ) assert fake_attached_node_base.spec.labels assert app_settings.AUTOSCALING_NODES_MONITORING - expected_docker_node_tags = { - tag_key: "true" - for tag_key in ( - app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS - + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS - ) - } | { + expected_docker_node_tags = dict.fromkeys( + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS + + app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS, + "true", + ) | { DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: f"{hot_buffer_instance_type}" } fake_attached_node_base.spec.labels |= expected_docker_node_tags | { @@ -2172,7 +2168,7 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7 spied_cluster.pending_ec2s[i].ec2_instance ) fake_hot_buffer_nodes.append(node) - auto_scaling_mode = DynamicAutoscaling() + auto_scaling_mode = DynamicAutoscalingProvider() mocker.patch.object( auto_scaling_mode, "get_monitored_nodes", diff --git a/services/autoscaling/tests/unit/test_utils_buffer_machines_pool_core.py b/services/autoscaling/tests/unit/test_utils_buffer_machines_pool_core.py index 19cc33c2575..2507fedc860 100644 --- a/services/autoscaling/tests/unit/test_utils_buffer_machines_pool_core.py +++ b/services/autoscaling/tests/unit/test_utils_buffer_machines_pool_core.py @@ -14,11 +14,11 @@ DEACTIVATED_BUFFER_MACHINE_EC2_TAGS, PRE_PULLED_IMAGES_EC2_TAG_KEY, ) -from simcore_service_autoscaling.modules.auto_scaling_mode_computational import ( - ComputationalAutoscaling, +from simcore_service_autoscaling.modules.cluster_scaling._provider_computational import ( + ComputationalAutoscalingProvider, ) -from simcore_service_autoscaling.modules.auto_scaling_mode_dynamic import ( - DynamicAutoscaling, +from simcore_service_autoscaling.modules.cluster_scaling._provider_dynamic import ( + DynamicAutoscalingProvider, ) from simcore_service_autoscaling.utils.buffer_machines_pool_core import ( dump_pre_pulled_images_as_tags, @@ -37,9 +37,9 @@ def test_get_activated_buffer_ec2_tags_dynamic( enabled_dynamic_mode: EnvVarsDict, initialized_app: FastAPI, ): - auto_scaling_mode = DynamicAutoscaling() + auto_scaling_mode = DynamicAutoscalingProvider() activated_buffer_tags = get_activated_buffer_ec2_tags( - initialized_app, auto_scaling_mode + auto_scaling_mode.get_ec2_tags(initialized_app) ) assert ( auto_scaling_mode.get_ec2_tags(initialized_app) @@ -55,9 +55,9 @@ def test_get_deactivated_buffer_ec2_tags_dynamic( enabled_dynamic_mode: EnvVarsDict, initialized_app: FastAPI, ): - auto_scaling_mode = DynamicAutoscaling() + auto_scaling_mode = DynamicAutoscalingProvider() deactivated_buffer_tags = get_deactivated_buffer_ec2_tags( - initialized_app, auto_scaling_mode + auto_scaling_mode.get_ec2_tags(initialized_app) ) # when deactivated the buffer EC2 name has an additional -buffer suffix expected_tags = ( @@ -79,9 +79,9 @@ def test_get_activated_buffer_ec2_tags_computational( enabled_computational_mode: EnvVarsDict, initialized_app: FastAPI, ): - auto_scaling_mode = ComputationalAutoscaling() + auto_scaling_mode = ComputationalAutoscalingProvider() activated_buffer_tags = get_activated_buffer_ec2_tags( - initialized_app, auto_scaling_mode + auto_scaling_mode.get_ec2_tags(initialized_app) ) assert ( auto_scaling_mode.get_ec2_tags(initialized_app) @@ -97,9 +97,9 @@ def test_get_deactivated_buffer_ec2_tags_computational( enabled_computational_mode: EnvVarsDict, initialized_app: FastAPI, ): - auto_scaling_mode = ComputationalAutoscaling() + auto_scaling_mode = ComputationalAutoscalingProvider() deactivated_buffer_tags = get_deactivated_buffer_ec2_tags( - initialized_app, auto_scaling_mode + auto_scaling_mode.get_ec2_tags(initialized_app) ) # when deactivated the buffer EC2 name has an additional -buffer suffix expected_tags = ( diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index abaedd698ed..c3214776d72 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -38,8 +38,8 @@ class GracefulKiller: """ kill_now = False - worker = None - task = None + worker: distributed.Worker | None = None + task: asyncio.Task | None = None def __init__(self, worker: distributed.Worker): signal.signal(signal.SIGINT, self.exit_gracefully) diff --git a/services/datcore-adapter/src/simcore_service_datcore_adapter/modules/pennsieve.py b/services/datcore-adapter/src/simcore_service_datcore_adapter/modules/pennsieve.py index 79148b72f7c..a67093fc6c8 100644 --- a/services/datcore-adapter/src/simcore_service_datcore_adapter/modules/pennsieve.py +++ b/services/datcore-adapter/src/simcore_service_datcore_adapter/modules/pennsieve.py @@ -215,7 +215,7 @@ async def _get_dataset_packages( api_secret: str, dataset_id: str, page_size: int, - cursor: str, + cursor: str | None, ) -> dict[str, Any]: packages = cast( dict[str, Any], @@ -227,7 +227,7 @@ async def _get_dataset_packages( params={ "includeSourceFiles": False, "pageSize": page_size, - "cursor": cursor, + "cursor": cursor if cursor is not None else "", }, ), ) @@ -464,8 +464,8 @@ async def list_all_dataset_files( ) -> list[FileMetaData]: """returns ALL the files belonging to the dataset, can be slow if there are a lot of files""" - file_meta_data = [] - cursor = "" + file_meta_data: list[FileMetaData] = [] + cursor: str | None = "" PAGE_SIZE = 1000 num_packages, dataset_details = await logged_gather( @@ -480,7 +480,7 @@ async def list_all_dataset_files( while resp := await self._get_dataset_packages( api_key, api_secret, dataset_id, PAGE_SIZE, cursor ): - cursor = resp.get("cursor") # type: ignore[assignment] + cursor = resp.get("cursor") assert isinstance(cursor, str | None) # nosec all_packages.update( {p["content"]["id"]: p for p in resp.get("packages", [])} @@ -491,6 +491,7 @@ async def list_all_dataset_files( num_packages, cursor, ) + if cursor is None: # the whole collection is there now break diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py index 49b9e0c5670..00dc20a3ed8 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py @@ -86,14 +86,14 @@ def extract_service_port_from_settings( return PortInt(param.value) # REST-API compatible if ( - param.setting_type == "EndpointSpec" + param.setting_type == "EndpointSpec" # type: ignore[unreachable] and "Ports" in param.value and ( isinstance(param.value["Ports"], list) and "TargetPort" in param.value["Ports"][0] ) ): - return PortInt(param.value["Ports"][0]["TargetPort"]) + return PortInt(param.value["Ports"][0]["TargetPort"]) # type: ignore[unreachable] msg = "service port not found!" raise ValueError(msg) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py index bf375b29eed..0922af46473 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py @@ -78,7 +78,7 @@ def _get_s3_volume_driver_config( "s3-acl": "private", } else: - msg = f"Unexpected, all {S3Provider.__name__} should be covered" + msg = f"Unexpected, all {S3Provider.__name__} should be covered" # type: ignore[unreachable] raise DynamicSidecarError(msg=msg) assert extra_options is not None # nosec diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 7e6f6c7b638..ac96e05bd6c 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -1,6 +1,5 @@ import json import logging -import os import sys import time from asyncio import CancelledError @@ -47,22 +46,22 @@ class PortTypeName(str, Enum): _logger = logging.getLogger(__name__) -# OUTPUTS section - -def _get_size_of_value(value: tuple[ItemConcreteValue | None, SetKWargs | None]) -> int: - if value is None: +def _get_size_of_value( + value: tuple[ItemConcreteValue | None, SetKWargs | None], +) -> int: + concrete_value, _ = value + if concrete_value is None: return 0 - if isinstance(value, Path): + if isinstance(concrete_value, Path): # if symlink we need to fetch the pointer to the file # relative symlink need to know which their parent is # in oder to properly resolve the path since the workdir # does not equal to their parent dir - path = value - if value.is_symlink(): - path = Path(value.parent) / Path(os.readlink(value)) - size_bytes = path.stat().st_size - return size_bytes + path = concrete_value + if concrete_value.is_symlink(): + path = Path(concrete_value.parent) / Path(Path.readlink(concrete_value)) + return path.stat().st_size return sys.getsizeof(value) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 8628413e83c..deb83bd8179 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -111,7 +111,7 @@ async def result( exc_msg = f"{exception}" if exception is None: - _logger.warning("Was not expecting '%s': '%s'", exc_type, exc_msg) + _logger.warning("Was not expecting '%s': '%s'", exc_type, exc_msg) # type: ignore[unreachable] # NOTE: cannot transfer original exception since this will not be able to be serialized # outside of storage diff --git a/services/storage/src/simcore_service_storage/models.py b/services/storage/src/simcore_service_storage/models.py index 1e4166c89c9..53c9fe25168 100644 --- a/services/storage/src/simcore_service_storage/models.py +++ b/services/storage/src/simcore_service_storage/models.py @@ -48,8 +48,7 @@ ) -class DatasetMetaData(DatasetMetaDataGet): - ... +class DatasetMetaData(DatasetMetaDataGet): ... def is_uuid(value: str) -> bool: @@ -305,7 +304,7 @@ class FilePathParams(LocationPathParams): def unquote(cls, v: str) -> str: if v is not None: return urllib.parse.unquote(f"{v}") - return v + return v # type: ignore[unreachable] class FilePathIsUploadCompletedParams(FilePathParams): @@ -324,7 +323,7 @@ class CopyAsSoftLinkParams(BaseModel): def unquote(cls, v: str) -> str: if v is not None: return urllib.parse.unquote(f"{v}") - return v + return v # type: ignore[unreachable] class UserOrProjectFilter(NamedTuple): @@ -395,9 +394,11 @@ def from_s3_object_in_dir( node_id=dir_fmd.node_id, created_at=dir_fmd.created_at, last_modified=dir_fmd.last_modified, - file_meta_data=None - if isinstance(s3_object, S3DirectoryMetaData) - else FileMetaData.from_s3_object_in_dir(s3_object, dir_fmd), + file_meta_data=( + None + if isinstance(s3_object, S3DirectoryMetaData) + else FileMetaData.from_s3_object_in_dir(s3_object, dir_fmd) + ), ) def to_api_model(self) -> PathMetaDataGet: diff --git a/services/storage/src/simcore_service_storage/modules/datcore_adapter/datcore_adapter_client_utils.py b/services/storage/src/simcore_service_storage/modules/datcore_adapter/datcore_adapter_client_utils.py index 3972d07d72d..7423b3f4708 100644 --- a/services/storage/src/simcore_service_storage/modules/datcore_adapter/datcore_adapter_client_utils.py +++ b/services/storage/src/simcore_service_storage/modules/datcore_adapter/datcore_adapter_client_utils.py @@ -33,8 +33,6 @@ async def request( session = get_client_session(app) try: - if request_kwargs is None: - request_kwargs = {} response = await session.request( method.upper(), url, diff --git a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py index 67106abddcc..bd743299abf 100644 --- a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py +++ b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py @@ -145,9 +145,9 @@ async def replace_current_owner( ) return - # the result might me none + # the result might me none (really? that is not what the function is supposed to do) if new_project_owner_id is None: - _logger.warning( + _logger.warning( # type: ignore[unreachable] "Could not recover a new user id from gid %s", new_project_owner_gid ) return diff --git a/services/web/server/src/simcore_service_webserver/groups/_groups_repository.py b/services/web/server/src/simcore_service_webserver/groups/_groups_repository.py index 83740fce392..1f081424f58 100644 --- a/services/web/server/src/simcore_service_webserver/groups/_groups_repository.py +++ b/services/web/server/src/simcore_service_webserver/groups/_groups_repository.py @@ -312,7 +312,6 @@ async def create_standard_group( user_id: UserID, create: StandardGroupCreate, ) -> tuple[Group, AccessRightsDict]: - async with transaction_context(get_asyncpg_engine(app), connection) as conn: user = await conn.scalar( sa.select( @@ -358,7 +357,6 @@ async def update_standard_group( group_id: GroupID, update: StandardGroupUpdate, ) -> tuple[Group, AccessRightsDict]: - values = update.model_dump(mode="json", exclude_unset=True) async with transaction_context(get_asyncpg_engine(app), connection) as conn: @@ -560,12 +558,7 @@ async def update_user_in_group( the_user_id_in_group: UserID, access_rights: AccessRightsDict, ) -> GroupMember: - if not access_rights: - msg = f"Cannot update empty {access_rights}" - raise ValueError(msg) - async with transaction_context(get_asyncpg_engine(app), connection) as conn: - # first check if the group exists await _get_group_and_access_rights_or_raise( conn, caller_id=caller_id, group_id=group_id, permission="write" @@ -715,7 +708,6 @@ async def auto_add_user_to_groups( *, user: dict, ) -> None: - user_id: UserID = user["id"] # auto add user to the groups with the right rules diff --git a/services/web/server/src/simcore_service_webserver/projects/_projects_repository_legacy.py b/services/web/server/src/simcore_service_webserver/projects/_projects_repository_legacy.py index f9a2340db76..527a1cc3480 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_projects_repository_legacy.py +++ b/services/web/server/src/simcore_service_webserver/projects/_projects_repository_legacy.py @@ -124,11 +124,11 @@ class ProjectDBAPI(BaseProjectDB): def __init__(self, app: web.Application) -> None: self._app = app - self._engine = cast(Engine, app.get(APP_AIOPG_ENGINE_KEY)) + self._engine = cast(Engine | None, app.get(APP_AIOPG_ENGINE_KEY)) def _init_engine(self) -> None: # Delays creation of engine because it setup_db does it on_startup - self._engine = cast(Engine, self._app.get(APP_AIOPG_ENGINE_KEY)) + self._engine = cast(Engine | None, self._app.get(APP_AIOPG_ENGINE_KEY)) if self._engine is None: msg = "Database subsystem was not initialized" raise ValueError(msg) @@ -435,7 +435,6 @@ def _create_shared_workspace_query( is_search_by_multi_columns: bool, user_groups: list[GroupID], ) -> sql.Select | None: - if workspace_query.workspace_scope is not WorkspaceScope.PRIVATE: assert workspace_query.workspace_scope in ( # nosec WorkspaceScope.SHARED, diff --git a/services/web/server/src/simcore_service_webserver/resource_manager/registry.py b/services/web/server/src/simcore_service_webserver/resource_manager/registry.py index 8ebd3b57ab6..b49b07f9bde 100644 --- a/services/web/server/src/simcore_service_webserver/resource_manager/registry.py +++ b/services/web/server/src/simcore_service_webserver/resource_manager/registry.py @@ -130,9 +130,6 @@ async def find_resources( return resources async def find_keys(self, resource: tuple[str, str]) -> list[UserSessionDict]: - if not resource: - return [] - field, value = resource return [ self._decode_hash_key(hash_key) diff --git a/services/web/server/src/simcore_service_webserver/resource_usage/_pricing_plans_admin_service.py b/services/web/server/src/simcore_service_webserver/resource_usage/_pricing_plans_admin_service.py index 60c62f89aaf..d13667f4787 100644 --- a/services/web/server/src/simcore_service_webserver/resource_usage/_pricing_plans_admin_service.py +++ b/services/web/server/src/simcore_service_webserver/resource_usage/_pricing_plans_admin_service.py @@ -135,7 +135,7 @@ def _validate_pricing_unit(classification: PricingPlanClassification, unit_extra msg = "Expected UnitExtraInfoTier (CPU, RAM, VRAM) for TIER classification" raise ValueError(msg) else: - msg = "Not known pricing plan classification" + msg = "Not known pricing plan classification" # type: ignore[unreachable] raise ValueError(msg) diff --git a/services/web/server/src/simcore_service_webserver/security/_authz_policy.py b/services/web/server/src/simcore_service_webserver/security/_authz_policy.py index 0df137dece4..ce63ac99a26 100644 --- a/services/web/server/src/simcore_service_webserver/security/_authz_policy.py +++ b/services/web/server/src/simcore_service_webserver/security/_authz_policy.py @@ -131,8 +131,8 @@ async def authorized_userid(self, identity: IdentityStr) -> int | None: async def permits( self, - identity: IdentityStr, - permission: str, + identity: IdentityStr | None, + permission: str | None, context: OptionalContext = None, ) -> bool: """Implements Interface: Determines whether an identified user has permission