diff --git a/.github/workflows/downstreams.yml b/.github/workflows/downstreams.yml deleted file mode 100644 index 6f57457cd..000000000 --- a/.github/workflows/downstreams.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: downstreams - -on: - pull_request: - types: [opened, synchronize] - merge_group: - types: [checks_requested] - push: - # Always run on push to main. The build cache can only be reused - # if it was saved by a run from the repository's default branch. - # The run result will be identical to that from the merge queue - # because the commit is identical, yet we need to perform it to - # seed the build cache. - branches: - - main - -permissions: - id-token: write - contents: read - pull-requests: write - -jobs: - compatibility: - strategy: - fail-fast: false - matrix: - downstream: - - name: ucx - org: databrickslabs - - name: blueprint - org: databrickslabs - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install toolchain - run: | - pip install hatch==1.9.4 - - - name: Acceptance - uses: databrickslabs/sandbox/downstreams@downstreams/v0.0.1 - with: - repo: ${{ matrix.downstream.name }} - org: ${{ matrix.downstream.org }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/external-message.yml b/.github/workflows/external-message.yml deleted file mode 100644 index 6771057c7..000000000 --- a/.github/workflows/external-message.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: PR Comment - -# WARNING: -# THIS WORKFLOW ALWAYS RUNS FOR EXTERNAL CONTRIBUTORS WITHOUT ANY APPROVAL. -# THIS WORKFLOW RUNS FROM MAIN BRANCH, NOT FROM THE PR BRANCH. -# DO NOT PULL THE PR OR EXECUTE ANY CODE FROM THE PR. - -on: - pull_request_target: - types: [opened, reopened, synchronize] - branches: - - main - -jobs: - comment-on-pr: - runs-on: - group: databricks-deco-testing-runner-group - labels: ubuntu-latest-deco - - permissions: - pull-requests: write - - steps: - - uses: actions/checkout@v4 - - - name: Delete old comments - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - # Delete previous comment if it exists - previous_comment_ids=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \ - --jq '.[] | select(.body | startswith("")) | .id') - echo "Previous comment IDs: $previous_comment_ids" - # Iterate over each comment ID and delete the comment - if [ ! -z "$previous_comment_ids" ]; then - echo "$previous_comment_ids" | while read -r comment_id; do - echo "Deleting comment with ID: $comment_id" - gh api "repos/${{ github.repository }}/issues/comments/$comment_id" -X DELETE - done - fi - - - name: Comment on PR - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMMIT_SHA: ${{ github.event.pull_request.head.sha }} - run: | - gh pr comment ${{ github.event.pull_request.number }} --body \ - " - If integration tests don't run automatically, an authorized user can run them manually by following the instructions below: - - Trigger: - [go/deco-tests-run/sdk-py](https://go/deco-tests-run/sdk-py) - - Inputs: - * PR number: ${{github.event.pull_request.number}} - * Commit SHA: \`${{ env.COMMIT_SHA }}\` - - Checks will be approved automatically on success. - " diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index c308cc03c..000000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: Integration Tests - -on: - - pull_request: - types: [opened, synchronize] - - merge_group: - - -jobs: - check-token: - name: Check secrets access - - runs-on: - group: databricks-deco-testing-runner-group - labels: ubuntu-latest-deco - - environment: "test-trigger-is" - outputs: - has_token: ${{ steps.set-token-status.outputs.has_token }} - steps: - - name: Check if DECO_WORKFLOW_TRIGGER_APP_ID is set - id: set-token-status - run: | - if [ -z "${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }}" ]; then - echo "DECO_WORKFLOW_TRIGGER_APP_ID is empty. User has no access to secrets." - echo "::set-output name=has_token::false" - else - echo "DECO_WORKFLOW_TRIGGER_APP_ID is set. User has access to secrets." - echo "::set-output name=has_token::true" - fi - - trigger-tests: - name: Trigger Tests - - runs-on: - group: databricks-deco-testing-runner-group - labels: ubuntu-latest-deco - - needs: check-token - if: github.event_name == 'pull_request' && needs.check-token.outputs.has_token == 'true' - environment: "test-trigger-is" - - steps: - - uses: actions/checkout@v3 - - - name: Generate GitHub App Token - id: generate-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} - private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} - owner: ${{ secrets.ORG_NAME }} - repositories: ${{secrets.REPO_NAME}} - - - name: Trigger Workflow in Another Repo - env: - GH_TOKEN: ${{ steps.generate-token.outputs.token }} - run: | - gh workflow run sdk-py-isolated-pr.yml -R ${{ secrets.ORG_NAME }}/${{secrets.REPO_NAME}} \ - --ref main \ - -f pull_request_number=${{ github.event.pull_request.number }} \ - -f commit_sha=${{ github.event.pull_request.head.sha }} - - # Statuses and checks apply to specific commits (by hash). - # Enforcement of required checks is done both at the PR level and the merge queue level. - # In case of multiple commits in a single PR, the hash of the squashed commit - # will not match the one for the latest (approved) commit in the PR. - # We auto approve the check for the merge queue for two reasons: - # * Queue times out due to duration of tests. - # * Avoid running integration tests twice, since it was already run at the tip of the branch before squashing. - auto-approve: - if: github.event_name == 'merge_group' - - runs-on: - group: databricks-deco-testing-runner-group - labels: ubuntu-latest-deco - - steps: - - name: Mark Check - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - shell: bash - run: | - gh api -X POST -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/statuses/${{ github.sha }} \ - -f 'state=success' \ - -f 'context=Integration Tests Check' diff --git a/.github/workflows/message.yml b/.github/workflows/message.yml deleted file mode 100644 index 057556895..000000000 --- a/.github/workflows/message.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Validate Commit Message - -on: - pull_request: - types: [opened, synchronize, edited] - merge_group: - types: [checks_requested] - -jobs: - validate: - runs-on: ubuntu-latest - # GitHub required checks are shared between PRs and the Merge Queue. - # Since there is no PR title on Merge Queue, we need to trigger and - # skip this test for Merge Queue to succeed. - if: github.event_name == 'pull_request' - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Validate Tag - env: - TITLE: ${{ github.event.pull_request.title }} - run: | - TAG=$(echo "$TITLE" | sed -ne 's/\[\(.*\)\].*/\1/p') - if grep -q "tag: \"\[$TAG\]\"" .codegen/changelog_config.yml; then - echo "Valid tag found: [$TAG]" - else - echo "Invalid or missing tag in commit message: [$TAG]" - exit 1 - fi \ No newline at end of file diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 70f094c18..bef41718f 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -2,40 +2,30 @@ name: build on: pull_request: - types: [opened, synchronize] + types: [ opened, synchronize ] merge_group: - types: [checks_requested] + types: [ checks_requested ] + push: + branches: + - main jobs: - tests-ubuntu: - uses: ./.github/workflows/test.yml + tests: strategy: fail-fast: false matrix: - pyVersion: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] - with: - os: ubuntu-latest - pyVersion: ${{ matrix.pyVersion }} - - tests-windows: - uses: ./.github/workflows/test.yml - strategy: - fail-fast: false - matrix: - pyVersion: [ '3.9', '3.10', '3.11', '3.12' ] - with: - os: windows-latest - pyVersion: ${{ matrix.pyVersion }} - - fmt: - runs-on: ubuntu-latest - + pyVersion: [ '3.7', '3.8', '3.9', '3.10', '3.11', '3.12' ] + runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 + + - name: Unshallow + run: git fetch --prune --unshallow - - name: Format all files - run: make dev fmt + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.pyVersion }} - - name: Fail on differences - run: git diff --exit-code + - name: Run tests + run: make dev install test diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 32890bde6..892bbc5c6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,16 +7,11 @@ on: jobs: publish: - runs-on: - group: databricks-deco-testing-runner-group - labels: ubuntu-latest-deco - + runs-on: ubuntu-latest environment: release - permissions: contents: write id-token: write - steps: - uses: actions/checkout@v3 @@ -31,7 +26,4 @@ jobs: draft: true files: | dist/databricks-*.whl - dist/databricks-*.tar.gz - - - uses: pypa/gh-action-pypi-publish@release/v1 - name: Publish package distributions to PyPI + dist/databricks-*.tar.gz \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bb86e38a3..7e0db7234 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,8 +28,3 @@ jobs: - name: Run tests run: make dev install test - - - name: Publish test coverage - uses: codecov/codecov-action@v4 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 9991c9cd0..f17435aef 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,18 @@ +# INFO + +This repo is Sync's fork of https://github.com/databricks/databricks-sdk-py/. It allows Sync to pull in commits / feature or create contributions to upstream that enhance both Sync's product offering and the Databricks ecosystem. + +This repo is *public*. + +To release this repo: + +1. Locally (on your terminal) tag the commit you want to release with a version for example: git tag v0.0.29-sync.0 +2. Push this tag git push origin v0.0.29-sync.0 +3. Pushing the tag triggers an automated github action that looks for tags that start with v: https://github.com/synccomputingcode/databricks-sdk-py/blob/main/.github/workflows/release.yml#L6 +4. The actions run in github and create a draft release, with the release artifacts tied to it. https://github.com/synccomputingcode/databricks-sdk-py/releases/tag/untagged-327af053f51d1f4da444 +5. To make the release "real", edit it and publish it +6. Then bump this the dependency inside of sync's codebase + # Databricks SDK for Python (Beta) [![PyPI - Downloads](https://img.shields.io/pypi/dw/databricks-sdk)](https://pypistats.org/packages/databricks-sdk) diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index c16f699bb..d4596e63e 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -1019,12 +1019,24 @@ class ClusterDetails: - Name: """ + disk_spec: Optional[dict] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + docker_image: Optional[DockerImage] = None driver: Optional[SparkNode] = None """Node on which the Spark driver resides. The driver node contains the Spark master and the Databricks application that manages the per-notebook Spark REPLs.""" + driver_healthy: Optional[bool] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + + driver_instance_source: Optional[dict] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + driver_instance_pool_id: Optional[str] = None """The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned.""" @@ -1033,6 +1045,10 @@ class ClusterDetails: """The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above.""" + effective_spark_version: Optional[str] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS @@ -1053,9 +1069,17 @@ class ClusterDetails: scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`.""" + init_scripts_safe_mode: Optional[int] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + instance_pool_id: Optional[str] = None """The optional ID of the instance pool to which the cluster belongs.""" + instance_source: Optional[dict] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + is_single_node: Optional[bool] = None """This field can only be used with `kind`. @@ -1074,6 +1098,10 @@ class ClusterDetails: The first usage of this value is for the simple cluster form where it sets `kind = CLASSIC_PREVIEW`.""" + last_activity_time: Optional[int] = None + """[PROD-2198] An APC attribute only. This field is missing in the API docs and the unforked databricks + sdk so it needed to be added here""" + last_restarted_time: Optional[int] = None """the timestamp that the cluster was started/restarted""" @@ -7550,6 +7578,7 @@ class TerminationReasonCode(Enum): AWS_INSUFFICIENT_INSTANCE_CAPACITY_FAILURE = 'AWS_INSUFFICIENT_INSTANCE_CAPACITY_FAILURE' AWS_MAX_SPOT_INSTANCE_COUNT_EXCEEDED_FAILURE = 'AWS_MAX_SPOT_INSTANCE_COUNT_EXCEEDED_FAILURE' AWS_REQUEST_LIMIT_EXCEEDED = 'AWS_REQUEST_LIMIT_EXCEEDED' + AWS_RESOURCE_QUOTA_EXCEEDED = 'AWS_RESOURCE_QUOTA_EXCEEDED' AWS_UNSUPPORTED_FAILURE = 'AWS_UNSUPPORTED_FAILURE' AZURE_BYOK_KEY_PERMISSION_FAILURE = 'AZURE_BYOK_KEY_PERMISSION_FAILURE' AZURE_EPHEMERAL_DISK_FAILURE = 'AZURE_EPHEMERAL_DISK_FAILURE' @@ -7587,6 +7616,8 @@ class TerminationReasonCode(Enum): INSTANCE_UNREACHABLE = 'INSTANCE_UNREACHABLE' INTERNAL_ERROR = 'INTERNAL_ERROR' INVALID_ARGUMENT = 'INVALID_ARGUMENT' + # [PROD-2800] Add missing termination reason code + INVALID_INSTANCE_PLACEMENT_PROTOCOL = 'INVALID_INSTANCE_PLACEMENT_PROTOCOL' INVALID_SPARK_IMAGE = 'INVALID_SPARK_IMAGE' IP_EXHAUSTION_FAILURE = 'IP_EXHAUSTION_FAILURE' JOB_FINISHED = 'JOB_FINISHED' diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index 6cc2e4213..c0d4240bf 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -1640,6 +1640,13 @@ class GitProvider(Enum): GIT_LAB = 'gitLab' GIT_LAB_ENTERPRISE_EDITION = 'gitLabEnterpriseEdition' + # [PROD-2302] The API treats this enum as case insensitive and the strictness here was causing failures + @classmethod + def _missing_(cls, value): + for member in cls: + if member.value.lower() == value.lower(): + return member + @dataclass class GitSnapshot: @@ -4871,7 +4878,8 @@ def from_dict(cls, d: Dict[str, any]) -> RunStatus: class RunTask: """Used when outputting a child run, in GetRun or ListRuns.""" - task_key: str + # [PROD-2198] adding the default here is necessary to process legacy cluster reports + task_key: str = None """A unique name for the task. This field is used to refer to this task from other tasks. This field is required and must be unique within its parent job. On Update or Reset, this field is used to reference the tasks to be updated or reset.""" @@ -6904,6 +6912,8 @@ class TriggerType(Enum): RETRY = 'RETRY' RUN_JOB_TASK = 'RUN_JOB_TASK' TABLE = 'TABLE' + # [PROD-2364] this trigger type is missing from api docs and sdk + CONTINUOUS = 'CONTINUOUS' @dataclass diff --git a/setup.py b/setup.py index b756e6d0d..bc7327e31 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with version_file.open('r') as f: exec(f.read(), version_data) -setup(name="databricks-sdk", +setup(name="sync-databricks-sdk", version=version_data['__version__'], packages=find_packages(exclude=["tests", "*tests.*", "*tests"]), package_data = {"databricks.sdk": ["py.typed"]}, @@ -21,9 +21,9 @@ 'langchain-openai; python_version > "3.7"', "httpx"], "notebook": ["ipython>=8,<9", "ipywidgets>=8,<9"], "openai": ["openai", 'langchain-openai; python_version > "3.7"', "httpx"]}, - author="Serge Smertin", - author_email="serge.smertin@databricks.com", - description="Databricks SDK for Python (Beta)", + author="Sync Computing", + author_email="info@synccomputing.com", + description="Sync Fork Databricks SDK for Python (Beta)", long_description=io.open("README.md", encoding="utf-8").read(), long_description_content_type='text/markdown', url="https://databricks-sdk-py.readthedocs.io",