Skip to content

Commit 5f46db1

Browse files
Merge origin/master into feat/omni-source
- Resolve setup.py extras conflict: keep omni extra; glue includes sqlglot_lib (master) - Add [project.optional-dependencies] omni group to pyproject.toml (pip install .[omni]) Made-with: Cursor
2 parents 0098d6b + 89abbad commit 5f46db1

File tree

2,414 files changed

+87745
-116423
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,414 files changed

+87745
-116423
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
*.tsx text eol=lf
44
gradlew text eol=lf
55
metadata-utils/src/test/resources/filterQuery/* text eol=lf
6+

.github/actions/ci-optimization/action.yml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ outputs:
1111
backend-only:
1212
description: "Backend only change"
1313
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'true' }}
14+
connector-source-only:
15+
description: "Only connector source/test files changed (no ingestion core, no backend, no frontend)"
16+
value: ${{ steps.filter.outputs.ingestion == 'true' && steps.filter.outputs.ingestion-core == 'false' && steps.filter.outputs.backend == 'false' && steps.filter.outputs.frontend == 'false' }}
1417
backend-change:
1518
description: "Backend code has changed"
1619
value: ${{ steps.filter.outputs.backend == 'true' || steps.trigger.outputs.trigger == 'manual' }}
@@ -51,7 +54,7 @@ runs:
5154
else
5255
echo "trigger=pr" >> $GITHUB_OUTPUT
5356
fi
54-
- uses: dorny/paths-filter@v3
57+
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3
5558
id: filter
5659
with:
5760
token: "" # Empty token forces it to use raw git commands.
@@ -66,6 +69,27 @@ runs:
6669
- "metadata-models/**"
6770
- "docker/datahub-ingestion-base/**"
6871
- "docker/datahub-ingestion/**"
72+
ingestion-core:
73+
- "metadata-ingestion/setup.py"
74+
- "metadata-ingestion/pyproject.toml"
75+
- "metadata-ingestion/constraints.txt"
76+
- "metadata-ingestion/src/datahub/api/**"
77+
- "metadata-ingestion/src/datahub/cli/**"
78+
- "metadata-ingestion/src/datahub/configuration/**"
79+
- "metadata-ingestion/src/datahub/emitter/**"
80+
- "metadata-ingestion/src/datahub/ingestion/api/**"
81+
- "metadata-ingestion/src/datahub/ingestion/graph/**"
82+
- "metadata-ingestion/src/datahub/ingestion/run/**"
83+
- "metadata-ingestion/src/datahub/ingestion/sink/**"
84+
- "metadata-ingestion/src/datahub/metadata/**"
85+
- "metadata-ingestion/src/datahub/sdk/**"
86+
- "metadata-ingestion/src/datahub/sql_parsing/**"
87+
- "metadata-ingestion/src/datahub/telemetry/**"
88+
- "metadata-ingestion/src/datahub/utilities/**"
89+
- "metadata-ingestion-modules/**"
90+
- "metadata-models/**"
91+
- "docker/datahub-ingestion-base/**"
92+
- "docker/datahub-ingestion/**"
6993
ingestion-base:
7094
- "docker/datahub-ingestion-base/**"
7195
docker:
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
name: Determine Runners
2+
description: "Centralized steps to determine which runners are available"
3+
# Refer: https://depot.dev/docs/github-actions/runner-types and https://docs.github.com/en/actions/reference/runners/github-hosted-runners for details of various available runners.
4+
5+
# Prefer pinning the runner to specific OS version to avoid surprise breakages due to new OS release
6+
7+
# 1. Running on a PR from a fork (community contribution). We use github runners, unless the "depot" label exists -- in which case, we run it on depot.author:
8+
#Note: Concurrency is lower when using github runners, queue times can be longer, test time is longer due to fewer parallel jobs.
9+
# 2. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
10+
# Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency
11+
# and hence short queue times and higher parallelism of smoke tests
12+
13+
inputs:
14+
is-community-pr:
15+
description: "Whether this is a PR from a fork repository"
16+
required: false
17+
default: "true"
18+
has-depot-label:
19+
description: "Whether the PR has the 'depot' label (allows Community PRs to opt-in to Depot runners)"
20+
required: false
21+
default: "false"
22+
23+
outputs:
24+
depot-config-set:
25+
description: "true if required depot vars are set"
26+
value: ${{ steps.depot-config-set.outputs.value }}
27+
default-runner:
28+
description: "Default runner commonly used for regular jobs. Appropriately set if Depot runners are available"
29+
value: ${{ steps.default-runner.outputs.value }}
30+
default-gh-runner:
31+
description: "The runner to use for most regular jobs to be run on Github Hosted Runners"
32+
value: ubuntu-24.04
33+
default-depot-runner:
34+
description: "The runner to use for most regular jobs to be run on Depot Runners. Use when you always want to run a job on Depot runners"
35+
value: "depot-ubuntu-24.04"
36+
small-runner:
37+
description: "Appropriately set small runner based on Depot availability"
38+
value: ${{ steps.small-runner.outputs.value }}
39+
depot-runner-medium:
40+
description: "depot-ubuntu-24.04-2 Runner. Use for medium jobs"
41+
value: "depot-ubuntu-24.04-2"
42+
depot-runner-large:
43+
description: "depot-ubuntu-24.04-4 Runner. Use for heavy jobs"
44+
value: "depot-ubuntu-24.04-4"
45+
depot-label-runner:
46+
description: "Set to depot-runner if depot label is added to Community PR"
47+
value: ${{ steps.depot-label-small-runner.outputs.value }}
48+
depot-label-small-runner:
49+
description: "Set to depot-runner if depot label is added to Community PR"
50+
value: ${{ steps.depot-label-small-runner.outputs.value }}
51+
52+
runs:
53+
using: "composite"
54+
steps:
55+
- name: Check Depot Configuration is set
56+
id: depot-config-set
57+
shell: bash
58+
run: |
59+
if [[ -n "$DEPOT_PROJECT_ID" ]]; then
60+
echo "value=true" >> "$GITHUB_OUTPUT"
61+
else
62+
echo "value=false" >> "$GITHUB_OUTPUT"
63+
fi
64+
65+
- name: Set default-runner
66+
id: default-runner
67+
shell: bash
68+
env:
69+
IS_COMMUNITY_PR: ${{ inputs.is-community-pr }}
70+
DEPOT_CONFIG_SET: ${{ steps.depot-config-set.outputs.value }}
71+
# Use depot for internal branches and push/schedule events; fall back to GitHub-hosted for fork PRs.
72+
run: |
73+
if [[ "$IS_COMMUNITY_PR" == "false" && "$DEPOT_CONFIG_SET" == "true" ]]; then
74+
echo "value=depot-ubuntu-24.04" >> "$GITHUB_OUTPUT"
75+
else
76+
echo "value=ubuntu-24.04" >> "$GITHUB_OUTPUT"
77+
fi
78+
79+
- name: Set small-runner
80+
id: small-runner
81+
shell: bash
82+
env:
83+
IS_COMMUNITY_PR: ${{ inputs.is-community-pr }}
84+
DEPOT_CONFIG_SET: ${{ steps.depot-config-set.outputs.value }}
85+
# depot-small for internal, ubuntu-latest for forks.
86+
run: |
87+
if [[ "$IS_COMMUNITY_PR" == "false" && "$DEPOT_CONFIG_SET" == "true" ]]; then
88+
echo "value=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT"
89+
else
90+
echo "value=ubuntu-24.04" >> "$GITHUB_OUTPUT"
91+
fi
92+
93+
- name: Set depot-label-small-runner
94+
id: depot-label-small-runner
95+
env:
96+
HAS_DEPOT_LABEL: ${{ inputs.has-depot-label }}
97+
shell: bash
98+
# Community (fork) PRs can opt-in to Depot runners by adding the 'depot' label.
99+
run: |
100+
if [[ "$HAS_DEPOT_LABEL" == "true" ]]; then
101+
echo "value=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT"
102+
else
103+
echo "value=ubuntu-24.04" >> "$GITHUB_OUTPUT"
104+
fi
105+
- name: Set depot-label-runner
106+
id: depot-label-runner
107+
shell: bash
108+
env:
109+
HAS_DEPOT_LABEL: ${{ inputs.has-depot-label }}
110+
# Community (fork) PRs can opt-in to Depot runners by adding the 'depot' label.
111+
run: |
112+
if [[ "$HAS_DEPOT_LABEL" == "true" ]]; then
113+
echo "value=depot-ubuntu-24.04" >> "$GITHUB_OUTPUT"
114+
else
115+
echo "value=ubuntu-24.04" >> "$GITHUB_OUTPUT"
116+
fi

.github/actions/docker-custom-build-and-push/action.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ runs:
5353
steps:
5454
- name: Docker meta
5555
id: docker_meta
56-
uses: docker/metadata-action@v5
56+
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5
5757
with:
5858
images: ${{ inputs.images }}
5959
flavor: |
@@ -78,7 +78,7 @@ runs:
7878
7979
# Code for testing the build when not pushing to Docker Hub.
8080
- name: Build and Load image for testing (if not publishing)
81-
uses: docker/build-push-action@v6
81+
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
8282
if: ${{ inputs.publish != 'true' }}
8383
with:
8484
context: ${{ inputs.context }}
@@ -97,32 +97,32 @@ runs:
9797
cache-to: |
9898
type=inline
9999
- name: Upload image locally for testing (if not publishing)
100-
uses: ishworkh/container-image-artifact-upload@v2.0.0
100+
uses: ishworkh/container-image-artifact-upload@5d71a2417f0576fa11fe770fb04ece58c4587714 # v2.0.0
101101
if: ${{ inputs.publish != 'true' }}
102102
with:
103103
image: ${{ steps.single_tag.outputs.SINGLE_TAG }}
104104
retention_days: "2"
105105

106106
# Code for building multi-platform images and pushing to Docker Hub.
107107
- name: Set up QEMU
108-
uses: docker/setup-qemu-action@v3
108+
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3
109109
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
110110
- name: Set up Docker Buildx
111-
uses: docker/setup-buildx-action@v3
111+
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
112112
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
113113
- name: Setup Depot CLI
114-
uses: depot/setup-action@v1
114+
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
115115
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
116116
- name: Login to DockerHub
117-
uses: docker/login-action@v3
117+
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
118118
if: ${{ inputs.publish == 'true' }}
119119
with:
120120
username: ${{ inputs.username }}
121121
password: ${{ inputs.password }}
122122

123123
# Depot variant.
124124
- name: Build and Push Multi-Platform image
125-
uses: depot/build-push-action@v1
125+
uses: depot/build-push-action@5f3b3c2e5a00f0093de47f657aeaefcedff27d18 # v1
126126
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
127127
with:
128128
project: ${{ inputs.depot-project }}
@@ -140,7 +140,7 @@ runs:
140140
type=inline
141141
142142
- name: Build and Push Multi-Platform image
143-
uses: docker/build-push-action@v6
143+
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
144144
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
145145
with:
146146
context: ${{ inputs.context }}

.github/actions/report-test-results/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ runs:
1818
steps:
1919
- name: Upload test results
2020
if: (!cancelled())
21-
uses: actions/upload-artifact@v4
21+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
2222
with:
2323
name: ${{ inputs.artifact-name }}
2424
path: ${{ inputs.test-results-paths }}
2525
retention-days: 7
2626

2727
- name: Publish test results
2828
if: (!cancelled())
29-
uses: test-summary/action@v2
29+
uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2
3030
with:
3131
paths: ${{ inputs.junit-file-globs }}
3232
show: fail
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
lint-job:
2+
- modified: ".github/workflows/lint-jobs.yml"
3+
- ".github/path-filters/lint-job-filter.yml"
4+
markdown:
5+
- added|modified: "**/*.md"
6+
github-actions:
7+
- added|modified: ".github/**/*.yml"
8+
- added|modified: ".github/**/*.yaml"
9+
workflow-files:
10+
- added|modified: ".github/workflows/**"
11+
workflow-validation:
12+
- ".github/workflows/**"
13+
- ".github/scripts/validate_post_workflow_list.py"
14+
code-check-sources:
15+
- "metadata-io/**"
16+
- "datahub-web-react/**"
17+
- "metadata-service/war/src/main/resources/boot/policies.json"
18+
- "**/build.gradle"
19+
- "**/build.gradle.kts"
20+
- "**/gradle.lockfile"
21+
- ".github/scripts/check_*.py"
22+
smoke-test-python:
23+
- added|modified: "smoke-test/**/*.py"
24+
- "smoke-test/pyproject.toml"
25+
- "smoke-test/requirements.txt"
26+
- "smoke-test/build.gradle"
27+
smoke-test-cypress:
28+
- "smoke-test/tests/cypress/**"
29+
datahub-web-react:
30+
- added|modified: "datahub-web-react/**"
31+
- "!datahub-web-react/**/*.md"

.github/pull_request_template.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Before you submit your PR, please go through the checklist below:
1111
- [ ] For any breaking change/potential downtime/deprecation/big changes an entry has been made in [Updating DataHub](https://github.com/datahub-project/datahub/blob/master/docs/how/updating-datahub.md)
1212
1313
14-
Allowed Types in PR Title: _feat_, _fix_, _refactor_, _docs_, _test_, _perf_, _style_, _build_, _ci_
14+
Allowed Types in PR Title: _feat_, _fix_, _refactor_, _docs_, _test_, _perf_, _style_, _build_, _ci_, _chore_
1515
1616
1717
-->

0 commit comments

Comments
 (0)