Skip to content

Commit 2ed5914

Browse files
committed
Merge branch 'master' of github.com:elementary-data/elementary into core-113-remove-deprecated-dags
2 parents a03dc64 + ef50438 commit 2ed5914

File tree

249 files changed

+5705
-1296
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

249 files changed

+5705
-1296
lines changed

.github/workflows/stale.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ jobs:
1111
issues: write
1212
pull-requests: write
1313
steps:
14-
- uses: actions/stale@v5
14+
- uses: actions/stale@v10
1515
with:
16-
days-before-stale: 180
16+
days-before-stale: 60
17+
days-before-close: 30
1718
stale-issue-message: |
1819
This issue is stale because it has been open for too long with no activity.
1920
If you would like the issue to remain open, please remove the stale label or leave a comment.

.github/workflows/test-warehouse.yml

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,6 @@ on:
2626
type: string
2727
required: false
2828
description: dbt's version to test with
29-
should-run-tests:
30-
type: boolean
31-
required: false
32-
default: true
33-
description: Whether to run E2E tests
34-
clear-tests:
35-
type: boolean
36-
required: false
37-
default: true
38-
description: Whether to clean test environment
3929
generate-data:
4030
type: boolean
4131
required: false
@@ -47,10 +37,6 @@ on:
4737
warehouse-type:
4838
type: string
4939
required: true
50-
should-run-tests:
51-
type: boolean
52-
required: false
53-
default: true
5440
elementary-ref:
5541
type: string
5642
required: false
@@ -60,19 +46,16 @@ on:
6046
dbt-version:
6147
type: string
6248
required: false
63-
clear-tests:
64-
type: boolean
65-
required: false
66-
default: true
6749
generate-data:
6850
type: boolean
6951
required: false
7052
default: false
7153

7254
env:
7355
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
74-
DBT_PKG_INTEG_TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests/deprecated_tests
75-
ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project
56+
ELEMENTARY_DBT_PACKAGE_PATH: ${{ github.workspace }}/dbt-data-reliability
57+
CLI_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project
58+
E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project
7659

7760
jobs:
7861
# PRs from forks require approval, specifically with the "pull_request_target" event as it contains repo secrets.
@@ -117,12 +100,12 @@ jobs:
117100

118101
- name: Start Postgres
119102
if: inputs.warehouse-type == 'postgres'
120-
working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }}
103+
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
121104
run: docker compose up -d postgres
122105

123106
# - name: Start Clickhouse
124107
# if: inputs.warehouse-type == 'clickhouse'
125-
# working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }}
108+
# working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
126109
# run: docker compose up -d clickhouse
127110

128111
- name: Setup Python
@@ -138,8 +121,7 @@ jobs:
138121
run: >
139122
pip install
140123
"dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}"
141-
# TODO: remove the <1.10.2 once we have a fix for https://github.com/elementary-data/elementary/issues/1931
142-
"dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks<1.10.2,') || inputs.warehouse-type }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}"
124+
"dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}${{ inputs.dbt-version && format('~={0}', inputs.dbt-version) }}"
143125
144126
- name: Install Elementary
145127
run: |
@@ -170,12 +152,46 @@ jobs:
170152
rm -rf "$DBT_PKGS_PATH/elementary"
171153
ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary"
172154
173-
- name: Run dbt package integration tests
174-
if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests
175-
working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }}
155+
- name: Run deps for E2E dbt project
156+
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
157+
env:
158+
ELEMENTARY_DBT_PACKAGE_PATH: ${{ env.ELEMENTARY_DBT_PACKAGE_PATH }}
176159
run: |
177160
dbt deps
178-
python run_e2e_tests.py -t "${{ inputs.warehouse-type }}" -g "${{ inputs.warehouse-type == 'postgres' || inputs.generate-data }}" --clear-tests "${{ inputs.clear-tests }}"
161+
162+
- name: Seed e2e dbt project
163+
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
164+
if: inputs.warehouse-type == 'postgres' || inputs.generate-data
165+
run: |
166+
python generate_data.py
167+
dbt seed -f --target "${{ inputs.warehouse-type }}"
168+
169+
- name: Run e2e dbt project
170+
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
171+
run: |
172+
dbt run --target "${{ inputs.warehouse-type }}" || true
173+
174+
# Validate run_results.json: only error_model should be non-success
175+
jq -e '
176+
[.results[] | select(.status != "success") | .unique_id]
177+
| length == 1 and .[0] == "model.elementary_integration_tests.error_model"
178+
' target/run_results.json > /dev/null
179+
jq_exit=$?
180+
181+
if [ $jq_exit -eq 0 ]; then
182+
echo "✅ Validation passed: only error_model failed."
183+
else
184+
echo "❌ Validation failed. Unexpected failures:"
185+
jq '[.results[] | select(.status != "success") | .unique_id] | join(", ")' target/run_results.json
186+
fi
187+
188+
exit $jq_exit
189+
190+
- name: Test e2e dbt project
191+
working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}
192+
continue-on-error: true
193+
run: |
194+
dbt test --target "${{ inputs.warehouse-type }}"
179195
180196
- name: Run help
181197
run: edr --help
@@ -187,12 +203,12 @@ jobs:
187203
edr monitor
188204
-t "${{ inputs.warehouse-type }}"
189205
--group-by table
190-
--project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}"
206+
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
191207
--project-profile-target "${{ inputs.warehouse-type }}"
192208
--slack-webhook "$SLACK_WEBHOOK"
193209
194210
- name: Validate alerts statuses were updated
195-
working-directory: ${{ env.ELMENTARY_INTERNAL_DBT_PKG_DIR }}
211+
working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }}
196212
run: |
197213
dbt deps
198214
dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}"
@@ -201,7 +217,7 @@ jobs:
201217
run: >
202218
edr monitor report
203219
-t "${{ inputs.warehouse-type }}"
204-
--project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}"
220+
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
205221
--project-profile-target "${{ inputs.warehouse-type }}"
206222
207223
- name: Set report artifact name
@@ -230,7 +246,7 @@ jobs:
230246
run: >
231247
edr monitor send-report
232248
-t "${{ inputs.warehouse-type }}"
233-
--project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}"
249+
--project-dir "${{ env.E2E_DBT_PROJECT_DIR }}"
234250
--project-profile-target "${{ inputs.warehouse-type }}"
235251
--slack-file-name "report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html"
236252
--slack-token "$SLACK_TOKEN"
@@ -259,5 +275,4 @@ jobs:
259275
path: elementary/edr_target/edr.log
260276

261277
- name: Run Python package e2e tests
262-
if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests
263278
run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }}

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,4 @@ venv/
9696

9797
# elementary outputs
9898
edr_target/
99-
tests/tests_with_db/dbt_project/dbt_packages/
99+
**/dbt_packages/

docs/_snippets/quickstart-package-install.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Some packages we recommend you check out: [dbt_utils](https://github.com/dbt-lab
3939
```yml packages.yml
4040
packages:
4141
- package: elementary-data/elementary
42-
version: 0.19.4
42+
version: 0.20.0
4343
## Docs: https://docs.elementary-data.com
4444
```
4545
</Step>

elementary/clients/dbt/api_dbt_runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ def collect_dbt_command_logs(event):
5757
logs=[DbtLog.from_log_line(log) for log in dbt_logs],
5858
)
5959

60-
return APIDbtCommandResult(success=res.success, output=output, result_obj=res)
60+
return APIDbtCommandResult(
61+
success=res.success, output=output, stderr=None, result_obj=res
62+
)
6163

6264
def _parse_ls_command_result(
6365
self, select: Optional[str], result: DbtCommandResult

elementary/clients/dbt/command_line_dbt_runner.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
class DbtCommandResult:
2626
success: bool
2727
output: Optional[str]
28+
stderr: Optional[str]
2829

2930

3031
class CommandLineDbtRunner(BaseDbtRunner):
@@ -190,16 +191,23 @@ def run_operation(
190191
log_pattern = (
191192
RAW_EDR_LOGS_PATTERN if return_raw_edr_logs else MACRO_RESULT_PATTERN
192193
)
193-
if capture_output and result.output is not None:
194-
for log in parse_dbt_output(result.output):
195-
if log_errors and log.level == "error":
196-
logger.error(log.msg)
197-
continue
198-
199-
if log.msg:
200-
match = log_pattern.match(log.msg)
201-
if match:
202-
run_operation_results.append(match.group(1))
194+
if capture_output:
195+
if result.output is not None:
196+
for log in parse_dbt_output(result.output):
197+
if log_errors and log.level == "error":
198+
logger.error(log.msg)
199+
continue
200+
201+
if log.msg:
202+
match = log_pattern.match(log.msg)
203+
if match:
204+
run_operation_results.append(match.group(1))
205+
206+
if result.stderr is not None and log_errors:
207+
for log in parse_dbt_output(result.stderr):
208+
if log.level == "error":
209+
logger.error(log.msg)
210+
continue
203211

204212
return run_operation_results
205213

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import os
2+
3+
from elementary.clients.dbt.subprocess_dbt_runner import SubprocessDbtRunner
4+
5+
DBT_FUSION_PATH = os.getenv("DBT_FUSION_PATH", "~/.local/bin/dbt")
6+
7+
8+
class DbtFusionRunner(SubprocessDbtRunner):
9+
def _get_dbt_command_name(self) -> str:
10+
return os.path.expanduser(DBT_FUSION_PATH)
11+
12+
def _run_deps_if_needed(self):
13+
# Currently we don't support auto-updating deps for dbt fusion
14+
return

elementary/clients/dbt/factory.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,21 @@
11
import os
2+
from enum import Enum
23
from typing import Any, Dict, Optional, Type
34

45
from dbt.version import __version__ as dbt_version_string
56
from packaging import version
67

78
from elementary.clients.dbt.command_line_dbt_runner import CommandLineDbtRunner
9+
from elementary.clients.dbt.dbt_fusion_runner import DbtFusionRunner
10+
from elementary.clients.dbt.subprocess_dbt_runner import SubprocessDbtRunner
811

912
DBT_VERSION = version.Version(dbt_version_string)
1013

11-
RUNNER_CLASS: Type[CommandLineDbtRunner]
12-
if (
13-
DBT_VERSION >= version.Version("1.5.0")
14-
and os.getenv("DBT_RUNNER_METHOD") != "subprocess"
15-
):
16-
from elementary.clients.dbt.api_dbt_runner import APIDbtRunner
1714

18-
RUNNER_CLASS = APIDbtRunner
19-
else:
20-
from elementary.clients.dbt.subprocess_dbt_runner import SubprocessDbtRunner
21-
22-
RUNNER_CLASS = SubprocessDbtRunner
15+
class RunnerMethod(Enum):
16+
SUBPROCESS = "subprocess"
17+
API = "api"
18+
FUSION = "fusion"
2319

2420

2521
def create_dbt_runner(
@@ -33,8 +29,11 @@ def create_dbt_runner(
3329
allow_macros_without_package_prefix: bool = False,
3430
run_deps_if_needed: bool = True,
3531
force_dbt_deps: bool = False,
32+
runner_method: Optional[RunnerMethod] = None,
3633
) -> CommandLineDbtRunner:
37-
return RUNNER_CLASS(
34+
runner_method = runner_method or get_dbt_runner_method()
35+
runner_class = get_dbt_runner_class(runner_method)
36+
return runner_class(
3837
project_dir=project_dir,
3938
profiles_dir=profiles_dir,
4039
target=target,
@@ -46,3 +45,27 @@ def create_dbt_runner(
4645
run_deps_if_needed=run_deps_if_needed,
4746
force_dbt_deps=force_dbt_deps,
4847
)
48+
49+
50+
def get_dbt_runner_method() -> RunnerMethod:
51+
runner_method = os.getenv("DBT_RUNNER_METHOD")
52+
if runner_method:
53+
return RunnerMethod(runner_method)
54+
55+
if DBT_VERSION >= version.Version("1.5.0"):
56+
return RunnerMethod.API
57+
return RunnerMethod.SUBPROCESS
58+
59+
60+
def get_dbt_runner_class(runner_method: RunnerMethod) -> Type[CommandLineDbtRunner]:
61+
if runner_method == RunnerMethod.API:
62+
# Import it internally since it will fail if the dbt version is below 1.5.0
63+
from elementary.clients.dbt.api_dbt_runner import APIDbtRunner
64+
65+
return APIDbtRunner
66+
elif runner_method == RunnerMethod.SUBPROCESS:
67+
return SubprocessDbtRunner
68+
elif runner_method == RunnerMethod.FUSION:
69+
return DbtFusionRunner
70+
else:
71+
raise ValueError(f"Invalid runner method: {runner_method}")

elementary/clients/dbt/subprocess_dbt_runner.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,17 @@ def _inner_run_command(
2626
) -> DbtCommandResult:
2727
try:
2828
result = subprocess.run(
29-
["dbt"] + dbt_command_args,
29+
[self._get_dbt_command_name()] + dbt_command_args,
3030
check=self.raise_on_failure,
3131
capture_output=capture_output or quiet,
3232
env=self._get_command_env(),
3333
cwd=self.project_dir,
3434
)
3535
success = result.returncode == 0
3636
output = result.stdout.decode() if result.stdout else None
37+
stderr = result.stderr.decode() if result.stderr else None
3738

38-
return DbtCommandResult(success=success, output=output)
39+
return DbtCommandResult(success=success, output=output, stderr=stderr)
3940
except subprocess.CalledProcessError as err:
4041
logs = (
4142
list(parse_dbt_output(err.output.decode(), log_format))
@@ -49,6 +50,9 @@ def _inner_run_command(
4950
base_command_args=dbt_command_args, logs=logs, err=err
5051
)
5152

53+
def _get_dbt_command_name(self) -> str:
54+
return "dbt"
55+
5256
def _parse_ls_command_result(
5357
self, select: Optional[str], result: DbtCommandResult
5458
) -> List[str]:

elementary/config/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def __init__(
7171
azure_container_name: Optional[str] = None,
7272
report_url: Optional[str] = None,
7373
teams_webhook: Optional[str] = None,
74+
maximum_columns_in_alert_samples: Optional[int] = None,
7475
env: str = DEFAULT_ENV,
7576
run_dbt_deps_if_needed: Optional[bool] = None,
7677
project_name: Optional[str] = None,
@@ -103,6 +104,12 @@ def __init__(
103104
False,
104105
)
105106

107+
self.maximum_columns_in_alert_samples = self._first_not_none(
108+
maximum_columns_in_alert_samples,
109+
config.get("maximum_columns_in_alert_samples"),
110+
4,
111+
)
112+
106113
self.timezone = self._first_not_none(
107114
timezone,
108115
config.get("timezone"),

0 commit comments

Comments
 (0)