Skip to content

Commit 9212136

Browse files
Dbt producer compatibility test
Signed-off-by: Pawel Marut <[email protected]>
1 parent 4e2a9a1 commit 9212136

File tree

19 files changed

+107
-1717
lines changed

19 files changed

+107
-1717
lines changed

.github/actions/run_event_validation/action.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,22 @@ runs:
5555
run: |
5656
cd tmp
5757
IFS=',' read -ra TAGS <<< "${{ inputs.release_tags }}"
58+
git fetch --tags --quiet
5859
for TAG in "${TAGS[@]}"; do
5960
echo "Checking out tag: $TAG"
60-
git fetch --tags --quiet
6161
if git checkout --quiet "$TAG"; then
6262
DEST_DIR="../specs/$TAG"
63-
if [ -d "spec" ]; then
64-
mkdir -p "../specs/$TAG"
65-
find spec -path './website' -prune -o -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp {} "../specs/$TAG/" \;
63+
if [[ -d "spec" || -d "integration/common/openlineage" ]]; then
64+
mkdir -p "$DEST_DIR"
65+
if [ -d "spec" ]; then
66+
find spec -path './website' -prune -o -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp {} "$DEST_DIR" \;
67+
fi
68+
if [ -d "integration/common/openlineage" ]; then
69+
find integration/common/openlineage -type f -iname '*facet.json' -exec cp {} "$DEST_DIR" \;
70+
fi
6671
echo "success"
6772
else
68-
echo "Spec directory not found in $TAG"
73+
echo "Neither spec nor integration/common/openlineage directory found in $TAG"
6974
fi
7075
else
7176
echo "Tag $TAG not found!"

.github/workflows/producer_dbt.yml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ jobs:
7474
pip install dbt-core==${{ inputs.dbt_release }}
7575
pip install dbt-postgres
7676
pip install openlineage-dbt==${{ inputs.ol_release }}
77-
pip install -r producer/dbt/test_runner/requirements.txt
7877
7978
- name: Set producer output event dir
8079
if: ${{ steps.init.outputs.scenarios }}
@@ -93,20 +92,22 @@ jobs:
9392
for scenario in "${scenarios[@]}"
9493
do
9594
echo "Running dbt scenario: $scenario"
96-
97-
if ! python3 producer/dbt/test_runner/cli.py run-scenario \
98-
--scenario "$scenario" \
99-
--output-dir "${{ steps.set-producer-output.outputs.event_dir }}"
100-
then
101-
echo "Error: dbt scenario failed: $scenario"
102-
exit 1
103-
fi
95+
96+
mkdir -p "${{ steps.set-producer-output.outputs.event_dir }}/$scenario"
97+
bash producer/dbt/scenarios/$scenario/test/run.sh "${{ steps.set-producer-output.outputs.event_dir }}/$scenario"
10498
10599
echo "Finished running scenario: $scenario"
106100
done
107101
108102
echo "Finished running all scenarios"
109103
104+
- uses: actions/upload-artifact@v4
105+
if: ${{ steps.init.outputs.scenarios }}
106+
with:
107+
name: dbt-${{inputs.dbt_release}}-ol-${{inputs.ol_release}}-events
108+
path: ${{ steps.set-producer-output.outputs.event_dir }}
109+
retention-days: 1
110+
110111
- name: Validation
111112
if: ${{ steps.init.outputs.scenarios }}
112113
uses: ./.github/actions/run_event_validation

.gitignore

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -170,25 +170,7 @@ ignored/
170170
bin/
171171

172172
# OpenLineage event files generated during local testing
173-
openlineage_events.json
174-
openlineage_events.jsonl
175-
*/openlineage_events.json
176-
*/openlineage_events.jsonl
177-
**/events/openlineage_events.json
178-
**/events/openlineage_events.jsonl
179-
180-
# Test output files (keep directory structure, ignore contents)
181-
producer/dbt/test_output/*
182-
!producer/dbt/test_output/.gitkeep
183-
184-
# Auto-generated report files (generated by CI/CD)
185-
*_producer_report.json
186-
*_consumer_report.json
187-
generated-files/report.json
188-
189-
# Virtual environments
190-
venv/
191-
test_venv/
192-
*/venv/
193-
*/test_venv/
194-
**/test_venv/
173+
**/specs/
174+
**/output/
175+
**/test/openlineage.yml
176+
dbt_producer_report.json

producer/dbt/run_dbt_tests.sh

Lines changed: 33 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ OPENLINEAGE_DIRECTORY=""
2525

2626
# Variables with default values
2727
PRODUCER_OUTPUT_EVENTS_DIR=output
28-
OPENLINEAGE_RELEASE=2-0-2
29-
REPORT_PATH="../dbt_producer_report.json"
28+
OPENLINEAGE_RELEASE=1.41.0
29+
REPORT_PATH="./dbt_producer_report.json"
3030

3131
# If -h or --help is passed, print usage and exit
3232
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
@@ -51,10 +51,8 @@ if [[ -z "$OPENLINEAGE_DIRECTORY" ]]; then
5151
usage
5252
fi
5353

54-
OL_SPEC_DIRECTORIES=$OPENLINEAGE_DIRECTORY/spec/,$OPENLINEAGE_DIRECTORY/spec/facets/,$OPENLINEAGE_DIRECTORY/spec/registry/gcp/dataproc/facets,$OPENLINEAGE_DIRECTORY/spec/registry/gcp/lineage/facets
55-
5654
# fail if scenarios are not defined in scenario directory
57-
[[ $(ls scenarios | wc -l) -gt 0 ]] || { echo >&2 "NO SCENARIOS DEFINED IN scenarios"; exit 1; }
55+
[[ $(find scenarios | wc -l) -gt 0 ]] || { echo >&2 "NO SCENARIOS DEFINED IN scenarios"; exit 1; }
5856

5957
mkdir -p "$PRODUCER_OUTPUT_EVENTS_DIR"
6058

@@ -73,208 +71,73 @@ echo "==========================================================================
7371
#
7472
################################################################################
7573

76-
echo "Setting up test environment..."
77-
78-
# Get script directory for relative paths
79-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
80-
cd "$SCRIPT_DIR"
81-
82-
# Check if Python test runner exists
83-
if [[ ! -f "test_runner/cli.py" ]]; then
84-
echo "Error: Python test runner not found at test_runner/cli.py"
85-
exit 1
86-
fi
87-
8874
# Check if scenario directory exists
8975
if [[ ! -d "scenarios" ]]; then
9076
echo "Error: scenarios directory not found"
9177
exit 1
9278
fi
9379

80+
#install python dependencies
81+
#python -m pip install --upgrade pip
82+
#
83+
#if [ -f ./runner/requirements.txt ]; then
84+
# pip install -r ./runner/requirements.txt
85+
#fi
86+
9487
################################################################################
9588
#
9689
# RUN dbt PRODUCER TESTS
9790
#
9891
################################################################################
9992

10093
echo "Running dbt producer tests..."
101-
102-
# Set up Python environment
103-
export PYTHONPATH="$SCRIPT_DIR/test_runner:$PYTHONPATH"
94+
POSIX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
95+
BASE_DIR="$(cygpath -m "$POSIX_DIR")"
10496

10597
# Run tests for each scenario
106-
TOTAL_SCENARIOS=0
107-
PASSED_SCENARIOS=0
108-
FAILED_SCENARIOS=0
109-
11098
echo "Discovering test scenarios..."
11199
for scenario_dir in scenarios/*/; do
112100
if [[ -d "$scenario_dir" && -f "${scenario_dir}config.json" ]]; then
113101
SCENARIO_NAME=$(basename "$scenario_dir")
114102
echo "Found scenario: $SCENARIO_NAME"
115-
TOTAL_SCENARIOS=$((TOTAL_SCENARIOS + 1))
116-
117-
echo "----------------------------------------"
118-
echo "Running scenario: $SCENARIO_NAME"
119-
echo "----------------------------------------"
120-
121-
# Run the atomic tests for this scenario
122-
echo "Step 1: Running atomic tests..."
123-
if python3 test_runner/cli.py run-atomic --base-path "." --verbose; then
124-
echo "✅ Atomic tests passed for $SCENARIO_NAME"
125-
126-
# Run OpenLineage event validation if events exist
127-
echo "Step 2: Validating OpenLineage events..."
128-
EVENTS_FILE="events/openlineage_events.jsonl"
129-
if [[ -f "$EVENTS_FILE" ]]; then
130-
echo "📋 Validating events from: $EVENTS_FILE"
131-
echo "📋 Against spec version: $OPENLINEAGE_RELEASE"
132-
if python3 test_runner/cli.py validate-events --events-file "$EVENTS_FILE" --spec-dir "$OPENLINEAGE_DIRECTORY/spec"; then
133-
echo "✅ Event validation passed for $SCENARIO_NAME (spec: $OPENLINEAGE_RELEASE)"
134-
PASSED_SCENARIOS=$((PASSED_SCENARIOS + 1))
135-
else
136-
echo "❌ Event validation failed for $SCENARIO_NAME (spec: $OPENLINEAGE_RELEASE)"
137-
FAILED_SCENARIOS=$((FAILED_SCENARIOS + 1))
138-
fi
139-
else
140-
echo "⚠️ No OpenLineage events found at $EVENTS_FILE, skipping validation for $SCENARIO_NAME"
141-
PASSED_SCENARIOS=$((PASSED_SCENARIOS + 1))
142-
fi
143-
else
144-
echo "❌ Atomic tests failed for $SCENARIO_NAME"
145-
FAILED_SCENARIOS=$((FAILED_SCENARIOS + 1))
146-
fi
147-
148-
echo ""
103+
104+
mkdir -p "$PRODUCER_OUTPUT_EVENTS_DIR/$SCENARIO_NAME"
105+
"$scenario_dir"test/run.sh "$BASE_DIR/$PRODUCER_OUTPUT_EVENTS_DIR/$SCENARIO_NAME"
106+
107+
echo "Scenario $SCENARIO_NAME completed"
149108
fi
150109
done
151110

152-
################################################################################
153-
#
154-
# GENERATE REPORT
155-
#
156-
################################################################################
157-
158-
echo "=============================================================================="
159-
echo " TEST RESULTS "
160-
echo "=============================================================================="
161-
echo "Total scenarios: $TOTAL_SCENARIOS"
162-
echo "Passed scenarios: $PASSED_SCENARIOS"
163-
echo "Failed scenarios: $FAILED_SCENARIOS"
164-
echo "OpenLineage Spec Version: $OPENLINEAGE_RELEASE"
165-
echo "Events File: events/openlineage_events.jsonl"
166-
echo "Report File: $REPORT_PATH"
167-
echo "=============================================================================="
168-
echo "Failed scenarios: $FAILED_SCENARIOS"
169-
echo "=============================================================================="
111+
echo "EVENT VALIDATION FOR SPEC VERSION $OPENLINEAGE_RELEASE"
170112

171113
# Generate JSON report
172114
REPORT_DIR=$(dirname "$REPORT_PATH")
173115
mkdir -p "$REPORT_DIR"
174116

175-
cat > "$REPORT_PATH" << EOF
176-
{
177-
"producer": "dbt",
178-
"openlineage_release": "$OPENLINEAGE_RELEASE",
179-
"test_execution_time": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
180-
"total_scenarios": $TOTAL_SCENARIOS,
181-
"passed_scenarios": $PASSED_SCENARIOS,
182-
"failed_scenarios": $FAILED_SCENARIOS,
183-
"success_rate": $(echo "scale=2; $PASSED_SCENARIOS * 100 / $TOTAL_SCENARIOS" | bc -l 2>/dev/null || echo "0"),
184-
"output_events_directory": "$PRODUCER_OUTPUT_EVENTS_DIR",
185-
"scenarios": []
186-
}
187-
EOF
117+
SPECS_BASE_DIR="./specs"
118+
DEST_DIR="$SPECS_BASE_DIR/$OPENLINEAGE_RELEASE"
188119

189-
echo "Report generated: $REPORT_PATH"
120+
mkdir -p "$DEST_DIR"
190121

191-
################################################################################
192-
#
193-
# CLEANUP AND EXIT
194-
#
195-
################################################################################
196-
197-
echo "Cleaning up temporary files..."
198-
199-
# Exit with appropriate code
200-
if [[ $FAILED_SCENARIOS -eq 0 ]]; then
201-
echo "🎉 All tests passed!"
202-
exit 0
203-
else
204-
echo "❌ Some tests failed. Check the output above for details."
205-
exit 1
122+
if [ -d "$OPENLINEAGE_DIRECTORY"/spec ]; then
123+
find "$OPENLINEAGE_DIRECTORY"/spec -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp -t "$DEST_DIR" {} +
206124
fi
207-
EOF
208-
209-
# Create staging models
210-
cat > dbt_project/models/staging/stg_customers.sql << EOF
211-
SELECT
212-
customer_id,
213-
UPPER(name) as customer_name,
214-
LOWER(email) as email,
215-
signup_date,
216-
status
217-
FROM {{ ref('customers') }}
218-
WHERE status = 'active'
219-
EOF
220-
221-
cat > dbt_project/models/staging/stg_orders.sql << EOF
222-
SELECT
223-
order_id,
224-
customer_id,
225-
product,
226-
amount,
227-
order_date
228-
FROM {{ ref('orders') }}
229-
EOF
230-
231-
# Create mart model
232-
mkdir -p dbt_project/models/marts
233-
cat > dbt_project/models/marts/customer_orders.sql << EOF
234-
SELECT
235-
c.customer_id,
236-
c.customer_name,
237-
COUNT(o.order_id) as total_orders,
238-
SUM(o.amount) as total_spent
239-
FROM {{ ref('stg_customers') }} c
240-
LEFT JOIN {{ ref('stg_orders') }} o
241-
ON c.customer_id = o.customer_id
242-
GROUP BY c.customer_id, c.customer_name
243-
EOF
244-
245-
echo "Running dbt with OpenLineage..."
246-
cd dbt_project
247-
248-
# Install dependencies and run dbt
249-
dbt deps --no-version-check || echo "No packages to install"
250-
dbt seed --no-version-check
251-
dbt run --no-version-check
252-
253-
cd ..
254-
255-
echo "dbt execution completed. Checking for generated events..."
256-
257-
# Check the events file
258-
if [[ -f "events/openlineage_events.jsonl" ]]; then
259-
event_count=$(wc -l < "events/openlineage_events.jsonl")
260-
echo "Generated $event_count OpenLineage events"
261-
echo "Events saved to: events/openlineage_events.jsonl"
262-
else
263-
echo "Warning: No OpenLineage events file generated at events/openlineage_events.jsonl"
264-
echo "Creating minimal event file for testing..."
265-
mkdir -p "events"
266-
echo '{"eventType": "COMPLETE", "eventTime": "2023-01-01T00:00:00Z", "run": {"runId": "test-run-id"}, "job": {"namespace": "dbt://local", "name": "test-job"}, "inputs": [], "outputs": [], "schemaURL": "https://openlineage.io/spec/'$OPENLINEAGE_RELEASE'/OpenLineage.json#/$defs/RunEvent"}' > "events/openlineage_events.jsonl"
125+
if [ -d "$OPENLINEAGE_DIRECTORY"/integration/common/openlineage ]; then
126+
find "$OPENLINEAGE_DIRECTORY"/integration/common/openlineage -type f -iname '*facet.json' -exec cp -t "$DEST_DIR" {} +
267127
fi
268128

269-
echo "EVENT VALIDATION FOR SPEC VERSION $OPENLINEAGE_RELEASE"
129+
if [ -z "$(ls -A "$DEST_DIR")" ]; then
130+
echo "Cannot collect OpenLineage specs"
131+
exit 1
132+
fi
270133

271-
pip install -r ../../scripts/requirements.txt
134+
#pip install -r ../../scripts/requirements.txt
272135

273136
python ../../scripts/validate_ol_events.py \
274-
--event_base_dir="events" \
275-
--spec_dirs="$OL_SPEC_DIRECTORIES" \
137+
--event_base_dir="$PRODUCER_OUTPUT_EVENTS_DIR" \
138+
--spec_base_dir="$SPECS_BASE_DIR" \
276139
--target="$REPORT_PATH" \
277-
--component="dbt_producer" \
140+
--component="scenarios" \
278141
--producer_dir=. \
279142
--openlineage_version="$OPENLINEAGE_RELEASE"
280143

producer/dbt/runner/models/marts/customer_analytics.sql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
{{ config(materialized='table') }}
2-
31
select
42
c.customer_id,
53
c.customer_name,

producer/dbt/runner/models/staging/stg_customers.sql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
{{ config(materialized='table') }}
2-
31
select
42
customer_id,
53
name as customer_name,

producer/dbt/runner/models/staging/stg_orders.sql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
{{ config(materialized='table') }}
2-
31
select
42
order_id,
53
customer_id,
-2.01 MB
Binary file not shown.

producer/dbt/runner/profiles.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
openlineage_compatibility_test:
2-
target: dev
2+
target: postgres
33
outputs:
4-
dev:
4+
postgres:
55
type: postgres
66
host: "{{ env_var('DBT_POSTGRES_HOST', 'localhost') }}"
77
port: "{{ env_var('DBT_POSTGRES_PORT', '5432') | as_number }}"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# OpenLineage dbt Producer Test Dependencies
2+
# Install: pip install -r requirements.txt
3+
4+
# dbt dependencies
5+
dbt-core>=1.5.0
6+
dbt-postgres>=1.5.0
7+
8+
# OpenLineage integration (if available)
9+
openlineage-dbt>=0.28.0

0 commit comments

Comments
 (0)