Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
77c48d0
Convert ROAS values to percentages in cpa_and_roas model
MikaKerman Jul 6, 2025
e566b07
Enhance data generation scripts for Jaffle Shop
MikaKerman Aug 10, 2025
a7a8310
Update elementary-data dependency version in setup.py to 0.19.3
MikaKerman Aug 10, 2025
d78300b
Refactor order date filtering in historical and real-time orders SQL …
MikaKerman Aug 10, 2025
250fbe4
Add database configuration to sources in sources.yml
MikaKerman Aug 10, 2025
9cdad5a
Update attribution date in cpa_and_roas model to use converted_at ins…
MikaKerman Aug 10, 2025
6ffc9c2
Update session and ad configurations for improved data handling
MikaKerman Aug 10, 2025
39db6b8
Update anomaly sensitivity and training period in marketing schema
MikaKerman Aug 10, 2025
030e5f2
Add complete pipeline runner script and update README for quick start…
MikaKerman Aug 10, 2025
22d1472
Refactor DbtRunner import in data injection script
MikaKerman Sep 8, 2025
d629d9e
Refactor DbtRunner import across data generation and injection scripts
MikaKerman Sep 8, 2025
541fed4
Refactor DbtRunner import to SubprocessDbtRunner across data generati…
MikaKerman Sep 8, 2025
43b5bc7
Refactor session data generation calls in incremental data flow script
MikaKerman Sep 8, 2025
17b84ef
Add Elementary metadata initialization in Jaffle Shop test injection
MikaKerman Sep 8, 2025
5a2be8a
delete: Add comprehensive debug logging to injection system
MikaKerman Sep 8, 2025
1930a1e
Merge pull request #581 from elementary-data/convert-to-team-ownershi…
MikaKerman Oct 6, 2025
25858fe
Add intentional data quality failures to training data generator
MikaKerman Feb 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@

Elementary UI demo dbt project!

## Quick Start - Run Complete Pipeline

To run the complete data pipeline (generate data + build models + run tests):

```bash
./run_pipeline.sh
```

This script will:
1. Generate training data
2. Generate validation data
3. Generate marketing ads data
4. Generate session data
5. Load seed data into database
6. Build all dbt models
7. Run data quality tests (including ROAS anomaly detection)

**Expected Result:** ROAS anomaly tests should FAIL, indicating successful detection of the artificial drop in return on advertising spend.

## Generate new demo

To generate a new demo please do the following steps:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from data_creation.data_injection.data_generator.specs.exposures.exposure_spec import (
ExposureSpec,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pydantic import BaseModel

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)


class BaseSpec(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
ExposuresInjector,
)

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

from data_creation.data_injection.injectors.models.models_injector import ModelsInjector

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
)
from data_creation.data_injection.data_generator.specs.base_spec import BaseSpec

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)


class SourceFreshnessSpec(BaseSpec):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from typing import Any, Optional

import numpy
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from pydantic import BaseModel

from data_creation.data_injection.data_generator.specs.tests.test_spec import TestSpec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
TestSubTypes,
TestTypes,
)
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)


class AutomatedFreshnessTestsSpec(AutomatedTestsSpec):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from data_creation.data_injection.data_generator.specs.base_spec import BaseSpec

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

from data_creation.data_injection.injectors.models.models_injector import ModelsInjector

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
TestSubTypes,
TestTypes,
)
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)


class AutomatedVolumeTestsSpec(AutomatedTestsSpec):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from enum import Enum
from typing import Any, Optional

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from pydantic import validator

from data_creation.data_injection.data_generator.specs.tests.test_spec import TestSpec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from typing import Any

import numpy
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

from data_creation.data_injection.data_generator.specs.tests.anomaly_test_spec import (
AnomalyTestSpec,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from datetime import datetime, timedelta
from typing import Any

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

from data_creation.data_injection.data_generator.specs.base_spec import BaseSpec
from data_creation.data_injection.injectors.models.models_injector import ModelsInjector
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from data_creation.data_injection.data_generator.specs.tests.test_spec import TestSpec


Expand Down
4 changes: 3 additions & 1 deletion data_creation/data_injection/inject_jaffle_shop_exposures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
from pathlib import Path
from typing import Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

from datetime import datetime
from data_creation.data_injection.data_generator.exposures_data_generator import (
Expand Down
14 changes: 13 additions & 1 deletion data_creation/data_injection/inject_jaffle_shop_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from typing import Optional
from uuid import uuid4

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

from data_creation.data_injection.data_generator.specs.tests.anomaly_test_spec import (
AnomalyTestSpec,
Expand Down Expand Up @@ -54,6 +56,16 @@ def inject_jaffle_shop_tests(
)
dbt_runner.deps()

# Ensure Elementary metadata tables are populated before injecting tests
print("Initializing Elementary metadata...")
jaffle_runner = DbtRunner(
project_dir=os.path.join(REPO_DIR, "jaffle_shop_online"),
profiles_dir=profiles_dir,
target=target,
)
# Run Elementary models to collect metadata about existing models
jaffle_runner.run(select="elementary")

start_time = datetime.now()

generator = TestDataGenerator(dbt_runner)
Expand Down
4 changes: 3 additions & 1 deletion data_creation/data_injection/injectors/base_injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import os
from pathlib import Path
from typing import Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)

DATA_INJECTION_DB_PROJECT_DIR_NAME = "dbt_project"
DATA_INJECTION_DIR = Path(os.path.dirname(__file__)).parent.absolute()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from data_creation.data_injection.injectors.base_injector import BaseInjector


Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import datetime
from enum import Enum
from typing import List, Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from pydantic import BaseModel

from data_creation.data_injection.injectors.models.models_injector import ModelsInjector
Expand Down
96 changes: 85 additions & 11 deletions data_creation/data_injection/injectors/models/models_injector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
from typing import List, Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from data_creation.data_injection.injectors.base_injector import BaseInjector


Expand All @@ -14,16 +16,72 @@ def __init__(
super().__init__(dbt_runner, target, profiles_dir)

def get_model_ids(self, select: Optional[str] = None) -> List[str]:
model_ids_output = self.dbt_runner.run_operation(
macro_name="data_injection.get_models_unique_ids",
macro_args=dict(filter=select),
print(f"🔍 DEBUG: Getting model IDs with filter: {select}")

# First, let's see all available model IDs directly from Elementary
debug_query = """
select unique_id, alias, package_name
from {{ ref('elementary', 'dbt_models') }}
order by package_name, alias
"""
all_models_debug = self.run_query(debug_query)
print(
f"📊 DEBUG: Elementary has {len(all_models_debug)} total models in dbt_models table:"
)
model_ids = json.loads(model_ids_output[0])
return model_ids

# Group by package
by_package = {}
for model in all_models_debug:
pkg = model.get("package_name", "unknown")
if pkg not in by_package:
by_package[pkg] = []
by_package[pkg].append(model)

for pkg, models in by_package.items():
print(f" 📦 Package '{pkg}': {len(models)} models")
for model in models[:3]: # Show first 3 per package
print(
f" - {model.get('alias', 'N/A')} ({model.get('unique_id', 'N/A')})"
)
if len(models) > 3:
print(f" ... and {len(models) - 3} more")

# Now run the original macro

def get_model_id_from_name(self, model_name: str) -> str:
return self.run_query(
"""
print(f"🔍 DEBUG: Searching for model: '{model_name}'")

# First, let's see what models are actually available
debug_query = """
select unique_id, alias, name, package_name
from {{ ref('elementary', 'dbt_models') }}
where package_name <> 'elementary'
order by alias
"""
all_models = self.run_query(debug_query)
print(f"📊 DEBUG: Found {len(all_models)} total models:")
for model in all_models[:10]: # Show first 10
print(
f" - alias: '{model.get('alias', 'N/A')}', unique_id: '{model.get('unique_id', 'N/A')}', package: '{model.get('package_name', 'N/A')}'"
)
if len(all_models) > 10:
print(f" ... and {len(all_models) - 10} more models")

# Also check sources
sources_query = """
select unique_id, name, package_name
from {{ ref('elementary', 'dbt_sources') }}
order by name
"""
all_sources = self.run_query(sources_query)
print(f"📊 DEBUG: Found {len(all_sources)} total sources:")
for source in all_sources[:5]: # Show first 5
print(
f" - name: '{source.get('name', 'N/A')}', unique_id: '{source.get('unique_id', 'N/A')}', package: '{source.get('package_name', 'N/A')}'"
)

# Now try the original query
query = """
(
select unique_id as model_id
from {{ ref('elementary', 'dbt_models') }}
Expand All @@ -35,9 +93,25 @@ def get_model_id_from_name(self, model_name: str) -> str:
from {{ ref('elementary', 'dbt_sources') }}
where name = '%(model_name)s'
)
"""
% {"model_name": model_name},
)[0]["model_id"]
""" % {
"model_name": model_name
}

print(f"🔍 DEBUG: Running query for '{model_name}'")
result = self.run_query(query)
print(f"📋 DEBUG: Query returned {len(result)} results: {result}")

if not result:
print(f"❌ ERROR: No model found with name '{model_name}'")
# Show exact matches we're looking for
exact_matches = [m for m in all_models if m.get("alias") == model_name]
source_matches = [s for s in all_sources if s.get("name") == model_name]
print(f"🔍 DEBUG: Exact alias matches: {exact_matches}")
print(f"🔍 DEBUG: Exact source name matches: {source_matches}")
raise ValueError(f"No model found with name '{model_name}'")

print(f"✅ DEBUG: Found model '{model_name}' with ID: {result[0]['model_id']}")
return result[0]["model_id"]

def get_nodes(self):
return self.run_query(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from datetime import datetime
from typing import Optional

from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from pydantic import BaseModel, root_validator

from data_creation.data_injection.injectors.tests.tests_injector import (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from enum import Enum
from typing import List, Optional
from elementary.clients.dbt.dbt_runner import DbtRunner
from elementary.clients.dbt.subprocess_dbt_runner import (
SubprocessDbtRunner as DbtRunner,
)
from pydantic import BaseModel

from data_creation.data_injection.injectors.base_injector import BaseInjector
Expand Down
Loading