Skip to content

Commit bd3df5a

Browse files
Merge pull request #9 from Eppo-exp/v0.0.3
added tests for non Eppo dbt configuration files, safer loading, desc…
2 parents 1e38724 + 65c34ca commit bd3df5a

File tree

9 files changed

+79
-56
lines changed

9 files changed

+79
-56
lines changed

eppo_metrics_sync/dbt_model_parser.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from collections import Counter
22
from itertools import chain
33

4-
54
class DbtModelParser():
65

76
def __init__(self, model, dbt_model_prefix):
@@ -44,14 +43,14 @@ def _parse_one_column(self, column):
4443
self.eppo_facts.append({
4544
"name": column["name"],
4645
"column": column["name"],
47-
"description": column["description"]
46+
"description": column.get("description", "")
4847
})
4948

5049
if 'eppo_property' in tags:
5150
self.eppo_properties.append({
5251
"name": column["name"],
5352
"column": column["name"],
54-
"description": column["description"]
53+
"description": column.get("description", "")
5554
})
5655

5756

@@ -131,7 +130,12 @@ def format(self):
131130
}
132131

133132
def build(self):
134-
self.parse_columns()
135-
self.validate()
136-
self.format()
137-
return self.eppo_fact_source
133+
if isinstance(self.model, dict):
134+
model_tags = self.model.get('tags', [])
135+
if 'eppo_fact_source' in model_tags:
136+
self.parse_columns()
137+
self.validate()
138+
self.format()
139+
return self.eppo_fact_source
140+
else:
141+
raise ValueError(f"Expected model to be a dictionary, got model = {self.model}")

eppo_metrics_sync/eppo_metrics_sync.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
)
1212

1313
from eppo_metrics_sync.dbt_model_parser import DbtModelParser
14+
from eppo_metrics_sync.helper import load_yaml
15+
1416

1517
API_ENDPOINT = 'https://eppo.cloud/api/v1/metrics/sync'
1618

@@ -35,38 +37,36 @@ def __init__(
3537
with open(schema_path) as schema_file:
3638
self.schema = json.load(schema_file)
3739

40+
3841
def load_eppo_yaml(self, path):
39-
with open(path, 'r') as yaml_file:
40-
yaml_data = yaml.safe_load(yaml_file)
41-
if 'fact_sources' in yaml_data:
42-
self.fact_sources.extend(yaml_data['fact_sources'])
43-
if 'metrics' in yaml_data:
44-
self.metrics.extend(yaml_data['metrics'])
42+
yaml_data = load_yaml(path)
43+
if 'fact_sources' in yaml_data:
44+
self.fact_sources.extend(yaml_data['fact_sources'])
45+
if 'metrics' in yaml_data:
46+
self.metrics.extend(yaml_data['metrics'])
4547

4648
def load_dbt_yaml(self, path):
4749
if not self.dbt_model_prefix:
4850
raise ValueError('Must specify dbt_model_prefix when schema_type=dbt-model')
49-
with open(path, 'r') as yaml_file:
50-
yaml_data = yaml.safe_load(yaml_file)
51+
yaml_data = load_yaml(path)
5152
models = yaml_data.get('models')
5253
if models:
5354
for model in models:
54-
self.fact_sources.append(
55-
DbtModelParser(model, self.dbt_model_prefix).build()
56-
)
55+
dbt_model_parser = DbtModelParser(model, self.dbt_model_prefix).build()
56+
if dbt_model_parser:
57+
self.fact_sources.append(dbt_model_parser)
5758

5859
def yaml_is_valid(self, yaml_path):
5960
"""
6061
Validate a single YAML file against the schema
6162
6263
"""
63-
with open(yaml_path, 'r') as yaml_file:
64-
data = yaml.safe_load(yaml_file)
65-
try:
66-
jsonschema.validate(data, self.schema)
67-
return {"passed": True}
68-
except jsonschema.exceptions.ValidationError as e:
69-
return {"passed": False, "error_message": e}
64+
data = load_yaml(yaml_path)
65+
try:
66+
jsonschema.validate(data, self.schema)
67+
return {"passed": True}
68+
except jsonschema.exceptions.ValidationError as e:
69+
return {"passed": False, "error_message": e}
7070

7171
def read_yaml_files(self):
7272
# Recursively scan the directory for YAML files and load valid ones

eppo_metrics_sync/helper.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import yaml
2+
3+
def load_yaml(path):
4+
try:
5+
with open(path, 'r') as file:
6+
content = yaml.safe_load(file)
7+
return content
8+
except yaml.YAMLError as e:
9+
raise ValueError(f"Error loading YAML file '{path}': {e}")
10+
except Exception as e:
11+
raise ValueError(f"Unexpected error loading file '{path}': {e}")
12+
13+

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='eppo_metrics_sync',
5-
version='0.0.2',
5+
version='0.0.3',
66
packages=find_packages(),
77
install_requires=[
88
'PyYAML', 'jsonschema', 'requests'

tests/test_cli.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@ def runner(args):
1313
return result
1414
return runner
1515

16-
"""def test_cli_with_valid_directory(run_cli):
17-
result = run_cli(['tests/yaml/valid'])
18-
assert result.returncode == 0
19-
"""
2016
def test_cli_dryrun_option(run_cli):
2117
result = run_cli(['tests/yaml/valid', '--dryrun'])
2218
assert result.returncode == 0

tests/test_validation_dbt.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from eppo_metrics_sync.validation import unique_names, valid_fact_references, aggregation_is_valid
44
from eppo_metrics_sync.eppo_metrics_sync import EppoMetricsSync
55

6-
test_yaml_dir = "tests/yaml/dbt/invalid"
6+
test_yaml_dir = "tests/yaml/dbt/"
77

88

99
def test_invalid_entity_tag():
@@ -17,7 +17,7 @@ def test_invalid_entity_tag():
1717
AssertionError,
1818
match = "Invalid entity tag eppo_entity:anonymous_user:foo in model revenue"
1919
):
20-
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "/invalid_entity_tag.yml")
20+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "invalid/invalid_entity_tag.yml")
2121

2222

2323
def test_missing_entity():
@@ -31,7 +31,7 @@ def test_missing_entity():
3131
ValueError,
3232
match = 'At least 1 column must have tag "eppo_entity:<entity_name>"'
3333
):
34-
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "/missing_entity.yml")
34+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "invalid/missing_entity.yml")
3535

3636

3737
def test_missing_timestamp():
@@ -45,7 +45,7 @@ def test_missing_timestamp():
4545
ValueError,
4646
match = 'Exactly 1 column must be have tag "eppo_timestamp"'
4747
):
48-
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "/missing_timestamp.yml")
48+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "invalid/missing_timestamp.yml")
4949

5050

5151
def test_overlapping_tags():
@@ -59,14 +59,39 @@ def test_overlapping_tags():
5959
ValueError,
6060
match = 'The following columns had tags to multiple Eppo fields: gross_revenue'
6161
):
62-
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "/overlapping_tags.yml")
62+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "invalid/overlapping_tags.yml")
6363

64-
# test that package handles yml without 'models' member gracefully
64+
65+
def test_model_is_not_a_dictionary():
66+
67+
eppo_metrics_sync = EppoMetricsSync(
68+
directory = None,
69+
schema_type = 'dbt-model',
70+
dbt_model_prefix = 'foo'
71+
)
72+
with pytest.raises(
73+
ValueError,
74+
match = 'Expected model to be a dictionary, got model = r'
75+
):
76+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "invalid/model_is_not_a_dictionary.yml")
77+
78+
# test that the package handles yml without 'models' member gracefully
79+
def test_no_model_tag():
80+
81+
eppo_metrics_sync = EppoMetricsSync(
82+
directory = None,
83+
schema_type = 'dbt-model',
84+
dbt_model_prefix = 'foo'
85+
)
86+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "valid/no_model_property.yml")
87+
88+
89+
# test that the package handles dbt models without eppo tags gracefully
6590
def test_no_model_tag():
6691

6792
eppo_metrics_sync = EppoMetricsSync(
6893
directory = None,
6994
schema_type = 'dbt-model',
7095
dbt_model_prefix = 'foo'
7196
)
72-
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "/no_model_tag.yml")
97+
eppo_metrics_sync.load_dbt_yaml(path = test_yaml_dir + "valid/no_dbt_tags.yml")
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version: 2
2+
3+
models: revenue
Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,19 @@
11

22
version: 2
33

4-
foo:
4+
models:
55
- name: revenue
66
description: "An example revenue model"
7-
tags:
8-
- eppo_fact_source
97
columns:
108
- name: revenue_id
119
description: "The primary key for this table"
1210
tests:
1311
- unique
1412
- not_null
1513
- name: anonymous_id
16-
tags:
17-
- eppo_entity:anonymous_user
1814
- name: purchase_timestamp
1915
description: "The time at which the revenue was created"
2016
tests:
2117
- not_null
22-
tags:
23-
- eppo_timestamp
2418
- name: gross_revenue
2519
description: "The gross revenue amount"
26-
tags:
27-
- eppo_fact
28-
- eppo_property

tests/yaml/dbt/invalid/no_model_tag.yml renamed to tests/yaml/dbt/valid/no_model_property.yml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,16 @@ version: 2
44
foo:
55
- name: revenue
66
description: "An example revenue model"
7-
tags:
8-
- eppo_fact_source
97
columns:
108
- name: revenue_id
119
description: "The primary key for this table"
1210
tests:
1311
- unique
1412
- not_null
1513
- name: anonymous_id
16-
tags:
17-
- eppo_entity:anonymous_user
1814
- name: purchase_timestamp
1915
description: "The time at which the revenue was created"
2016
tests:
2117
- not_null
22-
tags:
23-
- eppo_timestamp
2418
- name: gross_revenue
2519
description: "The gross revenue amount"
26-
tags:
27-
- eppo_fact
28-
- eppo_property

0 commit comments

Comments
 (0)