Skip to content

Commit d78d379

Browse files
authored
Merge pull request #11 from Eppo-exp/update-window-definitions
updated schema to use new timeframe parameters
2 parents bd3df5a + 3057e53 commit d78d379

File tree

4 files changed

+74
-63
lines changed

4 files changed

+74
-63
lines changed

eppo_metrics_sync/eppo_metrics_sync.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def read_yaml_files(self):
8282
self.load_eppo_yaml(yaml_path)
8383
else:
8484
self.validation_errors.append(
85-
f"Schema violation in {yaml_path}: \n{valid.error_message}"
85+
f"Schema violation in {yaml_path}: \n{valid['error_message']}"
8686
)
8787

8888
elif self.schema_type == 'dbt-model':

eppo_metrics_sync/schema/eppo_metric_schema.json

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,17 @@
216216
}
217217
}
218218
},
219-
"aggregation_timeframe_value": {
220-
"description": "How many timeframe units since assignment to include (optional)",
221-
"value": "number"
219+
"aggregation_timeframe_start_value": {
220+
"description": "The start of the timeframe window defined in number of timeframe units following assignment (optional)",
221+
"type": "number"
222+
},
223+
"aggregation_timeframe_end_value": {
224+
"description": "The end of the timeframe window defined in number of timeframe units following assignment (optional)",
225+
"type": "number"
222226
},
223227
"aggregation_timeframe_unit": {
224228
"description": "What time unit to use: minutes, hours, days, or weeks (optional)",
225-
"enum": ["minutes", "hours", "days", "weeks"]
229+
"enum": ["minutes", "hours", "days", "weeks", "calendar_days"]
226230
},
227231
"winsorization_lower_percentile": {
228232
"description": "Percentile at which to clip aggregated metrics (optional)",
@@ -274,13 +278,17 @@
274278
}
275279
}
276280
},
277-
"aggregation_timeframe_value": {
278-
"description": "How many timeframe units since assignment to include (optional)",
281+
"aggregation_timeframe_start_value": {
282+
"description": "The start of the timeframe window defined in number of timeframe units following assignment (optional)",
283+
"type": "number"
284+
},
285+
"aggregation_timeframe_end_value": {
286+
"description": "The end of the timeframe window defined in number of timeframe units following assignment (optional)",
279287
"type": "number"
280288
},
281289
"aggregation_timeframe_unit": {
282290
"description": "What time unit to use: minutes, hours, days, or weeks (optional)",
283-
"enum": ["minutes", "hours", "days", "weeks"]
291+
"enum": ["minutes", "hours", "days", "weeks", "calendar_days"]
284292
},
285293
"winsorization_lower_percentile": {
286294
"description": "Percentile at which to clip aggregated metrics (optional)",

eppo_metrics_sync/validation.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
]
1313

1414
timeframe_parameters = [
15-
'aggregation_timeframe_value',
15+
'aggregation_timeframe_start_value',
16+
'aggregation_timeframe_end_value',
1617
'aggregation_timeframe_unit'
1718
]
1819

@@ -27,15 +28,14 @@ def check_for_duplicated_names(payload, names, object_name):
2728

2829

2930
def unique_names(payload):
30-
3131
fact_source_names = []
3232
fact_names = []
3333
fact_property_names = []
3434

3535
for fact_source in payload.fact_sources:
3636
fact_source_names.append(fact_source['name'])
3737
fact_names.extend([f['name'] for f in fact_source['facts']])
38-
if('properties' in fact_source):
38+
if ('properties' in fact_source):
3939
fact_property_names.extend(
4040
[f['name'] for f in fact_source['properties']]
4141
)
@@ -45,14 +45,13 @@ def unique_names(payload):
4545
check_for_duplicated_names(payload, fact_source_names, 'Fact source')
4646
check_for_duplicated_names(payload, fact_names, 'Fact')
4747
# TODO: check for distinct names within a given fact source
48-
#check_for_duplicated_names(payload, fact_property_names, 'Fact property')
48+
# check_for_duplicated_names(payload, fact_property_names, 'Fact property')
4949
check_for_duplicated_names(payload, metric_names, 'Metric')
50-
50+
5151
return True
5252

5353

5454
def valid_fact_references(payload):
55-
5655
fact_references = set()
5756
for metric in payload.metrics:
5857
fact_references.add(metric['numerator']['fact_name'])
@@ -66,7 +65,7 @@ def valid_fact_references(payload):
6665

6766
if fact_references.issubset(set(fact_names)) == False:
6867
payload.validation_errors.append(
69-
"Invalid fact reference(s): " +
68+
"Invalid fact reference(s): " +
7069
str(', '.join(fact_references.difference(fact_names)))
7170
)
7271

@@ -79,7 +78,7 @@ def metric_aggregation_is_valid(payload):
7978
payload.validation_errors.append(
8079
f"{m['name']} has invalid numerator: {numerator_error}"
8180
)
82-
81+
8382
if 'denominator' in m:
8483
denominator_error = aggregation_is_valid(m['denominator'])
8584
if denominator_error:
@@ -89,11 +88,11 @@ def metric_aggregation_is_valid(payload):
8988

9089

9190
def distinct_advanced_aggregation_parameter_set(
92-
aggregation,
93-
operation,
91+
aggregation,
92+
operation,
9493
aggregation_parameter,
95-
error_message
96-
):
94+
error_message
95+
):
9796
if aggregation['operation'] == operation:
9897
matched = [p for p in advanced_aggregation_parameters if p in aggregation]
9998
if len(matched) == 0:
@@ -110,10 +109,10 @@ def distinct_advanced_aggregation_parameter_set(
110109

111110

112111
def aggregation_is_valid(aggregation):
113-
114112
error_message = []
115113

116-
if aggregation['operation'] not in ['sum', 'count', 'count_distinct', 'distinct_entity', 'threshold', 'retention', 'conversion']:
114+
if aggregation['operation'] not in ['sum', 'count', 'count_distinct', 'distinct_entity', 'threshold', 'retention',
115+
'conversion']:
117116
error_message.append(
118117
'Invalid aggregation operation: ' + aggregation['operation']
119118
)
@@ -124,12 +123,19 @@ def aggregation_is_valid(aggregation):
124123
error_message.append(
125124
'Cannot winsorize a metric with operation ' + aggregation['operation']
126125
)
127-
128-
# either 0 or 2 of timeframe_parameters must be set
129-
if len([name for name in timeframe_parameters if name in aggregation]) == 1:
126+
127+
# The aggregation_timeframe_unit must be specified if timeframe parameters are set
128+
included_timeframe_parameters = [name for name in timeframe_parameters if name in aggregation]
129+
130+
if 'aggregation_timeframe_value' in aggregation:
131+
error_message.append(
132+
'The aggregation_timeframe_value parameter has been deprecated. Please use aggregation_timeframe_end instead.'
133+
)
134+
135+
timeframe_unit_specified = 'aggregation_timeframe_unit' in included_timeframe_parameters
136+
if len(included_timeframe_parameters) > 0 and not timeframe_unit_specified:
130137
error_message.append(
131-
'Either both or neither aggregation_timeframe_value and ' +
132-
'aggregation_timeframe_unit must be set'
138+
'The aggregation_timeframe_unit must be set to use timeframe parameters.'
133139
)
134140

135141
# only set timeframe_parameters on a some operation types
@@ -154,20 +160,20 @@ def aggregation_is_valid(aggregation):
154160
pass
155161

156162
distinct_advanced_aggregation_parameter_set(
157-
aggregation,
158-
'retention',
163+
aggregation,
164+
'retention',
159165
'retention_threshold_days',
160166
error_message
161167
)
162168
distinct_advanced_aggregation_parameter_set(
163-
aggregation,
164-
'conversion',
169+
aggregation,
170+
'conversion',
165171
'conversion_threshold_days',
166172
error_message
167173
)
168174
distinct_advanced_aggregation_parameter_set(
169-
aggregation,
170-
'threshold',
175+
aggregation,
176+
'threshold',
171177
'threshold_metric_settings',
172178
error_message
173179
)
@@ -176,4 +182,3 @@ def aggregation_is_valid(aggregation):
176182
return '\n'.join(error_message)
177183
else:
178184
return None
179-

tests/test_validation.py

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,36 @@
55
from eppo_metrics_sync.eppo_metrics_sync import EppoMetricsSync
66

77
# If we use context.py we can do something like this instead
8-
#from .context import eppo_metric_sync
9-
#from .context import validation
8+
# from .context import eppo_metric_sync
9+
# from .context import validation
1010

1111

1212
test_yaml_dir = "tests/yaml/invalid"
1313

1414

1515
def test_unique_fact_source_names():
16-
17-
eppo_metrics_sync = EppoMetricsSync(directory = None)
16+
eppo_metrics_sync = EppoMetricsSync(directory=None)
1817
eppo_metrics_sync.load_eppo_yaml(
19-
path = test_yaml_dir + "/duplicated_fact_source_names.yaml")
20-
21-
with pytest.raises(ValueError, match = "Fact source names are not unique: upgrades_table"):
18+
path=test_yaml_dir + "/duplicated_fact_source_names.yaml")
19+
20+
with pytest.raises(ValueError, match="Fact source names are not unique: upgrades_table"):
2221
eppo_metrics_sync.validate()
2322

2423

2524
def test_unique_metric_names():
26-
27-
eppo_metrics_sync = EppoMetricsSync(directory = None)
25+
eppo_metrics_sync = EppoMetricsSync(directory=None)
2826
eppo_metrics_sync.load_eppo_yaml(
29-
path = test_yaml_dir + "/duplicated_metric_names.yaml")
30-
31-
with pytest.raises(ValueError, match = "Metric names are not unique: Total Upgrades to Paid Plan"):
27+
path=test_yaml_dir + "/duplicated_metric_names.yaml")
28+
29+
with pytest.raises(ValueError, match="Metric names are not unique: Total Upgrades to Paid Plan"):
3230
eppo_metrics_sync.validate()
3331

3432

3533
def test_unique_fact_names():
36-
37-
eppo_metrics_sync = EppoMetricsSync(directory = None)
34+
eppo_metrics_sync = EppoMetricsSync(directory=None)
3835
eppo_metrics_sync.load_eppo_yaml(
39-
path = test_yaml_dir + "/duplicated_fact_names.yaml")
40-
36+
path=test_yaml_dir + "/duplicated_fact_names.yaml")
37+
4138
with pytest.raises(ValueError, match="Fact names are not unique: upgrades"):
4239
eppo_metrics_sync.validate()
4340

@@ -53,10 +50,10 @@ def test_unique_fact_names():
5350

5451

5552
def test_invalid_fact_reference():
56-
eppo_metrics_sync = EppoMetricsSync(directory = None)
53+
eppo_metrics_sync = EppoMetricsSync(directory=None)
5754
eppo_metrics_sync.load_eppo_yaml(
58-
path = test_yaml_dir + "/invalid_fact_reference.yaml")
59-
with pytest.raises(ValueError, match = re.escape("Invalid fact reference(s): revenue")):
55+
path=test_yaml_dir + "/invalid_fact_reference.yaml")
56+
with pytest.raises(ValueError, match=re.escape("Invalid fact reference(s): revenue")):
6057
eppo_metrics_sync.validate()
6158

6259

@@ -72,23 +69,22 @@ def test_invalid_winsorization_operation():
7269
def test_invalid_aggregation_for_timeframe():
7370
test_agg = {
7471
'operation': 'conversion',
75-
'aggregation_timeframe_value': 1,
72+
'aggregation_timeframe_end_value': 1,
7673
'aggregation_timeframe_unit': 'days',
7774
'conversion_threshold_days': 1
7875
}
79-
76+
8077
res = aggregation_is_valid(test_agg)
81-
assert res == 'Cannot specify aggregation_timeframe_value for operation conversion'
78+
assert res == 'Cannot specify aggregation_timeframe_end_value for operation conversion'
8279

8380

8481
def test_invalid_timeframe_parameters():
8582
test_agg = {
8683
'operation': 'sum',
87-
'aggregation_timeframe_value': 1
84+
'aggregation_timeframe_end_value': 1
8885
}
8986

90-
expected_error = 'Either both or neither aggregation_timeframe_value and ' + \
91-
'aggregation_timeframe_unit must be set'
87+
expected_error = 'The aggregation_timeframe_unit must be set to use timeframe parameters.'
9288

9389
res = aggregation_is_valid(test_agg)
9490
assert res == expected_error
@@ -102,7 +98,7 @@ def test_invalid_aggregation_parameter():
10298

10399
res = aggregation_is_valid(test_agg)
104100
assert res == 'retention_threshold_days specified, but operation is sum'
105-
101+
106102

107103
def test_missing_conversion_threshold():
108104
test_agg = {
@@ -122,12 +118,14 @@ def test_extra_parameter_on_retention_metric():
122118
res = aggregation_is_valid(test_agg)
123119
assert res == 'Invalid parameter for retention aggregation: conversion_threshold_days'
124120

121+
125122
def test_count_distinct():
126123
test_agg = {
127124
'operation': 'count_distinct',
128-
'aggregation_timeframe_value': 1,
125+
'aggregation_timeframe_start_value': 1,
126+
'aggregation_timeframe_end_value': 7,
129127
'aggregation_timeframe_unit': 'days'
130128
}
131-
129+
132130
res = aggregation_is_valid(test_agg)
133-
assert res == None
131+
assert res == None

0 commit comments

Comments
 (0)