Skip to content

Commit 74fd8bd

Browse files
authored
Add support for percentile metrics (#34)
* Add support for percentile metric types * Bump version * Add checks for numerator presence
1 parent c394206 commit 74fd8bd

File tree

7 files changed

+325
-80
lines changed

7 files changed

+325
-80
lines changed

eppo_metrics_sync/schema/eppo_metric_schema.json

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
"items": {
119119
"type": "object",
120120
"additionalProperties": false,
121-
"required": ["name", "entity", "numerator"],
121+
"required": ["name", "entity"],
122122
"properties": {
123123
"name": {
124124
"description": "A user-friendly name shown in the Eppo UI",
@@ -130,7 +130,7 @@
130130
},
131131
"type": {
132132
"description": "The kind of metric to be calculated",
133-
"enum": ["simple", "ratio", "funnel", "percentile"]
133+
"enum": ["simple", "ratio", "percentile"]
134134
},
135135
"entity": {
136136
"description": "Must exactly match entity's name in Eppo UI",
@@ -315,6 +315,24 @@
315315
"type": "number"
316316
}
317317
}
318+
},
319+
"percentile": {
320+
"description": "For percentile metrics only: specify the fact and percentile value",
321+
"type": "object",
322+
"additionalProperties": false,
323+
"required": ["fact_name", "percentile_value"],
324+
"properties": {
325+
"fact_name": {
326+
"description": "Must match one of the values specified in fact_sources.facts.name",
327+
"type": "string"
328+
},
329+
"percentile_value": {
330+
"description": "The percentile to calculate (between 0 and 1)",
331+
"type": "number",
332+
"minimum": 0,
333+
"maximum": 1
334+
}
335+
}
318336
}
319337
}
320338
}

eppo_metrics_sync/validation.py

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,12 @@ def unique_names(payload):
5454
def valid_fact_references(payload):
5555
fact_references = set()
5656
for metric in payload.metrics:
57-
fact_references.add(metric['numerator']['fact_name'])
57+
if 'numerator' in metric:
58+
fact_references.add(metric['numerator']['fact_name'])
5859
if 'denominator' in metric:
5960
fact_references.add(metric['denominator']['fact_name'])
61+
if 'percentile' in metric:
62+
fact_references.add(metric['percentile']['fact_name'])
6063

6164
fact_names = set()
6265
for fact_source in payload.fact_sources:
@@ -81,12 +84,22 @@ def valid_experiment_computation(payload):
8184

8285
def metric_aggregation_is_valid(payload):
8386
for m in payload.metrics:
87+
if m.get('type') == 'percentile':
88+
percentile_error = percentile_metric_is_valid(m)
89+
if percentile_error:
90+
payload.validation_errors.append(
91+
f"{m['name']} has invalid percentile configuration: {percentile_error}"
92+
)
93+
# Skip the rest of the loop iteration for percentile metrics
94+
# since they don't have numerator/denominator to validate
95+
continue
8496

85-
numerator_error = aggregation_is_valid(m['numerator'])
86-
if numerator_error:
87-
payload.validation_errors.append(
88-
f"{m['name']} has invalid numerator: {numerator_error}"
89-
)
97+
if 'numerator' in m:
98+
numerator_error = aggregation_is_valid(m['numerator'])
99+
if numerator_error:
100+
payload.validation_errors.append(
101+
f"{m['name']} has invalid numerator: {numerator_error}"
102+
)
90103

91104
if 'denominator' in m:
92105
denominator_error = aggregation_is_valid(m['denominator'])
@@ -103,13 +116,27 @@ def valid_guardrail_cutoff_signs(payload):
103116
facts[fact['name']] = fact
104117

105118
for m in payload.metrics:
106-
numerator_fact_name = m['numerator']['fact_name']
107-
if is_guardrail_cutoff_exist(m) and numerator_fact_name in facts and 'desired_change' in facts[numerator_fact_name]:
108-
error = is_valid_guardrail_cutoff_sign(m, facts[numerator_fact_name])
109-
if error:
110-
payload.validation_errors.append(
111-
f"{m['name']} is having invalid guardrail_cutoff sign: {error}"
112-
)
119+
if m.get('type') == 'percentile':
120+
if is_guardrail_cutoff_exist(m):
121+
percentile_fact_name = m['percentile']['fact_name']
122+
if percentile_fact_name in facts and 'desired_change' in facts[percentile_fact_name]:
123+
error = is_valid_guardrail_cutoff_sign(m, facts[percentile_fact_name])
124+
if error:
125+
payload.validation_errors.append(
126+
f"{m['name']} is having invalid guardrail_cutoff sign: {error}"
127+
)
128+
# Skip the rest of the loop iteration for percentile metrics
129+
# since they don't have numerator/denominator to validate
130+
continue
131+
132+
if 'numerator' in m:
133+
numerator_fact_name = m['numerator']['fact_name']
134+
if is_guardrail_cutoff_exist(m) and numerator_fact_name in facts and 'desired_change' in facts[numerator_fact_name]:
135+
error = is_valid_guardrail_cutoff_sign(m, facts[numerator_fact_name])
136+
if error:
137+
payload.validation_errors.append(
138+
f"{m['name']} is having invalid guardrail_cutoff sign: {error}"
139+
)
113140

114141

115142
def is_valid_guardrail_cutoff_sign(metric, numerator_fact):
@@ -218,3 +245,30 @@ def aggregation_is_valid(aggregation):
218245
return '\n'.join(error_message)
219246
else:
220247
return None
248+
249+
def percentile_metric_is_valid(metric):
250+
error_message = []
251+
252+
# Check for required percentile field
253+
if 'percentile' not in metric:
254+
error_message.append("Missing 'percentile' field for percentile metric")
255+
return '\n'.join(error_message)
256+
257+
percentile = metric['percentile']
258+
259+
# Check for required fact_name
260+
if 'fact_name' not in percentile:
261+
error_message.append("Missing 'fact_name' in percentile configuration")
262+
263+
# Check for required percentile_value
264+
if 'percentile_value' not in percentile:
265+
error_message.append("Missing 'percentile_value' in percentile configuration")
266+
elif not isinstance(percentile['percentile_value'], (int, float)):
267+
error_message.append("'percentile_value' must be a number")
268+
elif percentile['percentile_value'] < 0 or percentile['percentile_value'] > 1:
269+
error_message.append("'percentile_value' must be between 0 and 1")
270+
271+
if error_message:
272+
return '\n'.join(error_message)
273+
else:
274+
return None

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "eppo_metrics_sync"
7-
version = "0.1.7"
7+
version = "0.1.8"
88
description = "Sync metrics to Eppo"
99
readme = "README.md"
1010
requires-python = ">=3.7"

tests/test_validation.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,93 @@ def test_valid_yaml():
156156
eppo_metrics_sync = EppoMetricsSync(directory=None)
157157
eppo_metrics_sync.load_eppo_yaml(path='tests/yaml/valid/purchases.yaml')
158158
eppo_metrics_sync.validate()
159+
160+
def test_valid_percentile_yaml():
161+
eppo_metrics_sync = EppoMetricsSync(directory=None)
162+
eppo_metrics_sync.load_eppo_yaml(path='tests/yaml/valid/percentile_test.yaml')
163+
eppo_metrics_sync.validate()
164+
165+
def test_invalid_percentile_yaml():
166+
eppo_metrics_sync = EppoMetricsSync(directory=None)
167+
eppo_metrics_sync.load_eppo_yaml(path='tests/yaml/invalid/invalid_percentile.yaml')
168+
169+
with pytest.raises(ValueError) as excinfo:
170+
eppo_metrics_sync.validate()
171+
172+
error_msg = str(excinfo.value)
173+
assert "Invalid App opens percentile has invalid percentile configuration: Missing 'percentile_value' in percentile configuration" in error_msg
174+
assert "Invalid percentile value has invalid percentile configuration: 'percentile_value' must be between 0 and 1" in error_msg
175+
176+
def test_percentile_metrics():
177+
# Test with missing percentile field
178+
metric = {
179+
'name': 'Test Percentile Metric',
180+
'type': 'percentile',
181+
'entity': 'User'
182+
}
183+
from eppo_metrics_sync.validation import percentile_metric_is_valid
184+
error = percentile_metric_is_valid(metric)
185+
assert error == "Missing 'percentile' field for percentile metric"
186+
187+
# Test with missing fact_name
188+
metric = {
189+
'name': 'Test Percentile Metric',
190+
'type': 'percentile',
191+
'entity': 'User',
192+
'percentile': {
193+
'percentile_value': 0.95
194+
}
195+
}
196+
error = percentile_metric_is_valid(metric)
197+
assert error == "Missing 'fact_name' in percentile configuration"
198+
199+
# Test with missing percentile_value
200+
metric = {
201+
'name': 'Test Percentile Metric',
202+
'type': 'percentile',
203+
'entity': 'User',
204+
'percentile': {
205+
'fact_name': 'App open'
206+
}
207+
}
208+
error = percentile_metric_is_valid(metric)
209+
assert error == "Missing 'percentile_value' in percentile configuration"
210+
211+
# Test with invalid percentile_value type
212+
metric = {
213+
'name': 'Test Percentile Metric',
214+
'type': 'percentile',
215+
'entity': 'User',
216+
'percentile': {
217+
'fact_name': 'App open',
218+
'percentile_value': 'invalid'
219+
}
220+
}
221+
error = percentile_metric_is_valid(metric)
222+
assert error == "'percentile_value' must be a number"
223+
224+
# Test with percentile_value out of range
225+
metric = {
226+
'name': 'Test Percentile Metric',
227+
'type': 'percentile',
228+
'entity': 'User',
229+
'percentile': {
230+
'fact_name': 'App open',
231+
'percentile_value': 1.5
232+
}
233+
}
234+
error = percentile_metric_is_valid(metric)
235+
assert error == "'percentile_value' must be between 0 and 1"
236+
237+
# Test with valid percentile metric
238+
metric = {
239+
'name': 'Test Percentile Metric',
240+
'type': 'percentile',
241+
'entity': 'User',
242+
'percentile': {
243+
'fact_name': 'App open',
244+
'percentile_value': 0.95
245+
}
246+
}
247+
error = percentile_metric_is_valid(metric)
248+
assert error is None
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
fact_sources:
2+
- name: App Usage
3+
sql: |
4+
SELECT
5+
timestamp as TS,
6+
user_id,
7+
app_open_duration
8+
from customer_db.onboarding.app_usage
9+
timestamp_column: TS
10+
entities:
11+
- entity_name: User
12+
column: user_id
13+
facts:
14+
- name: App open
15+
column: app_open_duration
16+
reference_url: https://github.com/Eppo-exp/eppo-metrics-sync
17+
metrics:
18+
- name: Invalid App opens percentile
19+
description: Missing percentile_value
20+
type: percentile
21+
entity: User
22+
metric_display_style: decimal
23+
minimum_detectable_effect: 0.05
24+
reference_url: ""
25+
percentile:
26+
fact_name: App open
27+
guardrail_cutoff: null
28+
- name: Invalid percentile value
29+
description: Percentile value out of range
30+
type: percentile
31+
entity: User
32+
metric_display_style: decimal
33+
minimum_detectable_effect: 0.05
34+
reference_url: ""
35+
percentile:
36+
fact_name: App open
37+
percentile_value: 2.0
38+
guardrail_cutoff: null
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
fact_sources:
2+
- name: App Usage
3+
sql: |
4+
SELECT
5+
timestamp as TS,
6+
user_id,
7+
app_open_duration
8+
from customer_db.onboarding.app_usage
9+
timestamp_column: TS
10+
entities:
11+
- entity_name: User
12+
column: user_id
13+
- entity_name: Session
14+
column: session_id
15+
facts:
16+
- name: App open
17+
column: app_open_duration
18+
desired_change: increase
19+
reference_url: https://github.com/Eppo-exp/eppo-metrics-sync
20+
metrics:
21+
- name: App opens (p99)
22+
description: User app opens of 99 percentile
23+
type: percentile
24+
entity: Session
25+
metric_display_style: decimal
26+
minimum_detectable_effect: 0.05
27+
reference_url: ""
28+
percentile:
29+
fact_name: App open
30+
percentile_value: 0.99
31+
guardrail_cutoff: null
32+
- name: App opens (p90)
33+
description: User app opens of 90 percentile
34+
type: percentile
35+
entity: User
36+
metric_display_style: decimal
37+
minimum_detectable_effect: 0.05
38+
reference_url: ""
39+
percentile:
40+
fact_name: App open
41+
percentile_value: 0.90
42+
guardrail_cutoff: null

0 commit comments

Comments
 (0)