Skip to content

Commit c832d9d

Browse files
authored
fix: chaoss metrics aggregation issues (#4652)
* feat: add entity types to timeseries metrics and enhance contributor absence factor tests * feat(tests): add multiple artifacts test cases for burstiness and self-merge rates * fix(sql): correct aggregation logic in self merge rates calculation * fix(sqlmesh): update variance calculation to ensure correct aggregation in burstiness metrics --------- Co-authored-by: ken <[email protected]>
1 parent c7fd273 commit c832d9d

File tree

7 files changed

+239
-30
lines changed

7 files changed

+239
-30
lines changed

warehouse/oso_sqlmesh/models/metrics_factories.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,7 @@ def no_gaps_audit_factory(config: MetricQueryConfig) -> tuple[str, dict] | None:
833833
"biannually",
834834
"yearly",
835835
],
836+
entity_types=["artifact", "project", "collection"],
836837
over_all_time=True,
837838
metadata=MetricMetadata(
838839
display_name="Contributor Absence Factor",

warehouse/oso_sqlmesh/oso_metrics/code/burstiness.sql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,14 @@ variance_calc as (
5252
from activity_stats
5353
)
5454
select
55-
variance_calc.metrics_sample_date as metrics_sample_date,
56-
variance_calc.event_source as event_source,
57-
variance_calc.to_artifact_id as to_artifact_id,
55+
variance_calc.metrics_sample_date,
56+
variance_calc.event_source,
57+
variance_calc.to_artifact_id,
5858
'' as from_artifact_id,
5959
@metric_name() as metric,
6060
cast(case
61-
when mean_activity > 0 then variance_activity * variance_activity / mean_activity
61+
when sum(mean_activity) > 0 then sum(variance_activity * variance_activity) / sum(mean_activity)
6262
else NULL
6363
end as DOUBLE) as amount
6464
from variance_calc
65+
group by 1, 2, 3, 4, 5

warehouse/oso_sqlmesh/oso_metrics/code/contributor_absence_factor.sql

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ ranked_contributors as (
3535
cc.contribution_amount,
3636
tc.total_amount,
3737
row_number() over (
38-
partition by cc.metrics_sample_date, cc.event_source, cc.to_artifact_id
38+
partition by cc.metrics_sample_date, cc.event_source, cc.to_artifact_id
3939
order by cc.contribution_amount desc
4040
) as contributor_rank
4141
from contributor_contributions cc
@@ -55,29 +55,20 @@ cumulative_contributions as (
5555
total_amount,
5656
contributor_rank,
5757
sum(contribution_amount) over (
58-
partition by metrics_sample_date, event_source, to_artifact_id
58+
partition by metrics_sample_date, event_source, to_artifact_id
5959
order by contributor_rank
6060
rows unbounded preceding
6161
) as cumulative_amount
6262
from ranked_contributors
63-
),
64-
65-
absence_factor as (
66-
select
67-
metrics_sample_date,
68-
event_source,
69-
to_artifact_id,
70-
min(contributor_rank) as contributor_absence_factor
71-
from cumulative_contributions
72-
where cumulative_amount >= (total_amount * 0.5)
73-
group by 1, 2, 3
7463
)
7564

7665
select
77-
af.metrics_sample_date,
78-
af.event_source,
79-
af.to_artifact_id,
66+
metrics_sample_date,
67+
event_source,
68+
to_artifact_id,
8069
'' as from_artifact_id,
8170
@metric_name() as metric,
82-
af.contributor_absence_factor as amount
83-
from absence_factor af
71+
min(contributor_rank) as amount
72+
from cumulative_contributions
73+
where cumulative_amount >= (total_amount * 0.5)
74+
group by 1, 2, 3, 4, 5

warehouse/oso_sqlmesh/oso_metrics/code/self_merge_rates.sql

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,19 @@ total_merged_prs as (
6464
to_artifact_id
6565
)
6666
select
67-
total.metrics_sample_date as metrics_sample_date,
68-
total.event_source as event_source,
69-
total.to_artifact_id as to_artifact_id,
67+
total.metrics_sample_date,
68+
total.event_source,
69+
total.to_artifact_id,
7070
'' as from_artifact_id,
7171
@metric_name() as metric,
72-
cast(case
73-
when total.total_merged_count > 0 then
74-
coalesce(self_merged.self_merged_count, 0) / cast(total.total_merged_count as double)
75-
else NULL
72+
cast(case
73+
when sum(total.total_merged_count) > 0
74+
then sum(coalesce(self_merged.self_merged_count, 0)) / cast(sum(total.total_merged_count) as double)
75+
else null
7676
end as double) as amount
7777
from total_merged_prs as total
7878
left join self_merged_prs_without_comments as self_merged
7979
on total.metrics_sample_date = self_merged.metrics_sample_date
8080
and total.event_source = self_merged.event_source
8181
and total.to_artifact_id = self_merged.to_artifact_id
82+
group by 1, 2, 3, 4, 5

warehouse/oso_sqlmesh/tests/test_burstiness_to_project_daily.yml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,74 @@ test_burstiness_to_project_daily_complex_multi_user_events:
420420
from_artifact_id: ''
421421
metric: burstiness_daily
422422
amount: 5.671296296
423+
424+
425+
test_burstiness_to_project_daily_multiple_artifacts_in_project:
426+
model: oso.burstiness_to_project_daily
427+
vars:
428+
start: 2024-01-01
429+
end: 2024-01-02
430+
inputs:
431+
oso.int_events_daily__github:
432+
rows:
433+
# Day 1
434+
- to_artifact_id: repo_1
435+
from_artifact_id: contributor_1
436+
event_source: SOURCE_PROVIDER
437+
event_type: COMMIT_CODE
438+
bucket_day: 2024-01-01
439+
amount: 100
440+
- to_artifact_id: repo_2
441+
from_artifact_id: contributor_2
442+
event_source: SOURCE_PROVIDER
443+
event_type: COMMIT_CODE
444+
bucket_day: 2024-01-01
445+
amount: 100
446+
# Day 2
447+
- to_artifact_id: repo_1
448+
from_artifact_id: contributor_1
449+
event_source: SOURCE_PROVIDER
450+
event_type: COMMIT_CODE
451+
bucket_day: 2024-01-02
452+
amount: 100
453+
- to_artifact_id: repo_2
454+
from_artifact_id: contributor_2
455+
event_source: SOURCE_PROVIDER
456+
event_type: COMMIT_CODE
457+
bucket_day: 2024-01-02
458+
amount: 100
459+
oso.int_first_last_commit_to_github_repository:
460+
rows:
461+
- artifact_id: repo_1
462+
artifact_source_id: github
463+
artifact_namespace: test
464+
artifact_name: repo_1
465+
first_commit_time: 2024-01-01 00:00:00
466+
last_commit_time: 2024-01-02 00:00:00
467+
- artifact_id: repo_2
468+
artifact_source_id: github
469+
artifact_namespace: test
470+
artifact_name: repo_2
471+
first_commit_time: 2024-01-01 00:00:00
472+
last_commit_time: 2024-01-02 00:00:00
473+
oso.artifacts_by_project_v1:
474+
rows:
475+
- artifact_id: repo_1
476+
project_id: project_1
477+
- artifact_id: repo_2
478+
project_id: project_1
479+
outputs:
480+
query:
481+
rows:
482+
- metrics_sample_date: 2024-01-01
483+
event_source: SOURCE_PROVIDER
484+
to_project_id: project_1
485+
from_artifact_id: ''
486+
metric: burstiness_daily
487+
amount: 0
488+
- metrics_sample_date: 2024-01-02
489+
event_source: SOURCE_PROVIDER
490+
to_project_id: project_1
491+
from_artifact_id: ''
492+
metric: burstiness_daily
493+
amount: 0

warehouse/oso_sqlmesh/tests/test_contributor_absence_factor_to_project_daily.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,40 @@ test_contributor_absence_factor_to_project_daily_no_activity:
209209
outputs:
210210
query:
211211
rows: []
212+
213+
214+
test_contributor_absence_factor_to_project_daily_multiple_artifacts_in_project:
215+
model: oso.contributor_absence_factor_to_project_daily
216+
vars:
217+
start: 2024-01-01
218+
end: 2024-01-01
219+
inputs:
220+
oso.int_events_daily__github:
221+
rows:
222+
- to_artifact_id: repo_1
223+
from_artifact_id: contributor_1
224+
event_source: SOURCE_PROVIDER
225+
event_type: COMMIT_CODE
226+
bucket_day: 2024-01-01
227+
amount: 100
228+
- to_artifact_id: repo_2
229+
from_artifact_id: contributor_2
230+
event_source: SOURCE_PROVIDER
231+
event_type: COMMIT_CODE
232+
bucket_day: 2024-01-01
233+
amount: 100
234+
oso.artifacts_by_project_v1:
235+
rows:
236+
- artifact_id: repo_1
237+
project_id: project_1
238+
- artifact_id: repo_2
239+
project_id: project_1
240+
outputs:
241+
query:
242+
rows:
243+
- metrics_sample_date: 2024-01-01
244+
event_source: SOURCE_PROVIDER
245+
to_project_id: project_1
246+
from_artifact_id: ''
247+
metric: contributor_absence_factor_daily
248+
amount: 1

warehouse/oso_sqlmesh/tests/test_self_merge_rates_to_project_daily.yml

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ test_self_merge_rates_to_project_daily:
7070
project_id: project_1
7171
outputs:
7272
query:
73-
rows:
73+
rows:
7474
- metrics_sample_date: 2024-01-01
7575
event_source: GITHUB
7676
to_project_id: project_1
@@ -89,3 +89,110 @@ test_self_merge_rates_to_project_daily:
8989
from_artifact_id: ''
9090
metric: self_merge_rates_daily
9191
amount: 0
92+
93+
94+
test_self_merge_rates_to_project_daily_multiple_artifacts_in_project:
95+
model: oso.self_merge_rates_to_project_daily
96+
vars:
97+
start: 2024-01-01
98+
end: 2024-01-02
99+
inputs:
100+
oso.int_events_aux_prs:
101+
rows:
102+
# repo_1, Day 1, self-merged
103+
- time: "2024-01-01 09:00:00"
104+
to_artifact_id: repo_1
105+
from_artifact_id: user_1
106+
pr_id: pr_1_1
107+
pr_number: 1
108+
event_type: PULL_REQUEST_OPENED
109+
event_source: GITHUB
110+
merged_at: null
111+
comments: 0
112+
- time: "2024-01-01 10:00:00"
113+
to_artifact_id: repo_1
114+
from_artifact_id: user_1
115+
pr_id: pr_1_1
116+
pr_number: 1
117+
event_type: PULL_REQUEST_MERGED
118+
event_source: GITHUB
119+
merged_at: "2024-01-01 10:00:00"
120+
comments: 0
121+
# repo_2, Day 1, self-merged
122+
- time: "2024-01-01 11:00:00"
123+
to_artifact_id: repo_2
124+
from_artifact_id: user_2
125+
pr_id: pr_2_1
126+
pr_number: 1
127+
event_type: PULL_REQUEST_OPENED
128+
event_source: GITHUB
129+
merged_at: null
130+
comments: 0
131+
- time: "2024-01-01 12:00:00"
132+
to_artifact_id: repo_2
133+
from_artifact_id: user_2
134+
pr_id: pr_2_1
135+
pr_number: 1
136+
event_type: PULL_REQUEST_MERGED
137+
event_source: GITHUB
138+
merged_at: "2024-01-01 12:00:00"
139+
comments: 0
140+
# repo_1, Day 2, not self-merged
141+
- time: "2024-01-02 09:00:00"
142+
to_artifact_id: repo_1
143+
from_artifact_id: user_1
144+
pr_id: pr_1_2
145+
pr_number: 2
146+
event_type: PULL_REQUEST_OPENED
147+
event_source: GITHUB
148+
merged_at: null
149+
comments: 0
150+
- time: "2024-01-02 10:00:00"
151+
to_artifact_id: repo_1
152+
from_artifact_id: user_3 # Merged by another user
153+
pr_id: pr_1_2
154+
pr_number: 2
155+
event_type: PULL_REQUEST_MERGED
156+
event_source: GITHUB
157+
merged_at: "2024-01-02 10:00:00"
158+
comments: 0
159+
# repo_2, Day 2, self-merged
160+
- time: "2024-01-02 11:00:00"
161+
to_artifact_id: repo_2
162+
from_artifact_id: user_2
163+
pr_id: pr_2_2
164+
pr_number: 2
165+
event_type: PULL_REQUEST_OPENED
166+
event_source: GITHUB
167+
merged_at: null
168+
comments: 0
169+
- time: "2024-01-02 12:00:00"
170+
to_artifact_id: repo_2
171+
from_artifact_id: user_2
172+
pr_id: pr_2_2
173+
pr_number: 2
174+
event_type: PULL_REQUEST_MERGED
175+
event_source: GITHUB
176+
merged_at: "2024-01-02 12:00:00"
177+
comments: 0
178+
oso.artifacts_by_project_v1:
179+
rows:
180+
- artifact_id: repo_1
181+
project_id: project_1
182+
- artifact_id: repo_2
183+
project_id: project_1
184+
outputs:
185+
query:
186+
rows:
187+
- metrics_sample_date: 2024-01-01
188+
event_source: GITHUB
189+
to_project_id: project_1
190+
from_artifact_id: ''
191+
metric: self_merge_rates_daily
192+
amount: 1.0
193+
- metrics_sample_date: 2024-01-02
194+
event_source: GITHUB
195+
to_project_id: project_1
196+
from_artifact_id: ''
197+
metric: self_merge_rates_daily
198+
amount: 0.5

0 commit comments

Comments
 (0)