Skip to content

Commit c4a329f

Browse files
authored
Merge pull request #3140 from athenianco/persist-health
[DEV-5541] Persist account data health metrics in rdb
2 parents 0454b65 + a1b8180 commit c4a329f

File tree

12 files changed

+169
-18
lines changed

12 files changed

+169
-18
lines changed
Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,49 @@
1+
import dataclasses
12
from dataclasses import dataclass
3+
from datetime import datetime, timezone
24

5+
from sqlalchemy import insert
6+
7+
from athenian.api.db import DatabaseLike
38
from athenian.api.internal.features.entries import MinePullRequestMetrics
49
from athenian.api.internal.miners.github.branches import BranchMinerMetrics
510
from athenian.api.internal.miners.github.deployment import MineDeploymentsMetrics
611
from athenian.api.internal.miners.github.release_load import MineReleaseMetrics
712
from athenian.api.internal.reposet import RepositorySetMetrics
13+
from athenian.api.models.persistentdata.models import HealthMetric
814

915

1016
@dataclass(frozen=True, slots=True)
1117
class DataHealthMetrics:
1218
"""Collection of data error statistics to report."""
1319

14-
branches: BranchMinerMetrics
15-
deployments: MineDeploymentsMetrics
16-
prs: MinePullRequestMetrics
17-
releases: MineReleaseMetrics
18-
reposet: RepositorySetMetrics
20+
branches: BranchMinerMetrics | None
21+
deployments: MineDeploymentsMetrics | None
22+
prs: MinePullRequestMetrics | None
23+
releases: MineReleaseMetrics | None
24+
reposet: RepositorySetMetrics | None
1925

2026
@classmethod
2127
def empty(cls) -> "DataHealthMetrics":
2228
"""Initialize a new DataHealthMetrics instance filled with zeros."""
23-
return DataHealthMetrics(**{k: v.empty() for k, v in cls.__annotations__.items()})
29+
return DataHealthMetrics(
30+
**{f.name: f.type.__args__[0].empty() for f in dataclasses.fields(cls)},
31+
)
32+
33+
@classmethod
34+
def skip(cls) -> "DataHealthMetrics":
35+
"""Initialize a new DataHealthMetrics instance filled with None-s, effectively disabling \
36+
the feature."""
37+
return DataHealthMetrics(**{f.name: None for f in dataclasses.fields(cls)})
38+
39+
async def persist(self, account: int, rdb: DatabaseLike) -> None:
40+
"""Insert all the measured metrics to the database."""
41+
now = datetime.now(timezone.utc)
42+
values = []
43+
for f in dataclasses.fields(self):
44+
v = getattr(self, f.name)
45+
for m in v.as_db():
46+
m.account_id = account
47+
m.created_at = now
48+
values.append(m.explode(with_primary_keys=True))
49+
await rdb.execute_many(insert(HealthMetric), values)

server/athenian/api/internal/features/entries.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
Release,
130130
)
131131
from athenian.api.models.metadata.jira import Issue
132+
from athenian.api.models.persistentdata.models import HealthMetric
132133
from athenian.api.models.web import NoSourceDataError
133134
from athenian.api.pandas_io import deserialize_args, serialize_args
134135
from athenian.api.response import ResponseError
@@ -1660,6 +1661,13 @@ def empty(cls) -> MinePullRequestMetrics:
16601661
"""Initialize a new MinePullRequestMetrics instance filled with zeros."""
16611662
return MinePullRequestMetrics(0, 0, 0, 0)
16621663

1664+
def as_db(self) -> Iterator[HealthMetric]:
1665+
"""Generate HealthMetric-s from this instance."""
1666+
yield HealthMetric(name="prs_count", value=self.count)
1667+
yield HealthMetric(name="prs_done_count", value=self.done_count)
1668+
yield HealthMetric(name="prs_merged_count", value=self.merged_count)
1669+
yield HealthMetric(name="prs_open_count", value=self.open_count)
1670+
16631671

16641672
class PRFactsCalculator:
16651673
"""Calculator for Pull Requests facts."""
@@ -2047,12 +2055,12 @@ async def _call_cached(
20472055

20482056
@staticmethod
20492057
def _set_count_metrics(facts: pd.DataFrame, metrics: MinePullRequestMetrics) -> None:
2050-
metrics.prs.count = len(facts)
2051-
metrics.prs.done_count = facts[PullRequestFacts.f.done].sum()
2052-
metrics.prs.merged_count = (
2058+
metrics.count = len(facts)
2059+
metrics.done_count = facts[PullRequestFacts.f.done].sum()
2060+
metrics.merged_count = (
20532061
facts[PullRequestFacts.f.merged].notnull() & ~facts[PullRequestFacts.f.done]
20542062
).sum()
2055-
metrics.prs.open_count = facts[PullRequestFacts.f.closed].isnull().sum()
2063+
metrics.open_count = facts[PullRequestFacts.f.closed].isnull().sum()
20562064

20572065

20582066
class ParticipantsMerge:

server/athenian/api/internal/miners/github/branches.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from dataclasses import dataclass
22
from datetime import datetime, timezone
33
import logging
4-
from typing import Iterable, Optional
4+
from typing import Iterable, Iterator, Optional
55

66
import aiomcache
77
import numpy as np
@@ -16,6 +16,7 @@
1616
from athenian.api.internal.logical_repos import coerce_logical_repos
1717
from athenian.api.internal.prefixer import Prefixer
1818
from athenian.api.models.metadata.github import Branch, NodeCommit, NodeRepositoryRef, Repository
19+
from athenian.api.models.persistentdata.models import HealthMetric
1920
from athenian.api.pandas_io import deserialize_args, serialize_args
2021
from athenian.api.to_object_arrays import is_not_null
2122
from athenian.api.tracing import sentry_span
@@ -34,6 +35,12 @@ def empty(cls) -> "BranchMinerMetrics":
3435
"""Initialize a new BranchMinerMetrics instance filled with zeros."""
3536
return BranchMinerMetrics(0, 0, 0)
3637

38+
def as_db(self) -> Iterator[HealthMetric]:
39+
"""Generate HealthMetric-s from this instance."""
40+
yield HealthMetric(name="branches_count", value=self.count)
41+
yield HealthMetric(name="branches_empty_count", value=self.empty_count)
42+
yield HealthMetric(name="branches_no_default", value=self.no_default)
43+
3744

3845
@cached_methods
3946
class BranchMiner:

server/athenian/api/internal/miners/github/commit.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from enum import Enum
44
import logging
55
import pickle
6-
from typing import Collection, Iterable, KeysView, Mapping, Optional, Sequence, Union
6+
from typing import Collection, Iterable, Iterator, KeysView, Mapping, Optional, Sequence, Union
77

88
import aiomcache
99
import numpy as np
@@ -48,6 +48,7 @@
4848
PushCommit,
4949
Release,
5050
)
51+
from athenian.api.models.persistentdata.models import HealthMetric
5152
from athenian.api.models.precomputed.models import GitHubCommitHistory
5253
from athenian.api.native.mi_heap_destroy_stl_allocator import make_mi_heap_allocator_capsule
5354
from athenian.api.pandas_io import deserialize_args, serialize_args
@@ -84,6 +85,12 @@ def empty(cls) -> "CommitDAGMetrics":
8485
"""Initialize a new CommitDAGMetrics instance filled with zeros."""
8586
return CommitDAGMetrics(set(), set(), set())
8687

88+
def as_db(self) -> Iterator[HealthMetric]:
89+
"""Generate HealthMetric-s from this instance."""
90+
yield HealthMetric(name="commits_pristine", value=len(self.pristine))
91+
yield HealthMetric(name="commits_corrupted", value=len(self.corrupted))
92+
yield HealthMetric(name="commits_orphaned", value=len(self.orphaned))
93+
8794

8895
def _postprocess_extract_commits(result, with_deployments=True, **_):
8996
if isinstance(result, tuple):

server/athenian/api/internal/miners/github/deployment.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import json
77
import logging
88
from operator import attrgetter
9-
from typing import Any, Collection, KeysView, Mapping, NamedTuple, Optional, Sequence
9+
from typing import Any, Collection, Iterator, KeysView, Mapping, NamedTuple, Optional, Sequence
1010

1111
import aiomcache
1212
import numpy as np
@@ -124,6 +124,7 @@
124124
DeployedComponent,
125125
DeployedLabel,
126126
DeploymentNotification,
127+
HealthMetric,
127128
ReleaseNotification,
128129
)
129130
from athenian.api.models.precomputed.models import (
@@ -153,6 +154,11 @@ def empty(cls) -> "MineDeploymentsMetrics":
153154
"""Initialize a new MineDeploymentsMetrics instance filled with zeros."""
154155
return MineDeploymentsMetrics(0, 0)
155156

157+
def as_db(self) -> Iterator[HealthMetric]:
158+
"""Generate HealthMetric-s from this instance."""
159+
yield HealthMetric(name="deployments_count", value=self.count)
160+
yield HealthMetric(name="deployments_unresolved", value=self.unresolved)
161+
156162

157163
async def mine_deployments(
158164
repositories: Collection[str],

server/athenian/api/internal/miners/github/release_load.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import pickle
77
import re
8-
from typing import Iterable, KeysView, Mapping, Optional, Sequence
8+
from typing import Iterable, Iterator, KeysView, Mapping, Optional, Sequence
99

1010
import aiomcache
1111
import morcilla
@@ -57,7 +57,7 @@
5757
default_branch_alias,
5858
)
5959
from athenian.api.models.metadata.github import Branch, NodeCommit, PushCommit, Release, User
60-
from athenian.api.models.persistentdata.models import ReleaseNotification
60+
from athenian.api.models.persistentdata.models import HealthMetric, ReleaseNotification
6161
from athenian.api.models.precomputed.models import (
6262
GitHubRelease as PrecomputedRelease,
6363
GitHubReleaseMatchTimespan,
@@ -88,6 +88,15 @@ def empty(cls) -> "MineReleaseMetrics":
8888
"""Initialize a new MineReleaseMetrics instance filled with zeros."""
8989
return MineReleaseMetrics(CommitDAGMetrics.empty(), 0, 0, 0, {}, 0)
9090

91+
def as_db(self) -> Iterator[HealthMetric]:
92+
"""Generate HealthMetric-s from this instance."""
93+
yield from self.commits.as_db()
94+
yield HealthMetric(name="releases_by_branch", value=self.count_by_branch)
95+
yield HealthMetric(name="releases_by_tag", value=self.count_by_tag)
96+
yield HealthMetric(name="releases_by_event", value=self.count_by_event)
97+
yield HealthMetric(name="releases_empty", value=sum(self.empty_releases.values()))
98+
yield HealthMetric(name="releases_unresolved", value=self.unresolved)
99+
91100

92101
class ReleaseLoader:
93102
"""Loader for releases."""

server/athenian/api/internal/reposet.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from athenian.api.internal.miners.access_classes import access_classes
3131
from athenian.api.internal.prefixer import Prefixer, RepositoryName
3232
from athenian.api.models.metadata.github import AccountRepository, NodeUser
33+
from athenian.api.models.persistentdata.models import HealthMetric
3334
from athenian.api.models.state.models import LogicalRepository, RepositorySet, UserAccount
3435
from athenian.api.models.web import (
3536
ForbiddenError,
@@ -48,14 +49,19 @@
4849
class RepositorySetMetrics:
4950
"""Reposet synchronization error statistics."""
5051

51-
count: int
52+
length: int
5253
undead: int
5354

5455
@classmethod
5556
def empty(cls) -> RepositorySetMetrics:
5657
"""Initialize a new RepositorySetMetrics instance filled with zeros."""
5758
return RepositorySetMetrics(0, 0)
5859

60+
def as_db(self) -> Iterator[HealthMetric]:
61+
"""Generate HealthMetric-s from this instance."""
62+
yield HealthMetric(name="reposet_length", value=self.length)
63+
yield HealthMetric(name="reposet_undead", value=self.undead)
64+
5965

6066
def reposet_items_to_refs(items: list[tuple[int, str, str]]) -> Iterator[RepositoryReference]:
6167
"""Convert the raw DB repository tuples to RepositoryReference."""

server/athenian/api/models/persistentdata/models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,18 @@ class DeployedComponent(create_time_mixin(created_at=True, updated_at=False), Ba
125125
# de-referenced commit node ID in metadata DB
126126
resolved_commit_node_id = Column(BigInteger(), info={"reset_nulls": True})
127127
resolved_at = Column(TIMESTAMP(timezone=True), nullable=True)
128+
129+
130+
class HealthMetric(Base):
131+
"""Account data health statistics."""
132+
133+
__tablename__ = "health_metrics"
134+
135+
name = Column(Text(), primary_key=True)
136+
created_at = Column(
137+
TIMESTAMP(timezone=True),
138+
primary_key=True,
139+
default=lambda: datetime.now(timezone.utc),
140+
server_default=func.now(),
141+
)
142+
value = Column(JSONB().with_variant(JSON(), sqlite.dialect.name), nullable=False)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""Add health_metrics table
2+
3+
Revision ID: 41fcc01cccc0
4+
Revises: f0ae6bfc60b1
5+
Create Date: 2022-12-16 15:55:24.105902+00:00
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import sqlite
11+
from sqlalchemy.dialects.postgresql import JSONB
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "41fcc01cccc0"
15+
down_revision = "f0ae6bfc60b1"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
name = "health_metrics"
22+
if op.get_bind().dialect.name == "postgresql":
23+
schema_arg = {"schema": "athenian"}
24+
else:
25+
name = "athenian." + name
26+
schema_arg = {}
27+
op.create_table(
28+
name,
29+
sa.Column("account_id", sa.Integer(), primary_key=True),
30+
sa.Column("name", sa.Text(), primary_key=True),
31+
sa.Column(
32+
"created_at",
33+
sa.TIMESTAMP(timezone=True),
34+
primary_key=True,
35+
server_default=sa.func.now(),
36+
),
37+
sa.Column(
38+
"value",
39+
JSONB().with_variant(sa.JSON(), sqlite.dialect.name),
40+
nullable=False,
41+
),
42+
**schema_arg,
43+
)
44+
45+
46+
def downgrade():
47+
name = "health_metrics"
48+
if op.get_bind().dialect.name == "postgresql":
49+
schema_arg = {"schema": "athenian"}
50+
else:
51+
name = "athenian." + name
52+
schema_arg = {}
53+
op.drop_table(name, **schema_arg)

server/athenian/api/precompute/__main__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ class Formatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpForm
115115
accounts_parser.add_argument(
116116
"--skip-teams", action="store_true", help="Do not touch anything related to teams.",
117117
)
118+
accounts_parser.add_argument(
119+
"--skip-health-metrics",
120+
action="store_true",
121+
help="Neither measure nor persist account data health metrics.",
122+
)
118123
accounts_parser.add_argument(
119124
"--time-from",
120125
type=deserialize_datetime,

0 commit comments

Comments
 (0)