Skip to content

Commit df4d6a9

Browse files
committed
[DEV-5349] Fix missing JIRA in unfresh PR facts
1 parent 2e0f6ff commit df4d6a9

File tree

5 files changed

+54
-51
lines changed

5 files changed

+54
-51
lines changed

server/athenian/api/internal/features/github/pull_request_filter.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
Deployment,
9090
JIRAEntityToFetch,
9191
Label,
92+
LoadedJIRADetails,
9293
MinedPullRequest,
9394
PRParticipants,
9495
PullRequestEvent,
@@ -1293,6 +1294,10 @@ async def noop():
12931294
for k, v in unreleased.items():
12941295
if k not in facts:
12951296
facts[k] = v
1297+
empty_jira = LoadedJIRADetails.empty()
1298+
for v in facts.values():
1299+
if v.jira is None:
1300+
v.jira = empty_jira
12961301
dfs, _, _ = await PullRequestMiner.mine_by_ids(
12971302
prs_df,
12981303
unreleased,

server/athenian/api/internal/features/github/unfresh_pull_request_metrics.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
from collections import defaultdict
33
from datetime import datetime, timedelta, timezone
4+
from itertools import chain
45
import logging
56
from typing import Dict, KeysView, List, Optional, Set, Tuple, Type, Union
67

@@ -206,6 +207,7 @@ async def dummy_inactive_prs():
206207
merged_prs = unreleased_prs.index.take(np.flatnonzero(merged_mask)).union(
207208
inactive_merged_prs,
208209
)
210+
del unreleased_prs
209211
tasks = [
210212
cls.open_prs_facts_loader.load_open_pull_request_facts_unfresh(
211213
open_prs,
@@ -254,10 +256,10 @@ async def dummy_inactive_prs():
254256
if pr_jira_mapper is not None:
255257
unreleased_jira_map = unreleased_jira_map[0]
256258
empty_jira = LoadedJIRADetails.empty()
257-
for node_id, repo in zip(
258-
unreleased_prs.index.get_level_values(0).values,
259-
unreleased_prs.index.get_level_values(1).values,
260-
):
259+
# it's not enough to iterate over unreleased_prs.index
260+
# we can catch a not yet precomputed pair (logical repository, node_id)
261+
# which has (physical repository, node_id) precomputed and existing in the facts
262+
for node_id, repo in chain(open_facts.keys(), merged_facts.keys()):
261263
try:
262264
jira = unreleased_jira_map[node_id]
263265
except KeyError:

server/athenian/api/internal/miners/github/precomputed_prs/done_prs.py

Lines changed: 33 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,7 @@
33
from itertools import chain
44
import logging
55
import pickle
6-
from typing import (
7-
Any,
8-
Callable,
9-
Collection,
10-
Dict,
11-
Iterable,
12-
KeysView,
13-
List,
14-
Mapping,
15-
Optional,
16-
Set,
17-
Tuple,
18-
)
6+
from typing import Any, Callable, Collection, Iterable, KeysView, Mapping, Optional
197

208
import aiomcache
219
import morcilla
@@ -87,11 +75,11 @@ async def load_precomputed_done_candidates(
8775
time_from: datetime,
8876
time_to: datetime,
8977
repos: Collection[str],
90-
default_branches: Dict[str, str],
78+
default_branches: dict[str, str],
9179
release_settings: ReleaseSettings,
9280
account: int,
9381
pdb: morcilla.Database,
94-
) -> Tuple[Set[int], Dict[str, List[int]]]:
82+
) -> tuple[set[int], dict[str, list[int]]]:
9583
"""
9684
Load the set of done PR identifiers and specifically ambiguous PR node IDs.
9785
@@ -134,13 +122,13 @@ async def load_precomputed_done_facts_filters(
134122
repos: Collection[str],
135123
participants: PRParticipants,
136124
labels: LabelFilter,
137-
default_branches: Dict[str, str],
125+
default_branches: dict[str, str],
138126
exclude_inactive: bool,
139127
release_settings: ReleaseSettings,
140128
prefixer: Prefixer,
141129
account: int,
142130
pdb: morcilla.Database,
143-
) -> Tuple[PullRequestFactsMap, Dict[str, List[int]]]:
131+
) -> tuple[PullRequestFactsMap, dict[str, list[int]]]:
144132
"""
145133
Fetch precomputed done PR facts.
146134
@@ -188,13 +176,13 @@ async def load_precomputed_done_facts_filters(
188176
async def load_precomputed_done_facts_all(
189177
cls,
190178
repos: Collection[str],
191-
default_branches: Dict[str, str],
179+
default_branches: dict[str, str],
192180
release_settings: ReleaseSettings,
193181
prefixer: Prefixer,
194182
account: int,
195183
pdb: morcilla.Database,
196184
extra: Iterable[InstrumentedAttribute] = (),
197-
) -> Tuple[PullRequestFactsMap, Dict[str, Mapping[str, Any]]]:
185+
) -> tuple[PullRequestFactsMap, dict[str, Mapping[str, Any]]]:
198186
"""
199187
Fetch all the precomputed done PR facts we have.
200188
@@ -240,13 +228,13 @@ async def load_precomputed_done_timestamp_filters(
240228
repos: Collection[str],
241229
participants: PRParticipants,
242230
labels: LabelFilter,
243-
default_branches: Dict[str, str],
231+
default_branches: dict[str, str],
244232
exclude_inactive: bool,
245233
release_settings: ReleaseSettings,
246234
prefixer: Prefixer,
247235
account: int,
248236
pdb: morcilla.Database,
249-
) -> Tuple[Dict[PullRequestID, datetime], Dict[str, List[int]]]:
237+
) -> tuple[dict[PullRequestID, datetime], dict[str, list[int]]]:
250238
"""
251239
Fetch precomputed done PR "pr_done_at" timestamps.
252240
@@ -278,13 +266,13 @@ async def load_precomputed_done_timestamp_filters(
278266
@sentry_span
279267
async def load_precomputed_done_facts_reponums(
280268
cls,
281-
repos: Dict[str, Set[int]],
282-
default_branches: Dict[str, str],
269+
repos: dict[str, set[int]],
270+
default_branches: dict[str, str],
283271
release_settings: ReleaseSettings,
284272
prefixer: Prefixer,
285273
account: int,
286274
pdb: morcilla.Database,
287-
) -> Tuple[PullRequestFactsMap, Dict[str, List[int]]]:
275+
) -> tuple[PullRequestFactsMap, dict[str, list[int]]]:
288276
"""
289277
Load PullRequestFacts belonging to released or rejected PRs from the precomputed DB.
290278
@@ -380,13 +368,13 @@ async def load_precomputed_done_facts_reponums(
380368
async def load_precomputed_done_facts_ids(
381369
cls,
382370
node_ids: Iterable[int],
383-
default_branches: Dict[str, str],
371+
default_branches: dict[str, str],
384372
release_settings: ReleaseSettings,
385373
prefixer: Prefixer,
386374
account: int,
387375
pdb: morcilla.Database,
388376
panic_on_missing_repositories: bool = True,
389-
) -> Tuple[PullRequestFactsMap, Dict[str, List[int]]]:
377+
) -> tuple[PullRequestFactsMap, dict[str, list[int]]]:
390378
"""
391379
Load PullRequestFacts belonging to released or rejected PRs from the precomputed DB.
392380
@@ -464,8 +452,8 @@ async def load_precomputed_pr_releases(
464452
cls,
465453
prs: pd.DataFrame,
466454
time_to: datetime,
467-
matched_bys: Dict[str, ReleaseMatch],
468-
default_branches: Dict[str, str],
455+
matched_bys: dict[str, ReleaseMatch],
456+
default_branches: dict[str, str],
469457
release_settings: ReleaseSettings,
470458
prefixer: Prefixer,
471459
account: int,
@@ -566,19 +554,19 @@ async def load_precomputed_pr_releases(
566554
@sentry_span
567555
async def _load_precomputed_done_filters(
568556
cls,
569-
columns: List[InstrumentedAttribute],
557+
columns: list[InstrumentedAttribute],
570558
time_from: Optional[datetime],
571559
time_to: Optional[datetime],
572560
repos: Collection[str],
573561
participants: PRParticipants,
574562
labels: LabelFilter,
575-
default_branches: Dict[str, str],
563+
default_branches: dict[str, str],
576564
exclude_inactive: bool,
577565
release_settings: ReleaseSettings,
578566
prefixer: Prefixer,
579567
account: int,
580568
pdb: morcilla.Database,
581-
) -> Tuple[Dict[PullRequestID, Mapping[str, Any]], Dict[str, List[int]]]:
569+
) -> tuple[dict[PullRequestID, Mapping[str, Any]], dict[str, list[int]]]:
582570
"""
583571
Load some data belonging to released or rejected PRs from the precomputed DB.
584572
@@ -685,8 +673,8 @@ def or_items():
685673
@classmethod
686674
async def _compose_query_filters_undeployed(
687675
cls,
688-
selected: Set[InstrumentedAttribute],
689-
or_items: Callable[[], List[ClauseElement]],
676+
selected: set[InstrumentedAttribute],
677+
or_items: Callable[[], list[ClauseElement]],
690678
time_from: Optional[datetime],
691679
time_to: Optional[datetime],
692680
participants: PRParticipants,
@@ -695,7 +683,7 @@ async def _compose_query_filters_undeployed(
695683
prefixer: Prefixer,
696684
account: int,
697685
postgres: bool,
698-
) -> Tuple[List[Select], Set[datetime]]:
686+
) -> tuple[list[Select], set[datetime]]:
699687
ghprt = GitHubDonePullRequestFacts
700688
filters = cls._create_common_filters(time_from, time_to, None, account)
701689
selected = selected.copy()
@@ -734,8 +722,8 @@ async def _compose_query_filters_undeployed(
734722
@classmethod
735723
async def _compose_query_filters_deployed(
736724
cls,
737-
selected: Set[InstrumentedAttribute],
738-
or_items: Callable[[], List[ClauseElement]],
725+
selected: set[InstrumentedAttribute],
726+
or_items: Callable[[], list[ClauseElement]],
739727
time_from: Optional[datetime],
740728
time_to: Optional[datetime],
741729
participants: PRParticipants,
@@ -744,7 +732,7 @@ async def _compose_query_filters_deployed(
744732
prefixer: Prefixer,
745733
account: int,
746734
postgres: bool,
747-
) -> List[Select]:
735+
) -> list[Select]:
748736
ghprt = GitHubDonePullRequestFacts
749737
filters = cls._create_common_filters(None, time_to, None, account)
750738
selected = selected.copy()
@@ -780,7 +768,7 @@ def _create_common_filters(
780768
time_to: Optional[datetime],
781769
repos: Optional[Collection[str]],
782770
account: int,
783-
) -> List[ClauseElement]:
771+
) -> list[ClauseElement]:
784772
assert isinstance(time_from, (datetime, type(None)))
785773
assert isinstance(time_to, (datetime, type(None)))
786774
ghprt = GitHubDonePullRequestFacts
@@ -799,9 +787,9 @@ def _create_common_filters(
799787
@classmethod
800788
def _post_process_ambiguous_done_prs(
801789
cls,
802-
result: Dict[Tuple[int, str], Mapping[str, Any]],
803-
ambiguous: Dict[ReleaseMatch, Dict[Tuple[int, str], Mapping[str, Any]]],
804-
) -> Tuple[Dict[Tuple[int, str], Mapping[str, Any]], Dict[str, List[int]]]:
790+
result: dict[tuple[int, str], Mapping[str, Any]],
791+
ambiguous: dict[ReleaseMatch, dict[tuple[int, str], Mapping[str, Any]]],
792+
) -> tuple[dict[tuple[int, str], Mapping[str, Any]], dict[str, list[int]]]:
805793
"""Figure out what to do with uncertain `tag_or_branch` release matches."""
806794
result.update(ambiguous[ReleaseMatch.tag.name])
807795
# we've found PRs released by tag belonging to these repos.
@@ -821,7 +809,7 @@ async def _build_participants_filters(
821809
cls,
822810
participants: PRParticipants,
823811
filters: list,
824-
selected: Set[InstrumentedAttribute],
812+
selected: set[InstrumentedAttribute],
825813
postgres: bool,
826814
prefixer: Prefixer,
827815
) -> None:
@@ -872,7 +860,7 @@ async def _build_participants_conditions(
872860
cls,
873861
participants: PRParticipants,
874862
prefixer: Prefixer,
875-
) -> Tuple[list, list]:
863+
) -> tuple[list, list]:
876864
user_login_to_node_get = prefixer.user_login_to_node.get
877865

878866
def _build_conditions(roles):
@@ -945,7 +933,7 @@ async def store_precomputed_done_facts(
945933
prs: Iterable[MinedPullRequest],
946934
pr_facts: Iterable[Optional[PullRequestFacts]],
947935
time_to: datetime,
948-
default_branches: Dict[str, str],
936+
default_branches: dict[str, str],
949937
release_settings: ReleaseSettings,
950938
account: int,
951939
pdb: morcilla.Database,
@@ -1053,7 +1041,7 @@ async def delete_force_push_dropped_prs(
10531041
repos: Iterable[str],
10541042
branches: pd.DataFrame,
10551043
account: int,
1056-
meta_ids: Tuple[int, ...],
1044+
meta_ids: tuple[int, ...],
10571045
mdb: Database,
10581046
pdb: Database,
10591047
cache: Optional[aiomcache.Client],

server/athenian/api/typing_utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,12 +660,19 @@ def __init__(self, first_item: NumpyStruct, length: Optional[int]):
660660
direct_columns = []
661661
dtype = [(f, v[0]) for f, v in first_item.dtype.fields.items()]
662662
direct_nested_fields = first_item.nested_dtypes
663+
side_fields = {}
664+
try:
665+
side_fields |= first_item.Optional.__annotations__
666+
side_fields |= first_item.Virtual.__annotations__
667+
except AttributeError:
668+
pass
663669
coerced_datas = [first_item.coerced_data]
664670
indirect_columns = {}
665671

666672
for k, v in first_item.items():
667673
if k not in first_item.dtype.names or k in direct_nested_fields:
668-
if dataclasses.is_dataclass(v):
674+
if dataclasses.is_dataclass(side_fields.get(k)):
675+
assert v is not None, f"{type(first_item).__name__}.{k} may not be None"
669676
for subfield, subvalue in dataclass_asdict(v).items():
670677
full_k = f"{k}_{subfield}"
671678
indirect_columns.setdefault(k, []).append((full_k, subfield))

server/tests/testutils/factory/miners.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import pandas as pd
99

10-
from athenian.api.internal.miners.types import PullRequestFacts
10+
from athenian.api.internal.miners.types import LoadedJIRADetails, PullRequestFacts
1111

1212
_faker = faker.Faker()
1313
_dt_between = partial(_faker.date_time_between, tzinfo=timezone.utc)
@@ -19,6 +19,7 @@ class Meta:
1919

2020
force_push_dropped = False
2121
release_ignored = False
22+
jira = LoadedJIRADetails.empty()
2223

2324
@factory.lazy_attribute
2425
def created(self) -> pd.Timestamp:

0 commit comments

Comments
 (0)