1+ from dataclasses import dataclass
12from datetime import datetime , timedelta , timezone
23from enum import Enum
34import logging
@@ -67,6 +68,23 @@ class FilterCommitsProperty(Enum):
6768DAG = tuple [np .ndarray , np .ndarray , np .ndarray ]
6869
6970
71+ @dataclass (slots = True )
72+ class CommitDAGMetrics :
73+ """Commit DAG error statistics, per repository.
74+
75+ Intersections are possible. For example, `bool(pristine & corrupted)` can be `True`.
76+ """
77+
78+ pristine : set [str ]
79+ corrupted : set [str ]
80+ orphaned : set [str ]
81+
82+ @classmethod
83+ def empty (cls ) -> "CommitDAGMetrics" :
84+ """Initialize a new CommitDAGMetrics instance filled with zeros."""
85+ return CommitDAGMetrics (set (), set (), set ())
86+
87+
7088def _postprocess_extract_commits (result , with_deployments = True , ** _ ):
7189 if isinstance (result , tuple ):
7290 if with_deployments :
@@ -367,6 +385,7 @@ async def fetch_repository_commits(
367385 mdb : Database ,
368386 pdb : Database ,
369387 cache : Optional [aiomcache .Client ],
388+ metrics : Optional [CommitDAGMetrics ] = None ,
370389) -> dict [str , tuple [bool , DAG ]]:
371390 """
372391 Load full commit DAGs for the given repositories.
@@ -379,6 +398,7 @@ async def fetch_repository_commits(
379398 3. Commit timestamp. \
380399 4. Commit repository name.
381400 :param prune: Remove any commits that are not accessible from `branches`.
401+ :param metrics: Mutable error statistics, will be written on new fetches.
382402 :return: Map from repository names to their DAG consistency indicators and bodies.
383403 """
384404 if branches .empty :
@@ -495,6 +515,8 @@ async def execute():
495515 for repo , pdag in repos .items ():
496516 if repo not in result :
497517 result [repo ] = (True , _empty_dag ()) if prune else pdag
518+ if metrics is not None and len (result [repo ][1 ][0 ]) == 0 :
519+ metrics .pristine .add (repo )
498520 return result
499521
500522
@@ -636,10 +658,12 @@ async def _fetch_commit_history_dag(
636658 meta_ids : tuple [int , ...],
637659 mdb : Database ,
638660 alloc = None ,
661+ metrics : Optional [CommitDAGMetrics ] = None ,
639662) -> tuple [bool , str , np .ndarray , np .ndarray , np .ndarray ]:
663+ # these are some approximately sensible defaults, found by experiment
640664 max_stop_heads = 25
641665 max_inner_partitions = 25
642- log = logging .getLogger ("%s._fetch_commit_history_dag" % metadata .__package__ )
666+ log = logging .getLogger (f" { metadata .__package__ } ._fetch_commit_history_dag" )
643667 # there can be duplicates, remove them
644668 head_hashes = np .asarray (head_hashes , dtype = "S40" )
645669 head_ids = np .asarray (head_ids , dtype = int )
@@ -688,13 +712,16 @@ async def _fetch_commit_history_dag(
688712 bads , bad_seeds , bad_hashes = verify_edges_integrity (new_edges , alloc )
689713 if bads :
690714 log .warning (
691- "%d @ %d new DAG edges are not consistent (%d commits / %d existing): %s" ,
715+ "%s: %d @ %d new DAG edges are not consistent (%d commits / %d existing): %s" ,
716+ repo ,
692717 len (bads ),
693718 len (bad_seeds ),
694719 len (bad_hashes ),
695720 len (hashes ),
696721 [new_edges [i ] for i in bad_seeds [:10 ]],
697722 )
723+ if metrics is not None :
724+ metrics .corrupted .add (repo )
698725 consistent = False
699726 for i in bads [::- 1 ]:
700727 new_edges .pop (i )
@@ -733,6 +760,8 @@ async def _fetch_commit_history_dag(
733760 "skipping orphans which are suspiciously young: %s" ,
734761 ", " .join (removed_orphans_hashes ),
735762 )
763+ if metrics is not None :
764+ metrics .orphaned .add (repo )
736765 consistent = False
737766 for i in sorted (removed_orphans_indexes , reverse = True ):
738767 new_edges .pop (i )
0 commit comments