|
8 | 8 | """
|
9 | 9 |
|
10 | 10 | from dataclasses import dataclass
|
11 |
| -from datetime import datetime |
| 11 | +from datetime import datetime, timedelta, timezone |
12 | 12 | from typing import Dict, Iterable, List, Optional, Set, Tuple
|
13 | 13 |
|
14 | 14 | from .job_agg_index import JobAggIndex, JobMeta, SignalStatus as AggStatus
|
@@ -92,7 +92,11 @@ def extract(self) -> List[Signal]:
|
92 | 92 | # When that happens, the jobs have identical timestamps by DIFFERENT job ids.
|
93 | 93 | # But since they are still the same job logically, we want to deduplicate them
|
94 | 94 | # for the purposes of signal events.
|
95 |
| - return self._dedup_signal_events(test_signals + job_signals) |
| 95 | + signals = self._dedup_signal_events(test_signals + job_signals) |
| 96 | + |
| 97 | + # Inject synthetic PENDING events for workflow runs that are known to be |
| 98 | + # pending but have no events in a given signal (e.g. multi-stage workflows). |
| 99 | + return self._inject_pending_workflow_events(signals, jobs) |
96 | 100 |
|
97 | 101 | # -----------------------------
|
98 | 102 | # Deduplication (GitHub-specific)
|
@@ -125,6 +129,90 @@ def _dedup_signal_events(self, signals: List[Signal]) -> List[Signal]:
|
125 | 129 | )
|
126 | 130 | return deduped
|
127 | 131 |
|
| 132 | + # ----------------------------- |
| 133 | + # Pending workflow synthesis |
| 134 | + # ----------------------------- |
| 135 | + def _inject_pending_workflow_events( |
| 136 | + self, |
| 137 | + signals: List[Signal], |
| 138 | + jobs: List[JobRow], |
| 139 | + ) -> List[Signal]: |
| 140 | + """ |
| 141 | + For each signal/commit, if there exists a pending workflow run and the |
| 142 | + signal has no event for that wf_run_id, insert a synthetic PENDING event |
| 143 | + with started_at set slightly in the future (now + 1 minute). |
| 144 | + """ |
| 145 | + if not signals or not jobs: |
| 146 | + return signals |
| 147 | + |
| 148 | + # Simple pass over JobRows to collect pending workflow run ids per (sha, workflow) |
| 149 | + pending_runs: Dict[Tuple[Sha, WorkflowName], Set[int]] = {} |
| 150 | + for j in jobs: |
| 151 | + if j.is_pending: |
| 152 | + pending_runs.setdefault((j.head_sha, j.workflow_name), set()).add( |
| 153 | + int(j.wf_run_id) |
| 154 | + ) |
| 155 | + |
| 156 | + # Avoid deprecated utcnow(); derive UTC then store naive to match existing code |
| 157 | + now_plus = (datetime.now(timezone.utc) + timedelta(minutes=1)).replace( |
| 158 | + tzinfo=None |
| 159 | + ) |
| 160 | + |
| 161 | + out: List[Signal] = [] |
| 162 | + for s in signals: |
| 163 | + new_commits: List[SignalCommit] = [] |
| 164 | + for c in s.commits: |
| 165 | + pending_ids = pending_runs.get( |
| 166 | + (Sha(c.head_sha), WorkflowName(s.workflow_name)) |
| 167 | + ) |
| 168 | + if not pending_ids: |
| 169 | + new_commits.append(c) |
| 170 | + continue |
| 171 | + |
| 172 | + have_ids = {e.wf_run_id for e in c.events} |
| 173 | + missing_ids = pending_ids - have_ids |
| 174 | + if not missing_ids: |
| 175 | + new_commits.append(c) |
| 176 | + continue |
| 177 | + |
| 178 | + # Build synthetic pending events for the missing wf_run_ids |
| 179 | + # set started_at to the future |
| 180 | + synth_events: List[SignalEvent] = list(c.events) |
| 181 | + for wf_run_id in missing_ids: |
| 182 | + name = self._fmt_event_name( |
| 183 | + workflow=s.workflow_name, |
| 184 | + kind="synthetic", |
| 185 | + identifier=str(s.key), |
| 186 | + wf_run_id=WfRunId(wf_run_id), |
| 187 | + run_attempt=RunAttempt(0), |
| 188 | + ) |
| 189 | + synth_events.append( |
| 190 | + SignalEvent( |
| 191 | + name=name, |
| 192 | + status=SignalStatus.PENDING, |
| 193 | + started_at=now_plus, |
| 194 | + ended_at=None, |
| 195 | + wf_run_id=int(wf_run_id), |
| 196 | + run_attempt=0, |
| 197 | + job_id=None, |
| 198 | + ) |
| 199 | + ) |
| 200 | + new_commits.append( |
| 201 | + SignalCommit( |
| 202 | + head_sha=c.head_sha, timestamp=c.timestamp, events=synth_events |
| 203 | + ) |
| 204 | + ) |
| 205 | + |
| 206 | + out.append( |
| 207 | + Signal( |
| 208 | + key=s.key, |
| 209 | + workflow_name=s.workflow_name, |
| 210 | + commits=new_commits, |
| 211 | + job_base_name=s.job_base_name, |
| 212 | + ) |
| 213 | + ) |
| 214 | + return out |
| 215 | + |
128 | 216 | # -----------------------------
|
129 | 217 | # Phase B — Tests (test_run_s3 only)
|
130 | 218 | # -----------------------------
|
|
0 commit comments