|
| 1 | +package bot |
| 2 | + |
| 3 | +import ( |
| 4 | + "testing" |
| 5 | + "time" |
| 6 | +) |
| 7 | + |
| 8 | +// TestClosedPRPollingWindow verifies that the closed PR polling window is sufficient |
| 9 | +// to catch PRs closed during sprinkler outages. |
| 10 | +// |
| 11 | +// This test documents a known bug: If sprinkler is down for more than 1 hour when |
| 12 | +// a PR is closed, polling will permanently miss the closed PR update because |
| 13 | +// ListClosedPRs only looks back 1 hour. |
| 14 | +// |
| 15 | +// See: internal/bot/polling.go:98 - ListClosedPRs(ctx, org, 1) |
| 16 | +func TestClosedPRPollingWindow(t *testing.T) { |
| 17 | + tests := []struct { |
| 18 | + name string |
| 19 | + prClosedAt time.Time |
| 20 | + pollingRunsAt time.Time |
| 21 | + lookbackHours int |
| 22 | + expectPRIncluded bool |
| 23 | + scenario string |
| 24 | + }{ |
| 25 | + { |
| 26 | + name: "PR closed 5 minutes ago - should be caught", |
| 27 | + prClosedAt: time.Now().Add(-5 * time.Minute), |
| 28 | + pollingRunsAt: time.Now(), |
| 29 | + lookbackHours: 1, |
| 30 | + expectPRIncluded: true, |
| 31 | + scenario: "Normal operation: polling catches recent closure", |
| 32 | + }, |
| 33 | + { |
| 34 | + name: "PR closed 59 minutes ago - should be caught", |
| 35 | + prClosedAt: time.Now().Add(-59 * time.Minute), |
| 36 | + pollingRunsAt: time.Now(), |
| 37 | + lookbackHours: 1, |
| 38 | + expectPRIncluded: true, |
| 39 | + scenario: "Edge case: just within 1-hour window", |
| 40 | + }, |
| 41 | + { |
| 42 | + name: "PR closed 61 minutes ago - MISSED (BUG)", |
| 43 | + prClosedAt: time.Now().Add(-61 * time.Minute), |
| 44 | + pollingRunsAt: time.Now(), |
| 45 | + lookbackHours: 1, |
| 46 | + expectPRIncluded: false, // BUG: This PR will never be updated |
| 47 | + scenario: "BUG: Sprinkler down 1h+ when PR closed - update permanently missed", |
| 48 | + }, |
| 49 | + { |
| 50 | + name: "PR closed 2 hours ago - MISSED (BUG)", |
| 51 | + prClosedAt: time.Now().Add(-2 * time.Hour), |
| 52 | + pollingRunsAt: time.Now(), |
| 53 | + lookbackHours: 1, |
| 54 | + expectPRIncluded: false, // BUG: This PR will never be updated |
| 55 | + scenario: "BUG: Extended sprinkler outage - update permanently missed", |
| 56 | + }, |
| 57 | + { |
| 58 | + name: "PR closed 23 hours ago - would be caught with 24h window", |
| 59 | + prClosedAt: time.Now().Add(-23 * time.Hour), |
| 60 | + pollingRunsAt: time.Now(), |
| 61 | + lookbackHours: 24, |
| 62 | + expectPRIncluded: true, |
| 63 | + scenario: "With 24h window: catches PRs from extended outages", |
| 64 | + }, |
| 65 | + { |
| 66 | + name: "PR closed 25 hours ago - missed even with 24h window", |
| 67 | + prClosedAt: time.Now().Add(-25 * time.Hour), |
| 68 | + pollingRunsAt: time.Now(), |
| 69 | + lookbackHours: 24, |
| 70 | + expectPRIncluded: false, |
| 71 | + scenario: "Even 24h window has limits - very extended outage", |
| 72 | + }, |
| 73 | + } |
| 74 | + |
| 75 | + for _, tt := range tests { |
| 76 | + t.Run(tt.name, func(t *testing.T) { |
| 77 | + // Calculate the lookback window start time |
| 78 | + windowStart := tt.pollingRunsAt.Add(-time.Duration(tt.lookbackHours) * time.Hour) |
| 79 | + |
| 80 | + // Simulate what ListClosedPRs does: filter by updated >= windowStart |
| 81 | + // This mimics the logic in internal/github/graphql.go:137-143 |
| 82 | + prIncluded := !tt.prClosedAt.Before(windowStart) |
| 83 | + |
| 84 | + if prIncluded != tt.expectPRIncluded { |
| 85 | + t.Errorf("SCENARIO: %s\n"+ |
| 86 | + "PR closed at: %s\n"+ |
| 87 | + "Polling at: %s\n"+ |
| 88 | + "Lookback: %dh (window start: %s)\n"+ |
| 89 | + "Time since close: %v\n"+ |
| 90 | + "Expected included: %v\n"+ |
| 91 | + "Actually included: %v\n", |
| 92 | + tt.scenario, |
| 93 | + tt.prClosedAt.Format(time.RFC3339), |
| 94 | + tt.pollingRunsAt.Format(time.RFC3339), |
| 95 | + tt.lookbackHours, |
| 96 | + windowStart.Format(time.RFC3339), |
| 97 | + tt.pollingRunsAt.Sub(tt.prClosedAt), |
| 98 | + tt.expectPRIncluded, |
| 99 | + prIncluded) |
| 100 | + } |
| 101 | + |
| 102 | + // Document the bug explicitly |
| 103 | + if !tt.expectPRIncluded && tt.lookbackHours == 1 { |
| 104 | + t.Logf("✅ TEST CONFIRMS BUG: PR closed %v ago will be PERMANENTLY MISSED with 1h lookback window", |
| 105 | + tt.pollingRunsAt.Sub(tt.prClosedAt)) |
| 106 | + } |
| 107 | + }) |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +// TestClosedPRRecoveryScenarios tests various sprinkler outage recovery scenarios. |
| 112 | +func TestClosedPRRecoveryScenarios(t *testing.T) { |
| 113 | + scenarios := []struct { |
| 114 | + name string |
| 115 | + sprinklerDownAt time.Time |
| 116 | + prClosedAt time.Time |
| 117 | + sprinklerUpAt time.Time |
| 118 | + pollingInterval time.Duration |
| 119 | + lookbackWindow time.Duration |
| 120 | + expectRecovery bool |
| 121 | + recoveryMechanism string |
| 122 | + }{ |
| 123 | + { |
| 124 | + name: "30-minute outage - polling catches it", |
| 125 | + sprinklerDownAt: parseTime("10:00"), |
| 126 | + prClosedAt: parseTime("10:15"), |
| 127 | + sprinklerUpAt: parseTime("10:30"), |
| 128 | + pollingInterval: 5 * time.Minute, |
| 129 | + lookbackWindow: 1 * time.Hour, |
| 130 | + expectRecovery: true, |
| 131 | + recoveryMechanism: "Next poll at 10:35 catches PR (closed 20min ago)", |
| 132 | + }, |
| 133 | + { |
| 134 | + name: "90-minute outage - PERMANENT LOSS (current bug)", |
| 135 | + sprinklerDownAt: parseTime("10:00"), |
| 136 | + prClosedAt: parseTime("10:30"), |
| 137 | + sprinklerUpAt: parseTime("11:30"), |
| 138 | + pollingInterval: 5 * time.Minute, |
| 139 | + lookbackWindow: 1 * time.Hour, |
| 140 | + expectRecovery: false, // BUG: PR closed 1h+ ago, outside window |
| 141 | + recoveryMechanism: "NONE - PR closed 60min+ ago is outside 1h lookback window", |
| 142 | + }, |
| 143 | + { |
| 144 | + name: "2-hour outage - PERMANENT LOSS (current bug)", |
| 145 | + sprinklerDownAt: parseTime("10:00"), |
| 146 | + prClosedAt: parseTime("10:30"), |
| 147 | + sprinklerUpAt: parseTime("12:00"), |
| 148 | + pollingInterval: 5 * time.Minute, |
| 149 | + lookbackWindow: 1 * time.Hour, |
| 150 | + expectRecovery: false, // BUG: PR closed 90min+ ago |
| 151 | + recoveryMechanism: "NONE - even when sprinkler returns, polling can't recover", |
| 152 | + }, |
| 153 | + { |
| 154 | + name: "2-hour outage - WOULD RECOVER with 24h window", |
| 155 | + sprinklerDownAt: parseTime("10:00"), |
| 156 | + prClosedAt: parseTime("10:30"), |
| 157 | + sprinklerUpAt: parseTime("12:00"), |
| 158 | + pollingInterval: 5 * time.Minute, |
| 159 | + lookbackWindow: 24 * time.Hour, |
| 160 | + expectRecovery: true, // Fixed: 24h window catches it |
| 161 | + recoveryMechanism: "Next poll catches PR (closed 90min ago, within 24h window)", |
| 162 | + }, |
| 163 | + } |
| 164 | + |
| 165 | + for _, sc := range scenarios { |
| 166 | + t.Run(sc.name, func(t *testing.T) { |
| 167 | + // First poll after sprinkler comes back up |
| 168 | + firstPollAfterRecovery := sc.sprinklerUpAt.Add(sc.pollingInterval) |
| 169 | + timeSinceClose := firstPollAfterRecovery.Sub(sc.prClosedAt) |
| 170 | + |
| 171 | + // Check if PR would be in lookback window |
| 172 | + wouldBeCaught := timeSinceClose <= sc.lookbackWindow |
| 173 | + |
| 174 | + if wouldBeCaught != sc.expectRecovery { |
| 175 | + t.Errorf("SCENARIO: %s\n"+ |
| 176 | + "Sprinkler down: %s\n"+ |
| 177 | + "PR closed: %s\n"+ |
| 178 | + "Sprinkler up: %s\n"+ |
| 179 | + "First poll: %s\n"+ |
| 180 | + "Time since close: %v\n"+ |
| 181 | + "Lookback window: %v\n"+ |
| 182 | + "Expected recovery: %v\n"+ |
| 183 | + "Actual recovery: %v\n"+ |
| 184 | + "Mechanism: %s\n", |
| 185 | + sc.name, |
| 186 | + sc.sprinklerDownAt.Format("15:04"), |
| 187 | + sc.prClosedAt.Format("15:04"), |
| 188 | + sc.sprinklerUpAt.Format("15:04"), |
| 189 | + firstPollAfterRecovery.Format("15:04"), |
| 190 | + timeSinceClose, |
| 191 | + sc.lookbackWindow, |
| 192 | + sc.expectRecovery, |
| 193 | + wouldBeCaught, |
| 194 | + sc.recoveryMechanism) |
| 195 | + } |
| 196 | + |
| 197 | + // Document bug cases |
| 198 | + if !sc.expectRecovery && sc.lookbackWindow == 1*time.Hour { |
| 199 | + t.Logf("✅ TEST CONFIRMS BUG: %s - Update permanently lost", sc.name) |
| 200 | + } |
| 201 | + |
| 202 | + // Document fix validation |
| 203 | + if sc.expectRecovery && sc.lookbackWindow == 24*time.Hour && timeSinceClose > 1*time.Hour { |
| 204 | + t.Logf("✅ TEST VALIDATES FIX: 24h window would recover this scenario") |
| 205 | + } |
| 206 | + }) |
| 207 | + } |
| 208 | +} |
| 209 | + |
| 210 | +// parseTime is a helper to create times on today's date for testing. |
| 211 | +func parseTime(hhMM string) time.Time { |
| 212 | + now := time.Now() |
| 213 | + parsed, _ := time.Parse("15:04", hhMM) |
| 214 | + return time.Date(now.Year(), now.Month(), now.Day(), parsed.Hour(), parsed.Minute(), 0, 0, now.Location()) |
| 215 | +} |
0 commit comments