@@ -25,7 +25,7 @@ func httpMonitorScanAndAlertFailures(ctx context.Context, app *amstate.App) erro
2525 failures := scanMonitors (
2626 ctx ,
2727 app .State .HttpMonitors (),
28- newScanner (),
28+ newRetryScanner ( newScanner () ),
2929 logex .Prefix ("httpscanner" , app .Logger ))
3030
3131 // convert monitor failures into alerts
@@ -56,7 +56,7 @@ func scanMonitors(
5656 failedMu := sync.Mutex {}
5757
5858 checkOne := func (monitor amstate.HttpMonitor ) {
59- ctx , cancel := context .WithTimeout (ctx , 10 * time .Second )
59+ ctx , cancel := context .WithTimeout (ctx , 30 * time .Second )
6060 defer cancel ()
6161
6262 started := time .Now ()
@@ -101,6 +101,31 @@ type HttpMonitorScanner interface {
101101 Scan (context.Context , amstate.HttpMonitor ) error
102102}
103103
104+ type retryScanner struct {
105+ actualScanner HttpMonitorScanner
106+ }
107+
108+ // retries once, but only if it looks retryable
109+ func newRetryScanner (actual HttpMonitorScanner ) HttpMonitorScanner {
110+ return & retryScanner {actual }
111+ }
112+
113+ func (r * retryScanner ) Scan (ctx context.Context , monitor amstate.HttpMonitor ) error {
114+ firstTryCtx , cancel := context .WithTimeout (ctx , 15 * time .Second )
115+ defer cancel ()
116+
117+ if err := r .actualScanner .Scan (firstTryCtx , monitor ); err != nil {
118+ if err != context .DeadlineExceeded { // non-retryable error
119+ return err
120+ }
121+
122+ // now use the longer context
123+ return r .actualScanner .Scan (ctx , monitor )
124+ }
125+
126+ return nil
127+ }
128+
104129type scanner struct {
105130 noRedirects * http.Client
106131}
0 commit comments