Skip to content

Commit 444d690

Browse files
committed
fix: list the pods again if the pod ip is not yet assigned
1 parent 1054697 commit 444d690

File tree

7 files changed

+96
-54
lines changed

7 files changed

+96
-54
lines changed

.goreleaser.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ release:
8585
owner: "meysam81"
8686
name: "{{ .ProjectName }}"
8787
draft: false
88-
mode: replace
8988
prerelease: auto
9089
header: |
9190
## What's Changed

cmd/checker/http.go

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package checker
33
import (
44
"context"
55
"fmt"
6-
"math/rand"
76
"net/http"
87
"time"
98
)
@@ -15,38 +14,20 @@ type StaticHTTPChecker struct {
1514
}
1615

1716
func (h *StaticHTTPChecker) Check(ctx context.Context) error {
18-
var attempts uint
1917

20-
for {
18+
retriable := func() error {
2119
result := h.Common.performSingleCheck(ctx, h.Upstream)
22-
attempts++
2320

2421
if result.Success {
2522
h.Common.Logger.Info().Msgf("check successful in %s with status: %s",
2623
result.Duration.Round(time.Millisecond), result.Status)
2724
return nil
2825
}
2926

30-
numTries := fmt.Sprintf("%d", attempts)
31-
if h.Common.Retries > 0 {
32-
numTries = fmt.Sprintf("%d/%d", attempts, h.Common.Retries)
33-
34-
if attempts >= h.Common.Retries {
35-
h.Common.Logger.Error().Msgf("max retries reached: %d", h.Common.Retries)
36-
return fmt.Errorf("max retries (%d) exceeded", h.Common.Retries)
37-
}
38-
}
39-
40-
jitterSeconds := rand.Intn(6) + 5 // 5-10 seconds
41-
if result.Error != nil {
42-
h.Common.Logger.Info().Err(result.Error).Msgf("[%s] check failed, retrying in %ds...", numTries, jitterSeconds)
43-
}
44-
45-
if err := waitWithJitter(ctx, jitterSeconds); err != nil {
46-
h.Common.Logger.Info().Msg("shutdown signal received")
47-
return err
48-
}
27+
return result.Error
4928
}
29+
30+
return h.Common.runWithJitterBackoff(ctx, retriable)
5031
}
5132

5233
func (c *HTTPCommon) performSingleCheck(ctx context.Context, upstream string) checkResult {

cmd/checker/k8s.go

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ package checker
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
6-
"math/rand"
77
"os"
88
"path/filepath"
99
"strings"
@@ -76,46 +76,47 @@ func (k *K8sPodChecker) Check(ctx context.Context) error {
7676
return err
7777
}
7878

79-
k.Common.Logger.Info().Strs("labels", k.LabelSelector).Str("namespace", k.Namespace).Msg("listing pods")
80-
81-
pods, err := clientset.CoreV1().Pods(k.Namespace).List(ctx, metav1.ListOptions{
82-
LabelSelector: strings.Join(k.LabelSelector, ","),
83-
})
84-
if err != nil {
85-
return err
86-
}
79+
var latestPod *corev1.Pod
80+
var podIP string
8781

88-
k.Common.Logger.Info().Msgf("found %d pods with the selected filters", len(pods.Items))
82+
retriable := func() error {
83+
k.Common.Logger.Info().Strs("labels", k.LabelSelector).Str("namespace", k.Namespace).Msg("listing pods")
8984

90-
var latestPod *corev1.Pod
91-
for _, pod := range pods.Items {
92-
if k.Image != "" && k.Image != pod.Spec.Containers[0].Image {
93-
k.Common.Logger.Info().Msgf("found pod %s/%s but did not match on image", pod.Namespace, pod.Name)
94-
continue
85+
pods, err := clientset.CoreV1().Pods(k.Namespace).List(ctx, metav1.ListOptions{
86+
LabelSelector: strings.Join(k.LabelSelector, ","),
87+
})
88+
if err != nil {
89+
return err
9590
}
9691

97-
if latestPod == nil || latestPod.CreationTimestamp.After(pod.CreationTimestamp.Time) {
98-
latestPod = &pod
99-
}
100-
}
92+
k.Common.Logger.Info().Msgf("found %d pods with the selected filters", len(pods.Items))
10193

102-
if latestPod == nil {
103-
return fmt.Errorf("no matching pod found in %s namespace with labels: %s", k.Namespace, k.LabelSelector)
104-
}
94+
for _, pod := range pods.Items {
95+
if k.Image != "" && k.Image != pod.Spec.Containers[0].Image {
96+
k.Common.Logger.Info().Msgf("found pod %s/%s but did not match on image", pod.Namespace, pod.Name)
97+
continue
98+
}
10599

106-
var podIP string
107-
for {
100+
if latestPod == nil || latestPod.CreationTimestamp.After(pod.CreationTimestamp.Time) {
101+
latestPod = &pod
102+
}
103+
}
104+
105+
if latestPod == nil {
106+
return fmt.Errorf("no matching pod found in %s namespace with labels: %s", k.Namespace, k.LabelSelector)
107+
}
108108
podIP = latestPod.Status.PodIP
109109
if podIP != "" {
110110
k.Common.Logger.Info().Msgf("pod %s has an IP assigned: %s", latestPod.Name, latestPod.Status.PodIP)
111-
break
111+
return nil
112112
}
113113

114-
jitterSeconds := rand.Intn(6) + 5
115-
err := waitWithJitter(ctx, jitterSeconds)
116-
if err != nil {
117-
return err
118-
}
114+
return errors.New("no IP assigned to the pod yet")
115+
}
116+
117+
err = k.Common.runWithJitterBackoff(ctx, retriable)
118+
if err != nil {
119+
return err
119120
}
120121

121122
port := k.Port

cmd/checker/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ type HTTPCommon struct {
1111
HTTPClient *http.Client
1212
Retries uint
1313
StatusCode int
14+
JitterMin int
15+
JitterMax int
1416
Logger *logging.Logger
1517
}
1618

cmd/checker/utils.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,48 @@ package checker
22

33
import (
44
"context"
5+
"fmt"
6+
"math/rand"
57
"time"
68
)
79

10+
func (c *HTTPCommon) runWithJitterBackoff(ctx context.Context, do func() error) error {
11+
low := c.JitterMin
12+
high := c.JitterMax - c.JitterMin + 1
13+
14+
var attempts uint
15+
16+
for {
17+
jitterSeconds := rand.Intn(high) + low
18+
19+
err := do()
20+
attempts++
21+
22+
tries := fmt.Sprintf("%d", attempts)
23+
if c.Retries > 0 {
24+
tries = fmt.Sprintf("%d/%d", attempts, c.Retries)
25+
}
26+
27+
if err != nil {
28+
c.Logger.Info().Err(err).Msgf("[%s] failed, retrying in %ds...", tries, jitterSeconds)
29+
} else {
30+
return nil
31+
}
32+
33+
if c.Retries > 0 {
34+
if attempts >= c.Retries {
35+
c.Logger.Error().Msgf("max retries reached: %d", c.Retries)
36+
return fmt.Errorf("max retries (%d) exceeded", c.Retries)
37+
}
38+
}
39+
40+
if err := waitWithJitter(ctx, jitterSeconds); err != nil {
41+
c.Logger.Info().Msg("shutdown signal received")
42+
return err
43+
}
44+
}
45+
}
46+
847
// waitWithJitter will only return error if the context is canceled/done
948
func waitWithJitter(ctx context.Context, jitterSeconds int) error {
1049
t := time.NewTicker(time.Duration(jitterSeconds) * time.Second)

cmd/cmd.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,20 @@ func (a *app) createGlobalFlags() []cli.Flag {
9595
Destination: &a.Config.StatusCode,
9696
Sources: cli.EnvVars("STATUS_CODE"),
9797
},
98+
&cli.IntFlag{
99+
Name: "jitter-min-seconds",
100+
Usage: "The min seconds when picking a random time for backoff",
101+
Value: 5,
102+
Destination: &a.Config.JitterMin,
103+
Sources: cli.EnvVars("JITTER_MIN"),
104+
},
105+
&cli.IntFlag{
106+
Name: "jitter-max-seconds",
107+
Usage: "The max seconds when picking a random time for backoff",
108+
Value: 10,
109+
Destination: &a.Config.JitterMax,
110+
Sources: cli.EnvVars("JITTER_MAX"),
111+
},
98112
}
99113
}
100114

@@ -186,6 +200,8 @@ func (a *app) createK8sCheckCommand() *cli.Command {
186200
Retries: a.Config.Retries,
187201
StatusCode: a.Config.StatusCode,
188202
Logger: a.Logger,
203+
JitterMin: a.Config.JitterMin,
204+
JitterMax: a.Config.JitterMax,
189205
},
190206
}
191207

@@ -225,6 +241,8 @@ func (a *app) staticHTTPCheck(ctx context.Context, c *cli.Command) error {
225241
Retries: a.Config.Retries,
226242
StatusCode: a.Config.StatusCode,
227243
Logger: a.Logger,
244+
JitterMin: a.Config.JitterMin,
245+
JitterMax: a.Config.JitterMax,
228246
},
229247
}
230248

cmd/config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ type Config struct {
2626
LogLevel string
2727
Retries uint
2828
Timeout uint
29+
JitterMin int
30+
JitterMax int
2931
StatusCode int
3032

3133
StaticHTTPTarget *StaticHTTPTarget

0 commit comments

Comments
 (0)