Skip to content

Commit 77c72ad

Browse files
adityathebemoshloop
authored andcommitted
feat: Handle Secret Lookup Rate Limit errors
1 parent 6059e5c commit 77c72ad

File tree

11 files changed

+132
-46
lines changed

11 files changed

+132
-46
lines changed

api/context/context.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@ import (
55
"fmt"
66
"strings"
77

8-
"github.com/flanksource/canary-checker/api/external"
9-
v1 "github.com/flanksource/canary-checker/api/v1"
10-
"github.com/flanksource/canary-checker/pkg"
118
"github.com/flanksource/duty/connection"
129
dutyCtx "github.com/flanksource/duty/context"
1310
"github.com/flanksource/duty/models"
1411
"github.com/flanksource/duty/types"
1512
"github.com/flanksource/gomplate/v3"
13+
14+
"github.com/flanksource/canary-checker/api/external"
15+
v1 "github.com/flanksource/canary-checker/api/v1"
16+
"github.com/flanksource/canary-checker/pkg"
1617
)
1718

1819
var DefaultContext dutyCtx.Context

checks/kubernetes_resource.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ func (c *KubernetesResourceChecker) Check(ctx context.Context, check v1.Kubernet
200200

201201
ctx = _ctx.(context.Context)
202202
checkCtx := context.New(ctx.Context, virtualCanary)
203-
res, err := Exec(checkCtx)
203+
res, err := RunChecksNoPersistence(checkCtx)
204204
if err != nil {
205205
return fmt.Errorf("error executing check: %w", err)
206206
} else {

checks/runchecks.go

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,37 @@ import (
99
"time"
1010

1111
"github.com/flanksource/artifacts"
12+
dutyCtx "github.com/flanksource/duty/context"
13+
"github.com/flanksource/duty/models"
14+
"github.com/google/uuid"
15+
gocache "github.com/patrickmn/go-cache"
16+
"golang.org/x/time/rate"
17+
1218
"github.com/flanksource/canary-checker/api/context"
1319
"github.com/flanksource/canary-checker/api/external"
1420
v1 "github.com/flanksource/canary-checker/api/v1"
1521
"github.com/flanksource/canary-checker/pkg"
1622
"github.com/flanksource/canary-checker/pkg/utils"
17-
"github.com/flanksource/duty/models"
18-
"github.com/google/uuid"
19-
gocache "github.com/patrickmn/go-cache"
23+
)
24+
25+
const (
26+
defaultSecretLookupFailureThreshold = 50
27+
defaultSecretLookupFailureWindow = 30 * time.Minute
2028
)
2129

2230
var checksCache = gocache.New(5*time.Minute, 5*time.Minute)
2331

32+
var secretLookupRateLimiter = rate.NewLimiter(
33+
rate.Limit(float64(defaultSecretLookupFailureThreshold)/defaultSecretLookupFailureWindow.Seconds()),
34+
defaultSecretLookupFailureThreshold,
35+
)
36+
2437
var DisabledChecks []string
2538

39+
type RunChecksMeta struct {
40+
SecretLookupRateLimitSkipped int
41+
}
42+
2643
func getDisabledChecks(ctx *context.Context) (map[string]struct{}, error) {
2744
if val, ok := checksCache.Get("disabledChecks"); ok {
2845
return val.(map[string]struct{}), nil
@@ -150,8 +167,13 @@ func sortChecksByDependency(checks []external.Check) ([]external.Check, error) {
150167
return append(unnamedChecks, sorted...), nil
151168
}
152169

153-
// Exec runs the actions specified and returns the results, without saving artifacts
154-
func Exec(ctx *context.Context) ([]*pkg.CheckResult, error) {
170+
// RunChecksNoPersistence executes checks and returns transformed results without persistence side effects.
171+
//
172+
// Unlike RunChecks, it does not perform canary deletion checks, dependency ordering,
173+
// artifact saving, or final result post-processing (e.g. resultMode handling).
174+
// It exports only check spec metrics (not per-result emitted metrics) and is intended
175+
// for embedded execution paths (e.g. topology lookups and kubernetesResource sub-checks).
176+
func RunChecksNoPersistence(ctx *context.Context) ([]*pkg.CheckResult, error) {
155177
var results []*pkg.CheckResult
156178
disabledChecks, err := getDisabledChecks(ctx)
157179
if err != nil {
@@ -174,9 +196,11 @@ func Exec(ctx *context.Context) ([]*pkg.CheckResult, error) {
174196

175197
result := c.Run(ctx)
176198
transformedResults := TransformResults(ctx, result)
177-
results = append(results, transformedResults...)
178-
ExportCheckMetrics(ctx, transformedResults, false)
199+
_, filteredResults := filterSecretLookupRateLimitedResults(ctx, transformedResults)
200+
results = append(results, filteredResults...)
201+
ExportCheckMetrics(ctx, filteredResults, false)
179202
}
203+
180204
return results, nil
181205
}
182206

@@ -189,23 +213,24 @@ func hasDependencies(checks []external.Check) bool {
189213
return false
190214
}
191215

192-
func RunChecks(ctx *context.Context) ([]*pkg.CheckResult, error) {
216+
func RunChecks(ctx *context.Context) ([]*pkg.CheckResult, RunChecksMeta, error) {
193217
var results []*pkg.CheckResult
218+
meta := RunChecksMeta{}
194219
disabledChecks, err := getDisabledChecks(ctx)
195220
if err != nil {
196-
return nil, fmt.Errorf("error getting disabled checks: %v", err)
221+
return nil, meta, fmt.Errorf("error getting disabled checks: %v", err)
197222
}
198223

199224
// Check if canary is not marked deleted in DB
200225
if ctx.DB() != nil && ctx.Canary.GetPersistedID() != "" {
201226
var deletedAt sql.NullTime
202227
err := ctx.DB().Table("canaries").Select("deleted_at").Where("id = ? and deleted_at < now()", ctx.Canary.GetPersistedID()).Scan(&deletedAt).Error
203228
if err != nil {
204-
return nil, fmt.Errorf("error getting canary: %v", err)
229+
return nil, meta, fmt.Errorf("error getting canary: %v", err)
205230
}
206231

207232
if deletedAt.Valid {
208-
return nil, nil
233+
return nil, meta, nil
209234
}
210235
}
211236

@@ -215,7 +240,7 @@ func RunChecks(ctx *context.Context) ([]*pkg.CheckResult, error) {
215240
if hasDependencies(checks) {
216241
sortedChecks, err := sortChecksByDependency(checks)
217242
if err != nil {
218-
return nil, fmt.Errorf("failed to sort checks: %v", err)
243+
return nil, meta, fmt.Errorf("failed to sort checks: %v", err)
219244
}
220245

221246
for _, check := range sortedChecks {
@@ -241,11 +266,13 @@ func RunChecks(ctx *context.Context) ([]*pkg.CheckResult, error) {
241266

242267
result := singleRunner.Check(ctx, check)
243268
transformedResults := TransformResults(ctx, result)
244-
results = append(results, transformedResults...)
245-
ExportCheckMetrics(ctx, transformedResults, true)
269+
skippedCount, filteredResults := filterSecretLookupRateLimitedResults(ctx, transformedResults)
270+
meta.SecretLookupRateLimitSkipped += skippedCount
271+
results = append(results, filteredResults...)
272+
ExportCheckMetrics(ctx, filteredResults, true)
246273

247-
if check.GetName() != "" && len(transformedResults) > 0 && transformedResults[0].Pass {
248-
ctx.SetOutput(check.GetName(), transformedResults[0])
274+
if check.GetName() != "" && len(filteredResults) > 0 && filteredResults[0].Pass {
275+
ctx.SetOutput(check.GetName(), filteredResults[0])
249276
}
250277
}
251278
} else {
@@ -259,16 +286,18 @@ func RunChecks(ctx *context.Context) ([]*pkg.CheckResult, error) {
259286

260287
result := c.Run(ctx)
261288
transformedResults := TransformResults(ctx, result)
262-
results = append(results, transformedResults...)
263-
ExportCheckMetrics(ctx, transformedResults, true)
289+
skippedCount, filteredResults := filterSecretLookupRateLimitedResults(ctx, transformedResults)
290+
meta.SecretLookupRateLimitSkipped += skippedCount
291+
results = append(results, filteredResults...)
292+
ExportCheckMetrics(ctx, filteredResults, true)
264293
}
265294
}
266295

267296
if err := saveArtifacts(ctx, results); err != nil {
268297
ctx.Errorf("error saving artifacts: %v", err)
269298
}
270299

271-
return ProcessResults(ctx, results), nil
300+
return ProcessResults(ctx, results), meta, nil
272301
}
273302

274303
func saveArtifacts(ctx *context.Context, results pkg.Results) error {
@@ -356,6 +385,48 @@ func TransformResults(ctx *context.Context, in []*pkg.CheckResult) (out []*pkg.C
356385
return out
357386
}
358387

388+
func isSecretLookupRateLimitResult(result *pkg.CheckResult) bool {
389+
if result == nil {
390+
return false
391+
}
392+
if dutyCtx.IsSecretLookupRateLimited(result.ErrorObject) {
393+
return true
394+
}
395+
return strings.Contains(strings.ToLower(result.Error), strings.ToLower(dutyCtx.ErrSecretLookupRateLimited.Error()))
396+
}
397+
398+
func filterSecretLookupRateLimitedResults(ctx *context.Context, in []*pkg.CheckResult) (int, []*pkg.CheckResult) {
399+
if len(in) == 0 {
400+
return 0, in
401+
}
402+
403+
skipped := 0
404+
out := make([]*pkg.CheckResult, 0, len(in))
405+
406+
for _, result := range in {
407+
if !isSecretLookupRateLimitResult(result) {
408+
out = append(out, result)
409+
continue
410+
}
411+
412+
if secretLookupRateLimiter.Allow() {
413+
skipped++
414+
ctx.Warnf("skipping check result due to secret lookup rate limiting for check=%s (threshold=%d window=%s)", result.GetName(), defaultSecretLookupFailureThreshold, defaultSecretLookupFailureWindow.Round(time.Second))
415+
continue
416+
}
417+
418+
ctx.Warnf("secret lookup rate limit threshold exceeded for check=%s (threshold=%d window=%s), recording as failure", result.GetName(), defaultSecretLookupFailureThreshold, defaultSecretLookupFailureWindow.Round(time.Second))
419+
result.Invalid = false
420+
result.Pass = false
421+
if result.Error == "" {
422+
result.Error = "secret lookup repeatedly rate limited"
423+
}
424+
out = append(out, result)
425+
}
426+
427+
return skipped, out
428+
}
429+
359430
func ProcessResults(ctx *context.Context, results []*pkg.CheckResult) []*pkg.CheckResult {
360431
if ctx.Canary.Spec.ResultMode == "" {
361432
return results

cmd/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ var Run = &cobra.Command{
6767
go func() {
6868
defer wg.Done()
6969

70-
res, err := checks.RunChecks(apicontext.New(apicontext.DefaultContext.WithName(_config.Name), _config))
70+
res, _, err := checks.RunChecks(apicontext.New(apicontext.DefaultContext.WithName(_config.Name), _config))
7171
if err != nil {
7272
log.Errorf("error running checks: %v", err)
7373
return

go.mod

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ require (
7373
go.opentelemetry.io/otel/trace v1.39.0
7474
gocloud.dev v0.44.0
7575
golang.org/x/sync v0.19.0
76+
golang.org/x/time v0.14.0
7677
google.golang.org/api v0.262.0
7778
google.golang.org/genproto v0.0.0-20260126211449-d11affda4bed
7879
google.golang.org/grpc v1.78.0
@@ -219,7 +220,6 @@ require (
219220
github.com/go-git/go-billy/v5 v5.7.0 // indirect
220221
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
221222
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
222-
github.com/go-json-experiment/json v0.0.0-20251027170946-4849db3c2f7e // indirect
223223
github.com/go-logr/stdr v1.2.2 // indirect
224224
github.com/go-logr/zapr v1.3.0 // indirect
225225
github.com/go-ole/go-ole v1.3.0 // indirect
@@ -307,10 +307,6 @@ require (
307307
github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 // indirect
308308
github.com/jpillora/backoff v1.0.0 // indirect
309309
github.com/json-iterator/go v1.1.12 // indirect
310-
github.com/kaptinlin/go-i18n v0.2.3 // indirect
311-
github.com/kaptinlin/jsonpointer v0.4.9 // indirect
312-
github.com/kaptinlin/jsonschema v0.6.8 // indirect
313-
github.com/kaptinlin/messageformat-go v0.4.9 // indirect
314310
github.com/kennygrant/sanitize v1.2.4 // indirect
315311
github.com/kevinburke/ssh_config v1.4.0 // indirect
316312
github.com/klauspost/compress v1.18.3 // indirect
@@ -446,7 +442,6 @@ require (
446442
golang.org/x/sys v0.40.0 // indirect
447443
golang.org/x/term v0.39.0 // indirect
448444
golang.org/x/text v0.33.0 // indirect
449-
golang.org/x/time v0.14.0 // indirect
450445
golang.org/x/tools v0.41.0 // indirect
451446
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
452447
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect

go.sum

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,8 +1092,6 @@ github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFS
10921092
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
10931093
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
10941094
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
1095-
github.com/go-json-experiment/json v0.0.0-20251027170946-4849db3c2f7e h1:Lf/gRkoycfOBPa42vU2bbgPurFong6zXeFtPoxholzU=
1096-
github.com/go-json-experiment/json v0.0.0-20251027170946-4849db3c2f7e/go.mod h1:uNVvRXArCGbZ508SxYYTC5v1JWoz2voff5pm25jU1Ok=
10971095
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
10981096
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
10991097
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
@@ -1514,14 +1512,6 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
15141512
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
15151513
github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
15161514
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
1517-
github.com/kaptinlin/go-i18n v0.2.3 h1:jyN/YOXXLcnGRBLdU+a8+6782B97fWE5aQqAHtvvk8Q=
1518-
github.com/kaptinlin/go-i18n v0.2.3/go.mod h1:O+Ax4HkMO0Jt4OaP4E4WCx0PAADeWkwk8Jgt9bjAU1w=
1519-
github.com/kaptinlin/jsonpointer v0.4.9 h1:o//bYf4PCvnMJIIX8bIg77KB6DO3wBPAabRyPRKh680=
1520-
github.com/kaptinlin/jsonpointer v0.4.9/go.mod h1:9y0LgXavlmVE5FSHShY5LRlURJJVhbyVJSRWkilrTqA=
1521-
github.com/kaptinlin/jsonschema v0.6.8 h1:CZt3HxwYCCVtAj50rys6Cg4Z/TWRUMd3U/ePV5FGXkw=
1522-
github.com/kaptinlin/jsonschema v0.6.8/go.mod h1:ZXZ4K5KrRmCCF1i6dgvBsQifl+WTb8XShKj0NpQNrz8=
1523-
github.com/kaptinlin/messageformat-go v0.4.9 h1:FR5j5n4aL4nG0afKn9vvANrKxLu7HjmbhJnw5ogIwAQ=
1524-
github.com/kaptinlin/messageformat-go v0.4.9/go.mod h1:qZzrGrlvWDz2KyyvN3dOWcK9PVSRV1BnfnNU+zB/RWc=
15251515
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
15261516
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
15271517
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
@@ -1737,8 +1727,6 @@ github.com/orcaman/concurrent-map/v2 v2.0.1/go.mod h1:9Eq3TG2oBe5FirmYWQfYO5iH1q
17371727
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
17381728
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
17391729
github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o=
1740-
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
1741-
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
17421730
github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
17431731
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
17441732
github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=

pkg/api/run_now.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func RunCanaryHandler(c echo.Context) error {
4949
}
5050

5151
ctx := context.New(duty, *canary)
52-
results, err := checks.RunChecks(ctx)
52+
results, _, err := checks.RunChecks(ctx)
5353
if err != nil {
5454
return errorResponse(c, err, http.StatusInternalServerError)
5555
}

pkg/jobs/canary/canary_jobs.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package canary
33
import (
44
"errors"
55
"fmt"
6+
"math/rand"
67
"sync"
78
"time"
89

@@ -101,11 +102,15 @@ func (j CanaryJob) Run(ctx dutyjob.JobRuntime) error {
101102
attribute.String("canary.namespace", j.Canary.Namespace),
102103
)
103104

104-
results, err := checks.RunChecks(canaryCtx)
105+
results, runMeta, err := checks.RunChecks(canaryCtx)
105106
if err != nil {
106107
return err
107108
}
108109

110+
if runMeta.SecretLookupRateLimitSkipped > 0 {
111+
maybeRescheduleAfterSecretLookupRateLimit(ctx.Context, j.DBCanary, j.Canary, runMeta.SecretLookupRateLimitSkipped)
112+
}
113+
109114
// Get transformed checks before and after, and then delete the olds ones that are not in new set.
110115
// NOTE: Webhook checks, although they are transformed, are handled entirely by the webhook caller
111116
// and should not be deleted manually in here.
@@ -164,6 +169,28 @@ func (j CanaryJob) Run(ctx dutyjob.JobRuntime) error {
164169
return nil
165170
}
166171

172+
func maybeRescheduleAfterSecretLookupRateLimit(ctx context.Context, dbCanary pkg.Canary, canary v1.Canary, skipped int) {
173+
nextRuntime, err := canary.NextRuntime(time.Now())
174+
if err != nil || nextRuntime == nil {
175+
ctx.Warnf("failed to compute next runtime for canary[%s/%s]: %v", canary.Namespace, canary.Name, err)
176+
return
177+
}
178+
179+
if time.Until(*nextRuntime) <= 5*time.Minute {
180+
return
181+
}
182+
183+
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
184+
delay := time.Minute + time.Duration(rng.Intn(60))*time.Second
185+
runAt := time.Now().Add(delay)
186+
if err := TriggerAt(ctx, dbCanary, runAt); err != nil {
187+
ctx.Warnf("failed to schedule earlier retry for canary[%s/%s] after %d skipped results: %v", canary.Namespace, canary.Name, skipped, err)
188+
return
189+
}
190+
191+
ctx.Infof("scheduled earlier retry for canary[%s/%s] in %s after %d secret lookup rate-limited skips", canary.Namespace, canary.Name, delay.Round(time.Second), skipped)
192+
}
193+
167194
func SaveResults(ctx context.Context, results []*pkg.CheckResult) ([]string, map[string]string, error) {
168195
var transformedChecksCreated []string
169196
// Transformed checks have a delete strategy

pkg/jobs/canary/status.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ func UpdateCanaryStatusAndEvent(ctx context.Context, canary v1.Canary, results [
2222
return
2323
}
2424

25+
if len(results) == 0 {
26+
return
27+
}
28+
2529
// Skip function if canary is not sourced from Kubernetes CRD
2630
if !strings.HasPrefix(canary.Annotations["source"], "kubernetes") {
2731
return

pkg/topology/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ func lookup(ctx *ComponentContext, name string, spec v1.CanarySpec) ([]interface
220220
// canaryCtx.Environment = ctx.
221221
// canaryCtx.Logger = ctx.Logger
222222

223-
checkResults, err := checks.Exec(canaryCtx)
223+
checkResults, err := checks.RunChecksNoPersistence(canaryCtx)
224224
if err != nil {
225225
return nil, err
226226
}

0 commit comments

Comments
 (0)