Skip to content

Commit 4396721

Browse files
committed
feat: add retry logic for Envoy service discovery
Add 10-minute retry mechanism when getting Envoy service name to handle cases where the service is not immediately available after deployment. - Retry every 5 seconds for up to 10 minutes - Show elapsed time in verbose mode - Respect context cancellation Signed-off-by: bitliu <[email protected]>
1 parent 2afa815 commit 4396721

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

e2e/profiles/ai-gateway/testcases.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,34 @@ func testChatCompletionsRequest(ctx context.Context, client *kubernetes.Clientse
6262
fmt.Println("[Test] Testing chat completions endpoint")
6363
}
6464

65-
// Get the Envoy service name
66-
envoyService, err := getEnvoyServiceName(ctx, opts.Verbose)
67-
if err != nil {
68-
return fmt.Errorf("failed to get Envoy service name: %w", err)
65+
// Get the Envoy service name with retry
66+
var envoyService string
67+
var err error
68+
retryTimeout := 10 * time.Minute
69+
retryInterval := 5 * time.Second
70+
startTime := time.Now()
71+
72+
for {
73+
envoyService, err = getEnvoyServiceName(ctx, opts.Verbose)
74+
if err == nil {
75+
break
76+
}
77+
78+
if time.Since(startTime) >= retryTimeout {
79+
return fmt.Errorf("failed to get Envoy service name after %v: %w", retryTimeout, err)
80+
}
81+
82+
if opts.Verbose {
83+
fmt.Printf("[Test] Envoy service not found, retrying in %v... (elapsed: %v)\n",
84+
retryInterval, time.Since(startTime).Round(time.Second))
85+
}
86+
87+
select {
88+
case <-ctx.Done():
89+
return ctx.Err()
90+
case <-time.After(retryInterval):
91+
// Continue retry
92+
}
6993
}
7094

7195
if opts.Verbose {

0 commit comments

Comments
 (0)