-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Separate backoff strategies for peer registration vs login operations #4835
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
9a1905a
d226e21
c1ac190
f706892
0c9f55d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,17 @@ import ( | |
| "github.com/netbirdio/netbird/util/wsproxy" | ||
| ) | ||
|
|
||
| const ConnectTimeout = 10 * time.Second | ||
| const ( | ||
| ConnectTimeout = 10 * time.Second | ||
|
|
||
| RegistrationRetryTimeout = 180 * time.Second | ||
| RegistrationRetryInterval = 5 * time.Second | ||
| RegistrationRetryMaxDelay = 30 * time.Second | ||
|
|
||
| LoginRetryTimeout = 45 * time.Second | ||
| LoginRetryInterval = 3 * time.Second | ||
| LoginRetryMaxDelay = 15 * time.Second | ||
| ) | ||
|
|
||
| const ( | ||
| errMsgMgmtPublicKey = "failed getting Management Service public key: %s" | ||
|
|
@@ -312,6 +322,38 @@ func (c *GrpcClient) IsHealthy() bool { | |
| return true | ||
| } | ||
|
|
||
| // newRegistrationBackoff creates a backoff strategy for peer registration operations | ||
| func newRegistrationBackoff(ctx context.Context) backoff.BackOff { | ||
| return backoff.WithContext(&backoff.ExponentialBackOff{ | ||
| InitialInterval: RegistrationRetryInterval, | ||
| RandomizationFactor: backoff.DefaultRandomizationFactor, | ||
| Multiplier: backoff.DefaultMultiplier, | ||
| MaxInterval: RegistrationRetryMaxDelay, | ||
| MaxElapsedTime: RegistrationRetryTimeout, | ||
| Stop: backoff.Stop, | ||
| Clock: backoff.SystemClock, | ||
| }, ctx) | ||
| } | ||
|
|
||
| // newLoginBackoff creates a backoff strategy for peer login operations | ||
| func newLoginBackoff(ctx context.Context) backoff.BackOff { | ||
| return backoff.WithContext(&backoff.ExponentialBackOff{ | ||
| InitialInterval: LoginRetryInterval, | ||
| RandomizationFactor: backoff.DefaultRandomizationFactor, | ||
| Multiplier: backoff.DefaultMultiplier, | ||
| MaxInterval: LoginRetryMaxDelay, | ||
| MaxElapsedTime: LoginRetryTimeout, | ||
| Stop: backoff.Stop, | ||
| Clock: backoff.SystemClock, | ||
| }, ctx) | ||
| } | ||
|
|
||
| // isRegistrationRequest determines if a login request is actually a registration. | ||
| // Returns true if either the setup key or the JWT token is present in the request. | ||
| func isRegistrationRequest(req *proto.LoginRequest) bool { | ||
| return req.SetupKey != "" || req.JwtToken != "" | ||
| } | ||
|
|
||
| func (c *GrpcClient) login(serverKey wgtypes.Key, req *proto.LoginRequest) (*proto.LoginResponse, error) { | ||
| if !c.ready() { | ||
| return nil, errors.New(errMsgNoMgmtConnection) | ||
|
|
@@ -334,17 +376,31 @@ func (c *GrpcClient) login(serverKey wgtypes.Key, req *proto.LoginRequest) (*pro | |
| Body: loginReq, | ||
| }) | ||
| if err != nil { | ||
| // retry only on context canceled | ||
| if s, ok := gstatus.FromError(err); ok && s.Code() == codes.Canceled { | ||
| return err | ||
| if s, ok := gstatus.FromError(err); ok { | ||
| if s.Code() == codes.Canceled { | ||
| log.Debugf("login canceled") | ||
| return backoff.Permanent(err) | ||
| } | ||
| if s.Code() == codes.DeadlineExceeded { | ||
| log.Debugf("login timeout, retrying...") | ||
| return err | ||
| } | ||
|
Comment on lines
386
to
398
|
||
| } | ||
| return backoff.Permanent(err) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| err = backoff.Retry(operation, nbgrpc.Backoff(c.ctx)) | ||
| isRegistration := isRegistrationRequest(req) | ||
| var backoffStrategy backoff.BackOff | ||
| if isRegistration { | ||
| backoffStrategy = newRegistrationBackoff(c.ctx) | ||
| } else { | ||
| backoffStrategy = newLoginBackoff(c.ctx) | ||
| } | ||
|
|
||
| err = backoff.Retry(operation, backoffStrategy) | ||
| if err != nil { | ||
| log.Errorf("failed to login to Management Service: %v", err) | ||
| return nil, err | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider wrapping the error to indicate registration was skipped.
Returning the original
PermissionDeniederror fromdoMgmLogindoesn't convey that registration was skipped due to missing credentials. This may complicate troubleshooting when callers or operators try to understand why the operation failed.Additionally, the debug log level might be insufficient for tracking this control flow decision in production environments.
🔧 Suggested improvements
Option 1: Wrap the error with context
// Only attempt registration if we have credentials (setup key or JWT token) if setupKey == "" && jwtToken == "" { - log.Debugf("peer registration required but no credentials provided") - return err + log.Warnf("peer registration required but no credentials provided") + return status.Errorf(codes.FailedPrecondition, "peer registration required but no credentials (setup key or JWT token) provided: %v", err) }Option 2: Keep original error but improve logging
// Only attempt registration if we have credentials (setup key or JWT token) if setupKey == "" && jwtToken == "" { - log.Debugf("peer registration required but no credentials provided") + log.Warnf("peer registration required but no credentials provided, returning original error: %v", err) return err }📝 Committable suggestion
🤖 Prompt for AI Agents