From aabbce1cfd6f5b19df18941f36cc6b6be665cfc6 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Wed, 30 Jul 2025 17:39:34 +0200 Subject: [PATCH 1/2] :seedling: When Robot API reaches a timeout, retry later. --- pkg/services/baremetal/host/host.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index 84156177b..d14ce3fdb 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1403,6 +1403,14 @@ func (s *Service) actionImageInstallingFinished(ctx context.Context, sshClient s // Update name in robot API if _, err := s.scope.RobotClient.SetBMServerName(s.scope.HetznerBareMetalHost.Spec.ServerID, s.scope.Hostname()); err != nil { + if errors.Is(err, os.ErrDeadlineExceeded) { + // If the Hetzner API returns this, we just want to retry later: + // failed to get bare metal server: Get "https://robot-ws.your-server.de/server/1234": net/http: TLS handshake timeout + s.scope.Logger.Info("SetBMServerName timed out, will retry later", "error", err) + return actionContinue{ + delay: 10 * time.Second, + } + } record.Warn(s.scope.HetznerBareMetalHost, "SetBMServerNameFailed", err.Error()) s.handleRobotRateLimitExceeded(err, "SetBMServerName") return actionError{err: fmt.Errorf("failed to update name of host in robot API: %w", err)} From 6118742efbd3584c0fc68ae4c0c86a8245a4e471 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Wed, 30 Jul 2025 18:27:34 +0200 Subject: [PATCH 2/2] Check first Robot API call during provisioning, too. --- pkg/services/baremetal/host/host.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index d14ce3fdb..cf77c7e97 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -190,6 +190,14 @@ func (s *Service) actionPreparing(ctx context.Context) actionResult { s.scope.HetznerBareMetalHost.SetError(infrav1.PermanentError, msg) return actionStop{} } + if errors.Is(err, os.ErrDeadlineExceeded) { + // If the Hetzner API returns this, we just want to retry later: + // Get "https://robot-ws.your-server.de/server/1234": net/http: TLS handshake timeout + s.scope.Logger.Info("GetBMServer timed out, will retry later", "error", err) + return actionContinue{ + delay: 10 * time.Second, + } + } return actionError{err: fmt.Errorf("failed to get bare metal server: %w", err)} } @@ -1405,7 +1413,7 @@ func (s *Service) actionImageInstallingFinished(ctx context.Context, sshClient s if _, err := s.scope.RobotClient.SetBMServerName(s.scope.HetznerBareMetalHost.Spec.ServerID, s.scope.Hostname()); err != nil { if errors.Is(err, os.ErrDeadlineExceeded) { // If the Hetzner API returns this, we just want to retry later: - // failed to get bare metal server: Get "https://robot-ws.your-server.de/server/1234": net/http: TLS handshake timeout + // Post "https://robot-ws.your-server.de/server/1234": net/http: TLS handshake timeout s.scope.Logger.Info("SetBMServerName timed out, will retry later", "error", err) return actionContinue{ delay: 10 * time.Second,