diff --git a/internal/nvmeof/controller/controllerserver.go b/internal/nvmeof/controller/controllerserver.go index 2846b719706..9358c0c3616 100644 --- a/internal/nvmeof/controller/controllerserver.go +++ b/internal/nvmeof/controller/controllerserver.go @@ -796,12 +796,12 @@ func (cs *Server) createNVMeoFResources( // Step 6: If using auto-listeners, query them back for storing in metadata if networkMask != "" { - autoListeners, err := gateway.ListListeners(ctx, nvmeofData.SubsystemNQN) + listenersDetailsList, err := gateway.GetListeners(ctx, nvmeofData.SubsystemNQN) if err != nil { - return nvmeofData, fmt.Errorf("failed to list auto-created listeners: %w", err) + return nvmeofData, fmt.Errorf("failed to retrieve auto-created listeners after retries: %w", err) } - nvmeofData.ListenerInfo = nvmeof.ConvertListenersFromProto(autoListeners.GetListeners()) - log.DebugLog(ctx, "Retrieved %d auto-created listeners", len(nvmeofData.ListenerInfo)) + log.DebugLog(ctx, "Retrieved %d auto-created listeners", len(listenersDetailsList)) + nvmeofData.ListenerInfo = listenersDetailsList } uuid, err := gateway.GetUUIDBySubsystemAndNameSpaceID(ctx, nvmeofData.SubsystemNQN, nvmeofData.NamespaceID) diff --git a/internal/nvmeof/nvmeof.go b/internal/nvmeof/nvmeof.go index 5b55139ef83..ad31daf10ab 100644 --- a/internal/nvmeof/nvmeof.go +++ b/internal/nvmeof/nvmeof.go @@ -23,6 +23,7 @@ import ( "math/big" "syscall" + "github.com/avast/retry-go/v4" pb "github.com/ceph/ceph-nvmeof/lib/go/nvmeof" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -536,6 +537,30 @@ func ConvertListenersFromProto(protoListeners []*pb.ListenerInfo) []ListenerDeta return listeners } +// GetListeners retrieves listeners for a subsystem with retry logic. +// Auto-listeners feature may takes time to sync to OMAP state, so this retries with +// exponential backoff. +func (gw *GatewayRpcClient) GetListeners( + ctx context.Context, + subsystemNQN string, +) ([]ListenerDetails, error) { + return retry.DoWithData( + func() ([]ListenerDetails, error) { + autoListeners, err := gw.ListListeners(ctx, subsystemNQN) + if err != nil { + return nil, fmt.Errorf("failed to list auto-created listeners: %w", err) + } + + if len(autoListeners.GetListeners()) == 0 { + return nil, fmt.Errorf("no auto-listeners found for subsystem %s", subsystemNQN) + } + + return ConvertListenersFromProto(autoListeners.GetListeners()), nil + }, + retry.Attempts(6), // ~100ms, 200ms, 400ms, 800ms, 1.6s, 3.2s = ~6.3s total + ) +} + // Connect to Gateway gRPC server. func (c *GatewayRpcClient) connect() error { // Create connection using new gRPC API