diff --git a/backend/internal/core/workflows/deployer_activities.go b/backend/internal/core/workflows/deployer_activities.go index 0e8c57ad..382bbfba 100644 --- a/backend/internal/core/workflows/deployer_activities.go +++ b/backend/internal/core/workflows/deployer_activities.go @@ -65,6 +65,7 @@ func ensureClient(state ewf.State) { func DeployNetworkStep() ewf.StepFn { return func(ctx context.Context, state ewf.State) error { + log := logger.ForOperation("deployer_activities", "deploy_network") ensureClient(state) config, err := getConfig(state) @@ -91,7 +92,9 @@ func DeployNetworkStep() ewf.StepFn { statemanager.StoreCluster(state, cluster) err = kubeClient.DeployNetwork(ctx, &cluster) // Save GridClient state after network deployment - statemanager.SaveGridClientState(state, kubeClient) + if err := statemanager.SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after network deployment") + } statemanager.StoreCluster(state, cluster) if err != nil { nodeIDs := make([]uint32, 0, len(cluster.Nodes)) @@ -114,6 +117,7 @@ func DeployNetworkStep() ewf.StepFn { func UpdateNetworkStep() ewf.StepFn { return func(ctx context.Context, state ewf.State) error { + log := logger.ForOperation("deployer_activities", "update_network") ensureClient(state) config, err := getConfig(state) @@ -144,7 +148,9 @@ func UpdateNetworkStep() ewf.StepFn { } // Save GridClient state after network update - statemanager.SaveGridClientState(state, kubeClient) + if err := statemanager.SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after network update") + } statemanager.StoreCluster(state, cluster) state["node"] = node return nil @@ -153,6 +159,7 @@ func UpdateNetworkStep() ewf.StepFn { func AddNodeStep() ewf.StepFn { return func(ctx context.Context, state ewf.State) error { + log := logger.ForOperation("deployer_activities", "add_node") ensureClient(state) config, err := getConfig(state) @@ -184,7 +191,9 @@ func AddNodeStep() ewf.StepFn { } // Save GridClient state after node deployment - statemanager.SaveGridClientState(state, kubeClient) + if err := statemanager.SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after node deployment") + } statemanager.StoreCluster(state, cluster) // Store the deployed node in state for verification step @@ -239,7 +248,9 @@ func DeployLeaderNodeStep() ewf.StepFn { log.Debug().Str("node_name", leaderNode.Name).Msg("Leader node deployed successfully") - statemanager.SaveGridClientState(state, kubeClient) + if err := statemanager.SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after leader node deployment") + } statemanager.StoreCluster(state, cluster) return nil } @@ -291,7 +302,9 @@ func BatchDeployAllNodesStep(metrics *metricsLib.Metrics) ewf.StepFn { batchErr := kubeClient.BatchDeployNodes(ctx, &cluster, nodesToDeploy, config.SSHPublicKey) - statemanager.SaveGridClientState(state, kubeClient) + if err := statemanager.SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after batch node deployment") + } statemanager.StoreCluster(state, cluster) if batchErr != nil { @@ -524,6 +537,7 @@ func DeleteAllUserClustersStep(clusterRepo models.ClusterRepository, metrics *me func RemoveDeploymentNodeStep() ewf.StepFn { return func(ctx context.Context, state ewf.State) error { + log := logger.ForOperation("deployer_activities", "remove_deployment_node") ensureClient(state) config, err := getConfig(state) @@ -553,7 +567,10 @@ func RemoveDeploymentNodeStep() ewf.StepFn { } // Save GridClient state after node removal - statemanager.SaveGridClientState(state, kubeClient) + if err := statemanager.SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after node removal") + } + statemanager.StoreCluster(state, existingCluster) return nil } @@ -563,7 +580,9 @@ func closeClient(ctx context.Context, wf *ewf.Workflow, err error) { log := logger.ForOperation("deployer_activities", "close_client").With().Str("workflow_name", wf.Name).Logger() if kubeClient, ok := wf.State["kubeclient"].(*kubedeployer.Client); ok { // Save final GridClient state before closing - statemanager.SaveGridClientState(wf.State, kubeClient) + if err := statemanager.SaveGridClientState(wf.State, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state before closing client") + } kubeClient.Close() delete(wf.State, "kubeclient") diff --git a/backend/internal/deployment/kubedeployer/client.go b/backend/internal/deployment/kubedeployer/client.go index 8d7535f9..1fce265d 100644 --- a/backend/internal/deployment/kubedeployer/client.go +++ b/backend/internal/deployment/kubedeployer/client.go @@ -2,43 +2,41 @@ package kubedeployer import ( "fmt" + "kubecloud/internal/infrastructure/gridclient" - "github.com/threefoldtech/tfgrid-sdk-go/grid-client/deployer" - "go.opentelemetry.io/otel/trace" + sdktrace "go.opentelemetry.io/otel/sdk/trace" ) type Client struct { - GridClient deployer.TFPluginClient - mnemonic string + GridClient gridclient.GridClient } -func NewClient(mnemonic, gridNet string, debug bool, tp trace.TracerProvider) (*Client, error) { - pluginOpts := []deployer.PluginOpt{ - deployer.WithNetwork(gridNet), - deployer.WithDisableSentry(), +// NewClient creates a new Client instance +func NewClient(mnemonic, gridNet string, debug bool, tp *sdktrace.TracerProvider) (*Client, error) { + if gridNet == "" { + return nil, fmt.Errorf("gridNet is required and cannot be empty") } + var opts []gridclient.ClientOpts + opts = append(opts, gridclient.WithNetwork(gridNet)) if debug { - pluginOpts = append(pluginOpts, deployer.WithLogs()) + opts = append(opts, gridclient.WithDebug()) } - if tp != nil { - pluginOpts = append(pluginOpts, deployer.WithTraceProvider(tp)) + opts = append(opts, gridclient.WithTracerProvider(tp)) } + opts = append(opts, gridclient.WithDisableSentry()) - tfplugin, err := deployer.NewTFPluginClient( - mnemonic, - pluginOpts..., - ) + gridCl, err := gridclient.NewGridClient(mnemonic, opts...) if err != nil { - return nil, fmt.Errorf("failed to create TFPluginClient: %v", err) + return nil, err } return &Client{ - GridClient: tfplugin, - mnemonic: mnemonic, + GridClient: gridCl, }, nil } +// Close closes the underlying GridClient func (c *Client) Close() { c.GridClient.Close() } diff --git a/backend/internal/deployment/kubedeployer/core.go b/backend/internal/deployment/kubedeployer/core.go index a7bf272f..2a56b14d 100644 --- a/backend/internal/deployment/kubedeployer/core.go +++ b/backend/internal/deployment/kubedeployer/core.go @@ -5,10 +5,10 @@ import ( "fmt" "slices" + "kubecloud/internal/infrastructure/gridclient" "kubecloud/internal/infrastructure/logger" "kubecloud/internal/infrastructure/telemetry" - "github.com/threefoldtech/tfgrid-sdk-go/grid-client/deployer" "github.com/threefoldtech/tfgrid-sdk-go/grid-client/workloads" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -24,7 +24,7 @@ func (c *Cluster) GetLeaderNode() (Node, error) { return c.Nodes[0], nil } -func (n *Node) AssignNodeIP(ctx context.Context, gridClient deployer.TFPluginClient, networkName string) error { +func (n *Node) AssignNodeIP(ctx context.Context, gridClient gridclient.GridClient, networkName string) error { ctx, span := getTracer().Start(ctx, "Node.AssignNodeIP", trace.WithAttributes( attribute.String("node.name", n.Name), @@ -109,8 +109,8 @@ func (c *Client) DeployNode(ctx context.Context, cluster *Cluster, node Node, ma leaderIP, cluster.Token, masterPubKey, - c.mnemonic, - c.GridClient.Network, + c.GridClient.GetMnemonic(), + c.GridClient.GetGridNetwork(), ) if err != nil { telemetry.RecordError(span, err) @@ -128,7 +128,7 @@ func (c *Client) DeployNode(ctx context.Context, cluster *Cluster, node Node, ma Str("deployment_name", depl.Name). Msg("Deploying to grid") - if err := c.GridClient.DeploymentDeployer.Deploy(ctx, &depl); err != nil { + if err := c.GridClient.Deploy(ctx, &depl); err != nil { log.Error(). Err(err). Str("node_name", node.Name). @@ -144,7 +144,7 @@ func (c *Client) DeployNode(ctx context.Context, cluster *Cluster, node Node, ma Uint32("node_id", node.NodeID). Msg("Loading deployment result from grid") - result, err := c.GridClient.State.LoadDeploymentFromGrid(ctx, node.NodeID, node.Name) + result, err := c.GridClient.LoadDeploymentFromGrid(ctx, node.NodeID, node.Name) if err != nil { telemetry.RecordError(span, err) return fmt.Errorf("failed to load deployment for node %s: %v", node.Name, err) @@ -241,8 +241,8 @@ func (c *Client) BatchDeployNodes(ctx context.Context, cluster *Cluster, nodes [ leaderIP, cluster.Token, masterPubKey, - c.mnemonic, - c.GridClient.Network, + c.GridClient.GetMnemonic(), + c.GridClient.GetGridNetwork(), ) if err != nil { telemetry.RecordError(span, err) @@ -259,7 +259,7 @@ func (c *Client) BatchDeployNodes(ctx context.Context, cluster *Cluster, nodes [ log.Debug(). Int("deployment_count", len(deployments)). Msg("Starting batch deployment to grid") - batchErr := c.GridClient.DeploymentDeployer.BatchDeploy(ctx, deployments) + batchErr := c.GridClient.BatchDeploy(ctx, deployments) var successCount int var failedNodes []string @@ -278,7 +278,7 @@ func (c *Client) BatchDeployNodes(ctx context.Context, cluster *Cluster, nodes [ ), ) - result, err := c.GridClient.State.LoadDeploymentFromGrid(ctx, node.NodeID, node.Name) + result, err := c.GridClient.LoadDeploymentFromGrid(ctx, node.NodeID, node.Name) if err != nil { log.Warn().Err(err).Str("node_name", node.Name).Msg("Failed to load deployment for node") failedNodes = append(failedNodes, node.Name) @@ -418,7 +418,7 @@ func (c *Client) DeployNetwork(ctx context.Context, cluster *Cluster) error { Msg("Deploying network") span.AddEvent("Deploying network") - err = c.GridClient.NetworkDeployer.Deploy(ctx, &net) + err = c.GridClient.DeployNetwork(ctx, &net) cluster.Network = net if err != nil { telemetry.RecordError(span, err) diff --git a/backend/internal/deployment/kubedeployer/netutil.go b/backend/internal/deployment/kubedeployer/netutil.go index dc49d075..43cb1f1e 100644 --- a/backend/internal/deployment/kubedeployer/netutil.go +++ b/backend/internal/deployment/kubedeployer/netutil.go @@ -5,15 +5,15 @@ import ( "fmt" "net" + "kubecloud/internal/infrastructure/gridclient" + "github.com/pkg/errors" - "github.com/threefoldtech/tfgrid-sdk-go/grid-client/deployer" "github.com/threefoldtech/tfgrid-sdk-go/grid-client/workloads" "github.com/threefoldtech/tfgrid-sdk-go/grid-client/zos" ) -func getIpForVm(ctx context.Context, tfPluginClient deployer.TFPluginClient, networkName string, nodeID uint32) (string, error) { - network := tfPluginClient.State.Networks.GetNetwork(networkName) - ipRange := network.GetNodeSubnet(nodeID) +func getIpForVm(ctx context.Context, tfPluginClient gridclient.GridClient, networkName string, nodeID uint32) (string, error) { + ipRange := tfPluginClient.GetNodeSubnet(networkName, nodeID) ip, ipRangeCIDR, err := net.ParseCIDR(ipRange) if err != nil { @@ -46,8 +46,8 @@ func getIpForVm(ctx context.Context, tfPluginClient deployer.TFPluginClient, net return "", fmt.Errorf("all IPs are exhausted for network %s on node %d", networkName, nodeID) } -func getUsedHostIDsFromGrid(ctx context.Context, tfPluginClient deployer.TFPluginClient, nodeID uint32, networkName string, ipRangeCIDR *net.IPNet) ([]byte, error) { - nodeClient, err := tfPluginClient.NcPool.GetNodeClient(tfPluginClient.SubstrateConn, nodeID) +func getUsedHostIDsFromGrid(ctx context.Context, tfPluginClient gridclient.GridClient, nodeID uint32, networkName string, ipRangeCIDR *net.IPNet) ([]byte, error) { + nodeClient, err := tfPluginClient.GetNodeClient(nodeID) if err != nil { return nil, errors.Wrapf(err, "could not get node client for node %d", nodeID) } diff --git a/backend/internal/deployment/kubedeployer/noderemove.go b/backend/internal/deployment/kubedeployer/noderemove.go index 191091d8..57e7f766 100644 --- a/backend/internal/deployment/kubedeployer/noderemove.go +++ b/backend/internal/deployment/kubedeployer/noderemove.go @@ -127,7 +127,7 @@ func (c *Client) isContractActive(ctx context.Context, contractID uint64) bool { log := logger.ForOperation("kubedeployer", "check_contract_active") log.Debug().Uint64("contract_id", contractID).Msg("Checking if contract is active") - _, err := c.GridClient.SubstrateConn.GetContract(contractID) + _, err := c.GridClient.GetContract(contractID) isActive := err == nil span.SetAttributes(attribute.Bool("contract.is_active", isActive)) diff --git a/backend/internal/deployment/statemanager/client_manager.go b/backend/internal/deployment/statemanager/client_manager.go index 353143b7..aa6d4eed 100644 --- a/backend/internal/deployment/statemanager/client_manager.go +++ b/backend/internal/deployment/statemanager/client_manager.go @@ -9,6 +9,7 @@ import ( "github.com/xmonader/ewf" "go.opentelemetry.io/otel" + sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace/noop" ) @@ -65,8 +66,14 @@ func GetKubeClient(state ewf.State, config ClientConfig) (*kubedeployer.Client, log.Warn().Msg("Tracer provider is no-op, tracing will not work") } + // Extract SDK trace provider if available + var tp *sdktrace.TracerProvider + if globalTp != nil { + tp, _ = globalTp.(*sdktrace.TracerProvider) + } + // Create new client - kubeClient, err := kubedeployer.NewClient(config.Mnemonic, config.Network, config.Debug, globalTp) + kubeClient, err := kubedeployer.NewClient(config.Mnemonic, config.Network, config.Debug, tp) if err != nil { return nil, fmt.Errorf("failed to create kubeclient: %w", err) } @@ -78,7 +85,9 @@ func GetKubeClient(state ewf.State, config ClientConfig) (*kubedeployer.Client, // Store the new client in state for reuse state["kubeclient"] = kubeClient - SaveGridClientState(state, kubeClient) + if err := SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state after creating kubeclient") + } log.Debug().Msg("Created and stored fresh kubeclient") return kubeClient, nil @@ -107,7 +116,9 @@ func CloseClient(state ewf.State, kubeClient *kubedeployer.Client) error { return nil } - SaveGridClientState(state, kubeClient) + if err := SaveGridClientState(state, kubeClient); err != nil { + log.Warn().Err(err).Msg("failed to save GridClient state before closing client") + } kubeClient.Close() delete(state, "kubeclient") diff --git a/backend/internal/deployment/statemanager/gridclient_state.go b/backend/internal/deployment/statemanager/gridclient_state.go index 77227630..a6d64264 100644 --- a/backend/internal/deployment/statemanager/gridclient_state.go +++ b/backend/internal/deployment/statemanager/gridclient_state.go @@ -1,86 +1,48 @@ package statemanager import ( - "encoding/json" "fmt" - "kubecloud/internal/deployment/kubedeployer" - "kubecloud/internal/infrastructure/logger" - "github.com/threefoldtech/tfgrid-sdk-go/grid-client/state" "github.com/xmonader/ewf" ) -// GridClientState represents the critical state that needs to be preserved -type GridClientState struct { - CurrentNodeDeployments map[uint32][]uint64 `json:"current_node_deployments"` - NetworkSubnets map[string]map[uint32]string `json:"network_subnets"` -} +const gridClientStateKey = "gridclient_state" -// SaveGridClientState saves the critical GridClient state to workflow state -func SaveGridClientState(workflowState ewf.State, kubeClient *kubedeployer.Client) { +// SaveGridClientState saves the GridClient state to the workflow state +func SaveGridClientState(workflowState ewf.State, kubeClient *kubedeployer.Client) error { log := logger.ForOperation("statemanager", "save_gridclient_state") - if kubeClient == nil || kubeClient.GridClient.State == nil { - return - } - gridState := GridClientState{ - CurrentNodeDeployments: make(map[uint32][]uint64), - NetworkSubnets: make(map[string]map[uint32]string), + if kubeClient == nil || kubeClient.GridClient == nil { + return fmt.Errorf("kubeClient or its GridClient is nil") } - // Save CurrentNodeDeployments - for nodeID, contractIDs := range kubeClient.GridClient.State.CurrentNodeDeployments { - gridState.CurrentNodeDeployments[nodeID] = []uint64(contractIDs) + stateData, err := kubeClient.GridClient.GetState() + if err != nil { + return fmt.Errorf("failed to get GridClient state: %w", err) } - // Save network subnet information - for networkName, network := range kubeClient.GridClient.State.Networks.State { - gridState.NetworkSubnets[networkName] = network.Subnets - } - - // Store as JSON string in workflow state - if stateBytes, err := json.Marshal(gridState); err == nil { - workflowState["gridclient_state"] = string(stateBytes) - log.Debug().Msg("Saved GridClient state to workflow state") - } else { - log.Warn().Err(err).Msg("Failed to marshal GridClient state") - } + workflowState[gridClientStateKey] = stateData + log.Debug().Msg("GridClient state saved successfully") + return nil } -// RestoreGridClientState restores the critical GridClient state from workflow state +// RestoreGridClientState restores the GridClient state from the workflow state func RestoreGridClientState(workflowState ewf.State, kubeClient *kubedeployer.Client) error { - log := logger.ForOperation("statemanager", "restore_gridclient_state") - if kubeClient == nil || kubeClient.GridClient.State == nil { - return fmt.Errorf("invalid kubeclient or gridclient state") - } - - stateStr, ok := workflowState["gridclient_state"].(string) - if !ok || stateStr == "" { - log.Debug().Msg("No GridClient state found in workflow state") - return nil // Not an error, just no state to restore + if kubeClient == nil || kubeClient.GridClient == nil { + return nil } - var savedState GridClientState - if err := json.Unmarshal([]byte(stateStr), &savedState); err != nil { - return fmt.Errorf("failed to unmarshal GridClient state: %w", err) + stateData, ok := workflowState[gridClientStateKey] + if !ok { + return nil } - // Restore CurrentNodeDeployments - kubeClient.GridClient.State.CurrentNodeDeployments = make(map[uint32]state.ContractIDs) - for nodeID, contractIDs := range savedState.CurrentNodeDeployments { - kubeClient.GridClient.State.CurrentNodeDeployments[nodeID] = state.ContractIDs(contractIDs) + stateBytes, ok := stateData.([]byte) + if !ok { + return nil } - // Restore network subnet information - kubeClient.GridClient.State.Networks.State = make(map[string]state.Network) - for networkName, subnets := range savedState.NetworkSubnets { - kubeClient.GridClient.State.Networks.State[networkName] = state.Network{ - Subnets: subnets, - } - } - - log.Debug().Msg("Restored GridClient state from workflow state") - return nil + return kubeClient.GridClient.RestoreState(stateBytes) } diff --git a/backend/internal/infrastructure/gridclient/grid_client.go b/backend/internal/infrastructure/gridclient/grid_client.go index 44683165..41723845 100644 --- a/backend/internal/infrastructure/gridclient/grid_client.go +++ b/backend/internal/infrastructure/gridclient/grid_client.go @@ -11,6 +11,7 @@ import ( "kubecloud/internal/infrastructure/logger" "math" "net/http" + "sync" "time" "github.com/cosmos/go-bip39" @@ -18,6 +19,7 @@ import ( "github.com/threefoldtech/tfgrid-sdk-go/grid-client/calculator" "github.com/threefoldtech/tfgrid-sdk-go/grid-client/deployer" client "github.com/threefoldtech/tfgrid-sdk-go/grid-client/node" + "github.com/threefoldtech/tfgrid-sdk-go/grid-client/workloads" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" sdktrace "go.opentelemetry.io/otel/sdk/trace" ) @@ -57,12 +59,28 @@ type GridClient interface { Twins(ctx context.Context, filter types.TwinFilter, limit types.Limit) (res []types.Twin, totalCount int, err error) Stats(ctx context.Context, filter types.StatsFilter) (res types.Stats, err error) + // deployment methods + Deploy(ctx context.Context, dl *workloads.Deployment) error + BatchDeploy(ctx context.Context, dls []*workloads.Deployment) error + LoadDeploymentFromGrid(ctx context.Context, nodeID uint32, name string) (workloads.Deployment, error) + DeployNetwork(ctx context.Context, net *workloads.ZNet) error + GetContract(contractID uint64) (uint64, error) + BatchCancelContract(contracts []uint64) error + GetGridNetwork() string + GetNodeSubnet(networkName string, nodeID uint32) string + GetMnemonic() string + + // state management methods + GetState() ([]byte, error) + RestoreState(stateData []byte) error + Close() } type gridClient struct { gridClient *deployer.TFPluginClient systemMnemonic string + mu sync.RWMutex } var _ GridClient = (*gridClient)(nil) @@ -117,6 +135,9 @@ func NewGridClient(systemMnemonic string, opts ...ClientOpts) (GridClient, error if cfg.network != "" { pluginOpts = append(pluginOpts, deployer.WithNetwork(cfg.network)) } + if cfg.traceProvider != nil { + pluginOpts = append(pluginOpts, deployer.WithTraceProvider(cfg.traceProvider)) + } gridCl, err := deployer.NewTFPluginClient( systemMnemonic, @@ -422,6 +443,56 @@ func (s *gridClient) Close() { s.gridClient.Close() } +// Deploy deploys a deployment to the grid +func (s *gridClient) Deploy(ctx context.Context, dl *workloads.Deployment) error { + return s.gridClient.DeploymentDeployer.Deploy(ctx, dl) +} + +// BatchDeploy batch deploys multiple deployments to the grid +func (s *gridClient) BatchDeploy(ctx context.Context, dls []*workloads.Deployment) error { + return s.gridClient.DeploymentDeployer.BatchDeploy(ctx, dls) +} + +// LoadDeploymentFromGrid loads a deployment from the grid by nodeID and deployment name +func (s *gridClient) LoadDeploymentFromGrid(ctx context.Context, nodeID uint32, name string) (workloads.Deployment, error) { + return s.gridClient.State.LoadDeploymentFromGrid(ctx, nodeID, name) +} + +// DeployNetwork deploys a network to the grid +func (s *gridClient) DeployNetwork(ctx context.Context, net *workloads.ZNet) error { + return s.gridClient.NetworkDeployer.Deploy(ctx, net) +} + +// GetContract gets a contract by its ID +func (s *gridClient) GetContract(contractID uint64) (uint64, error) { + contract, err := s.gridClient.SubstrateConn.GetContract(contractID) + if err != nil { + return 0, err + } + return uint64(contract.ContractID), nil +} + +// BatchCancelContract cancels multiple contracts +func (s *gridClient) BatchCancelContract(contracts []uint64) error { + return s.gridClient.BatchCancelContract(contracts) +} + +// GetNetwork returns the network of the grid client +func (s *gridClient) GetGridNetwork() string { + return s.gridClient.Network +} + +// GetNodeSubnet returns the node subnet for a given network and node ID +func (s *gridClient) GetNodeSubnet(networkName string, nodeID uint32) string { + network := s.gridClient.State.Networks.GetNetwork(networkName) + return network.GetNodeSubnet(nodeID) +} + +// GetMnemonic returns the system mnemonic +func (s *gridClient) GetMnemonic() string { + return s.systemMnemonic +} + // GenerateMnemonic generate mnemonic func GenerateMnemonic() (string, error) { entropy, err := bip39.NewEntropy(128) diff --git a/backend/internal/infrastructure/gridclient/grid_client_state.go b/backend/internal/infrastructure/gridclient/grid_client_state.go new file mode 100644 index 00000000..034e5eb2 --- /dev/null +++ b/backend/internal/infrastructure/gridclient/grid_client_state.go @@ -0,0 +1,76 @@ +package gridclient + +import ( + "encoding/json" + "fmt" + + "github.com/threefoldtech/tfgrid-sdk-go/grid-client/state" +) + +// GridClientState represents the critical state that needs to be preserved +type gridClientState struct { + CurrentNodeDeployments map[uint32][]uint64 `json:"current_node_deployments"` + NetworkSubnets map[string]map[uint32]string `json:"network_subnets"` +} + +// GetState returns the current GridClient state as JSON bytes +func (s *gridClient) GetState() ([]byte, error) { + + if s.gridClient.State == nil { + return nil, fmt.Errorf("gridclient state is nil") + } + + gridState := gridClientState{ + CurrentNodeDeployments: make(map[uint32][]uint64), + NetworkSubnets: make(map[string]map[uint32]string), + } + + s.mu.RLock() + // Save CurrentNodeDeployments + for nodeID, contractIDs := range s.gridClient.State.CurrentNodeDeployments { + gridState.CurrentNodeDeployments[nodeID] = []uint64(contractIDs) + } + + // Save network subnet information + for networkName, network := range s.gridClient.State.Networks.State { + gridState.NetworkSubnets[networkName] = network.Subnets + } + + s.mu.RUnlock() + + return json.Marshal(gridState) +} + +// RestoreState restores the GridClient state from JSON bytes +func (s *gridClient) RestoreState(stateData []byte) error { + if len(stateData) == 0 { + return nil // Nothing to restore + } + + var savedState gridClientState + if err := json.Unmarshal(stateData, &savedState); err != nil { + return fmt.Errorf("failed to unmarshal state: %w", err) + } + + if s.gridClient.State == nil { + return fmt.Errorf("gridclient state is nil") + } + + s.mu.Lock() + // Restore CurrentNodeDeployments + s.gridClient.State.CurrentNodeDeployments = make(map[uint32]state.ContractIDs) + for nodeID, contractIDs := range savedState.CurrentNodeDeployments { + s.gridClient.State.CurrentNodeDeployments[nodeID] = state.ContractIDs(contractIDs) + } + + // Restore network subnet information + s.gridClient.State.Networks.State = make(map[string]state.Network) + for networkName, subnets := range savedState.NetworkSubnets { + s.gridClient.State.Networks.State[networkName] = state.Network{ + Subnets: subnets, + } + } + s.mu.Unlock() + + return nil +}