Skip to content

Commit df3f5cc

Browse files
committed
fix: validate gridNet in NewClient and add logging for state management
1 parent 1ba2e43 commit df3f5cc

File tree

6 files changed

+58
-19
lines changed

6 files changed

+58
-19
lines changed

backend/internal/core/workflows/deployer_activities.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ func ensureClient(state ewf.State) {
6565

6666
func DeployNetworkStep() ewf.StepFn {
6767
return func(ctx context.Context, state ewf.State) error {
68+
log := logger.ForOperation("deployer_activities", "deploy_network")
6869
ensureClient(state)
6970

7071
config, err := getConfig(state)
@@ -91,7 +92,9 @@ func DeployNetworkStep() ewf.StepFn {
9192
statemanager.StoreCluster(state, cluster)
9293
err = kubeClient.DeployNetwork(ctx, &cluster)
9394
// Save GridClient state after network deployment
94-
statemanager.SaveGridClientState(state, kubeClient)
95+
if err := statemanager.SaveGridClientState(state, kubeClient); err != nil {
96+
log.Warn().Err(err).Msg("failed to save GridClient state after network deployment")
97+
}
9598
statemanager.StoreCluster(state, cluster)
9699
if err != nil {
97100
nodeIDs := make([]uint32, 0, len(cluster.Nodes))
@@ -114,6 +117,7 @@ func DeployNetworkStep() ewf.StepFn {
114117

115118
func UpdateNetworkStep() ewf.StepFn {
116119
return func(ctx context.Context, state ewf.State) error {
120+
log := logger.ForOperation("deployer_activities", "update_network")
117121
ensureClient(state)
118122

119123
config, err := getConfig(state)
@@ -144,7 +148,9 @@ func UpdateNetworkStep() ewf.StepFn {
144148
}
145149

146150
// Save GridClient state after network update
147-
statemanager.SaveGridClientState(state, kubeClient)
151+
if err := statemanager.SaveGridClientState(state, kubeClient); err != nil {
152+
log.Warn().Err(err).Msg("failed to save GridClient state after network update")
153+
}
148154
statemanager.StoreCluster(state, cluster)
149155
state["node"] = node
150156
return nil
@@ -153,6 +159,7 @@ func UpdateNetworkStep() ewf.StepFn {
153159

154160
func AddNodeStep() ewf.StepFn {
155161
return func(ctx context.Context, state ewf.State) error {
162+
log := logger.ForOperation("deployer_activities", "add_node")
156163
ensureClient(state)
157164

158165
config, err := getConfig(state)
@@ -184,7 +191,9 @@ func AddNodeStep() ewf.StepFn {
184191
}
185192

186193
// Save GridClient state after node deployment
187-
statemanager.SaveGridClientState(state, kubeClient)
194+
if err := statemanager.SaveGridClientState(state, kubeClient); err != nil {
195+
log.Warn().Err(err).Msg("failed to save GridClient state after node deployment")
196+
}
188197
statemanager.StoreCluster(state, cluster)
189198

190199
// Store the deployed node in state for verification step
@@ -239,7 +248,9 @@ func DeployLeaderNodeStep() ewf.StepFn {
239248

240249
log.Debug().Str("node_name", leaderNode.Name).Msg("Leader node deployed successfully")
241250

242-
statemanager.SaveGridClientState(state, kubeClient)
251+
if err := statemanager.SaveGridClientState(state, kubeClient); err != nil {
252+
log.Warn().Err(err).Msg("failed to save GridClient state after leader node deployment")
253+
}
243254
statemanager.StoreCluster(state, cluster)
244255
return nil
245256
}
@@ -291,7 +302,9 @@ func BatchDeployAllNodesStep(metrics *metricsLib.Metrics) ewf.StepFn {
291302

292303
batchErr := kubeClient.BatchDeployNodes(ctx, &cluster, nodesToDeploy, config.SSHPublicKey)
293304

294-
statemanager.SaveGridClientState(state, kubeClient)
305+
if err := statemanager.SaveGridClientState(state, kubeClient); err != nil {
306+
log.Warn().Err(err).Msg("failed to save GridClient state after batch node deployment")
307+
}
295308
statemanager.StoreCluster(state, cluster)
296309

297310
if batchErr != nil {
@@ -524,6 +537,7 @@ func DeleteAllUserClustersStep(clusterRepo models.ClusterRepository, metrics *me
524537

525538
func RemoveDeploymentNodeStep() ewf.StepFn {
526539
return func(ctx context.Context, state ewf.State) error {
540+
log := logger.ForOperation("deployer_activities", "remove_deployment_node")
527541
ensureClient(state)
528542

529543
config, err := getConfig(state)
@@ -553,7 +567,10 @@ func RemoveDeploymentNodeStep() ewf.StepFn {
553567
}
554568

555569
// Save GridClient state after node removal
556-
statemanager.SaveGridClientState(state, kubeClient)
570+
if err := statemanager.SaveGridClientState(state, kubeClient); err != nil {
571+
log.Warn().Err(err).Msg("failed to save GridClient state after node removal")
572+
}
573+
557574
statemanager.StoreCluster(state, existingCluster)
558575
return nil
559576
}
@@ -563,7 +580,9 @@ func closeClient(ctx context.Context, wf *ewf.Workflow, err error) {
563580
log := logger.ForOperation("deployer_activities", "close_client").With().Str("workflow_name", wf.Name).Logger()
564581
if kubeClient, ok := wf.State["kubeclient"].(*kubedeployer.Client); ok {
565582
// Save final GridClient state before closing
566-
statemanager.SaveGridClientState(wf.State, kubeClient)
583+
if err := statemanager.SaveGridClientState(wf.State, kubeClient); err != nil {
584+
log.Warn().Err(err).Msg("failed to save GridClient state before closing client")
585+
}
567586

568587
kubeClient.Close()
569588
delete(wf.State, "kubeclient")

backend/internal/deployment/kubedeployer/client.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package kubedeployer
22

33
import (
4+
"fmt"
45
"kubecloud/internal/infrastructure/gridclient"
56

67
sdktrace "go.opentelemetry.io/otel/sdk/trace"
@@ -12,10 +13,11 @@ type Client struct {
1213

1314
// NewClient creates a new Client instance
1415
func NewClient(mnemonic, gridNet string, debug bool, tp *sdktrace.TracerProvider) (*Client, error) {
15-
var opts []gridclient.ClientOpts
16-
if gridNet != "" {
17-
opts = append(opts, gridclient.WithNetwork(gridNet))
16+
if gridNet == "" {
17+
return nil, fmt.Errorf("gridNet is required and cannot be empty")
1818
}
19+
var opts []gridclient.ClientOpts
20+
opts = append(opts, gridclient.WithNetwork(gridNet))
1921
if debug {
2022
opts = append(opts, gridclient.WithDebug())
2123
}

backend/internal/deployment/statemanager/client_manager.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ func GetKubeClient(state ewf.State, config ClientConfig) (*kubedeployer.Client,
8585

8686
// Store the new client in state for reuse
8787
state["kubeclient"] = kubeClient
88-
SaveGridClientState(state, kubeClient)
88+
if err := SaveGridClientState(state, kubeClient); err != nil {
89+
log.Warn().Err(err).Msg("failed to save GridClient state after creating kubeclient")
90+
}
8991

9092
log.Debug().Msg("Created and stored fresh kubeclient")
9193
return kubeClient, nil
@@ -114,7 +116,9 @@ func CloseClient(state ewf.State, kubeClient *kubedeployer.Client) error {
114116
return nil
115117
}
116118

117-
SaveGridClientState(state, kubeClient)
119+
if err := SaveGridClientState(state, kubeClient); err != nil {
120+
log.Warn().Err(err).Msg("failed to save GridClient state before closing client")
121+
}
118122
kubeClient.Close()
119123
delete(state, "kubeclient")
120124

backend/internal/deployment/statemanager/gridclient_state.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,31 @@
11
package statemanager
22

33
import (
4+
"fmt"
45
"kubecloud/internal/deployment/kubedeployer"
6+
"kubecloud/internal/infrastructure/logger"
57

68
"github.com/xmonader/ewf"
79
)
810

911
const gridClientStateKey = "gridclient_state"
1012

1113
// SaveGridClientState saves the GridClient state to the workflow state
12-
func SaveGridClientState(workflowState ewf.State, kubeClient *kubedeployer.Client) {
14+
func SaveGridClientState(workflowState ewf.State, kubeClient *kubedeployer.Client) error {
15+
log := logger.ForOperation("statemanager", "save_gridclient_state")
16+
1317
if kubeClient == nil || kubeClient.GridClient == nil {
14-
return
18+
return fmt.Errorf("kubeClient or its GridClient is nil")
1519
}
1620

1721
stateData, err := kubeClient.GridClient.GetState()
1822
if err != nil {
19-
return
23+
return fmt.Errorf("failed to get GridClient state: %w", err)
2024
}
2125

2226
workflowState[gridClientStateKey] = stateData
27+
log.Debug().Msg("GridClient state saved successfully")
28+
return nil
2329
}
2430

2531
// RestoreGridClientState restores the GridClient state from the workflow state

backend/internal/infrastructure/gridclient/grid_client.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"kubecloud/internal/infrastructure/logger"
1212
"math"
1313
"net/http"
14+
"sync"
1415
"time"
1516

1617
"github.com/cosmos/go-bip39"
@@ -79,6 +80,7 @@ type GridClient interface {
7980
type gridClient struct {
8081
gridClient *deployer.TFPluginClient
8182
systemMnemonic string
83+
mu sync.RWMutex
8284
}
8385

8486
var _ GridClient = (*gridClient)(nil)

backend/internal/infrastructure/gridclient/grid_client_state.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ type gridClientState struct {
1515

1616
// GetState returns the current GridClient state as JSON bytes
1717
func (s *gridClient) GetState() ([]byte, error) {
18+
1819
if s.gridClient.State == nil {
1920
return nil, fmt.Errorf("gridclient state is nil")
2021
}
@@ -24,6 +25,7 @@ func (s *gridClient) GetState() ([]byte, error) {
2425
NetworkSubnets: make(map[string]map[uint32]string),
2526
}
2627

28+
s.mu.RLock()
2729
// Save CurrentNodeDeployments
2830
for nodeID, contractIDs := range s.gridClient.State.CurrentNodeDeployments {
2931
gridState.CurrentNodeDeployments[nodeID] = []uint64(contractIDs)
@@ -34,15 +36,13 @@ func (s *gridClient) GetState() ([]byte, error) {
3436
gridState.NetworkSubnets[networkName] = network.Subnets
3537
}
3638

39+
s.mu.RUnlock()
40+
3741
return json.Marshal(gridState)
3842
}
3943

4044
// RestoreState restores the GridClient state from JSON bytes
4145
func (s *gridClient) RestoreState(stateData []byte) error {
42-
if s.gridClient.State == nil {
43-
return fmt.Errorf("gridclient state is nil")
44-
}
45-
4646
if len(stateData) == 0 {
4747
return nil // Nothing to restore
4848
}
@@ -52,6 +52,11 @@ func (s *gridClient) RestoreState(stateData []byte) error {
5252
return fmt.Errorf("failed to unmarshal state: %w", err)
5353
}
5454

55+
if s.gridClient.State == nil {
56+
return fmt.Errorf("gridclient state is nil")
57+
}
58+
59+
s.mu.Lock()
5560
// Restore CurrentNodeDeployments
5661
s.gridClient.State.CurrentNodeDeployments = make(map[uint32]state.ContractIDs)
5762
for nodeID, contractIDs := range savedState.CurrentNodeDeployments {
@@ -65,6 +70,7 @@ func (s *gridClient) RestoreState(stateData []byte) error {
6570
Subnets: subnets,
6671
}
6772
}
73+
s.mu.Unlock()
6874

6975
return nil
7076
}

0 commit comments

Comments
 (0)