-
Notifications
You must be signed in to change notification settings - Fork 4
sambuc/feat merge restart pr 1 #77
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: build/csi-rclone-improvements
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| package rclone | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "net" | ||
| "os" | ||
|
|
@@ -78,14 +79,27 @@ func NewNodeServer(csiDriver *csicommon.CSIDriver, cacheDir string, cacheSize st | |
| } | ||
| rcloneOps := NewRclone(kubeClient, rclonePort, cacheDir, cacheSize) | ||
|
|
||
| return &nodeServer{ | ||
| // Use kubelet plugin directory for state persistence | ||
| stateFile := "/var/lib/kubelet/plugins/csi-rclone/mounted_volumes.json" | ||
|
|
||
| ns := &nodeServer{ | ||
| DefaultNodeServer: csicommon.NewDefaultNodeServer(csiDriver), | ||
| mounter: &mount.SafeFormatAndMount{ | ||
| Interface: mount.New(""), | ||
| Exec: utilexec.New(), | ||
| }, | ||
| RcloneOps: rcloneOps, | ||
| }, nil | ||
| RcloneOps: rcloneOps, | ||
| mountedVolumes: make(map[string]*MountedVolume), | ||
| mutex: &sync.Mutex{}, | ||
| stateFile: stateFile, | ||
| } | ||
|
|
||
| // Load persisted state on startup | ||
| if err := ns.loadState(); err != nil { | ||
| klog.Warningf("Failed to load persisted volume state: %v", err) | ||
| } | ||
|
|
||
| return ns, nil | ||
| } | ||
|
|
||
| func NewControllerServer(csiDriver *csicommon.CSIDriver) *controllerServer { | ||
|
|
@@ -139,7 +153,13 @@ func (d *Driver) Run() error { | |
| ) | ||
| d.server = s | ||
| if d.ns != nil && d.ns.RcloneOps != nil { | ||
| return d.ns.RcloneOps.Run() | ||
| onDaemonReady := func() error { | ||
| if d.ns != nil { | ||
| return d.ns.remountTrackedVolumes(context.Background()) | ||
| } | ||
| return nil | ||
| } | ||
| return d.ns.RcloneOps.Run(onDaemonReady) | ||
|
Comment on lines
+156
to
+162
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Here it is better to pass in a context to the enclosing function and propagate that into
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can even decide whether to have a timeout for the total remount of all storages or just make a new timeout context for each call to Mount for each mountpoint. And you could even do both. |
||
| } | ||
| s.Wait() | ||
| return nil | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,10 +7,13 @@ package rclone | |
|
|
||
| import ( | ||
| "bytes" | ||
| "encoding/json" | ||
| "errors" | ||
| "fmt" | ||
| "os" | ||
| "path/filepath" | ||
| "strings" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "gopkg.in/ini.v1" | ||
|
|
@@ -37,6 +40,23 @@ type nodeServer struct { | |
| *csicommon.DefaultNodeServer | ||
| mounter *mount.SafeFormatAndMount | ||
| RcloneOps Operations | ||
|
|
||
| // Track mounted volumes for automatic remounting | ||
| mountedVolumes map[string]*MountedVolume | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Why pointer here and not just
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume it allows for the map to stay small, therefore making it simpler and faster to reallocate when needed as we append often to it. The MountedVolume struct is not that small, also this decouples it a bit more from the map.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You really should not overthink that in go. This map will not have intensive use, so it should stay as simple as it can get. In go, this means using values as much as possible. |
||
| mutex *sync.Mutex | ||
| stateFile string | ||
| } | ||
|
|
||
| type MountedVolume struct { | ||
| VolumeId string | ||
| TargetPath string | ||
| Remote string | ||
| RemotePath string | ||
| ConfigData string | ||
| ReadOnly bool | ||
| Parameters map[string]string | ||
| SecretName string | ||
| SecretNamespace string | ||
| } | ||
|
|
||
| // Mounting Volume (Preparation) | ||
|
|
@@ -141,6 +161,10 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis | |
| } | ||
| return nil, status.Error(codes.Internal, err.Error()) | ||
| } | ||
|
|
||
| // Track the mounted volume for automatic remounting | ||
| ns.trackMountedVolume(volumeId, targetPath, remote, remotePath, configData, readOnly, flags, secretName, secretNamespace) | ||
|
|
||
| // err = ns.WaitForMountAvailable(targetPath) | ||
| // if err != nil { | ||
| // return nil, status.Error(codes.Internal, err.Error()) | ||
|
|
@@ -323,6 +347,10 @@ func (ns *nodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu | |
| if err := ns.RcloneOps.Unmount(ctx, req.GetVolumeId(), targetPath); err != nil { | ||
| klog.Warningf("Unmounting volume failed: %s", err) | ||
| } | ||
|
|
||
| // Remove the volume from tracking | ||
| ns.removeTrackedVolume(req.GetVolumeId()) | ||
|
|
||
| mount.CleanupMountPoint(req.GetTargetPath(), ns.mounter, false) | ||
| return &csi.NodeUnpublishVolumeResponse{}, nil | ||
| } | ||
|
|
@@ -344,6 +372,84 @@ func (*nodeServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVolu | |
| return nil, status.Errorf(codes.Unimplemented, "method NodeExpandVolume not implemented") | ||
| } | ||
|
|
||
| // Track mounted volume for automatic remounting | ||
| func (ns *nodeServer) trackMountedVolume(volumeId, targetPath, remote, remotePath, configData string, readOnly bool, parameters map[string]string, secretName, secretNamespace string) { | ||
| ns.mutex.Lock() | ||
|
|
||
| ns.mountedVolumes[volumeId] = &MountedVolume{ | ||
| VolumeId: volumeId, | ||
| TargetPath: targetPath, | ||
| Remote: remote, | ||
| RemotePath: remotePath, | ||
| ConfigData: configData, | ||
| ReadOnly: readOnly, | ||
| Parameters: parameters, | ||
| SecretName: secretName, | ||
| SecretNamespace: secretNamespace, | ||
| } | ||
| // Can't use defer here, as persistState also takes the mutex, and would fail as the Unlock happens after it returns | ||
| ns.mutex.Unlock() | ||
| klog.Infof("Tracked mounted volume %s at path %s", volumeId, targetPath) | ||
|
|
||
| if err := ns.persistState(); err != nil { | ||
| klog.Errorf("Failed to persist volume state: %v", err) | ||
| } | ||
|
Comment on lines
+391
to
+396
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: I think that updating the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What you could do is make You could even go as far as calling that function |
||
| } | ||
|
|
||
| // Remove tracked volume when unmounted | ||
| func (ns *nodeServer) removeTrackedVolume(volumeId string) { | ||
| ns.mutex.Lock() | ||
|
|
||
| delete(ns.mountedVolumes, volumeId) | ||
| // Can't use defer here, as persistState also takes the mutex, and would fail as the Unlock happens after it returns | ||
| ns.mutex.Unlock() | ||
| klog.Infof("Removed tracked volume %s", volumeId) | ||
|
|
||
| if err := ns.persistState(); err != nil { | ||
| klog.Errorf("Failed to persist volume state: %v", err) | ||
| } | ||
| } | ||
|
|
||
| // Automatically remount all tracked volumes after daemon restart | ||
| func (ns *nodeServer) remountTrackedVolumes(ctx context.Context) error { | ||
| ns.mutex.Lock() | ||
| defer ns.mutex.Unlock() | ||
|
|
||
| if len(ns.mountedVolumes) == 0 { | ||
| klog.Info("No tracked volumes to remount") | ||
| return nil | ||
| } | ||
|
|
||
| klog.Infof("Remounting %d tracked volumes", len(ns.mountedVolumes)) | ||
|
|
||
| for volumeId, mv := range ns.mountedVolumes { | ||
|
Comment on lines
+414
to
+425
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: This is similar to the other cases. It is safer to load the text file and remount in one lock/unlock cycle. |
||
| klog.Infof("Remounting volume %s to %s", volumeId, mv.TargetPath) | ||
|
|
||
| // Create the mount directory if it doesn't exist | ||
| if err := os.MkdirAll(mv.TargetPath, 0750); err != nil { | ||
| klog.Errorf("Failed to create mount directory %s: %v", mv.TargetPath, err) | ||
| continue | ||
| } | ||
|
|
||
| // Remount the volume | ||
| rcloneVol := &RcloneVolume{ | ||
| ID: mv.VolumeId, | ||
| Remote: mv.Remote, | ||
| RemotePath: mv.RemotePath, | ||
| } | ||
|
|
||
| err := ns.RcloneOps.Mount(ctx, rcloneVol, mv.TargetPath, mv.ConfigData, mv.ReadOnly, mv.Parameters) | ||
| if err != nil { | ||
| klog.Errorf("Failed to remount volume %s: %v", volumeId, err) | ||
| // Don't return error here - continue with other volumes | ||
| } else { | ||
| klog.Infof("Successfully remounted volume %s", volumeId) | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func (ns *nodeServer) WaitForMountAvailable(mountpoint string) error { | ||
| for { | ||
| select { | ||
|
|
@@ -357,3 +463,57 @@ func (ns *nodeServer) WaitForMountAvailable(mountpoint string) error { | |
| } | ||
| } | ||
| } | ||
|
|
||
| // Persist volume state to disk | ||
| func (ns *nodeServer) persistState() error { | ||
| if ns.stateFile == "" { | ||
| return nil | ||
| } | ||
|
|
||
| if err := os.MkdirAll(filepath.Dir(ns.stateFile), 0755); err != nil { | ||
| return fmt.Errorf("failed to create state directory: %v", err) | ||
| } | ||
|
|
||
| ns.mutex.Lock() | ||
| defer ns.mutex.Unlock() | ||
|
|
||
| data, err := json.Marshal(ns.mountedVolumes) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to marshal volume state: %v", err) | ||
| } | ||
|
|
||
| if err := os.WriteFile(ns.stateFile, data, 0600); err != nil { | ||
| return fmt.Errorf("failed to write state file: %v", err) | ||
| } | ||
|
|
||
| klog.Infof("Persisted volume state to %s", ns.stateFile) | ||
| return nil | ||
| } | ||
|
|
||
| // Load volume state from disk | ||
| func (ns *nodeServer) loadState() error { | ||
| if ns.stateFile == "" { | ||
| return nil | ||
| } | ||
|
|
||
| ns.mutex.Lock() | ||
| defer ns.mutex.Unlock() | ||
|
|
||
| data, err := os.ReadFile(ns.stateFile) | ||
| if err != nil { | ||
| if os.IsNotExist(err) { | ||
| klog.Info("No persisted volume state found, starting fresh") | ||
| return nil | ||
| } | ||
| return fmt.Errorf("failed to read state file: %v", err) | ||
| } | ||
|
|
||
| var volumes map[string]*MountedVolume | ||
| if err := json.Unmarshal(data, &volumes); err != nil { | ||
| return fmt.Errorf("failed to unmarshal volume state: %v", err) | ||
| } | ||
|
|
||
| ns.mountedVolumes = volumes | ||
| klog.Infof("Loaded %d tracked volumes from %s", len(ns.mountedVolumes), ns.stateFile) | ||
| return nil | ||
| } | ||
|
Comment on lines
+493
to
+519
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion(non-blocking): I am being incredibly picky here.. but it is not a bad idea to do one of the following:
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not a big deal, but it does not hurt to make this a constant
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You will find this (and the comment) moved directly into the instantiation of nodeServer just below as this is the only place where this variable is used in a commit in #78.