Skip to content

Commit 2fc62a6

Browse files
author
Changwei Ge
authored
Merge pull request #385 from mofishzz/hjn/live-upgrade
Fix some small problems in the live upgrade procedure
2 parents 5338178 + c1971c3 commit 2fc62a6

File tree

6 files changed

+67
-70
lines changed

6 files changed

+67
-70
lines changed

pkg/daemon/daemon.go

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -211,23 +211,7 @@ func (d *Daemon) SharedMount(rafs *Rafs) error {
211211
return errors.Wrapf(err, "mount instance %s", rafs.SnapshotID)
212212
}
213213

214-
defer func() {
215-
su := d.Supervisor
216-
if su != nil {
217-
// TODO: This should be optional by checking snapshotter's configuration.
218-
// FIXME: Is it possible the states are overwritten during two API mounts.
219-
// FIXME: What if nydusd does not support sending states.
220-
err = su.FetchDaemonStates(func() error {
221-
if err := d.SendStates(); err != nil {
222-
return errors.Wrapf(err, "send daemon %s states", d.ID())
223-
}
224-
return nil
225-
})
226-
if err != nil {
227-
log.L.Warnf("Daemon %s does not support sending states, %v", d.ID(), err)
228-
}
229-
}
230-
}()
214+
defer d.SendStates()
231215

232216
if d.States.FsDriver == config.FsDriverFscache {
233217
if err := d.sharedErofsMount(rafs); err != nil {
@@ -266,6 +250,8 @@ func (d *Daemon) SharedUmount(rafs *Rafs) error {
266250
return errors.Wrapf(err, "umount instance %s", rafs.SnapshotID)
267251
}
268252

253+
defer d.SendStates()
254+
269255
if d.States.FsDriver == config.FsDriverFscache {
270256
if err := d.sharedErofsUmount(rafs); err != nil {
271257
return errors.Wrapf(err, "failed to erofs mount")
@@ -357,7 +343,25 @@ func (d *Daemon) sharedErofsUmount(rafs *Rafs) error {
357343
return nil
358344
}
359345

360-
func (d *Daemon) SendStates() error {
346+
func (d *Daemon) SendStates() {
347+
su := d.Supervisor
348+
if su != nil {
349+
// TODO: This should be optional by checking snapshotter's configuration.
350+
// FIXME: Is it possible the states are overwritten during two API mounts.
351+
// FIXME: What if nydusd does not support sending states.
352+
err := su.FetchDaemonStates(func() error {
353+
if err := d.doSendStates(); err != nil {
354+
return errors.Wrapf(err, "send daemon %s states", d.ID())
355+
}
356+
return nil
357+
})
358+
if err != nil {
359+
log.L.Warnf("Daemon %s does not support sending states, %v", d.ID(), err)
360+
}
361+
}
362+
}
363+
364+
func (d *Daemon) doSendStates() error {
361365
c, err := d.GetClient()
362366
if err != nil {
363367
return errors.Wrapf(err, "send states %s", d.ID())

pkg/filesystem/fs.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ type Filesystem struct {
5555
rootMountpoint string
5656
}
5757

58-
func (fs *Filesystem) tryRetainSharedDaemon(d *daemon.Daemon) {
58+
func (fs *Filesystem) TryRetainSharedDaemon(d *daemon.Daemon) {
5959
// FsDriver can be changed between two startups.
6060
if d.HostMountpoint() == fs.rootMountpoint || config.GetFsDriver() == config.FsDriverFscache {
6161
fs.sharedDaemon = d
@@ -110,13 +110,13 @@ func NewFileSystem(ctx context.Context, opt ...NewFSOpt) (*Filesystem, error) {
110110

111111
// Found shared daemon
112112
// Fscache userspace daemon has no host mountpoint.
113-
fs.tryRetainSharedDaemon(d)
113+
fs.TryRetainSharedDaemon(d)
114114

115115
}
116116

117117
for _, d := range liveDaemons {
118118
// Found shared daemon
119-
fs.tryRetainSharedDaemon(d)
119+
fs.TryRetainSharedDaemon(d)
120120
}
121121

122122
return &fs, nil

pkg/manager/daemon_adaptor.go

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,21 +99,7 @@ func (m *Manager) StartDaemon(d *daemon.Daemon) error {
9999
collector.NewDaemonInfoCollector(&d.Version, 1).Collect()
100100
d.Unlock()
101101

102-
if d.Supervisor == nil {
103-
return
104-
}
105-
106-
su := d.Supervisor
107-
err = su.FetchDaemonStates(func() error {
108-
if err := d.SendStates(); err != nil {
109-
return errors.Wrapf(err, "send daemon %s states", d.ID())
110-
}
111-
return nil
112-
})
113-
if err != nil {
114-
log.L.Errorf("send states")
115-
return
116-
}
102+
d.SendStates()
117103
}()
118104

119105
return nil

pkg/manager/manager.go

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,10 @@ func (m *Manager) UpdateDaemon(daemon *daemon.Daemon) error {
391391
m.mu.Lock()
392392
defer m.mu.Unlock()
393393

394+
return m.UpdateDaemonNoLock(daemon)
395+
}
396+
397+
func (m *Manager) UpdateDaemonNoLock(daemon *daemon.Daemon) error {
394398
if old := m.daemonStates.GetByDaemonID(daemon.ID(), nil); old == nil {
395399
return errdefs.ErrNotFound
396400
}
@@ -562,19 +566,7 @@ func (m *Manager) Recover(ctx context.Context) (map[string]*daemon.Daemon, map[s
562566
}
563567

564568
// Snapshotter's lost the daemons' states after exit, refetch them.
565-
su := d.Supervisor
566-
if su != nil {
567-
err = su.FetchDaemonStates(func() error {
568-
if err := d.SendStates(); err != nil {
569-
return errors.Wrapf(err, "send daemon %s states", d.ID())
570-
}
571-
return nil
572-
})
573-
if err != nil {
574-
log.L.Errorf("Send daemon %s states", d.ID())
575-
return
576-
}
577-
}
569+
d.SendStates()
578570
}()
579571

580572
return nil

pkg/system/system.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"regexp"
1818
"strconv"
1919
"strings"
20+
"time"
2021

2122
"github.com/gorilla/mux"
2223
"github.com/pkg/errors"
@@ -25,6 +26,7 @@ import (
2526
"github.com/containerd/nydus-snapshotter/pkg/daemon"
2627
"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
2728
"github.com/containerd/nydus-snapshotter/pkg/errdefs"
29+
"github.com/containerd/nydus-snapshotter/pkg/filesystem"
2830
"github.com/containerd/nydus-snapshotter/pkg/manager"
2931
metrics "github.com/containerd/nydus-snapshotter/pkg/metrics/tool"
3032
)
@@ -50,6 +52,7 @@ const defaultErrorCode string = "Unknown"
5052
// 3. Rolling update
5153
// 4. Daemons failures record as metrics
5254
type Controller struct {
55+
fs *filesystem.Filesystem
5356
manager *manager.Manager
5457
// httpSever *http.Server
5558
addr *net.UnixAddr
@@ -117,7 +120,7 @@ type rafsInstanceInfo struct {
117120
ImageID string `json:"image_id"`
118121
}
119122

120-
func NewSystemController(manager *manager.Manager, sock string) (*Controller, error) {
123+
func NewSystemController(fs *filesystem.Filesystem, manager *manager.Manager, sock string) (*Controller, error) {
121124
if err := os.MkdirAll(filepath.Dir(sock), os.ModePerm); err != nil {
122125
return nil, err
123126
}
@@ -134,6 +137,7 @@ func NewSystemController(manager *manager.Manager, sock string) (*Controller, er
134137
}
135138

136139
sc := Controller{
140+
fs: fs,
137141
manager: manager,
138142
addr: addr,
139143
router: mux.NewRouter(),
@@ -293,9 +297,11 @@ func (sc *Controller) upgradeNydusDaemon(d *daemon.Daemon, c upgradeRequest) err
293297
log.L.Infof("Upgrading nydusd %s, request %v", d.ID(), c)
294298

295299
manager := sc.manager
300+
fs := sc.fs
296301

297302
var new daemon.Daemon
298303
new.States = d.States
304+
new.Supervisor = d.Supervisor
299305
new.CloneInstances(d)
300306

301307
s := path.Base(d.GetAPISock())
@@ -312,6 +318,11 @@ func (sc *Controller) upgradeNydusDaemon(d *daemon.Daemon, c upgradeRequest) err
312318
return err
313319
}
314320

321+
su := manager.SupervisorSet.GetSupervisor(d.ID())
322+
if err := su.SendStatesTimeout(time.Second * 10); err != nil {
323+
return errors.Wrap(err, "Send states")
324+
}
325+
315326
if err := cmd.Start(); err != nil {
316327
return errors.Wrap(err, "start process")
317328
}
@@ -337,6 +348,8 @@ func (sc *Controller) upgradeNydusDaemon(d *daemon.Daemon, c upgradeRequest) err
337348
return errors.Wrap(err, "old daemon exits")
338349
}
339350

351+
fs.TryRetainSharedDaemon(&new)
352+
340353
if err := new.Start(); err != nil {
341354
return errors.Wrap(err, "start file system service")
342355
}
@@ -347,10 +360,12 @@ func (sc *Controller) upgradeNydusDaemon(d *daemon.Daemon, c upgradeRequest) err
347360

348361
log.L.Infof("Started service of upgraded daemon on socket %s", new.GetAPISock())
349362

350-
if err := manager.UpdateDaemon(&new); err != nil {
363+
if err := manager.UpdateDaemonNoLock(&new); err != nil {
351364
return err
352365
}
353366

367+
log.L.Infof("Upgraded daemon success on socket %s", new.GetAPISock())
368+
354369
return nil
355370
}
356371

snapshot/snapshot.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ import (
3939
"github.com/containerd/nydus-snapshotter/pkg/metrics"
4040
"github.com/containerd/nydus-snapshotter/pkg/metrics/collector"
4141
"github.com/containerd/nydus-snapshotter/pkg/pprof"
42+
"github.com/containerd/nydus-snapshotter/pkg/system"
4243

4344
"github.com/containerd/nydus-snapshotter/pkg/resolve"
4445
"github.com/containerd/nydus-snapshotter/pkg/store"
45-
"github.com/containerd/nydus-snapshotter/pkg/system"
4646

4747
"github.com/containerd/nydus-snapshotter/pkg/filesystem"
4848
"github.com/containerd/nydus-snapshotter/pkg/label"
@@ -122,24 +122,6 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho
122122
}()
123123
}
124124

125-
if config.IsSystemControllerEnabled() {
126-
systemController, err := system.NewSystemController(manager, config.SystemControllerAddress())
127-
if err != nil {
128-
return nil, errors.Wrap(err, "create system controller")
129-
}
130-
go func() {
131-
if err := systemController.Run(); err != nil {
132-
log.L.WithError(err).Error("Failed to start system controller")
133-
}
134-
}()
135-
pprofAddress := config.SystemControllerPprofAddress()
136-
if pprofAddress != "" {
137-
if err := pprof.NewPprofHTTPListener(pprofAddress); err != nil {
138-
return nil, errors.Wrap(err, "Failed to start pprof HTTP server")
139-
}
140-
}
141-
}
142-
143125
opts := []filesystem.NewFSOpt{
144126
filesystem.WithManager(manager),
145127
filesystem.WithNydusImageBinaryPath(cfg.DaemonConfig.NydusdPath),
@@ -169,6 +151,24 @@ func NewSnapshotter(ctx context.Context, cfg *config.SnapshotterConfig) (snapsho
169151
return nil, errors.Wrap(err, "failed to initialize nydus filesystem")
170152
}
171153

154+
if config.IsSystemControllerEnabled() {
155+
systemController, err := system.NewSystemController(nydusFs, manager, config.SystemControllerAddress())
156+
if err != nil {
157+
return nil, errors.Wrap(err, "create system controller")
158+
}
159+
go func() {
160+
if err := systemController.Run(); err != nil {
161+
log.L.WithError(err).Error("Failed to start system controller")
162+
}
163+
}()
164+
pprofAddress := config.SystemControllerPprofAddress()
165+
if pprofAddress != "" {
166+
if err := pprof.NewPprofHTTPListener(pprofAddress); err != nil {
167+
return nil, errors.Wrap(err, "Failed to start pprof HTTP server")
168+
}
169+
}
170+
}
171+
172172
// With fuse driver enabled and a fuse daemon configuration with "localfs"
173173
// storage backend, it indicates that a Blobs Manager is needed to download
174174
// blobs from registry alone with no help of nydusd or containerd.

0 commit comments

Comments
 (0)