@@ -36,8 +36,6 @@ import (
36
36
"github.com/elastic/elastic-agent/pkg/control/v2/client"
37
37
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"
38
38
"github.com/elastic/elastic-agent/pkg/core/logger"
39
- "github.com/elastic/elastic-agent/pkg/core/process"
40
- "github.com/elastic/elastic-agent/pkg/utils"
41
39
agtversion "github.com/elastic/elastic-agent/pkg/version"
42
40
currentagtversion "github.com/elastic/elastic-agent/version"
43
41
)
@@ -71,12 +69,21 @@ func init() {
71
69
}
72
70
}
73
71
74
- // TODO substitute all the references to watcher with calls to the interface
72
+ // WatcherHelper is an abstraction of operations that Upgrader will trigger on elastic-agent watcher.
73
+ // This is defined to help with Upgrader testing and verify interactions with elastic-agent watcher
75
74
type WatcherHelper interface {
75
+ // InvokeWatcher invokes an elastic-agent watcher using the agentExecutable passed as argument
76
76
InvokeWatcher (log * logger.Logger , agentExecutable string ) (* exec.Cmd , error )
77
- SelectWatcherExecutable (topDir string , agentInstalls ... agentInstall ) string
77
+ // SelectWatcherExecutable will return the path to the newer elastic-agent executable that will be used to invoke the
78
+ // more recent watcher between the previous (the agent that started the upgrade) and current (the agent that will run after restart)
79
+ // agent installation
80
+ SelectWatcherExecutable (topDir string , previous agentInstall , current agentInstall ) string
81
+ // WaitForWatcher will listen for changes to the update marker, waiting for the elastic-agent watcher to set UPG_WATCHING state
82
+ // in the upgrade details' metadata
78
83
WaitForWatcher (ctx context.Context , log * logger.Logger , markerFilePath string , waitTime time.Duration ) error
79
- TakeOverWatcher (ctx context.Context , topDir string ) (* filelock.AppLocker , error )
84
+ // TakeOverWatcher will look for watcher processes and terminate them while at the same time trying to acquire the watcher AppLocker.
85
+ // It will return once it managed to get the AppLocker or with an error if the lock could not be acquired.
86
+ TakeOverWatcher (ctx context.Context , log * logger.Logger , topDir string ) (* filelock.AppLocker , error )
80
87
}
81
88
82
89
// Upgrader performs an upgrade
@@ -88,6 +95,7 @@ type Upgrader struct {
88
95
upgradeable bool
89
96
fleetServerURI string
90
97
markerWatcher MarkerWatcher
98
+ watcherHelper WatcherHelper
91
99
}
92
100
93
101
// IsUpgradeable when agent is installed and running as a service or flag was provided.
@@ -98,14 +106,15 @@ func IsUpgradeable() bool {
98
106
}
99
107
100
108
// NewUpgrader creates an upgrader which is capable of performing upgrade operation
101
- func NewUpgrader (log * logger.Logger , settings * artifact.Config , upgradeConfig * configuration.UpgradeConfig , agentInfo info.Agent ) (* Upgrader , error ) {
109
+ func NewUpgrader (log * logger.Logger , settings * artifact.Config , upgradeConfig * configuration.UpgradeConfig , agentInfo info.Agent , watcherHelper WatcherHelper ) (* Upgrader , error ) {
102
110
return & Upgrader {
103
111
log : log ,
104
112
settings : settings ,
105
113
upgradeSettings : upgradeConfig ,
106
114
agentInfo : agentInfo ,
107
115
upgradeable : IsUpgradeable (),
108
116
markerWatcher : newMarkerFileWatcher (markerFilePath (paths .Data ()), log ),
117
+ watcherHelper : watcherHelper ,
109
118
}, nil
110
119
}
111
120
@@ -368,16 +377,16 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
368
377
return nil , goerrors .Join (err , rollbackErr )
369
378
}
370
379
371
- watcherExecutable := selectWatcherExecutable (paths .Top (), previous , current )
380
+ watcherExecutable := u . watcherHelper . SelectWatcherExecutable (paths .Top (), previous , current )
372
381
373
382
var watcherCmd * exec.Cmd
374
- if watcherCmd , err = InvokeWatcher (u .log , watcherExecutable ); err != nil {
383
+ if watcherCmd , err = u . watcherHelper . InvokeWatcher (u .log , watcherExecutable ); err != nil {
375
384
u .log .Errorw ("Rolling back: starting watcher failed" , "error.message" , err )
376
385
rollbackErr := rollbackInstall (ctx , u .log , paths .Top (), hashedDir , currentVersionedHome )
377
386
return nil , goerrors .Join (err , rollbackErr )
378
387
}
379
388
380
- watcherWaitErr := waitForWatcher (ctx , u .log , markerFilePath (paths .Data ()), watcherMaxWaitTime )
389
+ watcherWaitErr := u . watcherHelper . WaitForWatcher (ctx , u .log , markerFilePath (paths .Data ()), watcherMaxWaitTime )
381
390
if watcherWaitErr != nil {
382
391
killWatcherErr := watcherCmd .Process .Kill ()
383
392
rollbackErr := rollbackInstall (ctx , u .log , paths .Top (), hashedDir , currentVersionedHome )
@@ -408,7 +417,7 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st
408
417
}
409
418
410
419
// Invoke watcher again
411
- _ , err = InvokeWatcher (u .log , paths .BinaryPath (paths .VersionedHome (topDir ), agentName ))
420
+ _ , err = u . watcherHelper . InvokeWatcher (u .log , paths .BinaryPath (paths .VersionedHome (topDir ), agentName ))
412
421
if err != nil {
413
422
return nil , fmt .Errorf ("invoking watcher: %w" , err )
414
423
}
@@ -418,7 +427,7 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st
418
427
}
419
428
420
429
func (u * Upgrader ) PersistManualRollback (ctx context.Context , topDir string ) error {
421
- watcherApplock , err := u .takeOverWatcher (ctx , topDir )
430
+ watcherApplock , err := u .watcherHelper . TakeOverWatcher (ctx , u . log , topDir )
422
431
if err != nil {
423
432
return fmt .Errorf ("taking over watcher processes: %w" , err )
424
433
}
@@ -443,107 +452,6 @@ func (u *Upgrader) PersistManualRollback(ctx context.Context, topDir string) err
443
452
return nil
444
453
}
445
454
446
- func (u * Upgrader ) takeOverWatcher (ctx context.Context , topDir string ) (* filelock.AppLocker , error ) {
447
-
448
- takeoverCtx , takeoverCancel := context .WithTimeout (ctx , 30 * time .Second )
449
- defer takeoverCancel ()
450
- go func () {
451
- killingTicker := time .NewTicker (500 * time .Millisecond )
452
- defer killingTicker .Stop ()
453
- for {
454
- select {
455
- case <- takeoverCtx .Done ():
456
- return
457
- case <- killingTicker .C :
458
- pids , err := utils .GetWatcherPIDs ()
459
- if err != nil {
460
- u .log .Errorf ("error listing watcher processes: %s" , err )
461
- continue
462
- }
463
-
464
- // this should be run continuously and concurrently attempting to get the app locker
465
- for _ , pid := range pids {
466
- u .log .Debugf ("attempting to kill watcher process with PID: %d" , pid )
467
- watcherProcess , findProcErr := os .FindProcess (pid )
468
- if findProcErr != nil {
469
- u .log .Errorf ("error finding process with PID: %d: %s" , pid , findProcErr )
470
- continue
471
- }
472
- killProcErr := process .Terminate (watcherProcess )
473
- if killProcErr != nil {
474
- u .log .Errorf ("error killing process with PID: %d: %s" , pid , killProcErr )
475
- }
476
- u .log .Debugf ("killed watcher process with PID: %d" , pid )
477
- }
478
- }
479
- }
480
- }()
481
-
482
- // we should retry to take over the AppLocker for 30s, but AppLocker interface is limited
483
- takeOverTicker := time .NewTicker (100 * time .Millisecond )
484
- defer takeOverTicker .Stop ()
485
- for {
486
- select {
487
- case <- takeoverCtx .Done ():
488
- return nil , fmt .Errorf ("timed out taking over watcher applocker" )
489
- case <- takeOverTicker .C :
490
- locker := filelock .NewAppLocker (topDir , "watcher.lock" )
491
- err := locker .TryLock ()
492
- if err != nil {
493
- u .log .Errorf ("error locking watcher applocker: %s" , err )
494
- continue
495
- }
496
- return locker , nil
497
- }
498
- }
499
- }
500
-
501
- func selectWatcherExecutable (topDir string , previous agentInstall , current agentInstall ) string {
502
- // check if the upgraded version is less than the previous (currently installed) version
503
- if current .parsedVersion .Less (* previous .parsedVersion ) {
504
- // use the current agent executable for watch, if downgrading the old agent doesn't understand the current agent's path structure.
505
- return paths .BinaryPath (filepath .Join (topDir , previous .versionedHome ), agentName )
506
- } else {
507
- // use the new agent executable as it should be able to parse the new update marker
508
- return paths .BinaryPath (filepath .Join (topDir , current .versionedHome ), agentName )
509
- }
510
- }
511
-
512
- func waitForWatcher (ctx context.Context , log * logger.Logger , markerFilePath string , waitTime time.Duration ) error {
513
- return waitForWatcherWithTimeoutCreationFunc (ctx , log , markerFilePath , waitTime , context .WithTimeout )
514
- }
515
-
516
- type createContextWithTimeout func (ctx context.Context , timeout time.Duration ) (context.Context , context.CancelFunc )
517
-
518
- func waitForWatcherWithTimeoutCreationFunc (ctx context.Context , log * logger.Logger , markerFilePath string , waitTime time.Duration , createTimeoutContext createContextWithTimeout ) error {
519
- // Wait for the watcher to be up and running
520
- watcherContext , cancel := createTimeoutContext (ctx , waitTime )
521
- defer cancel ()
522
-
523
- markerWatcher := newMarkerFileWatcher (markerFilePath , log )
524
- err := markerWatcher .Run (watcherContext )
525
- if err != nil {
526
- return fmt .Errorf ("error starting update marker watcher: %w" , err )
527
- }
528
-
529
- log .Infof ("waiting up to %s for upgrade watcher to set %s state in upgrade marker" , waitTime , details .StateWatching )
530
-
531
- for {
532
- select {
533
- case updMarker := <- markerWatcher .Watch ():
534
- if updMarker .Details != nil && updMarker .Details .State == details .StateWatching {
535
- // watcher started and it is watching, all good
536
- log .Infof ("upgrade watcher set %s state in upgrade marker: exiting wait loop" , details .StateWatching )
537
- return nil
538
- }
539
-
540
- case <- watcherContext .Done ():
541
- log .Errorf ("upgrade watcher did not start watching within %s or context has expired" , waitTime )
542
- return goerrors .Join (ErrWatcherNotStarted , watcherContext .Err ())
543
- }
544
- }
545
- }
546
-
547
455
// Ack acks last upgrade action
548
456
func (u * Upgrader ) Ack (ctx context.Context , acker acker.Acker ) error {
549
457
// get upgrade action
0 commit comments