Skip to content

Commit e04caf4

Browse files
committed
Added abort upgrade command
1 parent c00ec6a commit e04caf4

File tree

6 files changed

+232
-30
lines changed

6 files changed

+232
-30
lines changed

client/api.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ type API interface {
5757
// such that the starters will retry the upgrade once more.
5858
RetryDatabaseUpgrade(ctx context.Context) error
5959

60+
// AbortDatabaseUpgrade removes the existing upgrade plan.
61+
// Note that Starters working on an entry of the upgrade
62+
// will finish that entry.
63+
// If there is no plan, a NotFoundError will be returned.
64+
AbortDatabaseUpgrade(ctx context.Context) error
65+
6066
// Status returns the status of any upgrade plan
6167
UpgradeStatus(context.Context) (UpgradeStatus, error)
6268
}

client/client.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,31 @@ func (c *client) RetryDatabaseUpgrade(ctx context.Context) error {
247247
return nil
248248
}
249249

250+
// AbortDatabaseUpgrade removes the existing upgrade plan.
251+
// Note that Starters working on an entry of the upgrade
252+
// will finish that entry.
253+
// If there is no plan, a NotFoundError will be returned.
254+
func (c *client) AbortDatabaseUpgrade(ctx context.Context) error {
255+
url := c.createURL("/database-auto-upgrade", nil)
256+
257+
req, err := http.NewRequest("DELETE", url, nil)
258+
if err != nil {
259+
return maskAny(err)
260+
}
261+
if ctx != nil {
262+
req = req.WithContext(ctx)
263+
}
264+
resp, err := c.client.Do(req)
265+
if err != nil {
266+
return maskAny(err)
267+
}
268+
if err := c.handleResponse(resp, "DELETE", url, nil); err != nil {
269+
return maskAny(err)
270+
}
271+
272+
return nil
273+
}
274+
250275
// Status returns the status of any upgrade plan
251276
func (c *client) UpgradeStatus(ctx context.Context) (UpgradeStatus, error) {
252277
url := c.createURL("/database-auto-upgrade", nil)

client/error.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ type ErrorResponse struct {
8181
Error string
8282
}
8383

84+
// IsNotFound returns true if the given error is caused by a NotFoundError.
85+
func IsNotFound(err error) bool {
86+
return IsStatusErrorWithCode(err, http.StatusNotFound)
87+
}
88+
8489
// IsServiceUnavailable returns true if the given error is caused by a ServiceUnavailableError.
8590
func IsServiceUnavailable(err error) bool {
8691
return IsStatusErrorWithCode(err, http.StatusServiceUnavailable)
@@ -101,6 +106,11 @@ func IsInternalServer(err error) bool {
101106
return IsStatusErrorWithCode(err, http.StatusInternalServerError)
102107
}
103108

109+
// NewNotFoundError creates a not found error with given message.
110+
func NewNotFoundError(msg string) error {
111+
return StatusError{StatusCode: http.StatusNotFound, message: msg}
112+
}
113+
104114
// NewServiceUnavailableError creates a service unavailable error with given message.
105115
func NewServiceUnavailableError(msg string) error {
106116
return StatusError{StatusCode: http.StatusServiceUnavailable, message: msg}

service/server.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,32 @@ func (s *httpServer) databaseAutoUpgradeHandler(w http.ResponseWriter, r *http.R
668668
w.Write([]byte("OK"))
669669
}
670670
}
671+
case "DELETE":
672+
// Abort the upgrade process
673+
if !isRunningMaster {
674+
// We're not the starter leader.
675+
// Forward the request to the leader.
676+
c, err := createMasterClient()
677+
if err != nil {
678+
handleError(w, err)
679+
} else {
680+
if err := c.AbortDatabaseUpgrade(ctx); err != nil {
681+
s.log.Debug().Err(err).Msg("Forwarding AbortDatabaseUpgrade failed")
682+
handleError(w, err)
683+
} else {
684+
w.WriteHeader(http.StatusOK)
685+
w.Write([]byte("OK"))
686+
}
687+
}
688+
} else {
689+
// We're the starter leader, process the request
690+
if err := s.context.UpgradeManager().AbortDatabaseUpgrade(ctx); err != nil {
691+
handleError(w, err)
692+
} else {
693+
w.WriteHeader(http.StatusOK)
694+
w.Write([]byte("OK"))
695+
}
696+
}
671697
case "GET":
672698
if status, err := s.context.UpgradeManager().Status(ctx); err != nil {
673699
handleError(w, err)

service/upgrade_manager.go

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ type UpgradeManager interface {
4848
// such that the starters will retry the upgrade once more.
4949
RetryDatabaseUpgrade(ctx context.Context) error
5050

51+
// AbortDatabaseUpgrade removes the existing upgrade plan.
52+
// Note that Starters working on an entry of the upgrade
53+
// will finish that entry.
54+
// If there is no plan, a NotFoundError will be returned.
55+
AbortDatabaseUpgrade(ctx context.Context) error
56+
5157
// Status returns the status of any upgrade plan
5258
Status(context.Context) (client.UpgradeStatus, error)
5359

@@ -353,7 +359,9 @@ func (m *upgradeManager) StartDatabaseUpgrade(ctx context.Context, force bool) e
353359
// Save plan
354360
m.log.Debug().Msg("Writing upgrade plan")
355361
overwrite := true
356-
if _, err := m.writeUpgradePlan(ctx, plan, overwrite); err != nil {
362+
if _, err := m.writeUpgradePlan(ctx, plan, overwrite); driver.IsPreconditionFailed(err) {
363+
return errors.Wrap(err, "Failed to write upgrade plan because is was outdated or removed")
364+
} else if err != nil {
357365
return errors.Wrap(err, "Failed to write upgrade plan")
358366
}
359367

@@ -397,7 +405,9 @@ func (m *upgradeManager) RetryDatabaseUpgrade(ctx context.Context) error {
397405
// Reset failures and write plan
398406
plan.ResetFailures()
399407
overwrite := false
400-
if _, err := m.writeUpgradePlan(ctx, plan, overwrite); err != nil {
408+
if _, err := m.writeUpgradePlan(ctx, plan, overwrite); driver.IsPreconditionFailed(err) {
409+
return errors.Wrap(err, "Failed to write upgrade plan because is was outdated or removed")
410+
} else if err != nil {
401411
return errors.Wrap(err, "Failed to write upgrade plan")
402412
}
403413

@@ -407,12 +417,73 @@ func (m *upgradeManager) RetryDatabaseUpgrade(ctx context.Context) error {
407417
return nil
408418
}
409419

420+
// AbortDatabaseUpgrade removes the existing upgrade plan.
421+
// Note that Starters working on an entry of the upgrade
422+
// will finish that entry.
423+
// If there is no plan, a NotFoundError will be returned.
424+
func (m *upgradeManager) AbortDatabaseUpgrade(ctx context.Context) error {
425+
m.mutex.Lock()
426+
defer m.mutex.Unlock()
427+
428+
// Fetch mode
429+
_, _, mode := m.upgradeManagerContext.ClusterConfig()
430+
431+
if !mode.HasAgency() {
432+
// Without an agency there is not upgrade plan to abort
433+
return maskAny(client.NewBadRequestError("Abort needs an agency"))
434+
}
435+
436+
// Run upgrade with agency.
437+
// Create an agency lock, so we know we're the only one to create a plan.
438+
m.log.Debug().Msg("Creating agency API")
439+
api, err := m.createAgencyAPI()
440+
if err != nil {
441+
return maskAny(err)
442+
}
443+
m.log.Debug().Msg("Creating lock")
444+
lock, err := agency.NewLock(m, api, upgradeManagerLockKey, "", upgradeManagerLockTTL)
445+
if err != nil {
446+
return maskAny(err)
447+
}
448+
449+
// Claim the upgrade lock
450+
m.log.Debug().Msg("Locking lock")
451+
if err := lock.Lock(ctx); err != nil {
452+
m.log.Debug().Err(err).Msg("Lock failed")
453+
return maskAny(err)
454+
}
455+
456+
// Close agency lock when we're done
457+
defer func() {
458+
m.log.Debug().Msg("Unlocking lock")
459+
lock.Unlock(context.Background())
460+
}()
461+
462+
// Check plan
463+
if _, err := m.readUpgradePlan(ctx); agency.IsKeyNotFound(err) {
464+
// There is no plan
465+
return maskAny(client.NewNotFoundError("There is no upgrade plan"))
466+
}
467+
468+
// Remove plan
469+
m.log.Debug().Msg("Removing upgrade plan")
470+
if err := m.removeUpgradePlan(ctx); err != nil {
471+
return errors.Wrap(err, "Failed to remove upgrade plan")
472+
}
473+
474+
// Inform user
475+
m.log.Info().Msgf("Removed upgrade plan")
476+
477+
// We're done
478+
return nil
479+
}
480+
410481
// Status returns the current status of the upgrade process.
411482
func (m *upgradeManager) Status(ctx context.Context) (client.UpgradeStatus, error) {
412483
plan, err := m.readUpgradePlan(ctx)
413484
if agency.IsKeyNotFound(err) {
414-
// No plan, return empty status
415-
return client.UpgradeStatus{}, nil
485+
// No plan, return not found error
486+
return client.UpgradeStatus{}, maskAny(client.NewNotFoundError("There is no upgrade plan"))
416487
} else if err != nil {
417488
return client.UpgradeStatus{}, maskAny(err)
418489
}
@@ -625,6 +696,18 @@ func (m *upgradeManager) writeUpgradePlan(ctx context.Context, plan UpgradePlan,
625696
return plan, nil
626697
}
627698

699+
// removeUpgradePlan removes the current upgrade plan from the agency.
700+
func (m *upgradeManager) removeUpgradePlan(ctx context.Context) error {
701+
api, err := m.createAgencyAPI()
702+
if err != nil {
703+
return maskAny(err)
704+
}
705+
if err := api.RemoveKey(ctx, upgradePlanKey); err != nil {
706+
return maskAny(err)
707+
}
708+
return nil
709+
}
710+
628711
// RunWatchUpgradePlan keeps watching the upgrade plan in the agency.
629712
// Once it detects that this starter has to act, it does.
630713
func (m *upgradeManager) RunWatchUpgradePlan(ctx context.Context) {

upgrade.go

Lines changed: 78 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,25 @@ var (
5151
Short: "Retry a failed upgrade of an ArangoDB deployment to a new version",
5252
Run: cmdRetryUpgradeRun,
5353
}
54+
cmdAbort = &cobra.Command{
55+
Use: "abort",
56+
Short: "Abort an operation",
57+
Run: cmdShowUsage,
58+
}
59+
cmdAbortUpgrade = &cobra.Command{
60+
Use: "upgrade",
61+
Short: "Abort (or remove) an upgrade of an ArangoDB deployment to a new version",
62+
Run: cmdAbortUpgradeRun,
63+
}
5464
upgradeOptions struct {
5565
starterEndpoint string
5666
}
5767
retryUpgradeOptions struct {
5868
starterEndpoint string
5969
}
70+
abortUpgradeOptions struct {
71+
starterEndpoint string
72+
}
6073
)
6174

6275
func init() {
@@ -66,9 +79,14 @@ func init() {
6679
f = cmdRetryUpgrade.Flags()
6780
f.StringVar(&retryUpgradeOptions.starterEndpoint, "starter.endpoint", "", "The endpoint of the starter to connect to. E.g. http://localhost:8528")
6881

82+
f = cmdAbortUpgrade.Flags()
83+
f.StringVar(&abortUpgradeOptions.starterEndpoint, "starter.endpoint", "", "The endpoint of the starter to connect to. E.g. http://localhost:8528")
84+
6985
cmdMain.AddCommand(cmdUpgrade)
7086
cmdMain.AddCommand(cmdRetry)
7187
cmdRetry.AddCommand(cmdRetryUpgrade)
88+
cmdMain.AddCommand(cmdAbort)
89+
cmdAbort.AddCommand(cmdAbortUpgrade)
7290
}
7391

7492
func cmdUpgradeRun(cmd *cobra.Command, args []string) {
@@ -79,25 +97,30 @@ func cmdRetryUpgradeRun(cmd *cobra.Command, args []string) {
7997
runUpgrade(retryUpgradeOptions.starterEndpoint, false, true)
8098
}
8199

82-
func runUpgrade(starterEndpoint string, force, retry bool) {
100+
func cmdAbortUpgradeRun(cmd *cobra.Command, args []string) {
83101
// Setup logging
84102
consoleOnly := true
85103
configureLogging(consoleOnly)
86104

87-
// Check options
88-
if starterEndpoint == "" {
89-
log.Fatal().Msg("--starter.endpoint must be set")
90-
}
91-
ep, err := url.Parse(starterEndpoint)
92-
if err != nil {
93-
log.Fatal().Err(err).Msg("--starter.endpoint is invalid")
105+
// Create starter client
106+
c := mustCreateStarterClient(abortUpgradeOptions.starterEndpoint)
107+
ctx := context.Background()
108+
if err := c.AbortDatabaseUpgrade(ctx); client.IsNotFound(err) {
109+
log.Fatal().Msg("Database automatic upgrade plan does not exist")
110+
} else if err != nil {
111+
log.Fatal().Err(err).Msg("Failed to abort database automatic upgrade")
112+
} else {
113+
log.Info().Msg("Database automatic upgrade plan has been removed")
94114
}
115+
}
116+
117+
func runUpgrade(starterEndpoint string, force, retry bool) {
118+
// Setup logging
119+
consoleOnly := true
120+
configureLogging(consoleOnly)
95121

96122
// Create starter client
97-
c, err := client.NewArangoStarterClient(*ep)
98-
if err != nil {
99-
log.Fatal().Err(err).Msg("Failed to create Starter client")
100-
}
123+
c := mustCreateStarterClient(starterEndpoint)
101124
ctx := context.Background()
102125
if retry {
103126
if err := c.RetryDatabaseUpgrade(ctx); err != nil {
@@ -116,21 +139,30 @@ func runUpgrade(starterEndpoint string, force, retry bool) {
116139
finished := ""
117140
for {
118141
status, err := c.UpgradeStatus(ctx)
119-
if err != nil {
120-
log.Error().Err(err).Msg("Failed to fetch upgrade status")
121-
}
122-
if status.Failed {
123-
log.Error().Str("reason", status.Reason).Msg("Database upgrade has failed")
124-
return
125-
}
126-
if status.Ready {
127-
log.Info().Msg("Database upgrade has finished")
142+
if client.IsNotFound(err) {
143+
// Upgrade plan is gone
144+
log.Error().Msg("Upgrade plan is gone.")
128145
return
129-
}
130-
r, f := formatServerStatusList(status.ServersRemaining), formatServerStatusList(status.ServersUpgraded)
131-
if remaining != r || finished != f {
132-
remaining, finished = r, f
133-
log.Info().Msgf("Servers upgraded: %s, remaining servers: %s", finished, remaining)
146+
} else if err != nil {
147+
log.Error().Err(err).Msg("Failed to fetch upgrade status")
148+
} else {
149+
if status.Failed {
150+
log.Error().Str("reason", status.Reason).Msg("Database upgrade has failed")
151+
return
152+
}
153+
if status.Ready {
154+
log.Info().Msg("Database upgrade has finished")
155+
// Let's remove the plan now
156+
if err := c.AbortDatabaseUpgrade(ctx); err != nil {
157+
log.Warn().Err(err).Msg("Failed to remove upgrade plan")
158+
}
159+
return
160+
}
161+
r, f := formatServerStatusList(status.ServersRemaining), formatServerStatusList(status.ServersUpgraded)
162+
if remaining != r || finished != f {
163+
remaining, finished = r, f
164+
log.Info().Msgf("Servers upgraded: %s, remaining servers: %s", finished, remaining)
165+
}
134166
}
135167
time.Sleep(time.Second)
136168
}
@@ -161,3 +193,23 @@ func formatServerStatusList(list []client.UpgradeStatusServer) string {
161193
})
162194
return strings.Join(strList, ", ")
163195
}
196+
197+
// mustCreateStarterClient creates a client for a starter at the given endpoint.
198+
// Any errors cause the process to exit.
199+
func mustCreateStarterClient(endpoint string) client.API {
200+
// Check options
201+
if endpoint == "" {
202+
log.Fatal().Msg("--starter.endpoint must be set")
203+
}
204+
ep, err := url.Parse(endpoint)
205+
if err != nil {
206+
log.Fatal().Err(err).Msg("--starter.endpoint is invalid")
207+
}
208+
209+
// Create starter client
210+
c, err := client.NewArangoStarterClient(*ep)
211+
if err != nil {
212+
log.Fatal().Err(err).Msg("Failed to create Starter client")
213+
}
214+
return c
215+
}

0 commit comments

Comments
 (0)