Skip to content

Commit 4168df9

Browse files
committed
Consolidated waiting
1 parent 7841946 commit 4168df9

File tree

2 files changed

+12
-43
lines changed

2 files changed

+12
-43
lines changed

lib/hypervisor/qemu/process.go

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"syscall"
1515
"time"
1616

17-
"github.com/digitalocean/go-qemu/qmp/raw"
1817
"github.com/onkernel/hypeman/lib/hypervisor"
1918
"github.com/onkernel/hypeman/lib/logger"
2019
"github.com/onkernel/hypeman/lib/paths"
@@ -26,12 +25,9 @@ const (
2625
// socketWaitTimeout is how long to wait for QMP socket to become available after process start
2726
socketWaitTimeout = 10 * time.Second
2827

29-
// migrationTimeout is how long to wait for incoming migration to complete during restore
28+
// migrationTimeout is how long to wait for migration to complete
3029
migrationTimeout = 30 * time.Second
3130

32-
// migrationPollInterval is how often to poll migration status
33-
migrationPollInterval = 50 * time.Millisecond
34-
3531
// socketPollInterval is how often to check if socket is ready
3632
socketPollInterval = 50 * time.Millisecond
3733

@@ -310,7 +306,7 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
310306
// QEMU loads the migration data from the exec subprocess
311307
// After loading, VM is in paused state and ready for 'cont'
312308
migrationWaitStart := time.Now()
313-
if err := waitForMigrationComplete(hv.client, migrationTimeout); err != nil {
309+
if err := hv.client.WaitMigration(ctx, migrationTimeout); err != nil {
314310
return 0, nil, fmt.Errorf("wait for migration: %w", err)
315311
}
316312
log.DebugContext(ctx, "migration complete", "duration_ms", time.Since(migrationWaitStart).Milliseconds())
@@ -321,40 +317,6 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
321317
return pid, hv, nil
322318
}
323319

324-
// waitForMigrationComplete waits for incoming migration to finish loading
325-
func waitForMigrationComplete(client *Client, timeout time.Duration) error {
326-
deadline := time.Now().Add(timeout)
327-
for time.Now().Before(deadline) {
328-
info, err := client.QueryMigration()
329-
if err != nil {
330-
// Ignore errors during migration
331-
time.Sleep(migrationPollInterval)
332-
continue
333-
}
334-
335-
if info.Status == nil {
336-
// No migration status yet, might be loading
337-
time.Sleep(migrationPollInterval)
338-
continue
339-
}
340-
341-
switch *info.Status {
342-
case raw.MigrationStatusCompleted:
343-
return nil
344-
case raw.MigrationStatusFailed:
345-
return fmt.Errorf("migration failed")
346-
case raw.MigrationStatusCancelled:
347-
return fmt.Errorf("migration cancelled")
348-
case raw.MigrationStatusNone:
349-
// No active migration - incoming may have completed
350-
return nil
351-
}
352-
353-
time.Sleep(migrationPollInterval)
354-
}
355-
return fmt.Errorf("migration timeout")
356-
}
357-
358320
// vmConfigFile is the name of the file where VM config is saved for restore.
359321
const vmConfigFile = "qemu-config.json"
360322

lib/hypervisor/qemu/qmp.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ func (c *Client) QueryMigration() (raw.MigrationInfo, error) {
118118
}
119119

120120
// WaitMigration polls until migration completes or times out.
121+
// Works for both outgoing (snapshot) and incoming (restore) migrations.
121122
// Returns nil if migration completed successfully, error otherwise.
122123
func (c *Client) WaitMigration(ctx context.Context, timeout time.Duration) error {
123124
deadline := time.Now().Add(timeout)
@@ -131,7 +132,9 @@ func (c *Client) WaitMigration(ctx context.Context, timeout time.Duration) error
131132

132133
info, err := c.QueryMigration()
133134
if err != nil {
134-
return fmt.Errorf("query migration: %w", err)
135+
// Ignore transient errors during migration, keep polling
136+
time.Sleep(qmpMigrationPollInterval)
137+
continue
135138
}
136139

137140
// Check migration status (Status is a pointer in MigrationInfo)
@@ -144,14 +147,18 @@ func (c *Client) WaitMigration(ctx context.Context, timeout time.Duration) error
144147
switch *info.Status {
145148
case raw.MigrationStatusCompleted:
146149
return nil
150+
case raw.MigrationStatusNone:
151+
// No active migration - for incoming this means complete, for outgoing it transitions quickly
152+
return nil
147153
case raw.MigrationStatusFailed:
154+
if info.ErrorDesc != nil && *info.ErrorDesc != "" {
155+
return fmt.Errorf("migration failed: %s", *info.ErrorDesc)
156+
}
148157
return fmt.Errorf("migration failed")
149158
case raw.MigrationStatusCancelled:
150159
return fmt.Errorf("migration cancelled")
151160
case raw.MigrationStatusActive, raw.MigrationStatusSetup, raw.MigrationStatusPreSwitchover, raw.MigrationStatusDevice:
152161
// Still in progress, continue polling
153-
default:
154-
// Unknown or "none" status - might not have started yet
155162
}
156163

157164
time.Sleep(qmpMigrationPollInterval)

0 commit comments

Comments
 (0)