@@ -3,6 +3,7 @@ package qemu
33
44import (
55 "context"
6+ "encoding/json"
67 "fmt"
78 "net"
89 "os"
@@ -14,10 +15,26 @@ import (
1415 "time"
1516
1617 "github.com/onkernel/hypeman/lib/hypervisor"
18+ "github.com/onkernel/hypeman/lib/logger"
1719 "github.com/onkernel/hypeman/lib/paths"
1820 "gvisor.dev/gvisor/pkg/cleanup"
1921)
2022
23+ // Timeout constants for QEMU operations
24+ const (
25+ // socketWaitTimeout is how long to wait for QMP socket to become available after process start
26+ socketWaitTimeout = 10 * time .Second
27+
28+ // migrationTimeout is how long to wait for migration to complete
29+ migrationTimeout = 30 * time .Second
30+
31+ // socketPollInterval is how often to check if socket is ready
32+ socketPollInterval = 50 * time .Millisecond
33+
34+ // socketDialTimeout is timeout for individual socket connection attempts
35+ socketDialTimeout = 100 * time .Millisecond
36+ )
37+
2138func init () {
2239 hypervisor .RegisterSocketName (hypervisor .TypeQEMU , "qemu.sock" )
2340}
@@ -88,31 +105,34 @@ func (s *Starter) GetVersion(p *paths.Paths) (string, error) {
88105 return "" , fmt .Errorf ("could not parse QEMU version from: %s" , string (output ))
89106}
90107
91- // StartVM launches QEMU with the VM configuration and returns a Hypervisor client.
92- // QEMU receives all configuration via command-line arguments at process start.
93- func (s * Starter ) StartVM (ctx context.Context , p * paths.Paths , version string , socketPath string , config hypervisor.VMConfig ) (int , hypervisor.Hypervisor , error ) {
108+ // buildQMPArgs returns the base QMP socket arguments for QEMU.
109+ func buildQMPArgs (socketPath string ) []string {
110+ return []string {
111+ "-chardev" , fmt .Sprintf ("socket,id=qmp,path=%s,server=on,wait=off" , socketPath ),
112+ "-mon" , "chardev=qmp,mode=control" ,
113+ }
114+ }
115+
116+ // startQEMUProcess handles the common QEMU process startup logic.
117+ // Returns the PID, hypervisor client, and a cleanup function.
118+ // The cleanup function must be called on error; call cleanup.Release() on success.
119+ func (s * Starter ) startQEMUProcess (ctx context.Context , p * paths.Paths , version string , socketPath string , args []string ) (int , * QEMU , * cleanup.Cleanup , error ) {
120+ log := logger .FromContext (ctx )
121+
94122 // Get binary path
95123 binaryPath , err := s .GetBinaryPath (p , version )
96124 if err != nil {
97- return 0 , nil , fmt .Errorf ("get binary: %w" , err )
125+ return 0 , nil , nil , fmt .Errorf ("get binary: %w" , err )
98126 }
99127
100128 // Check if socket is already in use
101129 if isSocketInUse (socketPath ) {
102- return 0 , nil , fmt .Errorf ("socket already in use, QEMU may be running at %s" , socketPath )
130+ return 0 , nil , nil , fmt .Errorf ("socket already in use, QEMU may be running at %s" , socketPath )
103131 }
104132
105133 // Remove stale socket if exists
106134 os .Remove (socketPath )
107135
108- // Build command arguments: QMP socket + VM configuration
109- args := []string {
110- "-chardev" , fmt .Sprintf ("socket,id=qmp,path=%s,server=on,wait=off" , socketPath ),
111- "-mon" , "chardev=qmp,mode=control" ,
112- }
113- // Append VM configuration as command-line arguments
114- args = append (args , BuildArgs (config )... )
115-
116136 // Create command
117137 cmd := exec .Command (binaryPath , args ... )
118138
@@ -125,7 +145,7 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s
125145 instanceDir := filepath .Dir (socketPath )
126146 logsDir := filepath .Join (instanceDir , "logs" )
127147 if err := os .MkdirAll (logsDir , 0755 ); err != nil {
128- return 0 , nil , fmt .Errorf ("create logs directory: %w" , err )
148+ return 0 , nil , nil , fmt .Errorf ("create logs directory: %w" , err )
129149 }
130150
131151 vmmLogFile , err := os .OpenFile (
@@ -134,49 +154,148 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s
134154 0644 ,
135155 )
136156 if err != nil {
137- return 0 , nil , fmt .Errorf ("create vmm log: %w" , err )
157+ return 0 , nil , nil , fmt .Errorf ("create vmm log: %w" , err )
138158 }
139159 defer vmmLogFile .Close ()
140160
141161 cmd .Stdout = vmmLogFile
142162 cmd .Stderr = vmmLogFile
143163
164+ processStartTime := time .Now ()
144165 if err := cmd .Start (); err != nil {
145- return 0 , nil , fmt .Errorf ("start qemu: %w" , err )
166+ return 0 , nil , nil , fmt .Errorf ("start qemu: %w" , err )
146167 }
147168
148169 pid := cmd .Process .Pid
170+ log .DebugContext (ctx , "QEMU process started" , "pid" , pid , "duration_ms" , time .Since (processStartTime ).Milliseconds ())
149171
150172 // Setup cleanup to kill the process if subsequent steps fail
151173 cu := cleanup .Make (func () {
152174 syscall .Kill (pid , syscall .SIGKILL )
153175 })
154- defer cu .Clean ()
155176
156177 // Wait for socket to be ready
157- if err := waitForSocket (socketPath , 10 * time .Second ); err != nil {
178+ socketWaitStart := time .Now ()
179+ if err := waitForSocket (socketPath , socketWaitTimeout ); err != nil {
180+ cu .Clean ()
158181 vmmLogPath := filepath .Join (logsDir , "vmm.log" )
159182 if logData , readErr := os .ReadFile (vmmLogPath ); readErr == nil && len (logData ) > 0 {
160- return 0 , nil , fmt .Errorf ("%w; vmm.log: %s" , err , string (logData ))
183+ return 0 , nil , nil , fmt .Errorf ("%w; vmm.log: %s" , err , string (logData ))
161184 }
162- return 0 , nil , err
185+ return 0 , nil , nil , err
163186 }
187+ log .DebugContext (ctx , "QMP socket ready" , "duration_ms" , time .Since (socketWaitStart ).Milliseconds ())
164188
165189 // Create QMP client
166190 hv , err := New (socketPath )
167191 if err != nil {
168- return 0 , nil , fmt .Errorf ("create client: %w" , err )
192+ cu .Clean ()
193+ return 0 , nil , nil , fmt .Errorf ("create client: %w" , err )
194+ }
195+
196+ return pid , hv , & cu , nil
197+ }
198+
199+ // StartVM launches QEMU with the VM configuration and returns a Hypervisor client.
200+ // QEMU receives all configuration via command-line arguments at process start.
201+ func (s * Starter ) StartVM (ctx context.Context , p * paths.Paths , version string , socketPath string , config hypervisor.VMConfig ) (int , hypervisor.Hypervisor , error ) {
202+ log := logger .FromContext (ctx )
203+
204+ // Build command arguments: QMP socket + VM configuration
205+ args := buildQMPArgs (socketPath )
206+ args = append (args , BuildArgs (config )... )
207+
208+ pid , hv , cu , err := s .startQEMUProcess (ctx , p , version , socketPath , args )
209+ if err != nil {
210+ return 0 , nil , err
211+ }
212+ defer cu .Clean ()
213+
214+ // Save config for potential restore later
215+ // QEMU migration files only contain memory state, not device config
216+ instanceDir := filepath .Dir (socketPath )
217+ if err := saveVMConfig (instanceDir , config ); err != nil {
218+ // Non-fatal - restore just won't work
219+ log .WarnContext (ctx , "failed to save VM config for restore" , "error" , err )
169220 }
170221
171- // Success - release cleanup to prevent killing the process
172222 cu .Release ()
173223 return pid , hv , nil
174224}
175225
176226// RestoreVM starts QEMU and restores VM state from a snapshot.
177- // Not yet implemented for QEMU .
227+ // The VM is in paused state after restore; caller should call Resume() to continue execution .
178228func (s * Starter ) RestoreVM (ctx context.Context , p * paths.Paths , version string , socketPath string , snapshotPath string ) (int , hypervisor.Hypervisor , error ) {
179- return 0 , nil , fmt .Errorf ("restore not supported by QEMU implementation" )
229+ log := logger .FromContext (ctx )
230+ startTime := time .Now ()
231+
232+ // Load saved VM config from snapshot directory
233+ // QEMU requires exact same command-line args as when snapshot was taken
234+ configLoadStart := time .Now ()
235+ config , err := loadVMConfig (snapshotPath )
236+ if err != nil {
237+ return 0 , nil , fmt .Errorf ("load vm config from snapshot: %w" , err )
238+ }
239+ log .DebugContext (ctx , "loaded VM config from snapshot" , "duration_ms" , time .Since (configLoadStart ).Milliseconds ())
240+
241+ // Build command arguments: QMP socket + VM configuration + incoming migration
242+ args := buildQMPArgs (socketPath )
243+ args = append (args , BuildArgs (config )... )
244+
245+ // Add incoming migration flag to restore from snapshot
246+ // The "file:" protocol is deprecated in QEMU 7.2+, use "exec:cat < path" instead
247+ memoryFile := filepath .Join (snapshotPath , "memory" )
248+ incomingURI := "exec:cat < " + memoryFile
249+ args = append (args , "-incoming" , incomingURI )
250+
251+ pid , hv , cu , err := s .startQEMUProcess (ctx , p , version , socketPath , args )
252+ if err != nil {
253+ return 0 , nil , err
254+ }
255+ defer cu .Clean ()
256+
257+ // Wait for VM to be ready after loading migration data
258+ // QEMU transitions from "inmigrate" to "paused" when loading completes
259+ migrationWaitStart := time .Now ()
260+ if err := hv .client .WaitVMReady (ctx , migrationTimeout ); err != nil {
261+ return 0 , nil , fmt .Errorf ("wait for vm ready: %w" , err )
262+ }
263+ log .DebugContext (ctx , "VM ready" , "duration_ms" , time .Since (migrationWaitStart ).Milliseconds ())
264+
265+ cu .Release ()
266+ log .DebugContext (ctx , "QEMU restore complete" , "pid" , pid , "total_duration_ms" , time .Since (startTime ).Milliseconds ())
267+ return pid , hv , nil
268+ }
269+
270+ // vmConfigFile is the name of the file where VM config is saved for restore.
271+ const vmConfigFile = "qemu-config.json"
272+
273+ // saveVMConfig saves the VM configuration to a file in the instance directory.
274+ // This is needed for QEMU restore since migration files only contain memory state.
275+ func saveVMConfig (instanceDir string , config hypervisor.VMConfig ) error {
276+ configPath := filepath .Join (instanceDir , vmConfigFile )
277+ data , err := json .MarshalIndent (config , "" , " " )
278+ if err != nil {
279+ return fmt .Errorf ("marshal config: %w" , err )
280+ }
281+ if err := os .WriteFile (configPath , data , 0644 ); err != nil {
282+ return fmt .Errorf ("write config: %w" , err )
283+ }
284+ return nil
285+ }
286+
287+ // loadVMConfig loads the VM configuration from the instance directory.
288+ func loadVMConfig (instanceDir string ) (hypervisor.VMConfig , error ) {
289+ configPath := filepath .Join (instanceDir , vmConfigFile )
290+ data , err := os .ReadFile (configPath )
291+ if err != nil {
292+ return hypervisor.VMConfig {}, fmt .Errorf ("read config: %w" , err )
293+ }
294+ var config hypervisor.VMConfig
295+ if err := json .Unmarshal (data , & config ); err != nil {
296+ return hypervisor.VMConfig {}, fmt .Errorf ("unmarshal config: %w" , err )
297+ }
298+ return config , nil
180299}
181300
182301// qemuBinaryName returns the QEMU binary name for the host architecture.
@@ -205,7 +324,7 @@ func qemuInstallHint() string {
205324
206325// isSocketInUse checks if a Unix socket is actively being used
207326func isSocketInUse (socketPath string ) bool {
208- conn , err := net .DialTimeout ("unix" , socketPath , 100 * time . Millisecond )
327+ conn , err := net .DialTimeout ("unix" , socketPath , socketDialTimeout )
209328 if err != nil {
210329 return false
211330 }
@@ -217,12 +336,12 @@ func isSocketInUse(socketPath string) bool {
217336func waitForSocket (socketPath string , timeout time.Duration ) error {
218337 deadline := time .Now ().Add (timeout )
219338 for time .Now ().Before (deadline ) {
220- conn , err := net .DialTimeout ("unix" , socketPath , 100 * time . Millisecond )
339+ conn , err := net .DialTimeout ("unix" , socketPath , socketDialTimeout )
221340 if err == nil {
222341 conn .Close ()
223342 return nil
224343 }
225- time .Sleep (50 * time . Millisecond )
344+ time .Sleep (socketPollInterval )
226345 }
227346 return fmt .Errorf ("timeout waiting for socket" )
228347}
0 commit comments