Skip to content

Commit 0f37f89

Browse files
committed
Launch snapshotted vms in new shim
1 parent 1aca174 commit 0f37f89

File tree

2 files changed

+133
-51
lines changed

2 files changed

+133
-51
lines changed

firecracker-control/local.go

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -96,17 +96,45 @@ func newLocal(ic *plugin.InitContext) (*local, error) {
9696
func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest) (*proto.CreateVMResponse, error) {
9797
var err error
9898

99-
id := req.GetVMID()
100-
if err := identifiers.Validate(id); err != nil {
99+
// Create shim
100+
_, err = s.CreateShim(requestCtx, req.GetVMID())
101+
if err != nil {
102+
return nil, err
103+
}
104+
105+
client, err := s.shimFirecrackerClient(requestCtx, req.GetVMID())
106+
if err != nil {
107+
err = errors.Wrap(err, "failed to create firecracker shim client")
101108
s.logger.WithError(err).Error()
102109
return nil, err
103110
}
104111

112+
defer client.Close()
113+
114+
resp, err := client.CreateVM(requestCtx, req)
115+
if err != nil {
116+
s.logger.WithError(err).Error("shim CreateVM returned error")
117+
return nil, err
118+
}
119+
120+
return resp, nil
121+
}
122+
123+
func (s *local) CreateShim(requestCtx context.Context, id string) (codes.Code, error) {
124+
var err error
125+
126+
// Validate VM id
127+
if err := identifiers.Validate(id); err != nil {
128+
s.logger.WithError(err).Error()
129+
return codes.Unknown, err
130+
}
131+
132+
// Validate namespace
105133
ns, err := namespaces.NamespaceRequired(requestCtx)
106134
if err != nil {
107135
err = errors.Wrap(err, "error retrieving namespace of request")
108136
s.logger.WithError(err).Error()
109-
return nil, err
137+
return codes.Unknown, err
110138
}
111139

112140
s.logger.Debugf("using namespace: %s", ns)
@@ -118,36 +146,36 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
118146
if err != nil {
119147
err = errors.Wrap(err, "failed to obtain shim socket address")
120148
s.logger.WithError(err).Error()
121-
return nil, err
149+
return codes.Unknown, err
122150
}
123151

124152
shimSocket, err := shim.NewSocket(shimSocketAddress)
125153
if shim.SocketEaddrinuse(err) {
126-
return nil, status.Errorf(codes.AlreadyExists, "VM with ID %q already exists (socket: %q)", id, shimSocketAddress)
154+
return codes.AlreadyExists, status.Errorf(codes.AlreadyExists, "VM with ID %q already exists (socket: %q)", id, shimSocketAddress)
127155
} else if err != nil {
128156
err = errors.Wrapf(err, "failed to open shim socket at address %q", shimSocketAddress)
129157
s.logger.WithError(err).Error()
130-
return nil, err
158+
return codes.Unknown, err
131159
}
132160

133161
// If we're here, there is no pre-existing shim for this VMID, so we spawn a new one
134162
if err := os.Mkdir(s.config.ShimBaseDir, 0700); err != nil && !os.IsExist(err) {
135163
s.logger.WithError(err).Error()
136-
return nil, errors.Wrapf(err, "failed to make shim base directory: %s", s.config.ShimBaseDir)
164+
return codes.Unknown, errors.Wrapf(err, "failed to make shim base directory: %s", s.config.ShimBaseDir)
137165
}
138166

139167
shimDir, err := vm.ShimDir(s.config.ShimBaseDir, ns, id)
140168
if err != nil {
141169
err = errors.Wrapf(err, "failed to build shim path")
142170
s.logger.WithError(err).Error()
143-
return nil, err
171+
return codes.Unknown, err
144172
}
145173

146174
err = shimDir.Mkdir()
147175
if err != nil {
148176
err = errors.Wrapf(err, "failed to create VM dir %q", shimDir.RootPath())
149177
s.logger.WithError(err).Error()
150-
return nil, err
178+
return codes.Unknown, err
151179
}
152180

153181
defer func() {
@@ -167,19 +195,19 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
167195
if err != nil {
168196
err = errors.Wrap(err, "failed to obtain shim socket address")
169197
s.logger.WithError(err).Error()
170-
return nil, err
198+
return codes.Unknown, err
171199
}
172200

173201
fcSocket, err := shim.NewSocket(fcSocketAddress)
174202
if err != nil {
175203
err = errors.Wrapf(err, "failed to open fccontrol socket at address %q", fcSocketAddress)
176204
s.logger.WithError(err).Error()
177-
return nil, err
205+
return codes.Unknown, err
178206
}
179207

180208
cmd, err := s.newShim(ns, id, s.containerdAddress, shimSocket, fcSocket)
181209
if err != nil {
182-
return nil, err
210+
return codes.Unknown, err
183211
}
184212

185213
defer func() {
@@ -188,26 +216,12 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
188216
}
189217
}()
190218

191-
client, err := s.shimFirecrackerClient(requestCtx, id)
192-
if err != nil {
193-
err = errors.Wrap(err, "failed to create firecracker shim client")
194-
s.logger.WithError(err).Error()
195-
return nil, err
196-
}
197-
198-
defer client.Close()
199-
200-
resp, err := client.CreateVM(requestCtx, req)
201-
if err != nil {
202-
s.logger.WithError(err).Error("shim CreateVM returned error")
203-
return nil, err
204-
}
205-
206219
s.addShim(shimSocketAddress, cmd)
207220

208-
return resp, nil
221+
return codes.OK, nil
209222
}
210223

224+
211225
func (s *local) addShim(address string, cmd *exec.Cmd) {
212226
s.processesMu.Lock()
213227
defer s.processesMu.Unlock()
@@ -617,6 +631,14 @@ func (s *local) CreateSnapshot(ctx context.Context, req *proto.CreateSnapshotReq
617631

618632
// LoadSnapshot Loads a snapshot of a VM
619633
func (s *local) LoadSnapshot(ctx context.Context, req *proto.LoadSnapshotRequest) (*proto.LoadResponse, error) {
634+
var err error
635+
636+
// Create shim if not exists yet
637+
code, err := s.CreateShim(ctx, req.GetVMID())
638+
if err != nil && code != codes.AlreadyExists {
639+
return nil, err
640+
}
641+
620642
client, err := s.shimFirecrackerClient(ctx, req.VMID)
621643
if err != nil {
622644
return nil, err
@@ -651,4 +673,4 @@ func (s *local) Offload(ctx context.Context, req *proto.OffloadRequest) (*empty.
651673
}
652674

653675
return resp, nil
654-
}
676+
}

runtime/service.go

Lines changed: 82 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -273,20 +273,22 @@ func (s *service) startEventForwarders(remotePublisher shim.Publisher) {
273273
go func() {
274274
<-s.vmReady
275275

276-
// Once the VM is ready, also start forwarding events from it to our exchange
277-
attachCh := eventbridge.Attach(ctx, s.eventBridgeClient, s.eventExchange)
276+
if ! s.snapLoaded {
277+
// Once the VM is ready, also start forwarding events from it to our exchange
278+
attachCh := eventbridge.Attach(ctx, s.eventBridgeClient, s.eventExchange)
278279

279-
err := <-attachCh
280-
if err != nil && err != context.Canceled {
281-
s.logger.WithError(err).Error("error while forwarding events from VM agent")
282-
}
280+
err := <-attachCh
281+
if err != nil && err != context.Canceled && !strings.Contains(err.Error(), "context canceled") {
282+
s.logger.WithError(err).Error("error while forwarding events from VM agent")
283+
}
283284

284-
err = <-republishCh
285-
if err != nil && err != context.Canceled {
286-
s.logger.WithError(err).Error("error while republishing events")
287-
}
285+
err = <-republishCh
286+
if err != nil && err != context.Canceled {
287+
s.logger.WithError(err).Error("error while republishing events")
288+
}
288289

289-
remotePublisher.Close()
290+
remotePublisher.Close()
291+
}
290292
}()
291293
}
292294

@@ -515,10 +517,8 @@ func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRe
515517
s.logger.WithError(err).Error("failed to publish start VM event")
516518
}
517519

518-
// Commented out because its execution cancels the shim, and
519-
// it would get executed on Offload if we leave it, killing the shim,
520-
// and making snapshots impossible.
521-
//go s.monitorVMExit()
520+
// Cancels the shim
521+
go s.monitorVMExit()
522522

523523
// let all the other methods know that the VM is ready for tasks
524524
close(s.vmReady)
@@ -720,12 +720,51 @@ func (s *service) StopVM(requestCtx context.Context, request *proto.StopVMReques
720720
return nil, err
721721
}
722722

723-
if err = s.shutdown(requestCtx, timeout, &taskAPI.ShutdownRequest{Now: true}); err != nil {
723+
if ! s.snapLoaded {
724+
err = s.shutdown(requestCtx, timeout, &taskAPI.ShutdownRequest{Now: true})
725+
} else {
726+
err = s.shutdownSnapLoadedVm()
727+
}
728+
729+
if err != nil {
724730
return nil, err
725731
}
732+
726733
return &empty.Empty{}, nil
727734
}
728735

736+
// shutdownSnapLoadedVm shuts down a vm that has been loaded from a snapshot
737+
func (s *service) shutdownSnapLoadedVm() error {
738+
// Kill firecracker process and its shild processes
739+
if err := syscall.Kill(-s.firecrackerPid, 9); err != nil {
740+
s.logger.WithError(err).Error("Failed to kill firecracker process")
741+
return err
742+
}
743+
744+
// Delete firecracker socket
745+
if err := os.RemoveAll(s.shimDir.FirecrackerSockPath()); err != nil {
746+
s.logger.WithError(err).Error("Failed to delete firecracker socket")
747+
return err
748+
}
749+
750+
// Delete firecracker vsock
751+
if err := os.RemoveAll(s.shimDir.FirecrackerVSockPath()); err != nil {
752+
s.logger.WithError(err).Error("Failed to delete firecracker vsock")
753+
return err
754+
}
755+
756+
// Delete firecracker upf sock
757+
if err := os.RemoveAll(s.shimDir.FirecrackerUPFSockPath()); err != nil {
758+
s.logger.WithError(err).Error("Failed to delete firecracker UPF socket")
759+
return err
760+
}
761+
762+
if err := s.cleanup(); err != nil {
763+
s.logger.WithError(err).Error("failed to clean up the VM")
764+
}
765+
return nil
766+
}
767+
729768
// GetVMInfo returns metadata for the VM being managed by this shim. If the VM has not been created yet, this
730769
// method will wait for up to a hardcoded timeout for it to exist, returning an error if the timeout is reached.
731770
func (s *service) GetVMInfo(requestCtx context.Context, request *proto.GetVMInfoRequest) (*proto.GetVMInfoResponse, error) {
@@ -1746,7 +1785,6 @@ func (s *service) cleanup() error {
17461785

17471786
// monitorVMExit watches the VM and cleanup resources when it terminates.
17481787
// Comment out because unused
1749-
/*
17501788
func (s *service) monitorVMExit() {
17511789
// Block until the VM exits
17521790
if err := s.machine.Wait(s.shimCtx); err != nil && err != context.Canceled {
@@ -1757,7 +1795,6 @@ func (s *service) monitorVMExit() {
17571795
s.logger.WithError(err).Error("failed to clean up the VM")
17581796
}
17591797
}
1760-
*/
17611798

17621799
func (s *service) createHTTPControlClient() {
17631800
u := &httpunix.Transport{
@@ -1911,6 +1948,9 @@ func (s *service) startFirecrackerProcess() error {
19111948
}
19121949
firecrackerCmd.Dir = s.shimDir.RootPath()
19131950

1951+
// Make sure all child processes get killed
1952+
firecrackerCmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
1953+
19141954
if err := firecrackerCmd.Start(); err != nil {
19151955
logrus.WithError(err).Error("Failed to start firecracker process")
19161956
}
@@ -2076,6 +2116,8 @@ func (s *service) LoadSnapshot(ctx context.Context, req *proto.LoadSnapshotReque
20762116
return nil, err
20772117
}
20782118

2119+
close(s.vmReady)
2120+
20792121
return &proto.LoadResponse{FirecrackerPID: strconv.Itoa(s.firecrackerPid)}, nil
20802122
}
20812123

@@ -2134,11 +2176,25 @@ func (s *service) SendCreateSnapRequest(createSnapReq *http.Request) error {
21342176
}
21352177

21362178
// Offload Shuts down a VM and deletes the corresponding firecracker socket
2137-
// and vsock. All of the other resources will persist
2179+
// and vsock. All of the other resources will persist. Depracated!
21382180
func (s *service) Offload(ctx context.Context, req *proto.OffloadRequest) (*empty.Empty, error) {
2139-
if err := syscall.Kill(s.firecrackerPid, 9); err != nil {
2140-
s.logger.WithError(err).Error("Failed to kill firecracker process")
2141-
return nil, err
2181+
2182+
if !s.snapLoaded {
2183+
_, err := s.agentClient.Shutdown(ctx, &taskAPI.ShutdownRequest{Now: true})
2184+
if err != nil {
2185+
return nil, err
2186+
}
2187+
2188+
if err := syscall.Kill(s.firecrackerPid, 9); err != nil {
2189+
s.logger.WithError(err).Error("Failed to kill firecracker process")
2190+
return nil, err
2191+
}
2192+
} else {
2193+
// Make sure to kill child process if snaploaded
2194+
if err := syscall.Kill(-s.firecrackerPid, 9); err != nil {
2195+
s.logger.WithError(err).Error("Failed to kill firecracker process")
2196+
return nil, err
2197+
}
21422198
}
21432199

21442200
if err := os.RemoveAll(s.shimDir.FirecrackerSockPath()); err != nil {
@@ -2156,5 +2212,9 @@ func (s *service) Offload(ctx context.Context, req *proto.OffloadRequest) (*empt
21562212
return nil, err
21572213
}
21582214

2215+
if err := s.cleanup(); err != nil {
2216+
s.logger.WithError(err).Error("failed to clean up the VM")
2217+
}
2218+
21592219
return &empty.Empty{}, nil
21602220
}

0 commit comments

Comments
 (0)