Skip to content

Commit 898e3ae

Browse files
authored
LinuxContainer: Stop should be idempotent (#291)
I think stop should be able to be called more than once and not complain.
1 parent fe227c8 commit 898e3ae

File tree

2 files changed

+134
-52
lines changed

2 files changed

+134
-52
lines changed

Sources/Containerization/LinuxContainer.swift

Lines changed: 91 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ public final class LinuxContainer: Container, Sendable {
161161
/// The container is being resumed.
162162
case resuming(ResumingState)
163163

164+
struct Idempotent: Swift.Error {}
165+
164166
struct CreatingState: Sendable {}
165167

166168
struct CreatedState: Sendable {
@@ -244,6 +246,8 @@ public final class LinuxContainer: Container, Sendable {
244246
switch self {
245247
case .initialized, .stopped:
246248
self = .creating(.init())
249+
case .errored(let err):
250+
throw err
247251
default:
248252
throw ContainerizationError(
249253
.invalidState,
@@ -259,6 +263,8 @@ public final class LinuxContainer: Container, Sendable {
259263
switch self {
260264
case .creating:
261265
self = .created(.init(vm: vm, relayManager: relayManager))
266+
case .errored(let err):
267+
throw err
262268
default:
263269
throw ContainerizationError(
264270
.invalidState,
@@ -272,6 +278,8 @@ public final class LinuxContainer: Container, Sendable {
272278
case .created(let state):
273279
self = .starting(.init(state))
274280
return state.vm
281+
case .errored(let err):
282+
throw err
275283
default:
276284
throw ContainerizationError(
277285
.invalidState,
@@ -284,6 +292,8 @@ public final class LinuxContainer: Container, Sendable {
284292
switch self {
285293
case .starting(let state):
286294
self = .started(.init(state, process: process))
295+
case .errored(let err):
296+
throw err
287297
default:
288298
throw ContainerizationError(
289299
.invalidState,
@@ -296,6 +306,8 @@ public final class LinuxContainer: Container, Sendable {
296306
switch self {
297307
case .resuming(let state):
298308
self = .started(.init(state))
309+
case .errored(let err):
310+
throw err
299311
default:
300312
throw ContainerizationError(
301313
.invalidState,
@@ -304,11 +316,15 @@ public final class LinuxContainer: Container, Sendable {
304316
}
305317
}
306318

307-
mutating func stopping() throws -> StartedState {
319+
mutating func setStopping() throws -> StartedState {
308320
switch self {
309321
case .started(let state):
310322
self = .stopping(.init(state))
311323
return state
324+
case .stopping(_), .stopped:
325+
throw Idempotent()
326+
case .errored(let err):
327+
throw err
312328
default:
313329
throw ContainerizationError(
314330
.invalidState,
@@ -321,6 +337,8 @@ public final class LinuxContainer: Container, Sendable {
321337
switch self {
322338
case .started(let state):
323339
return state
340+
case .errored(let err):
341+
throw err
324342
default:
325343
throw ContainerizationError(
326344
.invalidState,
@@ -334,6 +352,8 @@ public final class LinuxContainer: Container, Sendable {
334352
case .started(let state):
335353
self = .pausing(.init(state))
336354
return state
355+
case .errored(let err):
356+
throw err
337357
default:
338358
throw ContainerizationError(
339359
.invalidState,
@@ -346,6 +366,8 @@ public final class LinuxContainer: Container, Sendable {
346366
switch self {
347367
case .pausing(let state):
348368
self = .paused(.init(state))
369+
case .errored(let err):
370+
throw err
349371
default:
350372
throw ContainerizationError(
351373
.invalidState,
@@ -359,6 +381,8 @@ public final class LinuxContainer: Container, Sendable {
359381
case .paused(let state):
360382
self = .resuming(.init(state))
361383
return state
384+
case .errored(let err):
385+
throw err
362386
default:
363387
throw ContainerizationError(
364388
.invalidState,
@@ -367,10 +391,14 @@ public final class LinuxContainer: Container, Sendable {
367391
}
368392
}
369393

370-
mutating func stopped() throws {
394+
mutating func setStopped() throws {
371395
switch self {
372396
case .stopping(_):
373397
self = .stopped
398+
case .stopped:
399+
throw Idempotent()
400+
case .errored(let err):
401+
throw err
374402
default:
375403
throw ContainerizationError(
376404
.invalidState,
@@ -379,7 +407,7 @@ public final class LinuxContainer: Container, Sendable {
379407
}
380408
}
381409

382-
mutating func errored(error: Swift.Error) {
410+
mutating func setErrored(error: Swift.Error) {
383411
self = .errored(error)
384412
}
385413
}
@@ -534,7 +562,7 @@ extension LinuxContainer {
534562
}
535563
} catch {
536564
try? await vm.stop()
537-
self.state.withLock { $0.errored(error: error) }
565+
self.state.withLock { $0.setErrored(error: error) }
538566
throw error
539567
}
540568
}
@@ -571,7 +599,7 @@ extension LinuxContainer {
571599
} catch {
572600
try? await agent.close()
573601
try? await vm.stop()
574-
self.state.withLock { $0.errored(error: error) }
602+
self.state.withLock { $0.setErrored(error: error) }
575603
throw error
576604
}
577605
}
@@ -616,61 +644,72 @@ extension LinuxContainer {
616644
)
617645
}
618646

619-
/// Stop the container from executing.
647+
/// Stop the container from executing. This MUST be called even if wait() has returned
648+
/// as their are additional
620649
public func stop() async throws {
621-
let startedState = try self.state.withLock { try $0.stopping() }
622-
623-
try await startedState.relayManager.stopAll()
624-
625-
// It's possible the state of the vm is not in a great spot
626-
// if the guest panicked or had any sort of bug/fault.
627-
// First check if the vm is even still running, as trying to
628-
// use a vsock handle like below here will cause NIO to
629-
// fatalError because we'll get an EBADF.
630-
if startedState.vm.state == .stopped {
631-
try self.state.withLock { try $0.stopped() }
650+
let startedState: State.StartedState
651+
do {
652+
startedState = try self.state.withLock { try $0.setStopping() }
653+
} catch _ as State.Idempotent {
632654
return
633655
}
634656

635-
try await startedState.vm.withAgent { agent in
636-
// First, we need to stop any unix socket relays as this will
637-
// keep the rootfs from being able to umount (EBUSY).
638-
let sockets = config.sockets
639-
if !sockets.isEmpty {
640-
guard let relayAgent = agent as? SocketRelayAgent else {
641-
throw ContainerizationError(
642-
.unsupported,
643-
message: "VirtualMachineAgent does not support relaySocket surface"
644-
)
645-
}
646-
for socket in sockets {
647-
try await relayAgent.stopSocketRelay(configuration: socket)
648-
}
657+
do {
658+
try await startedState.relayManager.stopAll()
659+
660+
// It's possible the state of the vm is not in a great spot
661+
// if the guest panicked or had any sort of bug/fault.
662+
// First check if the vm is even still running, as trying to
663+
// use a vsock handle like below here will cause NIO to
664+
// fatalError because we'll get an EBADF.
665+
if startedState.vm.state == .stopped {
666+
try self.state.withLock { try $0.setStopped() }
667+
return
649668
}
650669

651-
// Now lets ensure every process is donezo.
652-
try await agent.kill(pid: -1, signal: SIGKILL)
670+
try await startedState.vm.withAgent { agent in
671+
// First, we need to stop any unix socket relays as this will
672+
// keep the rootfs from being able to umount (EBUSY).
673+
let sockets = config.sockets
674+
if !sockets.isEmpty {
675+
guard let relayAgent = agent as? SocketRelayAgent else {
676+
throw ContainerizationError(
677+
.unsupported,
678+
message: "VirtualMachineAgent does not support relaySocket surface"
679+
)
680+
}
681+
for socket in sockets {
682+
try await relayAgent.stopSocketRelay(configuration: socket)
683+
}
684+
}
685+
686+
// Now lets ensure every process is donezo.
687+
try await agent.kill(pid: -1, signal: SIGKILL)
653688

654-
// Wait on init proc exit. Give it 5 seconds of leeway.
655-
_ = try await agent.waitProcess(
656-
id: self.id,
657-
containerID: self.id,
658-
timeoutInSeconds: 5
659-
)
689+
// Wait on init proc exit. Give it 5 seconds of leeway.
690+
_ = try await agent.waitProcess(
691+
id: self.id,
692+
containerID: self.id,
693+
timeoutInSeconds: 5
694+
)
660695

661-
// Today, we leave EBUSY looping and other fun logic up to the
662-
// guest agent.
663-
try await agent.umount(
664-
path: Self.guestRootfsPath(self.id),
665-
flags: 0
666-
)
667-
}
696+
// Today, we leave EBUSY looping and other fun logic up to the
697+
// guest agent.
698+
try await agent.umount(
699+
path: Self.guestRootfsPath(self.id),
700+
flags: 0
701+
)
702+
}
668703

669-
// Lets free up the init procs resources, as this includes the open agent conn.
670-
try? await startedState.process.delete()
704+
// Lets free up the init procs resources, as this includes the open agent conn.
705+
try? await startedState.process.delete()
671706

672-
try await startedState.vm.stop()
673-
try self.state.withLock { try $0.stopped() }
707+
try await startedState.vm.stop()
708+
try self.state.withLock { try $0.setStopped() }
709+
} catch {
710+
self.state.withLock { $0.setErrored(error: error) }
711+
throw error
712+
}
674713
}
675714

676715
/// Pause the container.
@@ -680,7 +719,7 @@ extension LinuxContainer {
680719
try await state.vm.pause()
681720
try self.state.withLock { try $0.setPaused() }
682721
} catch {
683-
self.state.withLock { $0.errored(error: error) }
722+
self.state.withLock { $0.setErrored(error: error) }
684723
}
685724
}
686725

@@ -691,7 +730,7 @@ extension LinuxContainer {
691730
try await state.vm.resume()
692731
try self.state.withLock { try $0.setResumed() }
693732
} catch {
694-
self.state.withLock { $0.errored(error: error) }
733+
self.state.withLock { $0.setErrored(error: error) }
695734
}
696735
}
697736

Sources/Integration/VMTests.swift

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,49 @@ extension IntegrationSuite {
206206
}
207207
}
208208

209+
func testContainerStopIdempotency() async throws {
210+
let id = "test-container-stop-idempotency"
211+
212+
// Get the kernel from bootstrap
213+
let bs = try await bootstrap()
214+
215+
// Create ContainerManager with kernel and initfs reference
216+
var manager = try ContainerManager(vmm: bs.vmm)
217+
defer {
218+
try? manager.delete(id)
219+
}
220+
221+
let buffer = BufferWriter()
222+
let container = try await manager.create(
223+
id,
224+
image: bs.image,
225+
rootfs: bs.rootfs
226+
) { config in
227+
config.process.arguments = ["/bin/echo", "please stop me"]
228+
config.process.stdout = buffer
229+
}
230+
231+
// Start the container
232+
try await container.create()
233+
try await container.start()
234+
235+
// Wait for completion
236+
let status = try await container.wait()
237+
guard status == 0 else {
238+
throw IntegrationError.assert(msg: "process status \(status) != 0")
239+
}
240+
241+
try await container.stop()
242+
// Second go around should return with no problems.
243+
try await container.stop()
244+
245+
let output = String(data: buffer.data, encoding: .utf8)
246+
guard output == "ContainerManager test\n" else {
247+
throw IntegrationError.assert(
248+
msg: "process should have returned 'ContainerManager test' != '\(output ?? "nil")'")
249+
}
250+
}
251+
209252
func testContainerReuse() async throws {
210253
let id = "test-container-reuse"
211254

0 commit comments

Comments
 (0)