@@ -31,8 +31,17 @@ public final class LinuxContainer: Container, Sendable {
3131 public let id : String
3232
3333 /// Rootfs for the container.
34+ ///
35+ /// Note: The `destination` field of this mount is ignored as mounting is handled internally.
3436 public let rootfs : Mount
3537
38+ /// Optional writable layer for the container. When provided, the rootfs
39+ /// is mounted as the lower layer of an overlayfs, with this as the upper layer.
40+ /// All writes will go to this layer instead of the rootfs.
41+ ///
42+ /// Note: The `destination` field of this mount is ignored as mounting is handled internally.
43+ public let writableLayer : Mount ?
44+
3645 /// Configuration for the container.
3746 public let config : Configuration
3847
@@ -238,21 +247,27 @@ public final class LinuxContainer: Container, Sendable {
238247 /// - Parameters:
239248 /// - id: The identifier for the container.
240249 /// - rootfs: The root filesystem mount containing the container image contents.
250+ /// The `destination` field is ignored as mounting is handled internally.
251+ /// - writableLayer: Optional writable layer mount. When provided, an overlayfs is used with
252+ /// rootfs as the lower layer and this as the upper layer. Must be a block device.
253+ /// The `destination` field is ignored as mounting is handled internally.
241254 /// - vmm: The virtual machine manager that will handle launching the VM for the container.
242255 /// - logger: Optional logger for container operations.
243256 /// - configuration: A closure that configures the container by modifying the Configuration instance.
244257 public convenience init (
245258 _ id: String ,
246259 rootfs: Mount ,
260+ writableLayer: Mount ? = nil ,
247261 vmm: VirtualMachineManager ,
248262 logger: Logger ? = nil ,
249263 configuration: ( inout Configuration ) throws -> Void
250264 ) throws {
251265 var config = Configuration ( )
252266 try configuration ( & config)
253- self . init (
267+ try self . init (
254268 id,
255269 rootfs: rootfs,
270+ writableLayer: writableLayer,
256271 vmm: vmm,
257272 configuration: config,
258273 logger: logger
@@ -264,16 +279,29 @@ public final class LinuxContainer: Container, Sendable {
264279 /// - Parameters:
265280 /// - id: The identifier for the container.
266281 /// - rootfs: The root filesystem mount containing the container image contents.
282+ /// The `destination` field is ignored as mounting is handled internally.
283+ /// - writableLayer: Optional writable layer mount. When provided, an overlayfs is used with
284+ /// rootfs as the lower layer and this as the upper layer. Must be a block device.
285+ /// The `destination` field is ignored as mounting is handled internally.
267286 /// - vmm: The virtual machine manager that will handle launching the VM for the container.
268287 /// - configuration: The container configuration specifying process, resources, networking, and other settings.
269288 /// - logger: Optional logger for container operations.
270289 public init (
271290 _ id: String ,
272291 rootfs: Mount ,
292+ writableLayer: Mount ? = nil ,
273293 vmm: VirtualMachineManager ,
274294 configuration: LinuxContainer . Configuration ,
275295 logger: Logger ? = nil
276- ) {
296+ ) throws {
297+ if let writableLayer {
298+ guard writableLayer. isBlock else {
299+ throw ContainerizationError (
300+ . invalidArgument,
301+ message: " writableLayer must be a block device "
302+ )
303+ }
304+ }
277305 self . id = id
278306 self . vmm = vmm
279307 self . hostVsockPorts = Atomic < UInt32 > ( 0x1000_0000 )
@@ -282,6 +310,7 @@ public final class LinuxContainer: Container, Sendable {
282310 self . config = configuration
283311 self . state = AsyncMutex ( . initialized)
284312 self . rootfs = rootfs
313+ self . writableLayer = writableLayer
285314 }
286315
287316 private static func createDefaultRuntimeSpec( _ id: String ) -> Spec {
@@ -313,7 +342,8 @@ public final class LinuxContainer: Container, Sendable {
313342
314343 // If the rootfs was requested as read-only, set it in the OCI spec.
315344 // We let the OCI runtime remount as ro, instead of doing it originally.
316- spec. root? . readonly = self . rootfs. options. contains ( " ro " )
345+ // However, if we have a writable layer, the overlay allows writes so we don't mark it read-only.
346+ spec. root? . readonly = self . rootfs. options. contains ( " ro " ) && self . writableLayer == nil
317347
318348 // Resource limits.
319349 // CPU: quota/period model where period is 100ms (100,000µs) and quota is cpus * period
@@ -393,6 +423,67 @@ extension LinuxContainer {
393423 config. interfaces
394424 }
395425
426+ private func mountRootfs(
427+ attachments: [ AttachedFilesystem ] ,
428+ rootfsPath: String ,
429+ agent: VirtualMachineAgent
430+ ) async throws {
431+ guard let rootfsAttachment = attachments. first else {
432+ throw ContainerizationError ( . notFound, message: " rootfs mount not found " )
433+ }
434+
435+ if self . writableLayer != nil {
436+ // Set up overlayfs with image as lower layer and writable layer as upper.
437+ guard attachments. count >= 2 else {
438+ throw ContainerizationError (
439+ . notFound,
440+ message: " writable layer mount not found "
441+ )
442+ }
443+ let writableAttachment = attachments [ 1 ]
444+
445+ let lowerPath = " /run/container/ \( self . id) /lower "
446+ let upperMountPath = " /run/container/ \( self . id) /upper "
447+ let upperPath = " /run/container/ \( self . id) /upper/diff "
448+ let workPath = " /run/container/ \( self . id) /upper/work "
449+
450+ // Mount the image (lower layer) as read-only.
451+ var lowerMount = rootfsAttachment. to
452+ lowerMount. destination = lowerPath
453+ if !lowerMount. options. contains ( " ro " ) {
454+ lowerMount. options. append ( " ro " )
455+ }
456+ try await agent. mount ( lowerMount)
457+
458+ // Mount the writable layer.
459+ var upperMount = writableAttachment. to
460+ upperMount. destination = upperMountPath
461+ try await agent. mount ( upperMount)
462+
463+ // Create the upper and work directories inside the writable layer.
464+ try await agent. mkdir ( path: upperPath, all: true , perms: 0o755 )
465+ try await agent. mkdir ( path: workPath, all: true , perms: 0o755 )
466+
467+ // Mount the overlay.
468+ let overlayMount = ContainerizationOCI . Mount (
469+ type: " overlay " ,
470+ source: " overlay " ,
471+ destination: rootfsPath,
472+ options: [
473+ " lowerdir= \( lowerPath) " ,
474+ " upperdir= \( upperPath) " ,
475+ " workdir= \( workPath) " ,
476+ ]
477+ )
478+ try await agent. mount ( overlayMount)
479+ } else {
480+ // No writable layer. Mount rootfs directly.
481+ var rootfs = rootfsAttachment. to
482+ rootfs. destination = rootfsPath
483+ try await agent. mount ( rootfs)
484+ }
485+ }
486+
396487 /// Create and start the underlying container's virtual machine
397488 /// and set up the runtime environment. The container's init process
398489 /// is NOT running afterwards.
@@ -428,11 +519,17 @@ extension LinuxContainer {
428519 // This is dumb, but alas.
429520 let fileMountContextHolder = Mutex < FileMountContext > ( fileMountContext)
430521
522+ // Build the list of mounts to attach to the VM.
523+ var containerMounts = [ modifiedRootfs] + fileMountContext. transformedMounts
524+ if let writableLayer = self . writableLayer {
525+ containerMounts. insert ( writableLayer, at: 1 )
526+ }
527+
431528 let vmConfig = VMConfiguration (
432529 cpus: self . cpus,
433530 memoryInBytes: vmMemory,
434531 interfaces: self . interfaces,
435- mountsByID: [ self . id: [ modifiedRootfs ] + fileMountContext . transformedMounts ] ,
532+ mountsByID: [ self . id: containerMounts ] ,
436533 bootLog: self . config. bootLog,
437534 nestedVirtualization: self . config. virtualization
438535 )
@@ -445,13 +542,11 @@ extension LinuxContainer {
445542 try await vm. withAgent { agent in
446543 try await agent. standardSetup ( )
447544
448- // Mount the rootfs.
449- guard let attachments = vm. mounts [ self . id] , let rootfsAttachment = attachments. first else {
545+ guard let attachments = vm. mounts [ self . id] else {
450546 throw ContainerizationError ( . notFound, message: " rootfs mount not found " )
451547 }
452- var rootfs = rootfsAttachment. to
453- rootfs. destination = Self . guestRootfsPath ( self . id)
454- try await agent. mount ( rootfs)
548+ let rootfsPath = Self . guestRootfsPath ( self . id)
549+ try await self . mountRootfs ( attachments: attachments, rootfsPath: rootfsPath, agent: agent)
455550
456551 // Mount file mount holding directories under /run.
457552 if fileMountContext. hasFileMounts {
@@ -493,10 +588,10 @@ extension LinuxContainer {
493588
494589 // Setup /etc/resolv.conf and /etc/hosts if asked for.
495590 if let dns = self . config. dns {
496- try await agent. configureDNS ( config: dns, location: rootfs . destination )
591+ try await agent. configureDNS ( config: dns, location: rootfsPath )
497592 }
498593 if let hosts = self . config. hosts {
499- try await agent. configureHosts ( config: hosts, location: rootfs . destination )
594+ try await agent. configureHosts ( config: hosts, location: rootfsPath )
500595 }
501596
502597 }
@@ -518,12 +613,14 @@ extension LinuxContainer {
518613 let agent = try await createdState. vm. dialAgent ( )
519614 do {
520615 var spec = self . generateRuntimeSpec ( )
521- // We don't need the rootfs, nor do OCI runtimes want it included.
616+ // We don't need the rootfs (or writable layer) , nor do OCI runtimes want it included.
522617 // Also filter out file mount holding directories. We'll mount those separately under /run.
523618 let containerMounts = createdState. vm. mounts [ self . id] ?? [ ]
524619 let holdingTags = createdState. fileMountContext. holdingDirectoryTags
620+ // Drop rootfs, and writable layer if present.
621+ let mountsToSkip = self . writableLayer != nil ? 2 : 1
525622 spec. mounts =
526- containerMounts. dropFirst ( )
623+ containerMounts. dropFirst ( mountsToSkip )
527624 . filter { !holdingTags. contains ( $0. source) }
528625 . map { $0. to }
529626 + createdState. fileMountContext. ociBindMounts ( )
@@ -666,6 +763,14 @@ extension LinuxContainer {
666763 flags: 0
667764 )
668765
766+ // If we have a writable layer, we also need to unmount the lower and upper layers.
767+ if self . writableLayer != nil {
768+ let upperPath = " /run/container/ \( self . id) /upper "
769+ let lowerPath = " /run/container/ \( self . id) /lower "
770+ try await agent. umount ( path: upperPath, flags: 0 )
771+ try await agent. umount ( path: lowerPath, flags: 0 )
772+ }
773+
669774 try await agent. sync ( )
670775 }
671776 } catch {
0 commit comments