Skip to content

Commit 8b39713

Browse files
authored
EXT4: Optimize unpack (#378)
Optimize unpack a little by trying to reduce allocations in the hot path. Today for every file we read the entire file into memory and then pass the data blob to the ext4 writer to eventually be written to the sparse file. Before being written to the sparse file the data is copied *again* to a temp buffer before finally hitting write(2) in FileHandle. This change moves things around such that we can pass an optional buffer to the ext4 create() (so we can reuse a buffer for file writes), as well as stops reading entire files into memory by passing the archive entry itself (wrapped in a ReaderStream object albeit) down to the writer. Testing with unpacking every platform for `docker.io/jenkins/jenkins:lts` on an M1 Max: Old Avg (5 runs): 7.43s New Avg (5 runs): 5.31s
1 parent f3d9989 commit 8b39713

File tree

4 files changed

+92
-41
lines changed

4 files changed

+92
-41
lines changed

Sources/ContainerizationArchive/Reader.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@
1717
import CArchive
1818
import Foundation
1919

20+
/// A protocol for reading data in chunks, compatible with both `InputStream` and zero-allocation archive readers.
21+
public protocol ReadableStream {
22+
/// Reads up to `maxLength` bytes into the provided buffer.
23+
/// Returns the number of bytes actually read, 0 for EOF, or -1 for error.
24+
func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength: Int) -> Int
25+
}
26+
27+
extension InputStream: ReadableStream {}
28+
29+
/// Small wrapper type to read data from an archive entry.
30+
public struct ArchiveEntryReader: ReadableStream {
31+
private weak var reader: ArchiveReader?
32+
33+
init(reader: ArchiveReader) {
34+
self.reader = reader
35+
}
36+
37+
/// Reads up to `maxLength` bytes into the provided buffer.
38+
/// Returns the number of bytes actually read, 0 for EOF, or -1 for error.
39+
public func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength: Int) -> Int {
40+
guard let archive = reader?.underlying else { return -1 }
41+
let bytesRead = archive_read_data(archive, buffer, maxLength)
42+
return bytesRead < 0 ? -1 : bytesRead
43+
}
44+
}
45+
2046
/// A class responsible for reading entries from an archive file.
2147
public final class ArchiveReader {
2248
/// A pointer to the underlying `archive` C structure.
@@ -99,6 +125,29 @@ extension ArchiveReader: Sequence {
99125
}
100126
}
101127

128+
/// Returns an iterator that yields archive entries.
129+
public func makeStreamingIterator() -> StreamingIterator {
130+
StreamingIterator(reader: self)
131+
}
132+
133+
public struct StreamingIterator: Sequence, IteratorProtocol {
134+
var reader: ArchiveReader
135+
136+
public func makeIterator() -> StreamingIterator {
137+
self
138+
}
139+
140+
public mutating func next() -> (WriteEntry, ArchiveEntryReader)? {
141+
let entry = WriteEntry()
142+
let result = archive_read_next_header2(reader.underlying, entry.underlying)
143+
if result == ARCHIVE_EOF {
144+
return nil
145+
}
146+
let streamReader = ArchiveEntryReader(reader: reader)
147+
return (entry, streamReader)
148+
}
149+
}
150+
102151
internal func readDataForEntry(_ entry: WriteEntry) -> Data {
103152
let bufferSize = Int(Swift.min(entry.size ?? 4096, 4096))
104153
var entry = Data()

Sources/ContainerizationEXT4/EXT4+Formatter.swift

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
// swiftlint: disable discouraged_direct_init shorthand_operator syntactic_sugar
1818

19+
import ContainerizationArchive
1920
import ContainerizationOS
2021
import Foundation
2122
import SystemPackage
@@ -24,7 +25,7 @@ extension EXT4 {
2425
/// The `EXT4.Formatter` class provides methods to format a block device with the ext4 filesystem.
2526
/// It allows customization of block size and maximum disk size.
2627
public class Formatter {
27-
private let blockSize: UInt32
28+
let blockSize: UInt32
2829
private var size: UInt64
2930
private let groupDescriptorSize: UInt32 = 32
3031

@@ -264,7 +265,7 @@ extension EXT4 {
264265
// - path: The FilePath representing the path where the file, directory, or symlink should be created.
265266
// - link: An optional FilePath representing the target path for a symlink. If `nil`, a regular file or directory will be created. Preceding '/' should be omitted
266267
// - mode: The permissions to set for the created file, directory, or symlink.
267-
// - buf: An `InputStream` object providing the contents for the created file. Ignored when creating directories or symlinks.
268+
// - buf: A `ReadableStream` object providing the contents for the created file. Ignored when creating directories or symlinks.
268269
//
269270
// - Note:
270271
// - This function recursively creates parent directories if they don't already exist. The `uid` and `gid` of the created parent directories are set to the values of their parent's `uid` and `gid`.
@@ -295,11 +296,12 @@ extension EXT4 {
295296
link: FilePath? = nil, // to create symbolic links
296297
mode: UInt16,
297298
ts: FileTimestamps = FileTimestamps(),
298-
buf: InputStream? = nil,
299+
buf: (any ReadableStream)? = nil,
299300
uid: UInt32? = nil,
300301
gid: UInt32? = nil,
301302
xattrs: [String: Data]? = nil,
302-
recursion: Bool = false
303+
recursion: Bool = false,
304+
fileBuffer: UnsafeMutableBufferPointer<UInt8>? = nil
303305
) throws {
304306
if let nodePtr = self.tree.lookup(path: path) {
305307
let node = nodePtr.pointee
@@ -539,14 +541,29 @@ extension EXT4 {
539541
if mode.isReg() {
540542
startBlock = self.currentBlock
541543
if let buf { // in case of empty files, this will be nil
542-
let tempBuf = Ptr<UInt8>.allocate(capacity: Int(self.blockSize))
543-
defer { tempBuf.deallocate() }
544-
while case let block = buf.read(tempBuf.underlying, maxLength: Int(self.blockSize)), block > 0 {
544+
let tempBuf: UnsafeMutablePointer<UInt8>
545+
let bufferSize: Int
546+
let shouldDeallocate: Bool
547+
if let fileBuffer {
548+
tempBuf = fileBuffer.baseAddress!
549+
bufferSize = fileBuffer.count
550+
shouldDeallocate = false
551+
} else {
552+
tempBuf = UnsafeMutablePointer<UInt8>.allocate(capacity: Int(self.blockSize))
553+
bufferSize = Int(self.blockSize)
554+
shouldDeallocate = true
555+
}
556+
defer {
557+
if shouldDeallocate {
558+
tempBuf.deallocate()
559+
}
560+
}
561+
while case let block = buf.read(tempBuf, maxLength: bufferSize), block > 0 {
545562
size += UInt64(block)
546563
if size > EXT4.MaxFileSize {
547564
throw Error.fileTooBig(size)
548565
}
549-
let data = UnsafeRawBufferPointer(start: tempBuf.underlying, count: block)
566+
let data = UnsafeRawBufferPointer(start: tempBuf, count: block)
550567
try withUnsafeLittleEndianBuffer(of: data) { b in
551568
try self.handle.write(contentsOf: b)
552569
}
@@ -565,29 +582,6 @@ extension EXT4 {
565582
throw Error.unsupportedFiletype
566583
}
567584

568-
public func setOwner(path: FilePath, uid: UInt16? = nil, gid: UInt16? = nil, recursive: Bool = false) throws {
569-
// ensure that target exists
570-
guard let pathPtr = self.tree.lookup(path: path) else {
571-
throw Error.notFound(path)
572-
}
573-
let pathNode = pathPtr.pointee
574-
let pathInodePtr = self.inodes[Int(pathNode.inode) - 1]
575-
var pathInode = pathInodePtr.pointee
576-
if let uid {
577-
pathInode.uid = uid
578-
}
579-
if let gid {
580-
pathInode.gid = gid
581-
}
582-
pathInodePtr.initialize(to: pathInode)
583-
if recursive {
584-
for childPtr in pathNode.children {
585-
let child = childPtr.pointee
586-
try self.setOwner(path: path.join(child.name), uid: uid, gid: gid, recursive: recursive)
587-
}
588-
}
589-
}
590-
591585
// Completes the formatting of an ext4 filesystem after writing the necessary structures.
592586
//
593587
// This function is responsible for finalizing the formatting process of an ext4 filesystem

Sources/ContainerizationEXT4/Formatter+Unpack.swift

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,13 @@ extension EXT4.Formatter {
2727
/// Unpack the provided archive on to the ext4 filesystem.
2828
public func unpack(reader: ArchiveReader, progress: ProgressHandler? = nil) throws {
2929
var hardlinks: Hardlinks = [:]
30-
for (entry, data) in reader {
30+
// Allocate a single 128KiB reusable buffer for all files to minimize allocations
31+
// and reduce the number of read calls to libarchive.
32+
let bufferSize = 128 * 1024
33+
let reusableBuffer = UnsafeMutableBufferPointer<UInt8>.allocate(capacity: bufferSize)
34+
defer { reusableBuffer.deallocate() }
35+
36+
for (entry, streamReader) in reader.makeStreamingIterator() {
3137
try Task.checkCancellation()
3238
guard var pathEntry = entry.path else {
3339
continue
@@ -73,20 +79,16 @@ extension EXT4.Formatter {
7379
gid: entry.group,
7480
xattrs: entry.xattrs)
7581
case .regular:
76-
let inputStream = InputStream(data: data)
77-
inputStream.open()
7882
try self.create(
79-
path: path, mode: EXT4.Inode.Mode(.S_IFREG, entry.permissions), ts: ts, buf: inputStream,
83+
path: path, mode: EXT4.Inode.Mode(.S_IFREG, entry.permissions), ts: ts, buf: streamReader,
8084
uid: entry.owner,
81-
gid: entry.group, xattrs: entry.xattrs)
82-
inputStream.close()
85+
gid: entry.group, xattrs: entry.xattrs, fileBuffer: reusableBuffer)
8386

8487
// Count the size of files
85-
if let progress {
88+
if let progress, let size = entry.size {
8689
Task {
87-
let size = Int64(data.count)
8890
await progress([
89-
ProgressEvent(event: "add-size", value: size)
91+
ProgressEvent(event: "add-size", value: Int64(size))
9092
])
9193
}
9294
}

Sources/cctl/ImageCommand.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ extension Application {
123123
print("Reference resolved to \(reference.description)")
124124
}
125125

126+
var startTime = ContinuousClock.now
126127
let image = try await Images.withAuthentication(ref: normalizedReference) { auth in
127128
try await imageStore.pull(reference: normalizedReference, platform: platform, insecure: http, auth: auth)
128129
}
@@ -132,7 +133,9 @@ extension Application {
132133
Application.exit(withError: POSIXError(.EACCES))
133134
}
134135

135-
print("image pulled")
136+
var duration = ContinuousClock.now - startTime
137+
print("Image pull took: \(duration)\n")
138+
136139
guard let unpackPath else {
137140
return
138141
}
@@ -144,6 +147,7 @@ extension Application {
144147

145148
let unpacker = EXT4Unpacker.init(blockSizeInBytes: 2.gib())
146149

150+
startTime = ContinuousClock.now
147151
if let platform {
148152
let name = platform.description.replacingOccurrences(of: "/", with: "-")
149153
let _ = try await unpacker.unpack(image, for: platform, at: unpackUrl.appending(component: name))
@@ -160,6 +164,8 @@ extension Application {
160164
print("created snapshot for platform \(descPlatform.description)")
161165
}
162166
}
167+
duration = ContinuousClock.now - startTime
168+
print("\nUnpacking took: \(duration)")
163169
}
164170
}
165171

0 commit comments

Comments
 (0)