Skip to content

Commit 4be075c

Browse files
committed
Fix issues with AsyncBufferSequence.LineSequence
The main problem is that the internal buffer used by AsyncBufferSequence.LineSequence may end on the boundary of a line ending sequence -- this impacts primarily the UTF-8 encoding, but also impacts UTF-16 and UTF-32 with the \r\n sequence specifically. When this condition occurs, the range check prevents the peek-ahead to the next 1 or 2 bytes and prevents a complete line from being returned to the client. The buffer size is based on the page size by default, which on my macOS and Linux systems respectively are 16384 and 4096. This led to testLineSequence failing frequently on Linux due to the greater likelihood of a line ending being split across a multiple of the buffer size. To fix this, we always load more bytes into the buffer until the buffer no longer ends with a potential partial line ending sequence (unless we hit EOF which correctly causes an early return). Additionally, the testLineSequence test could generate empty lines, which meant that it was possible to have a line ending with \r followed by an (empty) line ending with \n, indistinguishable from a single line ending with \r\n. This is a problem for any line ending sequences where one is a prefix of another -- \r and \r\n are the only ones which meet this criteria. To fix that, prevent the test from ever generating an empty buffer, and since it does so by restricting the character set to Latin, will never again produce the problematic sequence. Also switch testTeardownSequence to use AsyncBufferSequence.LineSequence instead of its custom line splitting logic. This ensures the test works correctly regardless of buffer size, even with a contrived buffer size of 1. Closes #78
1 parent 1038151 commit 4be075c

File tree

2 files changed

+33
-14
lines changed

2 files changed

+33
-14
lines changed

Sources/Subprocess/AsyncBufferSequence.swift

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,35 @@ extension AsyncBufferSequence {
214214
}
215215

216216
while true {
217+
// Whether the buffer ends with a potential partial line ending sequence.
218+
// In this case we need to buffer more content until we have a complete line ending sequence so that peeking works properly.
219+
func bufferHasPotentialPartialLineSequence() -> Bool {
220+
guard Encoding.CodeUnit.self is UInt8.Type else {
221+
// Only the \r\n sequence requires multiple code units in the UTF-16 and UTF-32 encodings
222+
return self.buffer.last == carriageReturn
223+
}
224+
switch self.buffer.last {
225+
case carriageReturn, newLine1, lineSeparator1, paragraphSeparator1:
226+
return true
227+
default:
228+
switch self.buffer.suffix(2) {
229+
case [lineSeparator1, lineSeparator2], [paragraphSeparator1, paragraphSeparator2]:
230+
return true
231+
default:
232+
return false
233+
}
234+
}
235+
}
217236
// Step 1: Load more buffer if needed
218-
if self.startIndex >= self.buffer.count {
237+
var partialLineSequence = bufferHasPotentialPartialLineSequence()
238+
while partialLineSequence || self.startIndex >= self.buffer.count {
219239
guard let nextBuffer = try await loadBuffer() else {
220240
// We have no more data
221241
// Return the remaining data
222242
return yield(at: self.buffer.count)
223243
}
224244
self.buffer += nextBuffer
245+
partialLineSequence = bufferHasPotentialPartialLineSequence()
225246
}
226247
// Step 2: Iterate through buffer to find next line
227248
var currentIndex: Int = self.startIndex

Tests/SubprocessTests/SubprocessTests+Unix.swift

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -760,15 +760,8 @@ extension SubprocessUnixTests {
760760
}
761761
group.addTask {
762762
var outputs: [String] = []
763-
for try await bit in standardOutput {
764-
let bitString = bit.withUnsafeBytes { ptr in
765-
return String(decoding: ptr, as: UTF8.self)
766-
}.trimmingCharacters(in: .whitespacesAndNewlines)
767-
if bitString.contains("\n") {
768-
outputs.append(contentsOf: bitString.split(separator: "\n").map { String($0) })
769-
} else {
770-
outputs.append(bitString)
771-
}
763+
for try await line in standardOutput.lines() {
764+
outputs.append(line.trimmingCharacters(in: .newlines))
772765
}
773766
#expect(outputs == ["saw SIGQUIT", "saw SIGTERM", "saw SIGINT"])
774767
}
@@ -881,17 +874,21 @@ extension SubprocessUnixTests {
881874
let length: Int
882875
switch size {
883876
case .large:
884-
length = Int(Double.random(in: 1.0 ..< 2.0) * Double(readBufferSize))
877+
length = Int(Double.random(in: 1.0 ..< 2.0) * Double(readBufferSize)) + 1
885878
case .medium:
886-
length = Int(Double.random(in: 0.2 ..< 1.0) * Double(readBufferSize))
879+
length = Int(Double.random(in: 0.2 ..< 1.0) * Double(readBufferSize)) + 1
887880
case .small:
888-
length = Int.random(in: 0 ..< 16)
881+
length = Int.random(in: 1 ..< 16)
889882
}
890883

891884
var buffer: [UInt8] = Array(repeating: 0, count: length)
892885
for index in 0 ..< length {
893886
buffer[index] = UInt8.random(in: range)
894887
}
888+
// Buffer cannot be empty or a line with a \r ending followed by an empty one with a \n ending would be indistinguishable.
889+
// This matters for any line ending sequences where one line ending sequence is the prefix of another. \r and \r\n are the
890+
// only two which meet this criteria.
891+
precondition(!buffer.isEmpty)
895892
return buffer
896893
}
897894

@@ -954,6 +951,8 @@ extension SubprocessUnixTests {
954951
) { execution, standardOutput in
955952
var index = 0
956953
for try await line in standardOutput.lines(encoding: UTF8.self) {
954+
defer { index += 1 }
955+
try #require(index < testCases.count, "Received more lines than expected")
957956
#expect(
958957
line == testCases[index].value,
959958
"""
@@ -963,7 +962,6 @@ extension SubprocessUnixTests {
963962
Line Ending \(Array(testCases[index].newLine.utf8))
964963
"""
965964
)
966-
index += 1
967965
}
968966
}
969967
try FileManager.default.removeItem(at: testFilePath)

0 commit comments

Comments
 (0)