Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Sources/Subprocess/AsyncBufferSequence.swift
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ extension AsyncBufferSequence {
private var source: AsyncBufferSequence.AsyncIterator
private var buffer: [Encoding.CodeUnit]
private var underlyingBuffer: [Encoding.CodeUnit]
private var underlyingBufferIndex: Array<Encoding.CodeUnit>.Index
private var leftover: Encoding.CodeUnit?
private var eofReached: Bool
private let bufferingPolicy: BufferingPolicy
Expand All @@ -154,6 +155,7 @@ extension AsyncBufferSequence {
self.source = underlyingIterator
self.buffer = []
self.underlyingBuffer = []
self.underlyingBufferIndex = self.underlyingBuffer.startIndex
self.leftover = nil
self.eofReached = false
self.bufferingPolicy = bufferingPolicy
Expand Down Expand Up @@ -208,13 +210,16 @@ extension AsyncBufferSequence {
}

func nextFromSource() async throws -> Encoding.CodeUnit? {
if underlyingBuffer.isEmpty {
if underlyingBufferIndex >= underlyingBuffer.count {
guard let buf = try await loadBuffer() else {
return nil
}
underlyingBuffer = buf
underlyingBufferIndex = buf.startIndex
}
return underlyingBuffer.removeFirst()
let result = underlyingBuffer[underlyingBufferIndex]
underlyingBufferIndex = underlyingBufferIndex.advanced(by: 1)
return result
}

func nextCodeUnit() async throws -> Encoding.CodeUnit? {
Expand Down
70 changes: 64 additions & 6 deletions Sources/Subprocess/Configuration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -187,33 +187,73 @@ extension Configuration {
) throws {
var possibleError: (any Swift.Error)? = nil

// To avoid closing the same descriptor multiple times,
// keep track of the list of descriptors that we have
// already closed. If a `IODescriptor.Descriptor` is
// already closed, mark that `IODescriptor` as closed
// as opposed to actually try to close it.
var remainingSet: Set<IODescriptor.Descriptor> = Set(
optionalSequence: [
inputRead?.descriptor,
inputWrite?.descriptor,
outputRead?.descriptor,
outputWrite?.descriptor,
errorRead?.descriptor,
errorWrite?.descriptor,
]
)

do {
try inputRead?.safelyClose()
if remainingSet.tryRemove(inputRead?.descriptor) {
try inputRead?.safelyClose()
} else {
try inputRead?.markAsClosed()
}
} catch {
possibleError = error
}
do {
try inputWrite?.safelyClose()
if remainingSet.tryRemove(inputWrite?.descriptor) {
try inputWrite?.safelyClose()
} else {
try inputWrite?.markAsClosed()
}
} catch {
possibleError = error
}
do {
try outputRead?.safelyClose()
if remainingSet.tryRemove(outputRead?.descriptor) {
try outputRead?.safelyClose()
} else {
try outputRead?.markAsClosed()
}
} catch {
possibleError = error
}
do {
try outputWrite?.safelyClose()
if remainingSet.tryRemove(outputWrite?.descriptor) {
try outputWrite?.safelyClose()
} else {
try outputWrite?.markAsClosed()
}
} catch {
possibleError = error
}
do {
try errorRead?.safelyClose()
if remainingSet.tryRemove(errorRead?.descriptor) {
try errorRead?.safelyClose()
} else {
try errorRead?.markAsClosed()
}
} catch {
possibleError = error
}
do {
try errorWrite?.safelyClose()
if remainingSet.tryRemove(errorWrite?.descriptor) {
try errorWrite?.safelyClose()
} else {
try errorWrite?.markAsClosed()
}
} catch {
possibleError = error
}
Expand Down Expand Up @@ -733,6 +773,10 @@ internal struct IODescriptor: ~Copyable {
#endif
}

internal mutating func markAsClosed() throws {
self.closeWhenDone = false
}

deinit {
guard self.closeWhenDone else {
return
Expand Down Expand Up @@ -1081,3 +1125,17 @@ extension _OrderedSet: Sequence {
return self.elements.makeIterator()
}
}

extension Set {
init<S>(optionalSequence sequence: S) where S: Sequence, S.Element == Optional<Self.Element> {
let sequence: [Self.Element] = sequence.compactMap(\.self)
self.init(sequence)
}

mutating func tryRemove(_ element: Self.Element?) -> Bool {
guard let element else {
return false
}
return self.remove(element) != nil
}
}
64 changes: 30 additions & 34 deletions Sources/_SubprocessCShims/process_shims.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,14 +431,13 @@ static int _positive_int_parse(const char *str) {
}

#if defined(__linux__)
// Linux-specific version that uses syscalls directly and doesn't allocate heap memory.
// Safe to use after vfork() and before execve()
static int _highest_possibly_open_fd_dir_linux(const char *fd_dir) {
int highest_fd_so_far = 0;
/// Set `FD_CLOEXEC` on all open file descriptors listed under `fd_dir` so
/// they are automatically closed upon `execve()`.
/// Safe to use after `vfork()` and before `execve()`
static void _set_cloexec_to_open_fds(const char *fd_dir) {
int dir_fd = open(fd_dir, O_RDONLY);
if (dir_fd < 0) {
// errno set by `open`.
return -1;
return;
}

// Buffer for directory entries - allocated on stack, no heap allocation
Expand All @@ -450,49 +449,37 @@ static int _highest_possibly_open_fd_dir_linux(const char *fd_dir) {
if (errno == EINTR) {
continue;
} else {
// `errno` set by _getdents64.
highest_fd_so_far = -1;
close(dir_fd);
return highest_fd_so_far;
return;
}
}
if (bytes_read == 0) {
close(dir_fd);
return highest_fd_so_far;
return;
}
long offset = 0;
while (offset < bytes_read) {
struct linux_dirent64 *entry = (struct linux_dirent64 *)(buffer + offset);

// Skip "." and ".." entries
if (entry->d_name[0] != '.') {
int number = _positive_int_parse(entry->d_name);
if (number > highest_fd_so_far) {
highest_fd_so_far = number;
int fd = _positive_int_parse(entry->d_name);
if (fd > STDERR_FILENO && fd != dir_fd) {
int flags = fcntl(fd, F_GETFD);
if (flags >= 0) {
// Set FD_CLOEXEC on every open fd so they are closed after exec()
fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're doing a syscall anyway, why don't you just close it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For two reasons:

  1. I've searched around (close vs setting FD_CLOEXEC) and it seems in general FD_CLOEXEC is preferred for this situation:

https://utcc.utoronto.ca/~cks/space/blog/unix/ForkFDsAndRaces

https://docs.fedoraproject.org/en-US/defensive-coding/tasks/Tasks-Descriptors/#sect-Defensive_Coding-Tasks-Descriptors-Child_Processes

  1. A more practical reason is I don't think we can safely close() while traversing /dev/fd before close() modifies that directory.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've searched around (close vs setting FD_CLOEXEC) and it seems in general FD_CLOEXEC is preferred for this situation:

Both of those articles I think are really talking about the pre-fork case. I think in our specific situation where we've already called fork (and thus have exactly one thread and no concurrency considerations), and we can guarantee that we will not call fork again, and that we will call exec imminently, there's no practical difference (that I can see).

A more practical reason is I don't think we can safely close() while traversing /dev/fd before close() modifies that directory.

This seems like a great point though!

Copy link

@weissi weissi Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A more practical reason is I don't think we can safely close() while traversing /dev/fd before close() modifies that directory.

That should not happen and doesn't happen in the original code from swift-sdk-generator. How the original code works is:

  1. Get the maximum fd
  2. Loop from 3 to maximum fd and close

That's completely fine. What is not fine (but that's not what the original code does) is to close whilst iterating of course.

}
}
}

offset += entry->d_reclen;
}
}

close(dir_fd);
return highest_fd_so_far;
}
#endif

// This function is only used on systems with Linux kernel 5.9 or lower.
// On newer systems, `close_range` is used instead.
// This function is only used on non-Linux systems.
static int _highest_possibly_open_fd(void) {
#if defined(__linux__)
int hi = _highest_possibly_open_fd_dir_linux("/dev/fd");
if (hi < 0) {
hi = sysconf(_SC_OPEN_MAX);
}
#else
int hi = sysconf(_SC_OPEN_MAX);
#endif
return hi;
return sysconf(_SC_OPEN_MAX);
}

int _subprocess_fork_exec(
Expand Down Expand Up @@ -681,8 +668,8 @@ int _subprocess_fork_exec(
errno = ENOSYS;
#if (__has_include(<linux/close_range.h>) && (!defined(__ANDROID__) || __ANDROID_API__ >= 34)) || defined(__FreeBSD__)
// We must NOT close pipefd[1] for writing errors
rc = close_range(STDERR_FILENO + 1, pipefd[1] - 1, 0);
rc |= close_range(pipefd[1] + 1, ~0U, 0);
rc = close_range(STDERR_FILENO + 1, pipefd[1] - 1, CLOSE_RANGE_CLOEXEC);
rc |= close_range(pipefd[1] + 1, ~0U, CLOSE_RANGE_CLOEXEC);
#elif defined(__OpenBSD__)
// OpenBSD Supports closefrom, but not close_range
// See https://man.openbsd.org/closefrom
Expand All @@ -692,13 +679,22 @@ int _subprocess_fork_exec(
rc = closefrom(pipefd[1] + 1);
#endif
if (rc != 0) {
// close_range failed (or doesn't exist), fall back to close()
for (int fd = STDERR_FILENO + 1; fd <= _highest_possibly_open_fd(); fd++) {
#if defined(__linux__)
_set_cloexec_to_open_fds("/dev/fd");
#else
// close_range failed (or doesn't exist), fall back to setting FD_CLOEXEC
int highest_open_fd = _highest_possibly_open_fd();
for (int fd = STDERR_FILENO + 1; fd <= highest_open_fd; fd++) {
// We must NOT close pipefd[1] for writing errors
if (fd != pipefd[1]) {
close(fd);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

close feels better here and will be cheaper overall. Otherwise you need to do N syscalls to mark as CLOEXEC and later on there's more work necessary to close them. Why not just close?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto to above: it seems the general consensus is to use FD_CLOEXEC instead of close(). We would have to issue N close() calls here, so without close_range, it doesn't really save much.

int flags = fcntl(fd, F_GETFD);
if (flags >= 0) {
// Set FD_CLOEXEC on every open fd so they are closed after exec()
fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
}
}
}
#endif
}

// Finally, exec
Expand Down
27 changes: 12 additions & 15 deletions Tests/SubprocessTests/IntegrationTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1337,22 +1337,19 @@ extension SubprocessIntegrationTests {
options: .create,
permissions: [.ownerReadWrite, .groupReadWrite]
)
let echoResult = try await outputFile.closeAfter {
let echoResult = try await _run(
setup,
input: .none,
output: .fileDescriptor(
outputFile,
closeAfterSpawningProcess: false
),
error: .fileDescriptor(
outputFile,
closeAfterSpawningProcess: false
)
let echoResult = try await _run(
setup,
input: .none,
output: .fileDescriptor(
outputFile,
closeAfterSpawningProcess: true
),
error: .fileDescriptor(
outputFile,
closeAfterSpawningProcess: true
)
#expect(echoResult.terminationStatus.isSuccess)
return echoResult
}
)
#expect(echoResult.terminationStatus.isSuccess)
let outputData: Data = try Data(
contentsOf: URL(filePath: outputFilePath.string)
)
Expand Down