Skip to content

Commit b5afb60

Browse files
committed
rewrite to runc implementation
1 parent dcb99a8 commit b5afb60

File tree

6 files changed

+135
-40
lines changed

6 files changed

+135
-40
lines changed

vminitd/Sources/vmexec/ExecCommand.swift

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import Foundation
2323
import LCShim
2424
import Logging
2525
import Musl
26+
import Glibc
2627

2728
struct ExecCommand: ParsableCommand {
2829
static let configuration = CommandConfiguration(
@@ -96,11 +97,44 @@ struct ExecCommand: ParsableCommand {
9697
throw App.Errno(stage: "setsid()")
9798
}
9899

100+
var hostFd: Int32 = -1
101+
102+
if process.terminal {
103+
hostFd = 0
104+
var containerFd: Int32 = 0
105+
var ws = winsize(ws_row: 40. ws_col: 120, ws_xpixel: 0, ws_ypixel: 0)
106+
guard openpty(&hostFd, &containerFd, nil, nil, &ws) == 0 else {
107+
throw App.Errno(stage: "openpty()")
108+
}
109+
110+
guard dup3(containerFd, 0, 0) != -1 else {
111+
throw App.Errno(stage: "dup3(slave->stdin)")
112+
}
113+
guard dup3(containerFd, 1, 0) != -1 else {
114+
throw App.Errno(stage: "dup3(slave->stdout)")
115+
}
116+
guard dup3(containerFd, 2, 0) != -1 else {
117+
throw App.Errno(stage: "dup3(slave->stderr)")
118+
}
119+
_ = close(containerFd)
120+
121+
guard ioctl(0, UInt(TIOCSCTTY), 0) != -1 else {
122+
throw App.Errno(stage: "setctty()")
123+
}
124+
}
125+
126+
var fdCopy = hostFd
127+
var fdData = Data(bytes: &fdCopy, count: MemoryLayout.size(ofValue: fdCopy))
128+
try childPipe.fileHandleForWriting.write(contentsOf: fdData)
129+
try childPipe.fileHandleForWriting.close()
130+
99131
// Apply O_CLOEXEC to all file descriptors except stdio.
100132
// This ensures that all unwanted fds we may have accidentally
101133
// inherited are marked close-on-exec so they stay out of the
102134
// container.
103-
try App.applyCloseExecOnFDs()
135+
let preserve: Set<Int32> = hostFd >= 0 ? [hostFd] : []
136+
try App.applyCloseExecOnFDs(preserve: preserve)
137+
104138
try App.setRLimits(rlimits: process.rlimits)
105139

106140
// set uid, gid, and supplementary groups
@@ -120,9 +154,9 @@ struct ExecCommand: ParsableCommand {
120154
_ = try childPipe.fileHandleForReading.readToEnd()
121155
try childPipe.fileHandleForReading.close()
122156

123-
// send our child's pid to our parent before we exit.
124-
var childPid = processID
125-
let data = Data(bytes: &childPid, count: MemoryLayout.size(ofValue: childPid))
157+
// send our child's pid and the host fd to our parent before we exit.
158+
var payload = [Int32(processID), hostFd]
159+
let data = Data(bytes: &payload, count: MemoryLayout<Int32>.size * 2)
126160

127161
try syncfd.write(contentsOf: data)
128162
try syncfd.close()

vminitd/Sources/vmexec/Mount.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ struct ContainerMount {
6060
if access(console, F_OK) != 0 {
6161
let fd = open(console, O_RDWR | O_CREAT, mode_t(UInt16(0o600)))
6262
if fd == -1 {
63-
throw App.erno(stage: "open(/dev/console)")
63+
throw App.Errno(stage: "open(/dev/console)")
6464
}
6565
close(fd)
6666
}

vminitd/Sources/vmexec/RunCommand.swift

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import Foundation
2323
import LCShim
2424
import Logging
2525
import Musl
26+
import Glibc
2627

2728
struct RunCommand: ParsableCommand {
2829
static let configuration = CommandConfiguration(
@@ -95,6 +96,32 @@ struct RunCommand: ParsableCommand {
9596

9697
try childRootSetup(rootfs: root, mounts: spec.mounts, log: log, process: process)
9798

99+
var hostFd: Int32 = -1
100+
101+
if process.terminal {
102+
hostFd = 0
103+
var containerFd: Int32 = 0
104+
var ws = winsize(ws_row: 40. ws_col: 120, ws_xpixel: 0, ws_ypixel: 0)
105+
guard openpty(&hostFd, &containerFd, nil, nil, &ws) == 0 else {
106+
throw App.Errno(stage: "openpty()")
107+
}
108+
109+
guard dup3(containerFd, 0, 0) != -1 else {
110+
throw App.Errno(stage: "dup3(slave->stdin)")
111+
}
112+
guard dup3(containerFd, 1, 0) != -1 else {
113+
throw App.Errno(stage: "dup3(slave->stdout)")
114+
}
115+
guard dup3(containerFd, 2, 0) != -1 else {
116+
throw App.Errno(stage: "dup3(slave->stderr)")
117+
}
118+
_ = close(containerFd)
119+
120+
guard ioctl(0, UInt(TIOCSCTTY), 0) != -1 else {
121+
throw App.Errno(stage: "setctty()")
122+
}
123+
}
124+
98125
if !spec.hostname.isEmpty {
99126
let errCode = spec.hostname.withCString { ptr in
100127
Musl.sethostname(ptr, spec.hostname.count)
@@ -104,11 +131,17 @@ struct RunCommand: ParsableCommand {
104131
}
105132
}
106133

134+
var fdCopy = hostFd
135+
var fdData = Data(bytes: &fdCopy, count: MemoryLayout.size(ofValue: fdCopy))
136+
try childPipe.fileHandleForWriting.write(contentsOf: fdData)
137+
try childPipe.fileHandleForWriting.close()
138+
107139
// Apply O_CLOEXEC to all file descriptors except stdio.
108140
// This ensures that all unwanted fds we may have accidentally
109141
// inherited are marked close-on-exec so they stay out of the
110142
// container.
111-
try App.applyCloseExecOnFDs()
143+
let preserve: Set<Int32> = hostFd >= 0 ? [hostFd] : []
144+
try App.applyCloseExecOnFDs(preserve: preserve)
112145

113146
try App.setRLimits(rlimits: process.rlimits)
114147

@@ -129,9 +162,9 @@ struct RunCommand: ParsableCommand {
129162
_ = try childPipe.fileHandleForReading.readToEnd()
130163
try childPipe.fileHandleForReading.close()
131164

132-
// send our child's pid to our parent before we exit.
133-
var childPid = processID
134-
let data = Data(bytes: &childPid, count: MemoryLayout.size(ofValue: childPid))
165+
// send our child's pid and the host fd to our parent before we exit.
166+
var payload = [Int32(processID), hostFd]
167+
let data = Data(bytes: &payload, count: MemoryLayout<Int32>.size * 2)
135168

136169
try syncfd.write(contentsOf: data)
137170
try syncfd.close()

vminitd/Sources/vmexec/vmexec.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ struct App: ParsableCommand {
5151
}
5252

5353
extension App {
54-
/// Applies O_CLOEXEC to all file descriptors currently open for
55-
/// the process except the stdio fd values
56-
static func applyCloseExecOnFDs() throws {
54+
/// Applies O_CLOEXEC to all file descriptors currently open for the
55+
/// process except the stdio fd values and those in the preserve set
56+
static func applyCloseExecOnFDs(preserve: Set<Int32> = []) throws {
5757
let minFD = 2 // stdin, stdout, stderr should be preserved
5858

5959
let fdList = try FileManager.default.contentsOfDirectory(atPath: "/proc/self/fd")
@@ -65,6 +65,9 @@ extension App {
6565
if fd <= minFD {
6666
continue
6767
}
68+
if preserve.contains(Int32(fd)) {
69+
continue
70+
}
6871

6972
_ = fcntl(Int32(fd), F_SETFD, FD_CLOEXEC)
7073
}

vminitd/Sources/vminitd/ManagedProcess.swift

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,6 @@ final class ManagedProcess: Sendable {
118118
)
119119
}
120120

121-
log.info("starting io")
122-
123-
// Setup IO early. We expect the host to be listening already.
124-
try io.start()
125-
126121
self.process = process
127122
self.lock = Mutex(State(io: io))
128123
}
@@ -133,6 +128,10 @@ extension ManagedProcess {
133128
try self.lock.withLock {
134129
log.debug("starting managed process")
135130

131+
if !($0.io is TerminalIO) {
132+
try $0.io.start()
133+
}
134+
136135
// Start the underlying process.
137136
try process.start()
138137

@@ -144,13 +143,28 @@ extension ManagedProcess {
144143
throw ContainerizationError(.internalError, message: "no pid data from sync pipe")
145144
}
146145

146+
guard piddata.count >= MemoryLayout<Int32>.size else {
147+
throw ContainerizationError(.internalError, message: "invalid payload")
148+
}
149+
147150
let i = piddata.withUnsafeBytes { ptr in
148151
ptr.load(as: Int32.self)
149152
}
150153

151-
log.info("got back pid data \(i)")
154+
var fd: Int32 = -1
155+
if piddata.count >= MemoryLayout<Int32>.size * 2 {
156+
fd = piddata.withUnsafeBytes { ptr in
157+
ptr.load(fromByteOffset: MemoryLayout<Int32>.size, as: Int32.self)
158+
}
159+
}
160+
161+
log.info("got back pid data \(i), fd \(fd)")
152162
$0.pid = i
153163

164+
if let terminalIO = $0.io as? TerminalIO, fd != -1 {
165+
try terminalIO.attach(pid: i, fd: fd)
166+
}
167+
154168
log.debug(
155169
"started managed process",
156170
metadata: [

vminitd/Sources/vminitd/TerminalIO.swift

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ import ContainerizationOS
1919
import Foundation
2020
import Logging
2121
import SendableProperty
22+
import Glibc
2223

2324
final class TerminalIO: ManagedProcess.IO & Sendable {
24-
private let parent: Terminal
25-
private let child: Terminal
25+
private let parent: Terminal? = nil
2626
private let log: Logger?
2727

2828
private let stdio: HostStdio
@@ -36,30 +36,43 @@ final class TerminalIO: ManagedProcess.IO & Sendable {
3636
stdio: HostStdio,
3737
log: Logger?
3838
) throws {
39-
let pair = try Terminal.create()
40-
self.parent = pair.parent
41-
self.child = pair.child
4239
self.stdio = stdio
4340
self.log = log
4441

45-
let ptyHandle = child.handle
46-
let useHandles = stdio.stdin != nil || stdio.stdout != nil
47-
// We currently set stdin to the controlling terminal always, so
48-
// it must be a valid pty descriptor.
49-
process.stdin = useHandles ? ptyHandle : nil
50-
51-
let stdoutHandle = useHandles ? ptyHandle : nil
52-
process.stdout = stdoutHandle
53-
process.stderr = stdoutHandle
42+
process.stdin = nil
43+
process.stdout = nil
44+
process.stderr = nil
5445
}
5546

5647
func resize(size: Terminal.Size) throws {
5748
if self.stdio.stdin != nil {
58-
try parent.resize(size: size)
49+
try parent?.resize(size: size)
5950
}
6051
}
6152

62-
func start() throws {
53+
func start() throws {}
54+
55+
func attach(pid: Int32, fd: Int32) throws {
56+
let containerFd = Glibc.syscall(Int(SYS_pidfd_open), pid, 0)
57+
guard containerFd != -1 else {
58+
throw POSIXError.fromErrno()
59+
}
60+
61+
let hostFd = Glibc.syscall(Int(SYS_pidfd_getfd), containerFd, pid, 0)
62+
guard Foundation.close(Int32(containerFd)) != -1 else {
63+
self.log?.error("failed to close pidfd: \(POSIXError.fromErrno())")
64+
}
65+
66+
guard hostFd != 1 else {
67+
throw POSIXError.fromErrno()
68+
}
69+
70+
let fdDup = Int32(hostFd)
71+
self.terminal = try Terminal(descriptor: fdDup, setInitState: false)
72+
try self.setupRelays(fd: fdDup)
73+
}
74+
75+
private func setupRelays(fd: Int32) throws {
6376
if let stdinPort = self.stdio.stdin {
6477
let type = VsockType(
6578
port: stdinPort,
@@ -71,7 +84,7 @@ final class TerminalIO: ManagedProcess.IO & Sendable {
7184

7285
try relay(
7386
readFromFd: stdinSocket.fileDescriptor,
74-
writeToFd: self.parent.handle.fileDescriptor
87+
writeToFd: fd
7588
)
7689
}
7790

@@ -85,7 +98,7 @@ final class TerminalIO: ManagedProcess.IO & Sendable {
8598
self.stdoutSocket = stdoutSocket
8699

87100
try relay(
88-
readFromFd: self.parent.handle.fileDescriptor,
101+
readFromFd: fd,
89102
writeToFd: stdoutSocket.fileDescriptor
90103
)
91104
}
@@ -157,10 +170,8 @@ final class TerminalIO: ManagedProcess.IO & Sendable {
157170
}
158171

159172
func close() throws {
160-
try parent.close()
173+
try parent?.close()
161174
}
162175

163-
func closeAfterExec() throws {
164-
try child.close()
165-
}
176+
func closeAfterExec() throws {}
166177
}

0 commit comments

Comments
 (0)