Skip to content

Commit 4a8f945

Browse files
authored
vminitd: Add memory threshold monitoring (#427)
Add a small bit of logic to monitor if vminitd goes over a (somewhat arbitrary) memory threshold. On average, when running one container it seems to hover around 25MiB, so this is mostly to catch cases where it's exceeding a limit we don't deem normal.
1 parent 35ebe36 commit 4a8f945

File tree

4 files changed

+288
-22
lines changed

4 files changed

+288
-22
lines changed

Sources/Containerization/Vminitd.swift

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,14 @@ extension Vminitd: VirtualMachineAgent {
5454

5555
try await setenv(key: "PATH", value: LinuxProcessConfiguration.defaultPath)
5656

57+
// Vminitd mounts /proc, /sys, /sys/fs/cgroup and /run automatically.
5758
let mounts: [ContainerizationOCI.Mount] = [
58-
.init(type: "sysfs", source: "sysfs", destination: "/sys"),
5959
.init(type: "tmpfs", source: "tmpfs", destination: "/tmp"),
6060
.init(type: "devpts", source: "devpts", destination: "/dev/pts", options: ["gid=5", "mode=620", "ptmxmode=666"]),
61-
.init(type: "cgroup2", source: "none", destination: "/sys/fs/cgroup"),
6261
]
6362
for mount in mounts {
6463
try await self.mount(mount)
6564
}
66-
67-
// Setup root cg subtree_control.
68-
let data = "+memory +pids +io +cpu +cpuset +hugetlb".data(using: .utf8)!
69-
try await writeFile(
70-
path: "/sys/fs/cgroup/cgroup.subtree_control",
71-
data: data,
72-
flags: .init(),
73-
mode: 0
74-
)
7565
}
7666

7767
public func writeFile(path: String, data: Data, flags: WriteFileFlags, mode: UInt32) async throws {

vminitd/Sources/Cgroup/Cgroup2Manager.swift

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,38 @@ package struct Cgroup2Manager: Sendable {
234234
}
235235
}
236236

237+
package func setMemoryHigh(bytes: UInt64) throws {
238+
self.logger?.debug(
239+
"setting memory.high",
240+
metadata: [
241+
"path": "\(self.path.path)",
242+
"bytes": "\(bytes)",
243+
])
244+
245+
try Self.writeValue(
246+
path: self.path,
247+
value: String(bytes),
248+
fileName: "memory.high"
249+
)
250+
}
251+
252+
package func getMemoryEvents() throws -> MemoryEvents {
253+
let content = try readFileContent(fileName: "memory.events")
254+
let values = parseKeyValuePairs(content)
255+
256+
return MemoryEvents(
257+
low: values["low"] ?? 0,
258+
high: values["high"] ?? 0,
259+
max: values["max"] ?? 0,
260+
oom: values["oom"] ?? 0,
261+
oomKill: values["oom_kill"] ?? 0
262+
)
263+
}
264+
265+
package func getMemoryEventsPath() -> String {
266+
self.path.appending(path: "memory.events").path
267+
}
268+
237269
package func kill() throws {
238270
try Self.writeValue(
239271
path: self.path,
@@ -608,6 +640,28 @@ package struct IOEntry: Sendable {
608640
}
609641
}
610642

643+
package struct MemoryEvents: Sendable {
644+
package var low: UInt64
645+
package var high: UInt64
646+
package var max: UInt64
647+
package var oom: UInt64
648+
package var oomKill: UInt64
649+
650+
package init(
651+
low: UInt64 = 0,
652+
high: UInt64 = 0,
653+
max: UInt64 = 0,
654+
oom: UInt64 = 0,
655+
oomKill: UInt64 = 0
656+
) {
657+
self.low = low
658+
self.high = high
659+
self.max = max
660+
self.oom = oom
661+
self.oomKill = oomKill
662+
}
663+
}
664+
611665
extension Cgroup2Manager {
612666
package enum Error: Swift.Error, CustomStringConvertible {
613667
case notCgroup

vminitd/Sources/vminitd/Application.swift

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
// limitations under the License.
1515
//===----------------------------------------------------------------------===//
1616

17+
import Cgroup
1718
import Containerization
1819
import ContainerizationError
1920
import ContainerizationOS
@@ -91,23 +92,86 @@ struct Application {
9192
CZ_set_sub_reaper()
9293
#endif
9394

95+
log.logLevel = .debug
96+
9497
signal(SIGPIPE, SIG_IGN)
9598

96-
// Because the sysctl rpc wouldn't make sense if this didn't always exist, we
97-
// ALWAYS mount /proc.
98-
guard Musl.mount("proc", "/proc", "proc", 0, "") == 0 else {
99-
log.error("failed to mount /proc")
100-
exit(1)
101-
}
102-
guard Musl.mount("tmpfs", "/run", "tmpfs", 0, "") == 0 else {
103-
log.error("failed to mount /run")
104-
exit(1)
99+
log.info("vminitd booting")
100+
101+
// Set of mounts necessary to be mounted prior to taking any RPCs.
102+
// 1. /proc as the sysctl rpc wouldn't make sense if it wasn't there.
103+
// 2. /run as that is where we store container state.
104+
// 3. /sys as we need it for /sys/fs/cgroup
105+
// 4. /sys/fs/cgroup to add the agent to a cgroup, as well as containers later.
106+
let mounts = [
107+
ContainerizationOS.Mount(
108+
type: "proc",
109+
source: "proc",
110+
target: "/proc",
111+
options: []
112+
),
113+
ContainerizationOS.Mount(
114+
type: "tmpfs",
115+
source: "tmpfs",
116+
target: "/run",
117+
options: []
118+
),
119+
ContainerizationOS.Mount(
120+
type: "sysfs",
121+
source: "sysfs",
122+
target: "/sys",
123+
options: []
124+
),
125+
ContainerizationOS.Mount(
126+
type: "cgroup2",
127+
source: "none",
128+
target: "/sys/fs/cgroup",
129+
options: []
130+
),
131+
]
132+
133+
for mnt in mounts {
134+
log.info("mounting \(mnt.target)")
135+
136+
try mnt.mount(createWithPerms: 0o755)
105137
}
106138
try Binfmt.mount()
107139

108-
log.logLevel = .debug
140+
let cgManager = Cgroup2Manager(
141+
group: URL(filePath: "/vminitd"),
142+
logger: log
143+
)
144+
try cgManager.create()
145+
try cgManager.toggleAllAvailableControllers(enable: true)
146+
147+
// Set memory.high threshold to 75 MiB
148+
let threshold: UInt64 = 75 * 1024 * 1024
149+
try cgManager.setMemoryHigh(bytes: threshold)
150+
try cgManager.addProcess(pid: getpid())
151+
152+
let memoryMonitor = try MemoryMonitor(
153+
cgroupManager: cgManager,
154+
threshold: threshold,
155+
logger: log
156+
) { [log] (currentUsage, highMark) in
157+
log.warning(
158+
"vminitd memory threshold exceeded",
159+
metadata: [
160+
"threshold_bytes": "\(threshold)",
161+
"current_bytes": "\(currentUsage)",
162+
"high_events_total": "\(highMark)",
163+
])
164+
}
165+
166+
let t = Thread { [log] in
167+
do {
168+
try memoryMonitor.run()
169+
} catch {
170+
log.error("memory monitor failed: \(error)")
171+
}
172+
}
173+
t.start()
109174

110-
log.info("vminitd booting")
111175
let eg = MultiThreadedEventLoopGroup(numberOfThreads: System.coreCount)
112176
let server = Initd(log: log, group: eg)
113177

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
//===----------------------------------------------------------------------===//
2+
// Copyright © 2025 Apple Inc. and the Containerization project authors.
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// https://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
//===----------------------------------------------------------------------===//
16+
17+
#if os(Linux)
18+
19+
import Cgroup
20+
import Foundation
21+
import Logging
22+
23+
#if canImport(Musl)
24+
import Musl
25+
#elseif canImport(Glibc)
26+
import Glibc
27+
#endif
28+
29+
package final class MemoryMonitor: Sendable {
30+
private static let inotifyEventSize = 0x10
31+
32+
private let cgroupManager: Cgroup2Manager
33+
private let threshold: UInt64
34+
private let logger: Logger
35+
private let inotifyFd: Int32
36+
private let watchDescriptor: Int32
37+
private let onThresholdExceeded: @Sendable (UInt64, UInt64) -> Void
38+
39+
package init(
40+
cgroupManager: Cgroup2Manager,
41+
threshold: UInt64,
42+
logger: Logger,
43+
onThresholdExceeded: @escaping @Sendable (UInt64, UInt64) -> Void
44+
) throws {
45+
self.cgroupManager = cgroupManager
46+
self.threshold = threshold
47+
self.logger = logger
48+
self.onThresholdExceeded = onThresholdExceeded
49+
50+
let fd = inotify_init()
51+
guard fd != -1 else {
52+
throw Error.inotifyInit(errno: errno)
53+
}
54+
self.inotifyFd = fd
55+
56+
let eventsPath = cgroupManager.getMemoryEventsPath()
57+
let wd = inotify_add_watch(
58+
inotifyFd,
59+
eventsPath,
60+
UInt32(IN_MODIFY)
61+
)
62+
guard wd != -1 else {
63+
close(fd)
64+
throw Error.inotifyAddWatch(errno: errno, path: eventsPath)
65+
}
66+
self.watchDescriptor = wd
67+
}
68+
69+
/// Run the monitoring loop. Call this from a dedicated thread.
70+
/// This function blocks until an error occurs.
71+
package func run() throws {
72+
let eventsPath = cgroupManager.getMemoryEventsPath()
73+
74+
logger.info(
75+
"Started memory monitoring",
76+
metadata: [
77+
"threshold_bytes": "\(threshold)",
78+
"events_path": "\(eventsPath)",
79+
])
80+
81+
// Read initial state
82+
var highCountMax: UInt64 = 0
83+
do {
84+
let events = try cgroupManager.getMemoryEvents()
85+
highCountMax = events.high
86+
} catch {
87+
throw Error.readMemoryEvents(error: error)
88+
}
89+
90+
let bufSize = Self.inotifyEventSize * 10
91+
var buffer = [UInt8](repeating: 0, count: bufSize)
92+
while true {
93+
let bytesRead = buffer.withUnsafeMutableBytes { ptr in
94+
read(inotifyFd, ptr.baseAddress!, bufSize)
95+
}
96+
97+
if bytesRead < 0 {
98+
if errno == EINTR {
99+
continue
100+
}
101+
throw Error.readFailed(errno: errno)
102+
}
103+
104+
do {
105+
let events = try cgroupManager.getMemoryEvents()
106+
107+
if events.high > highCountMax {
108+
highCountMax = events.high
109+
110+
let stats = try cgroupManager.stats()
111+
let currentUsage = stats.memory?.usage ?? 0
112+
113+
onThresholdExceeded(currentUsage, events.high)
114+
}
115+
116+
if events.oom > 0 || events.oomKill > 0 {
117+
logger.error(
118+
"OOM events detected",
119+
metadata: [
120+
"oom_events": "\(events.oom)",
121+
"oom_kill_events": "\(events.oomKill)",
122+
])
123+
}
124+
} catch {
125+
throw Error.readMemoryEvents(error: error)
126+
}
127+
}
128+
}
129+
130+
deinit {
131+
inotify_rm_watch(inotifyFd, watchDescriptor)
132+
close(inotifyFd)
133+
}
134+
}
135+
136+
extension MemoryMonitor {
137+
package enum Error: Swift.Error, CustomStringConvertible {
138+
case inotifyInit(errno: Int32)
139+
case inotifyAddWatch(errno: Int32, path: String)
140+
case readFailed(errno: Int32)
141+
case readMemoryEvents(error: Swift.Error)
142+
143+
package var description: String {
144+
switch self {
145+
case .inotifyInit(let errno):
146+
return "failed to initialize inotify: errno \(errno)"
147+
case .inotifyAddWatch(let errno, let path):
148+
return "failed to add inotify watch on \(path): errno \(errno)"
149+
case .readFailed(let errno):
150+
return "failed to read inotify events: errno \(errno)"
151+
case .readMemoryEvents(let error):
152+
return "failed to read memory events: \(error)"
153+
}
154+
}
155+
}
156+
}
157+
158+
#endif

0 commit comments

Comments
 (0)