Skip to content

Commit e8aff29

Browse files
authored
Cgroup2Manager: Fix cgroup deletions (#439)
If there's any nested cgroups in the one we made for the container (commonly seen for systemd images) removeItem didn't seem to be having a grand time, even though it states it should do recursive removals. Lets roll our own, and have a small EBUSY/EAGAIN retry loop as well. This fixes LinuxContainer.stop() for any containers with nested cgs. Context: apple/container#928
1 parent bb0cd39 commit e8aff29

File tree

2 files changed

+78
-2
lines changed

2 files changed

+78
-2
lines changed

vminitd/Sources/Cgroup/Cgroup2Manager.swift

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,44 @@ package struct Cgroup2Manager: Sendable {
285285
if force {
286286
try self.kill()
287287
}
288-
try FileManager.default.removeItem(at: self.path)
288+
289+
// Recursively remove child cgroups first
290+
try removeChildCgroups(at: self.path, force: force)
291+
292+
let result = rmdir(self.path.path)
293+
if result != 0 {
294+
throw Error.errno(errno: errno, message: "failed to remove cgroup directory \(self.path.path)")
295+
}
296+
}
297+
298+
private func removeChildCgroups(at path: URL, force: Bool) throws {
299+
let fileManager = FileManager.default
300+
301+
guard let contents = try? fileManager.contentsOfDirectory(atPath: path.path) else {
302+
return
303+
}
304+
305+
// Remove child directories (potential nested cgroups) first
306+
for item in contents {
307+
let childPath = path.appending(path: item)
308+
var isDirectory: ObjCBool = false
309+
310+
if fileManager.fileExists(atPath: childPath.path, isDirectory: &isDirectory) && isDirectory.boolValue {
311+
if force {
312+
try Self.writeValue(
313+
path: childPath,
314+
value: "1",
315+
fileName: Self.killFile
316+
)
317+
}
318+
319+
try removeChildCgroups(at: childPath, force: force)
320+
let result = rmdir(childPath.path)
321+
if result != 0 {
322+
throw Error.errno(errno: errno, message: "failed to remove child cgroup \(childPath.path)")
323+
}
324+
}
325+
}
289326
}
290327

291328
package func stats() throws -> Cgroup2Stats {

vminitd/Sources/vminitd/ManagedContainer.swift

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,45 @@ actor ManagedContainer {
110110
}
111111

112112
extension ManagedContainer {
113+
// removeCgroupWithRetry will remove a cgroup path handling EAGAIN and EBUSY errors and
114+
// retrying the remove after an exponential timeout
115+
private func removeCgroupWithRetry() async throws {
116+
var delay = 10 // 10ms
117+
let maxRetries = 5
118+
119+
for i in 0..<maxRetries {
120+
if i != 0 {
121+
try await Task.sleep(for: .milliseconds(delay))
122+
delay *= 2
123+
}
124+
125+
do {
126+
try self.cgroupManager.delete(force: true)
127+
return
128+
} catch let error as Cgroup2Manager.Error {
129+
guard case .errno(let errnoValue, let message) = error,
130+
errnoValue == EBUSY || errnoValue == EAGAIN
131+
else {
132+
throw error
133+
}
134+
self.log.warning(
135+
"cgroup deletion failed with EBUSY/EAGAIN, retrying",
136+
metadata: [
137+
"attempt": "\(i + 1)",
138+
"delay": "\(delay)",
139+
"errno": "\(errnoValue)",
140+
"context": "\(message)",
141+
])
142+
continue
143+
}
144+
}
145+
146+
throw ContainerizationError(
147+
.internalError,
148+
message: "cgroups: unable to remove cgroup after \(maxRetries) retries"
149+
)
150+
}
151+
113152
private func ensureExecExists(_ id: String) throws {
114153
if self.execs[id] == nil {
115154
throw ContainerizationError(
@@ -184,7 +223,7 @@ extension ManagedContainer {
184223
// Delete the bundle and cgroup
185224
try self.bundle.delete()
186225
if self.needsCgroupCleanup {
187-
try self.cgroupManager.delete(force: true)
226+
try await self.removeCgroupWithRetry()
188227
}
189228
}
190229

0 commit comments

Comments
 (0)