Skip to content

Commit dd842c4

Browse files
sercherPaul Hohensee
authored andcommitted
8349988: Change cgroup version detection logic to not depend on /proc/cgroups
8347811: Container detection code for cgroups v2 should use cgroup.controllers Reviewed-by: fitzsim Backport-of: 9c5ed23eac7470f56d498e9c4d3c51c2f80fd571
1 parent 7ef2dfd commit dd842c4

File tree

6 files changed

+362
-94
lines changed

6 files changed

+362
-94
lines changed

src/hotspot/os/linux/cgroupSubsystem_linux.cpp

Lines changed: 131 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <string.h>
2626
#include <math.h>
2727
#include <errno.h>
28+
#include <sys/vfs.h>
2829
#include "cgroupSubsystem_linux.hpp"
2930
#include "cgroupV1Subsystem_linux.hpp"
3031
#include "cgroupV2Subsystem_linux.hpp"
@@ -36,8 +37,26 @@
3637
#include "runtime/os.hpp"
3738
#include "utilities/globalDefinitions.hpp"
3839

40+
// Inlined from <linux/magic.h> for portability.
41+
#ifndef CGROUP2_SUPER_MAGIC
42+
# define CGROUP2_SUPER_MAGIC 0x63677270
43+
#endif
44+
3945
// controller names have to match the *_IDX indices
4046
static const char* cg_controller_name[] = { "cpuset", "cpu", "cpuacct", "memory", "pids" };
47+
static inline int cg_v2_controller_index(const char* name) {
48+
if (strcmp(name, "cpuset") == 0) {
49+
return CPUSET_IDX;
50+
} else if (strcmp(name, "cpu") == 0) {
51+
return CPU_IDX;
52+
} else if (strcmp(name, "memory") == 0) {
53+
return MEMORY_IDX;
54+
} else if (strcmp(name, "pids") == 0) {
55+
return PIDS_IDX;
56+
} else {
57+
return -1;
58+
}
59+
}
4160

4261
CgroupSubsystem* CgroupSubsystemFactory::create() {
4362
CgroupV1MemoryController* memory = nullptr;
@@ -48,10 +67,25 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
4867
CgroupInfo cg_infos[CG_INFO_LENGTH];
4968
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
5069
const char* proc_cgroups = "/proc/cgroups";
70+
const char* sys_fs_cgroup_cgroup_controllers = "/sys/fs/cgroup/cgroup.controllers";
71+
const char* controllers_file = proc_cgroups;
5172
const char* proc_self_cgroup = "/proc/self/cgroup";
5273
const char* proc_self_mountinfo = "/proc/self/mountinfo";
74+
const char* sys_fs_cgroup = "/sys/fs/cgroup";
75+
struct statfs fsstat = {};
76+
bool cgroups_v2_enabled = false;
77+
78+
// Assume cgroups v2 is usable by the JDK iff /sys/fs/cgroup has the cgroup v2
79+
// file system magic. If it does not then heuristics are required to determine
80+
// if cgroups v1 is usable or not.
81+
if (statfs(sys_fs_cgroup, &fsstat) != -1) {
82+
cgroups_v2_enabled = (fsstat.f_type == CGROUP2_SUPER_MAGIC);
83+
if (cgroups_v2_enabled) {
84+
controllers_file = sys_fs_cgroup_cgroup_controllers;
85+
}
86+
}
5387

54-
bool valid_cgroup = determine_type(cg_infos, proc_cgroups, proc_self_cgroup, proc_self_mountinfo, &cg_type_flags);
88+
bool valid_cgroup = determine_type(cg_infos, cgroups_v2_enabled, controllers_file, proc_self_cgroup, proc_self_mountinfo, &cg_type_flags);
5589

5690
if (!valid_cgroup) {
5791
// Could not detect cgroup type
@@ -216,84 +250,118 @@ static inline bool match_mount_info_line(char* line,
216250
}
217251

218252
bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
219-
const char* proc_cgroups,
253+
bool cgroups_v2_enabled,
254+
const char* controllers_file,
220255
const char* proc_self_cgroup,
221256
const char* proc_self_mountinfo,
222257
u1* flags) {
223258
FILE *mntinfo = nullptr;
224-
FILE *cgroups = nullptr;
259+
FILE* controllers = nullptr;
225260
FILE *cgroup = nullptr;
226261
char buf[MAXPATHLEN+1];
227262
char *p;
228-
bool is_cgroupsV2;
229263
// true iff all required controllers, memory, cpu, cpuacct are enabled
230264
// at the kernel level.
231265
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
232266
// cpuset might not be enabled on newer Linux distros (Fedora 41)
233-
bool all_required_controllers_enabled;
267+
bool all_required_controllers_enabled = true;
234268

235-
/*
236-
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
237-
*
238-
* For cgroups v1 hierarchy (hybrid or legacy), cpu, cpuacct, cpuset, memory controllers
239-
* must have non-zero for the hierarchy ID field and relevant controllers mounted.
240-
* Conversely, for cgroups v2 (unified hierarchy), cpu, cpuacct, cpuset, memory
241-
* controllers must have hierarchy ID 0 and the unified controller mounted.
242-
*/
243-
cgroups = os::fopen(proc_cgroups, "r");
244-
if (cgroups == nullptr) {
245-
log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno));
269+
// If cgroups v2 is enabled, open /sys/fs/cgroup/cgroup.controllers. If not, open /proc/cgroups.
270+
controllers = os::fopen(controllers_file, "r");
271+
if (controllers == nullptr) {
272+
log_debug(os, container)("Can't open %s, %s", controllers_file, os::strerror(errno));
246273
*flags = INVALID_CGROUPS_GENERIC;
247274
return false;
248275
}
249276

250-
while ((p = fgets(buf, MAXPATHLEN, cgroups)) != nullptr) {
251-
char name[MAXPATHLEN+1];
252-
int hierarchy_id;
253-
int enabled;
254-
255-
// Format of /proc/cgroups documented via man 7 cgroups
256-
if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
257-
continue;
277+
if (cgroups_v2_enabled) {
278+
/*
279+
* cgroups v2 is enabled. For cgroups v2 (unified hierarchy), the cpu and memory
280+
* controllers must be enabled.
281+
*/
282+
if ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) {
283+
char* controller = nullptr;
284+
#define ISSPACE_CHARS " \n\t\r\f\v"
285+
while ((controller = strsep(&p, ISSPACE_CHARS)) != nullptr) {
286+
int i;
287+
if ((i = cg_v2_controller_index(controller)) != -1) {
288+
cg_infos[i]._name = os::strdup(controller);
289+
cg_infos[i]._enabled = true;
290+
if (i == PIDS_IDX || i == CPUSET_IDX) {
291+
log_debug(os, container)("Detected optional %s controller entry in %s",
292+
controller, controllers_file);
293+
}
294+
}
295+
}
296+
#undef ISSPACE_CHARS
297+
} else {
298+
log_debug(os, container)("Can't read %s, %s", controllers_file, os::strerror(errno));
299+
*flags = INVALID_CGROUPS_V2;
300+
return false;
258301
}
259-
if (strcmp(name, "memory") == 0) {
260-
cg_infos[MEMORY_IDX]._name = os::strdup(name);
261-
cg_infos[MEMORY_IDX]._hierarchy_id = hierarchy_id;
262-
cg_infos[MEMORY_IDX]._enabled = (enabled == 1);
263-
} else if (strcmp(name, "cpuset") == 0) {
264-
log_debug(os, container)("Detected optional cpuset controller entry in %s", proc_cgroups);
265-
cg_infos[CPUSET_IDX]._name = os::strdup(name);
266-
cg_infos[CPUSET_IDX]._hierarchy_id = hierarchy_id;
267-
cg_infos[CPUSET_IDX]._enabled = (enabled == 1);
268-
} else if (strcmp(name, "cpu") == 0) {
269-
cg_infos[CPU_IDX]._name = os::strdup(name);
270-
cg_infos[CPU_IDX]._hierarchy_id = hierarchy_id;
271-
cg_infos[CPU_IDX]._enabled = (enabled == 1);
272-
} else if (strcmp(name, "cpuacct") == 0) {
273-
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
274-
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
275-
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
276-
} else if (strcmp(name, "pids") == 0) {
277-
log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups);
278-
cg_infos[PIDS_IDX]._name = os::strdup(name);
279-
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
280-
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
302+
for (int i = 0; i < CG_INFO_LENGTH; i++) {
303+
// cgroups v2 does not have cpuacct.
304+
if (i == CPUACCT_IDX) {
305+
continue;
306+
}
307+
// For cgroups v2, cpuacct is rolled into cpu, and the pids and cpuset controllers
308+
// are optional; the remaining controllers, cpu and memory, are required.
309+
if (i == CPU_IDX || i == MEMORY_IDX) {
310+
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
311+
}
312+
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
313+
log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]);
314+
}
281315
}
282-
}
283-
fclose(cgroups);
316+
} else {
317+
/*
318+
* The /sys/fs/cgroup filesystem magic hint suggests we have cg v1. Read /proc/cgroups; for
319+
* cgroups v1 hierarchy (hybrid or legacy), cpu, cpuacct, cpuset, and memory controllers must
320+
* have non-zero for the hierarchy ID field and relevant controllers mounted.
321+
*/
322+
while ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) {
323+
char name[MAXPATHLEN+1];
324+
int hierarchy_id;
325+
int enabled;
284326

285-
is_cgroupsV2 = true;
286-
all_required_controllers_enabled = true;
287-
for (int i = 0; i < CG_INFO_LENGTH; i++) {
288-
// pids and cpuset controllers are optional. All other controllers are required
289-
if (i != PIDS_IDX && i != CPUSET_IDX) {
290-
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
291-
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
327+
// Format of /proc/cgroups documented via man 7 cgroups
328+
if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
329+
continue;
330+
}
331+
if (strcmp(name, "memory") == 0) {
332+
cg_infos[MEMORY_IDX]._name = os::strdup(name);
333+
cg_infos[MEMORY_IDX]._hierarchy_id = hierarchy_id;
334+
cg_infos[MEMORY_IDX]._enabled = (enabled == 1);
335+
} else if (strcmp(name, "cpuset") == 0) {
336+
cg_infos[CPUSET_IDX]._name = os::strdup(name);
337+
cg_infos[CPUSET_IDX]._hierarchy_id = hierarchy_id;
338+
cg_infos[CPUSET_IDX]._enabled = (enabled == 1);
339+
} else if (strcmp(name, "cpu") == 0) {
340+
cg_infos[CPU_IDX]._name = os::strdup(name);
341+
cg_infos[CPU_IDX]._hierarchy_id = hierarchy_id;
342+
cg_infos[CPU_IDX]._enabled = (enabled == 1);
343+
} else if (strcmp(name, "cpuacct") == 0) {
344+
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
345+
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
346+
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
347+
} else if (strcmp(name, "pids") == 0) {
348+
log_debug(os, container)("Detected optional pids controller entry in %s", controllers_file);
349+
cg_infos[PIDS_IDX]._name = os::strdup(name);
350+
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
351+
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
352+
}
292353
}
293-
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
294-
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
354+
for (int i = 0; i < CG_INFO_LENGTH; i++) {
355+
// pids controller is optional. All other controllers are required
356+
if (i != PIDS_IDX) {
357+
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
358+
}
359+
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
360+
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
361+
}
295362
}
296363
}
364+
fclose(controllers);
297365

298366
if (!all_required_controllers_enabled) {
299367
// one or more required controllers disabled, disable container support
@@ -335,7 +403,7 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
335403
continue;
336404
}
337405

338-
while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != nullptr) {
406+
while (!cgroups_v2_enabled && (token = strsep(&controllers, ",")) != nullptr) {
339407
if (strcmp(token, "memory") == 0) {
340408
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
341409
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
@@ -346,15 +414,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
346414
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
347415
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
348416
} else if (strcmp(token, "cpuacct") == 0) {
349-
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc");
417+
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacct");
350418
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
351419
} else if (strcmp(token, "pids") == 0) {
352420
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
353421
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
354422
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
355423
}
356424
}
357-
if (is_cgroupsV2) {
425+
if (cgroups_v2_enabled) {
358426
// On some systems we have mixed cgroups v1 and cgroups v2 controllers (e.g. freezer on cg1 and
359427
// all relevant controllers on cg2). Only set the cgroup path when we see a hierarchy id of 0.
360428
if (hierarchy_id != 0) {
@@ -390,14 +458,14 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
390458
char *cptr = tmpcgroups;
391459
char *token;
392460

393-
/* Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so
461+
/* Cgroup v2 relevant info. We only look for the _mount_path iff cgroups_v2_enabled so
394462
* as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
395463
* block in the hybrid case.
396464
*
397465
* We collect the read only mount option in the cgroup infos so as to have that
398466
* info ready when determining is_containerized().
399467
*/
400-
if (is_cgroupsV2 && match_mount_info_line(p,
468+
if (cgroups_v2_enabled && match_mount_info_line(p,
401469
tmproot,
402470
tmpmount,
403471
mount_opts,
@@ -476,7 +544,7 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
476544
return false;
477545
}
478546

479-
if (is_cgroupsV2) {
547+
if (cgroups_v2_enabled) {
480548
if (!cgroupv2_mount_point_found) {
481549
log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
482550
cleanup(cg_infos);

src/hotspot/os/linux/cgroupSubsystem_linux.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,8 @@ class CgroupSubsystemFactory: AllStatic {
322322
// Determine the cgroup type (version 1 or version 2), given
323323
// relevant paths to files. Sets 'flags' accordingly.
324324
static bool determine_type(CgroupInfo* cg_infos,
325-
const char* proc_cgroups,
325+
bool cgroups_v2_enabled,
326+
const char* controllers_file,
326327
const char* proc_self_cgroup,
327328
const char* proc_self_mountinfo,
328329
u1* flags);

src/hotspot/share/prims/whitebox.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,12 +1038,13 @@ WB_ENTRY(jboolean, WB_TestSetForceInlineMethod(JNIEnv* env, jobject o, jobject m
10381038
WB_END
10391039

10401040
#ifdef LINUX
1041-
bool WhiteBox::validate_cgroup(const char* proc_cgroups,
1041+
bool WhiteBox::validate_cgroup(bool cgroups_v2_enabled,
1042+
const char* controllers_file,
10421043
const char* proc_self_cgroup,
10431044
const char* proc_self_mountinfo,
10441045
u1* cg_flags) {
10451046
CgroupInfo cg_infos[CG_INFO_LENGTH];
1046-
return CgroupSubsystemFactory::determine_type(cg_infos, proc_cgroups,
1047+
return CgroupSubsystemFactory::determine_type(cg_infos, cgroups_v2_enabled, controllers_file,
10471048
proc_self_cgroup,
10481049
proc_self_mountinfo, cg_flags);
10491050
}
@@ -2419,23 +2420,24 @@ WB_END
24192420

24202421
WB_ENTRY(jint, WB_ValidateCgroup(JNIEnv* env,
24212422
jobject o,
2422-
jstring proc_cgroups,
2423+
jboolean cgroups_v2_enabled,
2424+
jstring controllers_file,
24232425
jstring proc_self_cgroup,
24242426
jstring proc_self_mountinfo))
24252427
jint ret = 0;
24262428
#ifdef LINUX
24272429
ThreadToNativeFromVM ttnfv(thread);
2428-
const char* p_cgroups = env->GetStringUTFChars(proc_cgroups, nullptr);
2430+
const char* c_file = env->GetStringUTFChars(controllers_file, nullptr);
24292431
CHECK_JNI_EXCEPTION_(env, 0);
24302432
const char* p_s_cgroup = env->GetStringUTFChars(proc_self_cgroup, nullptr);
24312433
CHECK_JNI_EXCEPTION_(env, 0);
24322434
const char* p_s_mountinfo = env->GetStringUTFChars(proc_self_mountinfo, nullptr);
24332435
CHECK_JNI_EXCEPTION_(env, 0);
24342436
u1 cg_type_flags = 0;
24352437
// This sets cg_type_flags
2436-
WhiteBox::validate_cgroup(p_cgroups, p_s_cgroup, p_s_mountinfo, &cg_type_flags);
2438+
WhiteBox::validate_cgroup(cgroups_v2_enabled, c_file, p_s_cgroup, p_s_mountinfo, &cg_type_flags);
24372439
ret = (jint)cg_type_flags;
2438-
env->ReleaseStringUTFChars(proc_cgroups, p_cgroups);
2440+
env->ReleaseStringUTFChars(controllers_file, c_file);
24392441
env->ReleaseStringUTFChars(proc_self_cgroup, p_s_cgroup);
24402442
env->ReleaseStringUTFChars(proc_self_mountinfo, p_s_mountinfo);
24412443
#endif
@@ -2853,7 +2855,7 @@ static JNINativeMethod methods[] = {
28532855
(void*)&WB_CheckLibSpecifiesNoexecstack},
28542856
{CC"isContainerized", CC"()Z", (void*)&WB_IsContainerized },
28552857
{CC"validateCgroup",
2856-
CC"(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
2858+
CC"(ZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;)I",
28572859
(void*)&WB_ValidateCgroup },
28582860
{CC"hostPhysicalMemory", CC"()J", (void*)&WB_HostPhysicalMemory },
28592861
{CC"hostPhysicalSwap", CC"()J", (void*)&WB_HostPhysicalSwap },

src/hotspot/share/prims/whitebox.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class WhiteBox : public AllStatic {
6969
static void register_extended(JNIEnv* env, jclass wbclass, JavaThread* thread);
7070
static bool compile_method(Method* method, int comp_level, int bci, JavaThread* THREAD);
7171
#ifdef LINUX
72-
static bool validate_cgroup(const char* proc_cgroups, const char* proc_self_cgroup, const char* proc_self_mountinfo, u1* cg_flags);
72+
static bool validate_cgroup(bool cgroups_v2_enabled, const char* controllers_file, const char* proc_self_cgroup, const char* proc_self_mountinfo, u1* cg_flags);
7373
#endif
7474
// provide info about enabling of Address Sanitizer / Undefined Behavior Sanitizer
7575
static bool is_asan_enabled();

0 commit comments

Comments
 (0)