From 860a7f5fc06d32fc4d6f5fd45e740eabb5b3bd3f Mon Sep 17 00:00:00 2001 From: Tobias Werth Date: Sun, 5 Oct 2025 17:17:37 +0200 Subject: [PATCH] Remove support for cgroup v1. All relevant environments (Debian, Ubuntu, RedHat, ArchLinux, WSL2, Fedora) support and default to cgroup v2 since years, and it helps with simplification of runguard, which is a precursor of merging runpipe and runguard. --- doc/manual/install-judgehost.rst | 12 +-- judge/create_cgroups.in | 29 +----- judge/runguard.cc | 147 +++++-------------------------- 3 files changed, 28 insertions(+), 160 deletions(-) diff --git a/doc/manual/install-judgehost.rst b/doc/manual/install-judgehost.rst index 6010cdf608..0b1f63301e 100644 --- a/doc/manual/install-judgehost.rst +++ b/doc/manual/install-judgehost.rst @@ -169,9 +169,9 @@ no separate measures are necessary, and they allow running :ref:`multiple judgedaemons ` on a multi-core machine by using CPU binding. -The judgedaemon needs to run a recent Linux kernel (at least 3.2.0). The -following steps configure cgroups on Debian. Instructions for other -distributions may be different (send us your feedback!). +The judgedaemon needs to run a recent Linux kernel (at least 5.19 or 6.0 or +later). The following steps configure cgroups v2 on Debian. Instructions for +other distributions may be different (send us your feedback!). Edit grub config to add cgroup memory and swap accounting to the boot options. Edit ``/etc/default/grub`` and change the default @@ -181,12 +181,6 @@ Optionally the timings can be made more stable by not letting the OS schedule any other tasks on the same CPU core the judgedaemon is using: ``GRUB_CMDLINE_LINUX_DEFAULT="quiet cgroup_enable=memory swapaccount=1 isolcpus=2"`` -On modern systems where cgroup v2 is available, DOMjudge will try to -use that. This requires kernel versions 5.19 or 6.0 or later to -support reporting peak memory usage. If not found, the system will try -to fall back to cgroup v1, but this might require you to add -``systemd.unified_cgroup_hierarchy=0`` to the boot options as well. - You have now configured the system to use cgroups. To create the actual cgroups that DOMjudge will use you need to run:: diff --git a/judge/create_cgroups.in b/judge/create_cgroups.in index 8e3ed7ad2f..9a5a339665 100755 --- a/judge/create_cgroups.in +++ b/judge/create_cgroups.in @@ -25,8 +25,7 @@ if [ "$fs_type" = "cgroup2" ]; then major=$(echo "$kernel_version" | cut -d '.' -f 1) minor=$(echo "$kernel_version" | cut -d '.' -f 2) if [ "$major" -lt 5 ] || { [ "$major" -eq 5 ] && [ "$minor" -lt 19 ]; }; then - cgroup_error_and_usage "Error: kernel ($kernel_version) is too old to record peak RAM usage with cgroup V2. -You can try using cgroup V1 by adding systemd.unified_cgroup_hierarchy=0 to the kernel params." + cgroup_error_and_usage "Error: kernel ($kernel_version) is too old to record peak RAM usage with cgroup V2." fi if ! echo "+memory" >> /sys/fs/cgroup/cgroup.subtree_control; then cgroup_error_and_usage "Error: Cannot add +memory to cgroup.subtree_control; check kernel params." @@ -37,26 +36,6 @@ You can try using cgroup V1 by adding systemd.unified_cgroup_hierarchy=0 to the if grep -q ":/$" /proc/self/cgroup; then cgroup_error_and_usage "Error: Cgroups not configured properly, missing cgroup hierarchy prefix under /proc/self/cgroup. If running in a container, make sure to set cgroupns=host." fi - -else # Trying cgroup V1: - - for i in cpuset memory; do - mkdir -p $CGROUPBASE/$i - if [ ! -d $CGROUPBASE/$i/ ]; then - if ! mount -t cgroup -o$i $i $CGROUPBASE/$i/; then - cgroup_error_and_usage "Error: Can not mount $i cgroup. Probably cgroup support is missing from running kernel." - fi - fi - mkdir -p $CGROUPBASE/$i/domjudge - done - - if [ ! -f $CGROUPBASE/memory/memory.limit_in_bytes ] || [ ! -f $CGROUPBASE/memory/memory.memsw.limit_in_bytes ]; then - cgroup_error_and_usage "Error: cgroup support missing memory features in running kernel." - fi - - chown -R $JUDGEHOSTUSER $CGROUPBASE/*/domjudge - - cat $CGROUPBASE/cpuset/cpuset.cpus > $CGROUPBASE/cpuset/domjudge/cpuset.cpus - cat $CGROUPBASE/cpuset/cpuset.mems > $CGROUPBASE/cpuset/domjudge/cpuset.mems - -fi # cgroup V1 +else + cgroup_error_and_usage "Error: Cgroups not configured properly, did not find cgroup v2 in /sys/fs/cgroup but '$fs_type'." +fi diff --git a/judge/runguard.cc b/judge/runguard.cc index 582e4dd60f..e66ea637f6 100644 --- a/judge/runguard.cc +++ b/judge/runguard.cc @@ -145,8 +145,6 @@ int show_version; int in_error_handling = 0; pid_t runpipe_pid = -1; -bool is_cgroup_v2 = false; - double walltimelimit[2], cputimelimit[2]; /* in seconds, soft and hard limits */ int walllimit_reached, cpulimit_reached; /* 1=soft, 2=hard, 3=both limits reached */ rlim_t memsize; @@ -514,11 +512,7 @@ std::set read_cpuset(const char *path) void check_remaining_procs() { char path[1024]; - if (is_cgroup_v2) { - snprintf(path, 1023, "/sys/fs/cgroup/%s/cgroup.procs", cgroupname); - } else { - snprintf(path, 1023, "/sys/fs/cgroup/cpuacct/%s/cgroup.procs", cgroupname); - } + snprintf(path, 1023, "/sys/fs/cgroup/%s/cgroup.procs", cgroupname); FILE *file = fopen(path, "r"); if (file == nullptr) { @@ -532,33 +526,8 @@ void check_remaining_procs() if (fclose(file) != 0) error(errno, "closing file `%s'", path); } -void output_cgroup_stats_v1(double *cputime) -{ - struct cgroup *cg; - if ( (cg = cgroup_new_cgroup(cgroupname))==nullptr ) error(0,"cgroup_new_cgroup"); - - int ret; - if ((ret = cgroup_get_cgroup(cg)) != 0) error(ret,"get cgroup information"); - - int64_t max_usage = 0; - struct cgroup_controller *cg_controller = cgroup_get_controller(cg, "memory"); - ret = cgroup_get_value_int64(cg_controller, "memory.memsw.max_usage_in_bytes", &max_usage); - if ( ret!=0 ) error(ret,"get cgroup value memory.memsw.max_usage_in_bytes"); - - verbose("total memory used: %" PRId64 " kB", max_usage/1024); - write_meta("memory-bytes","%" PRId64, max_usage); - - int64_t cpu_time_int; - cg_controller = cgroup_get_controller(cg, "cpuacct"); - ret = cgroup_get_value_int64(cg_controller, "cpuacct.usage", &cpu_time_int); - if ( ret!=0 ) error(ret,"get cgroup value cpuacct.usage"); - *cputime = (double) cpu_time_int / 1.e9; - - cgroup_free(&cg); -} - -void output_cgroup_stats_v2(double *cputime) +void output_cgroup_stats(double *cputime) { struct cgroup *cg; if ( (cg = cgroup_new_cgroup(cgroupname))==nullptr ) error(0,"cgroup_new_cgroup"); @@ -615,18 +584,13 @@ void cgroup_create() } int ret; - if (is_cgroup_v2) { - // TODO: do we want to set cpu.weight here as well? - if (memsize != RLIM_INFINITY) { - cgroup_add_value(uint64, "memory.max", memsize); - cgroup_add_value(uint64, "memory.swap.max", 0); - } else { - cgroup_add_value(string, "memory.max", "max"); - cgroup_add_value(string, "memory.swap.max", "max"); - } + // TODO: do we want to set cpu.weight here as well? + if (memsize != RLIM_INFINITY) { + cgroup_add_value(uint64, "memory.max", memsize); + cgroup_add_value(uint64, "memory.swap.max", 0); } else { - cgroup_add_value(uint64, "memory.limit_in_bytes", memsize); - cgroup_add_value(uint64, "memory.memsw.limit_in_bytes", memsize); + cgroup_add_value(string, "memory.max", "max"); + cgroup_add_value(string, "memory.swap.max", "max"); } /* Set up cpu restrictions; we pin the task to a specific set of @@ -645,15 +609,6 @@ void cgroup_create() verbose("cpuset undefined"); } - if (!is_cgroup_v2) { - if ( (cg_controller = cgroup_add_controller(cg, "cpu"))==nullptr ) { - error(0,"cgroup_add_controller cpu"); - } - if ((cg_controller = cgroup_add_controller(cg, "cpuacct")) == nullptr) { - error(0, "cgroup_add_controller cpuacct"); - } - } - /* Perform the actual creation of the cgroup */ if ( (ret = cgroup_create_cgroup(cg, 1))!=0 ) error(ret,"creating cgroup"); @@ -663,46 +618,21 @@ void cgroup_create() #undef cgroup_setval -void cgroup_attach() -{ - struct cgroup *cg; - cg = cgroup_new_cgroup(cgroupname); - if (!cg) error(0,"cgroup_new_cgroup"); - - int ret; - if ( (ret = cgroup_get_cgroup(cg))!=0 ) error(ret,"get cgroup information"); - - /* Attach task to the cgroup */ - if ( (ret = cgroup_attach_task(cg))!=0 ) error(ret,"attach task to cgroup"); - - cgroup_free(&cg); -} void cgroup_kill() { /* kill any remaining tasks, and wait for them to be gone */ char mem_controller[10] = "memory"; - if (is_cgroup_v2) { - int size; - do { - pid_t* pids; - int ret = cgroup_get_procs(cgroupname, mem_controller, &pids, &size); - if (ret != 0) error(ret, "cgroup_get_procs"); - for(int i = 0; i < size; i++) { - kill(pids[i], SIGKILL); - } - free(pids); - } while (size > 0); - } else { - while(1) { - void *handle = nullptr; - pid_t pid; - int ret = cgroup_get_task_begin(cgroupname, mem_controller, &handle, &pid); - cgroup_get_task_end(&handle); - if (ret == ECGEOF) break; - kill(pid, SIGKILL); + int size; + do { + pid_t* pids; + int ret = cgroup_get_procs(cgroupname, mem_controller, &pids, &size); + if (ret != 0) error(ret, "cgroup_get_procs"); + for(int i = 0; i < size; i++) { + kill(pids[i], SIGKILL); } - } + free(pids); + } while (size > 0); } void cgroup_delete() @@ -712,9 +642,6 @@ void cgroup_delete() if (!cg) error(0,"cgroup_new_cgroup"); if (cgroup_add_controller(cg, "cpu") == nullptr) error(0, "cgroup_add_controller cpu"); - if (!is_cgroup_v2) { - if (cgroup_add_controller(cg, "cpuacct") == nullptr) error(0, "cgroup_add_controller cpuacct"); - } if ( cgroup_add_controller(cg, "memory")==nullptr ) error(0,"cgroup_add_controller memory"); if ( cpuset!=nullptr && strlen(cpuset)>0 ) { @@ -958,13 +885,9 @@ void setrestrictions() } /* Put the child process in the cgroup */ - if (is_cgroup_v2) { - const char *controllers[] = { "memory", nullptr }; - if (cgroup_change_cgroup_path(cgroupname, getpid(), controllers) != 0) { - error(0, "Failed to move the process to the cgroup"); - } - } else { - cgroup_attach(); + const char *controllers[] = { "memory", nullptr }; + if (cgroup_change_cgroup_path(cgroupname, getpid(), controllers) != 0) { + error(0, "Failed to move the process to the cgroup"); } /* Run the command in a separate process group so that the command @@ -1109,28 +1032,6 @@ void pump_pipes(fd_set* readfds, size_t data_read[], size_t data_passed[]) } -bool cgroup_is_v2() { - bool ret = false; - FILE *fp = setmntent("/proc/mounts", "r"); - if (!fp) { - perror("Error opening /proc/mounts"); - return false; - } - - struct mntent *entry; - while ((entry = getmntent(fp)) != nullptr) { - if (strcmp(entry->mnt_dir, "/sys/fs/cgroup") == 0) { - if (strcmp(entry->mnt_type, "cgroup2") == 0) { - ret = true; - } - break; - } - } - - endmntent(fp); - - return ret; -} int main(int argc, char **argv) { @@ -1306,8 +1207,6 @@ int main(int argc, char **argv) cmdname = argv[optind]; cmdargs = argv+optind; - is_cgroup_v2 = cgroup_is_v2(); - if ( outputmeta && (metafile = fopen(metafilename,"w"))==nullptr ) { error(errno,"cannot open `%s'",metafilename); } @@ -1630,11 +1529,7 @@ int main(int argc, char **argv) check_remaining_procs(); double cputime = -1; - if (is_cgroup_v2) { - output_cgroup_stats_v2(&cputime); - } else { - output_cgroup_stats_v1(&cputime); - } + output_cgroup_stats(&cputime); cgroup_kill(); cgroup_delete();