Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions doc/manual/install-judgehost.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ no separate measures are necessary, and they allow running
:ref:`multiple judgedaemons <multiple-judgedaemons>`
on a multi-core machine by using CPU binding.

The judgedaemon needs to run a recent Linux kernel (at least 3.2.0). The
following steps configure cgroups on Debian. Instructions for other
distributions may be different (send us your feedback!).
The judgedaemon needs to run a recent Linux kernel (at least 5.19 or 6.0 or
later). The following steps configure cgroups v2 on Debian. Instructions for
other distributions may be different (send us your feedback!).

Edit grub config to add cgroup memory and swap accounting to the boot
options. Edit ``/etc/default/grub`` and change the default
Expand All @@ -181,12 +181,6 @@ Optionally the timings can be made more stable by not letting the OS schedule
any other tasks on the same CPU core the judgedaemon is using:
``GRUB_CMDLINE_LINUX_DEFAULT="quiet cgroup_enable=memory swapaccount=1 isolcpus=2"``

On modern systems where cgroup v2 is available, DOMjudge will try to
use that. This requires kernel versions 5.19 or 6.0 or later to
support reporting peak memory usage. If not found, the system will try
to fall back to cgroup v1, but this might require you to add
``systemd.unified_cgroup_hierarchy=0`` to the boot options as well.

You have now configured the system to use cgroups. To create
the actual cgroups that DOMjudge will use you need to run::

Expand Down
29 changes: 4 additions & 25 deletions judge/create_cgroups.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ if [ "$fs_type" = "cgroup2" ]; then
major=$(echo "$kernel_version" | cut -d '.' -f 1)
minor=$(echo "$kernel_version" | cut -d '.' -f 2)
if [ "$major" -lt 5 ] || { [ "$major" -eq 5 ] && [ "$minor" -lt 19 ]; }; then
cgroup_error_and_usage "Error: kernel ($kernel_version) is too old to record peak RAM usage with cgroup V2.
You can try using cgroup V1 by adding systemd.unified_cgroup_hierarchy=0 to the kernel params."
cgroup_error_and_usage "Error: kernel ($kernel_version) is too old to record peak RAM usage with cgroup V2."
fi
if ! echo "+memory" >> /sys/fs/cgroup/cgroup.subtree_control; then
cgroup_error_and_usage "Error: Cannot add +memory to cgroup.subtree_control; check kernel params."
Expand All @@ -37,26 +36,6 @@ You can try using cgroup V1 by adding systemd.unified_cgroup_hierarchy=0 to the
if grep -q ":/$" /proc/self/cgroup; then
cgroup_error_and_usage "Error: Cgroups not configured properly, missing cgroup hierarchy prefix under /proc/self/cgroup. If running in a container, make sure to set cgroupns=host."
fi

else # Trying cgroup V1:

for i in cpuset memory; do
mkdir -p $CGROUPBASE/$i
if [ ! -d $CGROUPBASE/$i/ ]; then
if ! mount -t cgroup -o$i $i $CGROUPBASE/$i/; then
cgroup_error_and_usage "Error: Can not mount $i cgroup. Probably cgroup support is missing from running kernel."
fi
fi
mkdir -p $CGROUPBASE/$i/domjudge
done

if [ ! -f $CGROUPBASE/memory/memory.limit_in_bytes ] || [ ! -f $CGROUPBASE/memory/memory.memsw.limit_in_bytes ]; then
cgroup_error_and_usage "Error: cgroup support missing memory features in running kernel."
fi

chown -R $JUDGEHOSTUSER $CGROUPBASE/*/domjudge

cat $CGROUPBASE/cpuset/cpuset.cpus > $CGROUPBASE/cpuset/domjudge/cpuset.cpus
cat $CGROUPBASE/cpuset/cpuset.mems > $CGROUPBASE/cpuset/domjudge/cpuset.mems

fi # cgroup V1
else
cgroup_error_and_usage "Error: Cgroups not configured properly, did not find cgroup v2 in /sys/fs/cgroup but '$fs_type'."
fi
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The message about cgroup v1 on line 29 should also be removed.

Is there maybe also documentation referring to cgroup v1/2?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, also good call on the documentation, fixed that as well

147 changes: 21 additions & 126 deletions judge/runguard.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@ int show_version;
int in_error_handling = 0;
pid_t runpipe_pid = -1;

bool is_cgroup_v2 = false;

double walltimelimit[2], cputimelimit[2]; /* in seconds, soft and hard limits */
int walllimit_reached, cpulimit_reached; /* 1=soft, 2=hard, 3=both limits reached */
rlim_t memsize;
Expand Down Expand Up @@ -514,11 +512,7 @@ std::set<unsigned> read_cpuset(const char *path)
void check_remaining_procs()
{
char path[1024];
if (is_cgroup_v2) {
snprintf(path, 1023, "/sys/fs/cgroup/%s/cgroup.procs", cgroupname);
} else {
snprintf(path, 1023, "/sys/fs/cgroup/cpuacct/%s/cgroup.procs", cgroupname);
}
snprintf(path, 1023, "/sys/fs/cgroup/%s/cgroup.procs", cgroupname);

FILE *file = fopen(path, "r");
if (file == nullptr) {
Expand All @@ -532,33 +526,8 @@ void check_remaining_procs()
if (fclose(file) != 0) error(errno, "closing file `%s'", path);
}

void output_cgroup_stats_v1(double *cputime)
{
struct cgroup *cg;
if ( (cg = cgroup_new_cgroup(cgroupname))==nullptr ) error(0,"cgroup_new_cgroup");

int ret;
if ((ret = cgroup_get_cgroup(cg)) != 0) error(ret,"get cgroup information");

int64_t max_usage = 0;
struct cgroup_controller *cg_controller = cgroup_get_controller(cg, "memory");
ret = cgroup_get_value_int64(cg_controller, "memory.memsw.max_usage_in_bytes", &max_usage);
if ( ret!=0 ) error(ret,"get cgroup value memory.memsw.max_usage_in_bytes");

verbose("total memory used: %" PRId64 " kB", max_usage/1024);
write_meta("memory-bytes","%" PRId64, max_usage);

int64_t cpu_time_int;
cg_controller = cgroup_get_controller(cg, "cpuacct");
ret = cgroup_get_value_int64(cg_controller, "cpuacct.usage", &cpu_time_int);
if ( ret!=0 ) error(ret,"get cgroup value cpuacct.usage");

*cputime = (double) cpu_time_int / 1.e9;

cgroup_free(&cg);
}

void output_cgroup_stats_v2(double *cputime)
void output_cgroup_stats(double *cputime)
{
struct cgroup *cg;
if ( (cg = cgroup_new_cgroup(cgroupname))==nullptr ) error(0,"cgroup_new_cgroup");
Expand Down Expand Up @@ -615,18 +584,13 @@ void cgroup_create()
}

int ret;
if (is_cgroup_v2) {
// TODO: do we want to set cpu.weight here as well?
if (memsize != RLIM_INFINITY) {
cgroup_add_value(uint64, "memory.max", memsize);
cgroup_add_value(uint64, "memory.swap.max", 0);
} else {
cgroup_add_value(string, "memory.max", "max");
cgroup_add_value(string, "memory.swap.max", "max");
}
// TODO: do we want to set cpu.weight here as well?
if (memsize != RLIM_INFINITY) {
cgroup_add_value(uint64, "memory.max", memsize);
cgroup_add_value(uint64, "memory.swap.max", 0);
} else {
cgroup_add_value(uint64, "memory.limit_in_bytes", memsize);
cgroup_add_value(uint64, "memory.memsw.limit_in_bytes", memsize);
cgroup_add_value(string, "memory.max", "max");
cgroup_add_value(string, "memory.swap.max", "max");
}

/* Set up cpu restrictions; we pin the task to a specific set of
Expand All @@ -645,15 +609,6 @@ void cgroup_create()
verbose("cpuset undefined");
}

if (!is_cgroup_v2) {
if ( (cg_controller = cgroup_add_controller(cg, "cpu"))==nullptr ) {
error(0,"cgroup_add_controller cpu");
}
if ((cg_controller = cgroup_add_controller(cg, "cpuacct")) == nullptr) {
error(0, "cgroup_add_controller cpuacct");
}
}

/* Perform the actual creation of the cgroup */
if ( (ret = cgroup_create_cgroup(cg, 1))!=0 ) error(ret,"creating cgroup");

Expand All @@ -663,46 +618,21 @@ void cgroup_create()

#undef cgroup_setval

void cgroup_attach()
{
struct cgroup *cg;
cg = cgroup_new_cgroup(cgroupname);
if (!cg) error(0,"cgroup_new_cgroup");

int ret;
if ( (ret = cgroup_get_cgroup(cg))!=0 ) error(ret,"get cgroup information");

/* Attach task to the cgroup */
if ( (ret = cgroup_attach_task(cg))!=0 ) error(ret,"attach task to cgroup");

cgroup_free(&cg);
}

void cgroup_kill()
{
/* kill any remaining tasks, and wait for them to be gone */
char mem_controller[10] = "memory";
if (is_cgroup_v2) {
int size;
do {
pid_t* pids;
int ret = cgroup_get_procs(cgroupname, mem_controller, &pids, &size);
if (ret != 0) error(ret, "cgroup_get_procs");
for(int i = 0; i < size; i++) {
kill(pids[i], SIGKILL);
}
free(pids);
} while (size > 0);
} else {
while(1) {
void *handle = nullptr;
pid_t pid;
int ret = cgroup_get_task_begin(cgroupname, mem_controller, &handle, &pid);
cgroup_get_task_end(&handle);
if (ret == ECGEOF) break;
kill(pid, SIGKILL);
int size;
do {
pid_t* pids;
int ret = cgroup_get_procs(cgroupname, mem_controller, &pids, &size);
if (ret != 0) error(ret, "cgroup_get_procs");
for(int i = 0; i < size; i++) {
kill(pids[i], SIGKILL);
}
}
free(pids);
} while (size > 0);
}

void cgroup_delete()
Expand All @@ -712,9 +642,6 @@ void cgroup_delete()
if (!cg) error(0,"cgroup_new_cgroup");

if (cgroup_add_controller(cg, "cpu") == nullptr) error(0, "cgroup_add_controller cpu");
if (!is_cgroup_v2) {
if (cgroup_add_controller(cg, "cpuacct") == nullptr) error(0, "cgroup_add_controller cpuacct");
}
if ( cgroup_add_controller(cg, "memory")==nullptr ) error(0,"cgroup_add_controller memory");

if ( cpuset!=nullptr && strlen(cpuset)>0 ) {
Expand Down Expand Up @@ -958,13 +885,9 @@ void setrestrictions()
}

/* Put the child process in the cgroup */
if (is_cgroup_v2) {
const char *controllers[] = { "memory", nullptr };
if (cgroup_change_cgroup_path(cgroupname, getpid(), controllers) != 0) {
error(0, "Failed to move the process to the cgroup");
}
} else {
cgroup_attach();
const char *controllers[] = { "memory", nullptr };
if (cgroup_change_cgroup_path(cgroupname, getpid(), controllers) != 0) {
error(0, "Failed to move the process to the cgroup");
}

/* Run the command in a separate process group so that the command
Expand Down Expand Up @@ -1109,28 +1032,6 @@ void pump_pipes(fd_set* readfds, size_t data_read[], size_t data_passed[])

}

bool cgroup_is_v2() {
bool ret = false;
FILE *fp = setmntent("/proc/mounts", "r");
if (!fp) {
perror("Error opening /proc/mounts");
return false;
}

struct mntent *entry;
while ((entry = getmntent(fp)) != nullptr) {
if (strcmp(entry->mnt_dir, "/sys/fs/cgroup") == 0) {
if (strcmp(entry->mnt_type, "cgroup2") == 0) {
ret = true;
}
break;
}
}

endmntent(fp);

return ret;
}

int main(int argc, char **argv)
{
Expand Down Expand Up @@ -1306,8 +1207,6 @@ int main(int argc, char **argv)
cmdname = argv[optind];
cmdargs = argv+optind;

is_cgroup_v2 = cgroup_is_v2();

if ( outputmeta && (metafile = fopen(metafilename,"w"))==nullptr ) {
error(errno,"cannot open `%s'",metafilename);
}
Expand Down Expand Up @@ -1630,11 +1529,7 @@ int main(int argc, char **argv)
check_remaining_procs();

double cputime = -1;
if (is_cgroup_v2) {
output_cgroup_stats_v2(&cputime);
} else {
output_cgroup_stats_v1(&cputime);
}
output_cgroup_stats(&cputime);
cgroup_kill();
cgroup_delete();

Expand Down
Loading