Skip to content

Commit 4d559ae

Browse files
committed
Add prototype impl for cgroupv2
1 parent 40ecf1e commit 4d559ae

File tree

1 file changed

+136
-22
lines changed

1 file changed

+136
-22
lines changed

judge/runguard.cc

Lines changed: 136 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <fcntl.h>
4646
#include <csignal>
4747
#include <cstdlib>
48+
#include <mntent.h>
4849
#include <unistd.h>
4950
#include <cstring>
5051
#include <cstdarg>
@@ -138,6 +139,8 @@ int show_help;
138139
int show_version;
139140
pid_t runpipe_pid = -1;
140141

142+
bool is_cgroup_v2 = false;
143+
141144
double walltimelimit[2], cputimelimit[2]; /* in seconds, soft and hard limits */
142145
int walllimit_reached, cpulimit_reached; /* 1=soft, 2=hard, 3=both limits reached */
143146
int64_t memsize;
@@ -433,7 +436,11 @@ void output_exit_time(int exitcode, double cpudiff)
433436
void check_remaining_procs()
434437
{
435438
char path[1024];
436-
snprintf(path, 1023, "/sys/fs/cgroup/cpuacct%scgroup.procs", cgroupname);
439+
if (is_cgroup_v2) {
440+
snprintf(path, 1023, "/sys/fs/cgroup/%scgroup.procs", cgroupname);
441+
} else {
442+
snprintf(path, 1023, "/sys/fs/cgroup/cpuacct%scgroup.procs", cgroupname);
443+
}
437444

438445
FILE *file = fopen(path, "r");
439446
if (file == nullptr) {
@@ -447,7 +454,7 @@ void check_remaining_procs()
447454
if (fclose(file) != 0) error(errno, "closing file `%s'", path);
448455
}
449456

450-
void output_cgroup_stats(double *cputime)
457+
void output_cgroup_stats_v1(double *cputime)
451458
{
452459
struct cgroup *cg;
453460
if ( (cg = cgroup_new_cgroup(cgroupname))==nullptr ) error(0,"cgroup_new_cgroup");
@@ -474,6 +481,45 @@ void output_cgroup_stats(double *cputime)
474481
cgroup_free(&cg);
475482
}
476483

484+
void output_cgroup_stats_v2(double *cputime)
485+
{
486+
struct cgroup *cg;
487+
if ( (cg = cgroup_new_cgroup(cgroupname))==NULL ) error(0,"cgroup_new_cgroup");
488+
489+
int ret;
490+
if ((ret = cgroup_get_cgroup(cg)) != 0) error(ret,"get cgroup information");
491+
492+
struct cgroup_controller *cg_controller = cgroup_get_controller(cg, "memory");
493+
int64_t max_usage = 0;
494+
ret = cgroup_get_value_int64(cg_controller, "memory.peak", &max_usage);
495+
if ( ret == ECGROUPVALUENOTEXIST ) {
496+
write_meta("internal-warning", "Kernel too old and does not support memory.peak");
497+
} else if ( ret!=0 ) {
498+
error(ret,"get cgroup value memory.peak");
499+
}
500+
501+
// There is no need to check swap usage, as we limit it to 0.
502+
verbose("total memory used: %" PRId64 " kB", max_usage/1024);
503+
write_meta("memory-bytes","%" PRId64, max_usage);
504+
505+
struct cgroup_stat stat;
506+
void *handle;
507+
ret = cgroup_read_stats_begin("cpu", cgroupname, &handle, &stat);
508+
while (ret == 0) {
509+
if (strcmp(stat.name, "usage_usec") == 0) {
510+
long long usec = strtoll(stat.value, NULL, 10);
511+
*cputime = usec / 1e6;
512+
break;
513+
}
514+
ret = cgroup_read_stats_next(&handle, &stat);
515+
}
516+
if ( ret!=ECGEOF ) error(ret,"get cgroup value cpu.stat");
517+
cgroup_read_stats_end(&handle);
518+
519+
cgroup_free(&cg);
520+
521+
}
522+
477523
/* Temporary shorthand define for error handling. */
478524
#define cgroup_add_value(type,name,value) \
479525
ret = cgroup_add_value_ ## type(cg_controller, name, value); \
@@ -492,9 +538,21 @@ void cgroup_create()
492538
error(0,"cgroup_add_controller memory");
493539
}
494540

495-
int ret;
496-
cgroup_add_value(int64, "memory.limit_in_bytes", memsize);
497-
cgroup_add_value(int64, "memory.memsw.limit_in_bytes", memsize);
541+
int ret;
542+
if (is_cgroup_v2) {
543+
// TODO: do we want to set cpu.weight here as well?
544+
if (memsize != RLIM_INFINITY) {
545+
cgroup_add_value(int64, "memory.max", memsize);
546+
// TODO: Is this the behavior change that JohnB mentioned?
547+
cgroup_add_value(int64, "memory.swap.max", 0);
548+
} else {
549+
cgroup_add_value(string, "memory.max", "max");
550+
cgroup_add_value(string, "memory.swap.max", "max");
551+
}
552+
} else {
553+
cgroup_add_value(int64, "memory.limit_in_bytes", memsize);
554+
cgroup_add_value(int64, "memory.memsw.limit_in_bytes", memsize);
555+
}
498556

499557
/* Set up cpu restrictions; we pin the task to a specific set of
500558
cpus. We also give it exclusive access to those cores, and set
@@ -512,9 +570,14 @@ void cgroup_create()
512570
verbose("cpuset undefined");
513571
}
514572

515-
if ( (cg_controller = cgroup_add_controller(cg, "cpuacct"))==nullptr ) {
516-
error(0,"cgroup_add_controller cpuacct");
573+
if ( (cg_controller = cgroup_add_controller(cg, "cpu"))==nullptr ) {
574+
error(0,"cgroup_add_controller cpu");
517575
}
576+
if (!is_cgroup_v2) {
577+
if ((cg_controller = cgroup_add_controller(cg, "cpuacct")) == nullptr) {
578+
error(0, "cgroup_add_controller cpuacct");
579+
}
580+
}
518581

519582
/* Perform the actual creation of the cgroup */
520583
if ( (ret = cgroup_create_cgroup(cg, 1))!=0 ) error(ret,"creating cgroup");
@@ -542,16 +605,28 @@ void cgroup_attach()
542605

543606
void cgroup_kill()
544607
{
545-
void *handle = nullptr;
546-
pid_t pid;
547-
548-
/* kill any remaining tasks, and wait for them to be gone */
549-
while(1) {
550-
int ret = cgroup_get_task_begin(cgroupname, "memory", &handle, &pid);
551-
cgroup_get_task_end(&handle);
552-
if (ret == ECGEOF) break;
553-
kill(pid, SIGKILL);
554-
}
608+
/* kill any remaining tasks, and wait for them to be gone */
609+
if (is_cgroup_v2) {
610+
int size;
611+
do {
612+
pid_t* pids;
613+
int ret = cgroup_get_procs(cgroupname, NULL, &pids, &size);
614+
if (ret != 0) error(ret, "cgroup_get_procs");
615+
for(int i = 0; i < size; i++) {
616+
kill(pids[i], SIGKILL);
617+
}
618+
free(pids);
619+
} while (size > 0);
620+
} else {
621+
while(1) {
622+
void *handle = nullptr;
623+
pid_t pid;
624+
int ret = cgroup_get_task_begin(cgroupname, "memory", &handle, &pid);
625+
cgroup_get_task_end(&handle);
626+
if (ret == ECGEOF) break;
627+
kill(pid, SIGKILL);
628+
}
629+
}
555630
}
556631

557632
void cgroup_delete()
@@ -560,7 +635,10 @@ void cgroup_delete()
560635
cg = cgroup_new_cgroup(cgroupname);
561636
if (!cg) error(0,"cgroup_new_cgroup");
562637

563-
if ( cgroup_add_controller(cg, "cpuacct")==nullptr ) error(0,"cgroup_add_controller cpuacct");
638+
if (cgroup_add_controller(cg, "cpu") == nullptr) error(0, "cgroup_add_controller cpu");
639+
if (!is_cgroup_v2) {
640+
if (cgroup_add_controller(cg, "cpuacct") == nullptr) error(0, "cgroup_add_controller cpuacct");
641+
}
564642
if ( cgroup_add_controller(cg, "memory")==nullptr ) error(0,"cgroup_add_controller memory");
565643

566644
if ( cpuset!=nullptr && strlen(cpuset)>0 ) {
@@ -783,7 +861,13 @@ void setrestrictions()
783861
}
784862

785863
/* Put the child process in the cgroup */
786-
cgroup_attach();
864+
if (is_cgroup_v2) {
865+
if (cgroup_change_cgroup_path(cgroupname, getpid(), NULL) != 0) {
866+
error(0, "Failed to move the process to the cgroup");
867+
}
868+
} else {
869+
cgroup_attach();
870+
}
787871

788872
/* Run the command in a separate process group so that the command
789873
and all its children can be killed off with one signal. */
@@ -929,6 +1013,29 @@ void pump_pipes(fd_set* readfds, size_t data_read[], size_t data_passed[])
9291013

9301014
}
9311015

1016+
bool cgroup_is_v2() {
1017+
bool ret = false;
1018+
FILE *fp = setmntent("/proc/mounts", "r");
1019+
if (!fp) {
1020+
perror("Error opening /proc/mounts");
1021+
return false;
1022+
}
1023+
1024+
struct mntent *entry;
1025+
while ((entry = getmntent(fp)) != nullptr) {
1026+
if (strcmp(entry->mnt_dir, "/sys/fs/cgroup") == 0) {
1027+
if (strcmp(entry->mnt_type, "cgroup2") == 0) {
1028+
ret = true;
1029+
}
1030+
break;
1031+
}
1032+
}
1033+
1034+
endmntent(fp);
1035+
1036+
return ret;
1037+
}
1038+
9321039
int main(int argc, char **argv)
9331040
{
9341041
int ret;
@@ -1103,6 +1210,8 @@ int main(int argc, char **argv)
11031210
cmdname = argv[optind];
11041211
cmdargs = argv+optind;
11051212

1213+
is_cgroup_v2 = cgroup_is_v2();
1214+
11061215
if ( outputmeta && (metafile = fopen(metafilename,"w"))==nullptr ) {
11071216
error(errno,"cannot open `%s'",metafilename);
11081217
}
@@ -1162,6 +1271,7 @@ int main(int argc, char **argv)
11621271
}
11631272
}
11641273
}
1274+
11651275
/* Make libcgroup ready for use */
11661276
ret = cgroup_init();
11671277
if ( ret!=0 ) {
@@ -1175,8 +1285,8 @@ int main(int argc, char **argv)
11751285
} else {
11761286
str[0] = 0;
11771287
}
1178-
snprintf(cgroupname, 255, "/domjudge/dj_cgroup_%d_%.16s_%d.%06d/",
1179-
getpid(), str, (int)progstarttime.tv_sec, (int)progstarttime.tv_usec);
1288+
snprintf(cgroupname, 255, "/domjudge/dj_cgroup_%d_%.16s_%d.%06d/",
1289+
getpid(), str, (int) progstarttime.tv_sec, (int) progstarttime.tv_usec);
11801290

11811291
cgroup_create();
11821292

@@ -1422,7 +1532,11 @@ int main(int argc, char **argv)
14221532
check_remaining_procs();
14231533

14241534
double cputime = -1;
1425-
output_cgroup_stats(&cputime);
1535+
if (is_cgroup_v2) {
1536+
output_cgroup_stats_v2(&cputime);
1537+
} else {
1538+
output_cgroup_stats_v1(&cputime);
1539+
}
14261540
cgroup_kill();
14271541
cgroup_delete();
14281542

0 commit comments

Comments
 (0)