Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
void cgroup_file_show(struct cgroup_file *cfile, bool show);

ssize_t cgroup_kn_interface_write(struct kernfs_node *kn, const char *name__str,
const char *buf, size_t nbytes, loff_t off);

int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk);
Expand Down
45 changes: 45 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -2605,6 +2605,50 @@ bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
return NULL;
return cgrp;
}

#define BPF_CGROUP_MAX_WRITE ((1UL << 24) - 1)

/**
* bpf_cgroup_write_interface - Writes to a cgroup interface file.
* @cgrp: The target cgroup
* @name__str: name of the cgroup core interface file
* @value_p: value to write
* @off: offset
*
* Return: number of bytes written on success, a negative value on error.
*/
__bpf_kfunc int
bpf_cgroup_write_interface(struct cgroup *cgrp, const char *name__str,
const struct bpf_dynptr *value_p, loff_t off)
{
struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
struct kernfs_node *kn;
const void *value;
u32 value_len;
int ret;

value_len = __bpf_dynptr_size(value_ptr);
if (!value_len)
return 0;

if (value_len > BPF_CGROUP_MAX_WRITE)
return -E2BIG;

value = __bpf_dynptr_data(value_ptr, value_len);
if (!value)
return -EINVAL;

rcu_read_lock();
kn = cgrp->kn;
rcu_read_unlock();

kernfs_get(kn);
ret = cgroup_kn_interface_write(kn, name__str, value, value_len, off);
kernfs_put(kn);

return ret;
}

#endif /* CONFIG_CGROUPS */

/**
Expand Down Expand Up @@ -3736,6 +3780,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_write_interface, KF_TRUSTED_ARGS | KF_SLEEPABLE)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL)
Expand Down
102 changes: 96 additions & 6 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,24 @@ static struct file_system_type cgroup2_fs_type;
static struct cftype cgroup_base_files[];
static struct cftype cgroup_psi_files[];

struct cgroup_kn_cftype {
char name[MAX_CFTYPE_NAME];
unsigned int namelen;

/*
* write() is the write operation on a kernfs node.
*/
ssize_t (*write)(struct kernfs_node *kn, const char *buf, size_t nbytes,
loff_t off, bool revalidate);
};

#define CGROUP_PREFIX "cgroup."
#define CGROUP_CORE_INTERFACE_FREEZE_SUFFIX "freeze"
#define CGROUP_CORE_INTERFACE_FREEZE (CGROUP_PREFIX CGROUP_CORE_INTERFACE_FREEZE_SUFFIX)
#define CGROUP_CORE_INTERFACE_FREEZE_LEN (sizeof(CGROUP_CORE_INTERFACE_FREEZE) - 1)

static struct cgroup_kn_cftype kn_cfts[];

/* cgroup optional features */
enum cgroup_opt_features {
#ifdef CONFIG_PSI
Expand Down Expand Up @@ -4030,29 +4048,58 @@ static int cgroup_freeze_show(struct seq_file *seq, void *v)
return 0;
}

static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
static bool cgroup_kn_revalidate(struct cgroup *cgrp)
{
if (!cgroup_on_dfl(cgrp) || !cgroup_parent(cgrp))
return false;

return true;
}

static ssize_t cgroup_kn_freeze(struct kernfs_node *kn,
const char *buf, size_t nbytes, loff_t off,
bool revalidate)
{
struct cgroup *cgrp;
ssize_t ret;
int freeze;
char b[4] = {0};

/* Handle userspace writes +(0|1)\n and fail otherwise */
ret = strscpy(b, buf, sizeof(b));
if (ret < 0)
return ret;

ret = kstrtoint(strstrip(buf), 0, &freeze);
nbytes = ret;
ret = kstrtoint(strstrip(b), 0, &freeze);
if (ret)
return ret;

if (freeze < 0 || freeze > 1)
return -ERANGE;

cgrp = cgroup_kn_lock_live(of->kn, false);
cgrp = cgroup_kn_lock_live(kn, false);
if (!cgrp)
return -ENOENT;

if (revalidate && !cgroup_kn_revalidate(cgrp)) {
ret = -EOPNOTSUPP;
goto out;
}

cgroup_freeze(cgrp, freeze);

cgroup_kn_unlock(of->kn);
ret = nbytes;

return nbytes;
out:
cgroup_kn_unlock(kn);
return ret;
}

static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
return cgroup_kn_freeze(of->kn, buf, nbytes, off, false);
}

static void __cgroup_kill(struct cgroup *cgrp)
Expand Down Expand Up @@ -4601,6 +4648,49 @@ void cgroup_file_show(struct cgroup_file *cfile, bool show)
kernfs_put(kn);
}

static struct cgroup_kn_cftype kn_cfts[] = {
{
.name = CGROUP_CORE_INTERFACE_FREEZE,
.namelen = CGROUP_CORE_INTERFACE_FREEZE_LEN,
.write = cgroup_kn_freeze,
},
{ },
};

static const struct cgroup_kn_cftype *cgroup_kn_cft(const char *name__str)
{
struct cgroup_kn_cftype *kn_cft;

for (kn_cft = kn_cfts; kn_cft && kn_cft->name[0] != '\0'; kn_cft++) {
if (!strncmp(name__str, kn_cft->name, kn_cft->namelen))
return kn_cft;
}

return ERR_PTR(-EOPNOTSUPP);
}

ssize_t cgroup_kn_interface_write(struct kernfs_node *kn, const char *name__str,
const char *buf, size_t nbytes, loff_t off)
{
const struct cgroup_kn_cftype *kn_cft;

/* empty, do not remove */
if (!nbytes)
return 0;

if (kernfs_type(kn) != KERNFS_DIR)
return -ENOTDIR;

kn_cft = cgroup_kn_cft(name__str);
if (IS_ERR(kn_cft))
return PTR_ERR(kn_cft);

if (unlikely(!kn_cft->write))
return -EOPNOTSUPP;

return kn_cft->write(kn, buf, nbytes, off, true);
}

/**
* css_next_child - find the next child of a given css
* @pos: the current position (%NULL to initiate traversal)
Expand Down
172 changes: 172 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/task_freeze_cgroup.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// SPDX-License-Identifier: GPL-2.0

#include <sys/syscall.h>
#include <test_progs.h>
#include <cgroup_helpers.h>
#include <unistd.h>
#include "test_task_freeze_cgroup.skel.h"

#define CGROUP_PATH "/test-task-freeze-cgroup"

static int bpf_sleepable(struct test_task_freeze_cgroup *skel)
{
int err, cgroup_fd;
pid_t new_pid2;

cgroup_fd = cgroup_setup_and_join(CGROUP_PATH);
if (!ASSERT_OK(cgroup_fd < 0, "cgroup_setup_and_join"))
return -errno;

skel = test_task_freeze_cgroup__open();
if (!ASSERT_OK_PTR(skel, "test_task_freeze_cgroup__open")) {
err = -errno;
goto cleanup_cgroup;
}

skel->rodata->parent_pid = getppid();
skel->rodata->monitor_pid = getpid();
skel->rodata->cgid = get_cgroup_id(CGROUP_PATH);
skel->bss->new_pid = getpid();
skel->bss->freeze = 1;

err = test_task_freeze_cgroup__load(skel);
if (!ASSERT_OK(err, "test_task_freeze_cgroup__load")) {
err = -errno;
goto cleanup_skel;
}

/* First attach the LSM Program that is triggered on bpf() calls
* especially on TP_BTF programs when attached.
*/
skel->links.lsm_freeze_cgroup =
bpf_program__attach_lsm(skel->progs.lsm_freeze_cgroup);
if (!ASSERT_OK_PTR(skel->links.lsm_freeze_cgroup, "attach_lsm")) {
err = -errno;
goto cleanup_detach;
}

/* Attaching this must fail with -EPERM and freeze current task */
skel->links.tp_newchild =
bpf_program__attach_trace(skel->progs.tp_newchild);
if (!ASSERT_EQ(errno, EPERM, "attach_trace() must fail here")) {
err = -EINVAL;
goto cleanup_detach;
}

/* Continue */

/* Attach again now with success */
skel->links.tp_newchild =
bpf_program__attach_trace(skel->progs.tp_newchild);
if (!ASSERT_OK_PTR(skel->links.tp_newchild, "attach_trace")) {
err = -EINVAL;
goto cleanup_detach;
}

/* Fork, update vars from BPF and assert the unfrozen state */
new_pid2 = fork();
if (new_pid2 == 0)
exit(0);

err = (new_pid2 == -1);
if (ASSERT_OK(err, "fork process"))
wait(NULL);

/* Now assert that new_pid2 reflects this new child */
ASSERT_NEQ(0, skel->bss->new_pid,
"test task_freeze_cgroup failed at new_pid != 0");
ASSERT_NEQ(skel->rodata->monitor_pid, skel->bss->new_pid,
"test task_freeze_cgroup failed at old monitor_pid != new_pid");
/* Assert that bpf sets new_pid to new forked child new_pid2 */
ASSERT_EQ(skel->bss->new_pid, new_pid2,
"test task_freeze_cgroup failed first child new_pid == new_pid2");

cleanup_detach:
test_task_freeze_cgroup__detach(skel);
cleanup_skel:
test_task_freeze_cgroup__destroy(skel);
cleanup_cgroup:
close(cgroup_fd);
cleanup_cgroup_environment();
return err;
}

void test_task_freeze_cgroup(void)
{
pid_t pid, result;
char buf[512] = {0};
char path[PATH_MAX] = {0};
int ret, status, attempts, frozen = 0, fd;
struct test_task_freeze_cgroup *skel = NULL;

pid = fork();
ret = (pid == -1);
if (!ASSERT_OK(ret, "fork process"))
return;

if (pid == 0) {
ret = bpf_sleepable(skel);
ASSERT_EQ(0, ret, "child bpf_sleepable failed");
exit(ret);
}

skel = test_task_freeze_cgroup__open();
if (!ASSERT_OK_PTR(skel, "test_task_freeze_cgroup__open"))
goto out;

snprintf(path, sizeof(path),
"/sys/fs/cgroup/cgroup-test-work-dir%d%s/cgroup.freeze",
pid, CGROUP_PATH);

for (attempts = 10; attempts >= 0; attempts--) {
ret = 0;

fd = open(path, O_RDONLY);
if (fd > 0)
ret = read(fd, buf, sizeof(buf) - 1);
if (ret > 0) {
errno = 0;
frozen = strtol(buf, NULL, 10);
if (errno)
frozen = 0;
}

close(fd);
if (frozen)
break;
sleep(1);
}

/* Assert that child cgroup is frozen */
if (!ASSERT_EQ(1, frozen, "child cgroup not frozen"))
goto out;

ret = test_task_freeze_cgroup__load(skel);
if (!ASSERT_OK(ret, "test_task_freeze_cgroup__load"))
goto out;

/* Trigger the unthaw child cgroup from parent */
skel->links.lsm_task_free =
bpf_program__attach_lsm(skel->progs.lsm_task_free);
if (!ASSERT_OK_PTR(skel->links.lsm_task_free, "attach_lsm"))
goto out;

result = waitpid(pid, &status, WUNTRACED);
if (!ASSERT_NEQ(result, -1, "waitpid"))
goto detach;

result = WIFEXITED(status);
if (!ASSERT_EQ(result, 1, "forked process did not terminate normally"))
goto detach;

result = WEXITSTATUS(status);
if (!ASSERT_EQ(result, 0, "forked process did not exit successfully"))
goto detach;

detach:
test_task_freeze_cgroup__detach(skel);

out:
if (skel)
test_task_freeze_cgroup__destroy(skel);
}
Loading
Loading