Skip to content

Commit a87a108

Browse files
committed
Merge feature/memory-reclaim/5.15 into v5.15.153
* commit 'fed46d1f99d22a5a9efd06da0bf5baf6a04045d8': selftests: cgroup: add a selftest for memory.reclaim selftests: cgroup: fix unsigned comparison with less than zero selftests: cgroup: fix alloc_anon_noexit() instantly freeing memory selftests: cgroup: return -errno from cg_read()/cg_write() on failure memcg: introduce per-memcg reclaim interface
2 parents 3b7adb8 + fed46d1 commit a87a108

File tree

4 files changed

+198
-27
lines changed

4 files changed

+198
-27
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,27 @@ PAGE_SIZE multiple when read back.
12001200
high limit is used and monitored properly, this limit's
12011201
utility is limited to providing the final safety net.
12021202

1203+
memory.reclaim
1204+
A write-only nested-keyed file which exists for all cgroups.
1205+
1206+
This is a simple interface to trigger memory reclaim in the
1207+
target cgroup.
1208+
1209+
This file accepts a single key, the number of bytes to reclaim.
1210+
No nested keys are currently supported.
1211+
1212+
Example::
1213+
1214+
echo "1G" > memory.reclaim
1215+
1216+
The interface can be later extended with nested keys to
1217+
configure the reclaim behavior. For example, specify the
1218+
type of memory to reclaim from (anon, file, ..).
1219+
1220+
Please note that the kernel can over or under reclaim from
1221+
the target cgroup. If less bytes are reclaimed than the
1222+
specified amount, -EAGAIN is returned.
1223+
12031224
memory.oom.group
12041225
A read-write single value file which exists on non-root
12051226
cgroups. The default value is "0".

mm/memcontrol.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6483,6 +6483,46 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
64836483
return nbytes;
64846484
}
64856485

6486+
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
6487+
size_t nbytes, loff_t off)
6488+
{
6489+
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
6490+
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
6491+
unsigned long nr_to_reclaim, nr_reclaimed = 0;
6492+
int err;
6493+
6494+
buf = strstrip(buf);
6495+
err = page_counter_memparse(buf, "", &nr_to_reclaim);
6496+
if (err)
6497+
return err;
6498+
6499+
while (nr_reclaimed < nr_to_reclaim) {
6500+
unsigned long reclaimed;
6501+
6502+
if (signal_pending(current))
6503+
return -EINTR;
6504+
6505+
/*
6506+
* This is the final attempt, drain percpu lru caches in the
6507+
* hope of introducing more evictable pages for
6508+
* try_to_free_mem_cgroup_pages().
6509+
*/
6510+
if (!nr_retries)
6511+
lru_add_drain_all();
6512+
6513+
reclaimed = try_to_free_mem_cgroup_pages(memcg,
6514+
nr_to_reclaim - nr_reclaimed,
6515+
GFP_KERNEL, true);
6516+
6517+
if (!reclaimed && !nr_retries--)
6518+
return -EAGAIN;
6519+
6520+
nr_reclaimed += reclaimed;
6521+
}
6522+
6523+
return nbytes;
6524+
}
6525+
64866526
static struct cftype memory_files[] = {
64876527
{
64886528
.name = "current",
@@ -6541,6 +6581,11 @@ static struct cftype memory_files[] = {
65416581
.seq_show = memory_oom_group_show,
65426582
.write = memory_oom_group_write,
65436583
},
6584+
{
6585+
.name = "reclaim",
6586+
.flags = CFTYPE_NS_DELEGATABLE,
6587+
.write = memory_reclaim,
6588+
},
65446589
{ } /* terminate */
65456590
};
65466591

tools/testing/selftests/cgroup/cgroup_util.c

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,37 @@
1919
#include "cgroup_util.h"
2020
#include "../clone3/clone3_selftests.h"
2121

22+
/* Returns read len on success, or -errno on failure. */
2223
static ssize_t read_text(const char *path, char *buf, size_t max_len)
2324
{
2425
ssize_t len;
2526
int fd;
2627

2728
fd = open(path, O_RDONLY);
2829
if (fd < 0)
29-
return fd;
30+
return -errno;
3031

3132
len = read(fd, buf, max_len - 1);
32-
if (len < 0)
33-
goto out;
3433

35-
buf[len] = 0;
36-
out:
34+
if (len >= 0)
35+
buf[len] = 0;
36+
3737
close(fd);
38-
return len;
38+
return len < 0 ? -errno : len;
3939
}
4040

41+
/* Returns written len on success, or -errno on failure. */
4142
static ssize_t write_text(const char *path, char *buf, ssize_t len)
4243
{
4344
int fd;
4445

4546
fd = open(path, O_WRONLY | O_APPEND);
4647
if (fd < 0)
47-
return fd;
48+
return -errno;
4849

4950
len = write(fd, buf, len);
50-
if (len < 0) {
51-
close(fd);
52-
return len;
53-
}
54-
5551
close(fd);
56-
57-
return len;
52+
return len < 0 ? -errno : len;
5853
}
5954

6055
char *cg_name(const char *root, const char *name)
@@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
8782
return ret;
8883
}
8984

85+
/* Returns 0 on success, or -errno on failure. */
9086
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
9187
{
9288
char path[PATH_MAX];
89+
ssize_t ret;
9390

9491
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
9592

96-
if (read_text(path, buf, len) >= 0)
97-
return 0;
98-
99-
return -1;
93+
ret = read_text(path, buf, len);
94+
return ret >= 0 ? 0 : ret;
10095
}
10196

10297
int cg_read_strcmp(const char *cgroup, const char *control,
@@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control)
177172
return cnt;
178173
}
179174

175+
/* Returns 0 on success, or -errno on failure. */
180176
int cg_write(const char *cgroup, const char *control, char *buf)
181177
{
182178
char path[PATH_MAX];
183-
ssize_t len = strlen(buf);
179+
ssize_t len = strlen(buf), ret;
184180

185181
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
186-
187-
if (write_text(path, buf, len) == len)
188-
return 0;
189-
190-
return -1;
182+
ret = write_text(path, buf, len);
183+
return ret == len ? 0 : ret;
191184
}
192185

193186
int cg_find_unified_root(char *root, size_t len)
@@ -538,14 +531,16 @@ int set_oom_adj_score(int pid, int score)
538531
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
539532
{
540533
char path[PATH_MAX];
534+
ssize_t ret;
541535

542536
if (!pid)
543537
snprintf(path, sizeof(path), "/proc/%s/%s",
544538
thread ? "thread-self" : "self", item);
545539
else
546540
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
547541

548-
return read_text(path, buf, size);
542+
ret = read_text(path, buf, size);
543+
return ret < 0 ? -1 : ret;
549544
}
550545

551546
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)

tools/testing/selftests/cgroup/test_memcontrol.c

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,13 +210,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
210210
static int alloc_anon_noexit(const char *cgroup, void *arg)
211211
{
212212
int ppid = getppid();
213+
size_t size = (unsigned long)arg;
214+
char *buf, *ptr;
213215

214-
if (alloc_anon(cgroup, arg))
215-
return -1;
216+
buf = malloc(size);
217+
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
218+
*ptr = 0;
216219

217220
while (getppid() == ppid)
218221
sleep(1);
219222

223+
free(buf);
220224
return 0;
221225
}
222226

@@ -679,6 +683,111 @@ static int test_memcg_max(const char *root)
679683
return ret;
680684
}
681685

686+
/*
687+
* This test checks that memory.reclaim reclaims the given
688+
* amount of memory (from both anon and file, if possible).
689+
*/
690+
static int test_memcg_reclaim(const char *root)
691+
{
692+
int ret = KSFT_FAIL, fd, retries;
693+
char *memcg;
694+
long current, expected_usage, to_reclaim;
695+
char buf[64];
696+
697+
memcg = cg_name(root, "memcg_test");
698+
if (!memcg)
699+
goto cleanup;
700+
701+
if (cg_create(memcg))
702+
goto cleanup;
703+
704+
current = cg_read_long(memcg, "memory.current");
705+
if (current != 0)
706+
goto cleanup;
707+
708+
fd = get_temp_fd();
709+
if (fd < 0)
710+
goto cleanup;
711+
712+
cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
713+
714+
/*
715+
* If swap is enabled, try to reclaim from both anon and file, else try
716+
* to reclaim from file only.
717+
*/
718+
if (is_swap_enabled()) {
719+
cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
720+
expected_usage = MB(100);
721+
} else
722+
expected_usage = MB(50);
723+
724+
/*
725+
* Wait until current usage reaches the expected usage (or we run out of
726+
* retries).
727+
*/
728+
retries = 5;
729+
while (!values_close(cg_read_long(memcg, "memory.current"),
730+
expected_usage, 10)) {
731+
if (retries--) {
732+
sleep(1);
733+
continue;
734+
} else {
735+
fprintf(stderr,
736+
"failed to allocate %ld for memcg reclaim test\n",
737+
expected_usage);
738+
goto cleanup;
739+
}
740+
}
741+
742+
/*
743+
* Reclaim until current reaches 30M, this makes sure we hit both anon
744+
* and file if swap is enabled.
745+
*/
746+
retries = 5;
747+
while (true) {
748+
int err;
749+
750+
current = cg_read_long(memcg, "memory.current");
751+
to_reclaim = current - MB(30);
752+
753+
/*
754+
* We only keep looping if we get EAGAIN, which means we could
755+
* not reclaim the full amount.
756+
*/
757+
if (to_reclaim <= 0)
758+
goto cleanup;
759+
760+
761+
snprintf(buf, sizeof(buf), "%ld", to_reclaim);
762+
err = cg_write(memcg, "memory.reclaim", buf);
763+
if (!err) {
764+
/*
765+
* If writing succeeds, then the written amount should have been
766+
* fully reclaimed (and maybe more).
767+
*/
768+
current = cg_read_long(memcg, "memory.current");
769+
if (!values_close(current, MB(30), 3) && current > MB(30))
770+
goto cleanup;
771+
break;
772+
}
773+
774+
/* The kernel could not reclaim the full amount, try again. */
775+
if (err == -EAGAIN && retries--)
776+
continue;
777+
778+
/* We got an unexpected error or ran out of retries. */
779+
goto cleanup;
780+
}
781+
782+
ret = KSFT_PASS;
783+
cleanup:
784+
cg_destroy(memcg);
785+
free(memcg);
786+
close(fd);
787+
788+
return ret;
789+
}
790+
682791
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
683792
{
684793
long mem_max = (long)arg;
@@ -1181,6 +1290,7 @@ struct memcg_test {
11811290
T(test_memcg_low),
11821291
T(test_memcg_high),
11831292
T(test_memcg_max),
1293+
T(test_memcg_reclaim),
11841294
T(test_memcg_oom_events),
11851295
T(test_memcg_swap_max),
11861296
T(test_memcg_sock),

0 commit comments

Comments
 (0)