Skip to content

Commit 43b0536

Browse files
sjp38torvalds
authored andcommitted
mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM)
This implements a new kernel subsystem that finds cold memory regions using DAMON and reclaims those immediately. It is intended to be used as proactive lightweigh reclamation logic for light memory pressure. For heavy memory pressure, it could be inactivated and fall back to the traditional page-scanning based reclamation. It's implemented on top of DAMON framework to use the DAMON-based Operation Schemes (DAMOS) feature. It utilizes all the DAMOS features including speed limit, prioritization, and watermarks. It could be enabled and tuned in boot time via the kernel boot parameter, and in run time via its module parameters ('/sys/module/damon_reclaim/parameters/') interface. [[email protected]: fix error return code in damon_reclaim_turn()] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: SeongJae Park <[email protected]> Signed-off-by: Yang Yingliang <[email protected]> Cc: Amit Shah <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: David Rientjes <[email protected]> Cc: David Woodhouse <[email protected]> Cc: Greg Thelen <[email protected]> Cc: Jonathan Cameron <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Leonard Foerster <[email protected]> Cc: Marco Elver <[email protected]> Cc: Markus Boehme <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Shuah Khan <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 1dc90cc commit 43b0536

File tree

3 files changed

+369
-0
lines changed

3 files changed

+369
-0
lines changed

mm/damon/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,16 @@ config DAMON_DBGFS_KUNIT_TEST
7373

7474
If unsure, say N.
7575

76+
config DAMON_RECLAIM
77+
bool "Build DAMON-based reclaim (DAMON_RECLAIM)"
78+
depends on DAMON_PADDR
79+
help
80+
This builds the DAMON-based reclamation subsystem. It finds pages
81+
that not accessed for a long time (cold) using DAMON and reclaim
82+
those.
83+
84+
This is suggested to be used as a proactive and lightweight
85+
reclamation under light memory pressure, while the traditional page
86+
scanning-based reclamation is used for heavy pressure.
87+
7688
endmenu

mm/damon/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ obj-$(CONFIG_DAMON) := core.o
44
obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o
55
obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o
66
obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o
7+
obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o

mm/damon/reclaim.c

Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* DAMON-based page reclamation
4+
*
5+
* Author: SeongJae Park <[email protected]>
6+
*/
7+
8+
#define pr_fmt(fmt) "damon-reclaim: " fmt
9+
10+
#include <linux/damon.h>
11+
#include <linux/ioport.h>
12+
#include <linux/module.h>
13+
#include <linux/sched.h>
14+
#include <linux/workqueue.h>
15+
16+
#ifdef MODULE_PARAM_PREFIX
17+
#undef MODULE_PARAM_PREFIX
18+
#endif
19+
#define MODULE_PARAM_PREFIX "damon_reclaim."
20+
21+
/*
22+
* Enable or disable DAMON_RECLAIM.
23+
*
24+
* You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``.
25+
* Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could
26+
* do no real monitoring and reclamation due to the watermarks-based activation
27+
* condition. Refer to below descriptions for the watermarks parameter for
28+
* this.
29+
*/
30+
static bool enabled __read_mostly;
31+
module_param(enabled, bool, 0600);
32+
33+
/*
34+
* Time threshold for cold memory regions identification in microseconds.
35+
*
36+
* If a memory region is not accessed for this or longer time, DAMON_RECLAIM
37+
* identifies the region as cold, and reclaims. 120 seconds by default.
38+
*/
39+
static unsigned long min_age __read_mostly = 120000000;
40+
module_param(min_age, ulong, 0600);
41+
42+
/*
43+
* Limit of time for trying the reclamation in milliseconds.
44+
*
45+
* DAMON_RECLAIM tries to use only up to this time within a time window
46+
* (quota_reset_interval_ms) for trying reclamation of cold pages. This can be
47+
* used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero,
48+
* the limit is disabled.
49+
*
50+
* 10 ms by default.
51+
*/
52+
static unsigned long quota_ms __read_mostly = 10;
53+
module_param(quota_ms, ulong, 0600);
54+
55+
/*
56+
* Limit of size of memory for the reclamation in bytes.
57+
*
58+
* DAMON_RECLAIM charges amount of memory which it tried to reclaim within a
59+
* time window (quota_reset_interval_ms) and makes no more than this limit is
60+
* tried. This can be used for limiting consumption of CPU and IO. If this
61+
* value is zero, the limit is disabled.
62+
*
63+
* 128 MiB by default.
64+
*/
65+
static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024;
66+
module_param(quota_sz, ulong, 0600);
67+
68+
/*
69+
* The time/size quota charge reset interval in milliseconds.
70+
*
71+
* The charge reset interval for the quota of time (quota_ms) and size
72+
* (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than
73+
* quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms
74+
* milliseconds.
75+
*
76+
* 1 second by default.
77+
*/
78+
static unsigned long quota_reset_interval_ms __read_mostly = 1000;
79+
module_param(quota_reset_interval_ms, ulong, 0600);
80+
81+
/*
82+
* The watermarks check time interval in microseconds.
83+
*
84+
* Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is
85+
* enabled but inactive due to its watermarks rule. 5 seconds by default.
86+
*/
87+
static unsigned long wmarks_interval __read_mostly = 5000000;
88+
module_param(wmarks_interval, ulong, 0600);
89+
90+
/*
91+
* Free memory rate (per thousand) for the high watermark.
92+
*
93+
* If free memory of the system in bytes per thousand bytes is higher than
94+
* this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically
95+
* checks the watermarks. 500 (50%) by default.
96+
*/
97+
static unsigned long wmarks_high __read_mostly = 500;
98+
module_param(wmarks_high, ulong, 0600);
99+
100+
/*
101+
* Free memory rate (per thousand) for the middle watermark.
102+
*
103+
* If free memory of the system in bytes per thousand bytes is between this and
104+
* the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring
105+
* and the reclaiming. 400 (40%) by default.
106+
*/
107+
static unsigned long wmarks_mid __read_mostly = 400;
108+
module_param(wmarks_mid, ulong, 0600);
109+
110+
/*
111+
* Free memory rate (per thousand) for the low watermark.
112+
*
113+
* If free memory of the system in bytes per thousand bytes is lower than this,
114+
* DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks
115+
* the watermarks. In the case, the system falls back to the LRU-based page
116+
* granularity reclamation logic. 200 (20%) by default.
117+
*/
118+
static unsigned long wmarks_low __read_mostly = 200;
119+
module_param(wmarks_low, ulong, 0600);
120+
121+
/*
122+
* Sampling interval for the monitoring in microseconds.
123+
*
124+
* The sampling interval of DAMON for the cold memory monitoring. Please refer
125+
* to the DAMON documentation for more detail. 5 ms by default.
126+
*/
127+
static unsigned long sample_interval __read_mostly = 5000;
128+
module_param(sample_interval, ulong, 0600);
129+
130+
/*
131+
* Aggregation interval for the monitoring in microseconds.
132+
*
133+
* The aggregation interval of DAMON for the cold memory monitoring. Please
134+
* refer to the DAMON documentation for more detail. 100 ms by default.
135+
*/
136+
static unsigned long aggr_interval __read_mostly = 100000;
137+
module_param(aggr_interval, ulong, 0600);
138+
139+
/*
140+
* Minimum number of monitoring regions.
141+
*
142+
* The minimal number of monitoring regions of DAMON for the cold memory
143+
* monitoring. This can be used to set lower-bound of the monitoring quality.
144+
* But, setting this too high could result in increased monitoring overhead.
145+
* Please refer to the DAMON documentation for more detail. 10 by default.
146+
*/
147+
static unsigned long min_nr_regions __read_mostly = 10;
148+
module_param(min_nr_regions, ulong, 0600);
149+
150+
/*
151+
* Maximum number of monitoring regions.
152+
*
153+
* The maximum number of monitoring regions of DAMON for the cold memory
154+
* monitoring. This can be used to set upper-bound of the monitoring overhead.
155+
* However, setting this too low could result in bad monitoring quality.
156+
* Please refer to the DAMON documentation for more detail. 1000 by default.
157+
*/
158+
static unsigned long max_nr_regions __read_mostly = 1000;
159+
module_param(max_nr_regions, ulong, 0600);
160+
161+
/*
162+
* Start of the target memory region in physical address.
163+
*
164+
* The start physical address of memory region that DAMON_RECLAIM will do work
165+
* against. By default, biggest System RAM is used as the region.
166+
*/
167+
static unsigned long monitor_region_start __read_mostly;
168+
module_param(monitor_region_start, ulong, 0600);
169+
170+
/*
171+
* End of the target memory region in physical address.
172+
*
173+
* The end physical address of memory region that DAMON_RECLAIM will do work
174+
* against. By default, biggest System RAM is used as the region.
175+
*/
176+
static unsigned long monitor_region_end __read_mostly;
177+
module_param(monitor_region_end, ulong, 0600);
178+
179+
/*
180+
* PID of the DAMON thread
181+
*
182+
* If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
183+
* Else, -1.
184+
*/
185+
static int kdamond_pid __read_mostly = -1;
186+
module_param(kdamond_pid, int, 0400);
187+
188+
static struct damon_ctx *ctx;
189+
static struct damon_target *target;
190+
191+
struct damon_reclaim_ram_walk_arg {
192+
unsigned long start;
193+
unsigned long end;
194+
};
195+
196+
static int walk_system_ram(struct resource *res, void *arg)
197+
{
198+
struct damon_reclaim_ram_walk_arg *a = arg;
199+
200+
if (a->end - a->start < res->end - res->start) {
201+
a->start = res->start;
202+
a->end = res->end;
203+
}
204+
return 0;
205+
}
206+
207+
/*
208+
* Find biggest 'System RAM' resource and store its start and end address in
209+
* @start and @end, respectively. If no System RAM is found, returns false.
210+
*/
211+
static bool get_monitoring_region(unsigned long *start, unsigned long *end)
212+
{
213+
struct damon_reclaim_ram_walk_arg arg = {};
214+
215+
walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
216+
if (arg.end <= arg.start)
217+
return false;
218+
219+
*start = arg.start;
220+
*end = arg.end;
221+
return true;
222+
}
223+
224+
static struct damos *damon_reclaim_new_scheme(void)
225+
{
226+
struct damos_watermarks wmarks = {
227+
.metric = DAMOS_WMARK_FREE_MEM_RATE,
228+
.interval = wmarks_interval,
229+
.high = wmarks_high,
230+
.mid = wmarks_mid,
231+
.low = wmarks_low,
232+
};
233+
struct damos_quota quota = {
234+
/*
235+
* Do not try reclamation for more than quota_ms milliseconds
236+
* or quota_sz bytes within quota_reset_interval_ms.
237+
*/
238+
.ms = quota_ms,
239+
.sz = quota_sz,
240+
.reset_interval = quota_reset_interval_ms,
241+
/* Within the quota, page out older regions first. */
242+
.weight_sz = 0,
243+
.weight_nr_accesses = 0,
244+
.weight_age = 1
245+
};
246+
struct damos *scheme = damon_new_scheme(
247+
/* Find regions having PAGE_SIZE or larger size */
248+
PAGE_SIZE, ULONG_MAX,
249+
/* and not accessed at all */
250+
0, 0,
251+
/* for min_age or more micro-seconds, and */
252+
min_age / aggr_interval, UINT_MAX,
253+
/* page out those, as soon as found */
254+
DAMOS_PAGEOUT,
255+
/* under the quota. */
256+
&quota,
257+
/* (De)activate this according to the watermarks. */
258+
&wmarks);
259+
260+
return scheme;
261+
}
262+
263+
static int damon_reclaim_turn(bool on)
264+
{
265+
struct damon_region *region;
266+
struct damos *scheme;
267+
int err;
268+
269+
if (!on) {
270+
err = damon_stop(&ctx, 1);
271+
if (!err)
272+
kdamond_pid = -1;
273+
return err;
274+
}
275+
276+
err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
277+
min_nr_regions, max_nr_regions);
278+
if (err)
279+
return err;
280+
281+
if (monitor_region_start > monitor_region_end)
282+
return -EINVAL;
283+
if (!monitor_region_start && !monitor_region_end &&
284+
!get_monitoring_region(&monitor_region_start,
285+
&monitor_region_end))
286+
return -EINVAL;
287+
/* DAMON will free this on its own when finish monitoring */
288+
region = damon_new_region(monitor_region_start, monitor_region_end);
289+
if (!region)
290+
return -ENOMEM;
291+
damon_add_region(region, target);
292+
293+
/* Will be freed by 'damon_set_schemes()' below */
294+
scheme = damon_reclaim_new_scheme();
295+
if (!scheme) {
296+
err = -ENOMEM;
297+
goto free_region_out;
298+
}
299+
err = damon_set_schemes(ctx, &scheme, 1);
300+
if (err)
301+
goto free_scheme_out;
302+
303+
err = damon_start(&ctx, 1);
304+
if (!err) {
305+
kdamond_pid = ctx->kdamond->pid;
306+
return 0;
307+
}
308+
309+
free_scheme_out:
310+
damon_destroy_scheme(scheme);
311+
free_region_out:
312+
damon_destroy_region(region, target);
313+
return err;
314+
}
315+
316+
#define ENABLE_CHECK_INTERVAL_MS 1000
317+
static struct delayed_work damon_reclaim_timer;
318+
static void damon_reclaim_timer_fn(struct work_struct *work)
319+
{
320+
static bool last_enabled;
321+
bool now_enabled;
322+
323+
now_enabled = enabled;
324+
if (last_enabled != now_enabled) {
325+
if (!damon_reclaim_turn(now_enabled))
326+
last_enabled = now_enabled;
327+
else
328+
enabled = last_enabled;
329+
}
330+
331+
schedule_delayed_work(&damon_reclaim_timer,
332+
msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS));
333+
}
334+
static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
335+
336+
static int __init damon_reclaim_init(void)
337+
{
338+
ctx = damon_new_ctx();
339+
if (!ctx)
340+
return -ENOMEM;
341+
342+
damon_pa_set_primitives(ctx);
343+
344+
/* 4242 means nothing but fun */
345+
target = damon_new_target(4242);
346+
if (!target) {
347+
damon_destroy_ctx(ctx);
348+
return -ENOMEM;
349+
}
350+
damon_add_target(ctx, target);
351+
352+
schedule_delayed_work(&damon_reclaim_timer, 0);
353+
return 0;
354+
}
355+
356+
module_init(damon_reclaim_init);

0 commit comments

Comments
 (0)