|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * DAMON-based page reclamation |
| 4 | + * |
| 5 | + * Author: SeongJae Park <[email protected]> |
| 6 | + */ |
| 7 | + |
| 8 | +#define pr_fmt(fmt) "damon-reclaim: " fmt |
| 9 | + |
| 10 | +#include <linux/damon.h> |
| 11 | +#include <linux/ioport.h> |
| 12 | +#include <linux/module.h> |
| 13 | +#include <linux/sched.h> |
| 14 | +#include <linux/workqueue.h> |
| 15 | + |
| 16 | +#ifdef MODULE_PARAM_PREFIX |
| 17 | +#undef MODULE_PARAM_PREFIX |
| 18 | +#endif |
| 19 | +#define MODULE_PARAM_PREFIX "damon_reclaim." |
| 20 | + |
| 21 | +/* |
| 22 | + * Enable or disable DAMON_RECLAIM. |
| 23 | + * |
| 24 | + * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. |
| 25 | + * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could |
| 26 | + * do no real monitoring and reclamation due to the watermarks-based activation |
| 27 | + * condition. Refer to below descriptions for the watermarks parameter for |
| 28 | + * this. |
| 29 | + */ |
| 30 | +static bool enabled __read_mostly; |
| 31 | +module_param(enabled, bool, 0600); |
| 32 | + |
| 33 | +/* |
| 34 | + * Time threshold for cold memory regions identification in microseconds. |
| 35 | + * |
| 36 | + * If a memory region is not accessed for this or longer time, DAMON_RECLAIM |
| 37 | + * identifies the region as cold, and reclaims. 120 seconds by default. |
| 38 | + */ |
| 39 | +static unsigned long min_age __read_mostly = 120000000; |
| 40 | +module_param(min_age, ulong, 0600); |
| 41 | + |
| 42 | +/* |
| 43 | + * Limit of time for trying the reclamation in milliseconds. |
| 44 | + * |
| 45 | + * DAMON_RECLAIM tries to use only up to this time within a time window |
| 46 | + * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be |
| 47 | + * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, |
| 48 | + * the limit is disabled. |
| 49 | + * |
| 50 | + * 10 ms by default. |
| 51 | + */ |
| 52 | +static unsigned long quota_ms __read_mostly = 10; |
| 53 | +module_param(quota_ms, ulong, 0600); |
| 54 | + |
| 55 | +/* |
| 56 | + * Limit of size of memory for the reclamation in bytes. |
| 57 | + * |
| 58 | + * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a |
| 59 | + * time window (quota_reset_interval_ms) and makes no more than this limit is |
| 60 | + * tried. This can be used for limiting consumption of CPU and IO. If this |
| 61 | + * value is zero, the limit is disabled. |
| 62 | + * |
| 63 | + * 128 MiB by default. |
| 64 | + */ |
| 65 | +static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; |
| 66 | +module_param(quota_sz, ulong, 0600); |
| 67 | + |
| 68 | +/* |
| 69 | + * The time/size quota charge reset interval in milliseconds. |
| 70 | + * |
| 71 | + * The charge reset interval for the quota of time (quota_ms) and size |
| 72 | + * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than |
| 73 | + * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms |
| 74 | + * milliseconds. |
| 75 | + * |
| 76 | + * 1 second by default. |
| 77 | + */ |
| 78 | +static unsigned long quota_reset_interval_ms __read_mostly = 1000; |
| 79 | +module_param(quota_reset_interval_ms, ulong, 0600); |
| 80 | + |
| 81 | +/* |
| 82 | + * The watermarks check time interval in microseconds. |
| 83 | + * |
| 84 | + * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is |
| 85 | + * enabled but inactive due to its watermarks rule. 5 seconds by default. |
| 86 | + */ |
| 87 | +static unsigned long wmarks_interval __read_mostly = 5000000; |
| 88 | +module_param(wmarks_interval, ulong, 0600); |
| 89 | + |
| 90 | +/* |
| 91 | + * Free memory rate (per thousand) for the high watermark. |
| 92 | + * |
| 93 | + * If free memory of the system in bytes per thousand bytes is higher than |
| 94 | + * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically |
| 95 | + * checks the watermarks. 500 (50%) by default. |
| 96 | + */ |
| 97 | +static unsigned long wmarks_high __read_mostly = 500; |
| 98 | +module_param(wmarks_high, ulong, 0600); |
| 99 | + |
| 100 | +/* |
| 101 | + * Free memory rate (per thousand) for the middle watermark. |
| 102 | + * |
| 103 | + * If free memory of the system in bytes per thousand bytes is between this and |
| 104 | + * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring |
| 105 | + * and the reclaiming. 400 (40%) by default. |
| 106 | + */ |
| 107 | +static unsigned long wmarks_mid __read_mostly = 400; |
| 108 | +module_param(wmarks_mid, ulong, 0600); |
| 109 | + |
| 110 | +/* |
| 111 | + * Free memory rate (per thousand) for the low watermark. |
| 112 | + * |
| 113 | + * If free memory of the system in bytes per thousand bytes is lower than this, |
| 114 | + * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks |
| 115 | + * the watermarks. In the case, the system falls back to the LRU-based page |
| 116 | + * granularity reclamation logic. 200 (20%) by default. |
| 117 | + */ |
| 118 | +static unsigned long wmarks_low __read_mostly = 200; |
| 119 | +module_param(wmarks_low, ulong, 0600); |
| 120 | + |
| 121 | +/* |
| 122 | + * Sampling interval for the monitoring in microseconds. |
| 123 | + * |
| 124 | + * The sampling interval of DAMON for the cold memory monitoring. Please refer |
| 125 | + * to the DAMON documentation for more detail. 5 ms by default. |
| 126 | + */ |
| 127 | +static unsigned long sample_interval __read_mostly = 5000; |
| 128 | +module_param(sample_interval, ulong, 0600); |
| 129 | + |
| 130 | +/* |
| 131 | + * Aggregation interval for the monitoring in microseconds. |
| 132 | + * |
| 133 | + * The aggregation interval of DAMON for the cold memory monitoring. Please |
| 134 | + * refer to the DAMON documentation for more detail. 100 ms by default. |
| 135 | + */ |
| 136 | +static unsigned long aggr_interval __read_mostly = 100000; |
| 137 | +module_param(aggr_interval, ulong, 0600); |
| 138 | + |
| 139 | +/* |
| 140 | + * Minimum number of monitoring regions. |
| 141 | + * |
| 142 | + * The minimal number of monitoring regions of DAMON for the cold memory |
| 143 | + * monitoring. This can be used to set lower-bound of the monitoring quality. |
| 144 | + * But, setting this too high could result in increased monitoring overhead. |
| 145 | + * Please refer to the DAMON documentation for more detail. 10 by default. |
| 146 | + */ |
| 147 | +static unsigned long min_nr_regions __read_mostly = 10; |
| 148 | +module_param(min_nr_regions, ulong, 0600); |
| 149 | + |
| 150 | +/* |
| 151 | + * Maximum number of monitoring regions. |
| 152 | + * |
| 153 | + * The maximum number of monitoring regions of DAMON for the cold memory |
| 154 | + * monitoring. This can be used to set upper-bound of the monitoring overhead. |
| 155 | + * However, setting this too low could result in bad monitoring quality. |
| 156 | + * Please refer to the DAMON documentation for more detail. 1000 by default. |
| 157 | + */ |
| 158 | +static unsigned long max_nr_regions __read_mostly = 1000; |
| 159 | +module_param(max_nr_regions, ulong, 0600); |
| 160 | + |
| 161 | +/* |
| 162 | + * Start of the target memory region in physical address. |
| 163 | + * |
| 164 | + * The start physical address of memory region that DAMON_RECLAIM will do work |
| 165 | + * against. By default, biggest System RAM is used as the region. |
| 166 | + */ |
| 167 | +static unsigned long monitor_region_start __read_mostly; |
| 168 | +module_param(monitor_region_start, ulong, 0600); |
| 169 | + |
| 170 | +/* |
| 171 | + * End of the target memory region in physical address. |
| 172 | + * |
| 173 | + * The end physical address of memory region that DAMON_RECLAIM will do work |
| 174 | + * against. By default, biggest System RAM is used as the region. |
| 175 | + */ |
| 176 | +static unsigned long monitor_region_end __read_mostly; |
| 177 | +module_param(monitor_region_end, ulong, 0600); |
| 178 | + |
| 179 | +/* |
| 180 | + * PID of the DAMON thread |
| 181 | + * |
| 182 | + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. |
| 183 | + * Else, -1. |
| 184 | + */ |
| 185 | +static int kdamond_pid __read_mostly = -1; |
| 186 | +module_param(kdamond_pid, int, 0400); |
| 187 | + |
| 188 | +static struct damon_ctx *ctx; |
| 189 | +static struct damon_target *target; |
| 190 | + |
| 191 | +struct damon_reclaim_ram_walk_arg { |
| 192 | + unsigned long start; |
| 193 | + unsigned long end; |
| 194 | +}; |
| 195 | + |
| 196 | +static int walk_system_ram(struct resource *res, void *arg) |
| 197 | +{ |
| 198 | + struct damon_reclaim_ram_walk_arg *a = arg; |
| 199 | + |
| 200 | + if (a->end - a->start < res->end - res->start) { |
| 201 | + a->start = res->start; |
| 202 | + a->end = res->end; |
| 203 | + } |
| 204 | + return 0; |
| 205 | +} |
| 206 | + |
| 207 | +/* |
| 208 | + * Find biggest 'System RAM' resource and store its start and end address in |
| 209 | + * @start and @end, respectively. If no System RAM is found, returns false. |
| 210 | + */ |
| 211 | +static bool get_monitoring_region(unsigned long *start, unsigned long *end) |
| 212 | +{ |
| 213 | + struct damon_reclaim_ram_walk_arg arg = {}; |
| 214 | + |
| 215 | + walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); |
| 216 | + if (arg.end <= arg.start) |
| 217 | + return false; |
| 218 | + |
| 219 | + *start = arg.start; |
| 220 | + *end = arg.end; |
| 221 | + return true; |
| 222 | +} |
| 223 | + |
| 224 | +static struct damos *damon_reclaim_new_scheme(void) |
| 225 | +{ |
| 226 | + struct damos_watermarks wmarks = { |
| 227 | + .metric = DAMOS_WMARK_FREE_MEM_RATE, |
| 228 | + .interval = wmarks_interval, |
| 229 | + .high = wmarks_high, |
| 230 | + .mid = wmarks_mid, |
| 231 | + .low = wmarks_low, |
| 232 | + }; |
| 233 | + struct damos_quota quota = { |
| 234 | + /* |
| 235 | + * Do not try reclamation for more than quota_ms milliseconds |
| 236 | + * or quota_sz bytes within quota_reset_interval_ms. |
| 237 | + */ |
| 238 | + .ms = quota_ms, |
| 239 | + .sz = quota_sz, |
| 240 | + .reset_interval = quota_reset_interval_ms, |
| 241 | + /* Within the quota, page out older regions first. */ |
| 242 | + .weight_sz = 0, |
| 243 | + .weight_nr_accesses = 0, |
| 244 | + .weight_age = 1 |
| 245 | + }; |
| 246 | + struct damos *scheme = damon_new_scheme( |
| 247 | + /* Find regions having PAGE_SIZE or larger size */ |
| 248 | + PAGE_SIZE, ULONG_MAX, |
| 249 | + /* and not accessed at all */ |
| 250 | + 0, 0, |
| 251 | + /* for min_age or more micro-seconds, and */ |
| 252 | + min_age / aggr_interval, UINT_MAX, |
| 253 | + /* page out those, as soon as found */ |
| 254 | + DAMOS_PAGEOUT, |
| 255 | + /* under the quota. */ |
| 256 | + "a, |
| 257 | + /* (De)activate this according to the watermarks. */ |
| 258 | + &wmarks); |
| 259 | + |
| 260 | + return scheme; |
| 261 | +} |
| 262 | + |
| 263 | +static int damon_reclaim_turn(bool on) |
| 264 | +{ |
| 265 | + struct damon_region *region; |
| 266 | + struct damos *scheme; |
| 267 | + int err; |
| 268 | + |
| 269 | + if (!on) { |
| 270 | + err = damon_stop(&ctx, 1); |
| 271 | + if (!err) |
| 272 | + kdamond_pid = -1; |
| 273 | + return err; |
| 274 | + } |
| 275 | + |
| 276 | + err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, |
| 277 | + min_nr_regions, max_nr_regions); |
| 278 | + if (err) |
| 279 | + return err; |
| 280 | + |
| 281 | + if (monitor_region_start > monitor_region_end) |
| 282 | + return -EINVAL; |
| 283 | + if (!monitor_region_start && !monitor_region_end && |
| 284 | + !get_monitoring_region(&monitor_region_start, |
| 285 | + &monitor_region_end)) |
| 286 | + return -EINVAL; |
| 287 | + /* DAMON will free this on its own when finish monitoring */ |
| 288 | + region = damon_new_region(monitor_region_start, monitor_region_end); |
| 289 | + if (!region) |
| 290 | + return -ENOMEM; |
| 291 | + damon_add_region(region, target); |
| 292 | + |
| 293 | + /* Will be freed by 'damon_set_schemes()' below */ |
| 294 | + scheme = damon_reclaim_new_scheme(); |
| 295 | + if (!scheme) { |
| 296 | + err = -ENOMEM; |
| 297 | + goto free_region_out; |
| 298 | + } |
| 299 | + err = damon_set_schemes(ctx, &scheme, 1); |
| 300 | + if (err) |
| 301 | + goto free_scheme_out; |
| 302 | + |
| 303 | + err = damon_start(&ctx, 1); |
| 304 | + if (!err) { |
| 305 | + kdamond_pid = ctx->kdamond->pid; |
| 306 | + return 0; |
| 307 | + } |
| 308 | + |
| 309 | +free_scheme_out: |
| 310 | + damon_destroy_scheme(scheme); |
| 311 | +free_region_out: |
| 312 | + damon_destroy_region(region, target); |
| 313 | + return err; |
| 314 | +} |
| 315 | + |
| 316 | +#define ENABLE_CHECK_INTERVAL_MS 1000 |
| 317 | +static struct delayed_work damon_reclaim_timer; |
| 318 | +static void damon_reclaim_timer_fn(struct work_struct *work) |
| 319 | +{ |
| 320 | + static bool last_enabled; |
| 321 | + bool now_enabled; |
| 322 | + |
| 323 | + now_enabled = enabled; |
| 324 | + if (last_enabled != now_enabled) { |
| 325 | + if (!damon_reclaim_turn(now_enabled)) |
| 326 | + last_enabled = now_enabled; |
| 327 | + else |
| 328 | + enabled = last_enabled; |
| 329 | + } |
| 330 | + |
| 331 | + schedule_delayed_work(&damon_reclaim_timer, |
| 332 | + msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS)); |
| 333 | +} |
| 334 | +static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); |
| 335 | + |
| 336 | +static int __init damon_reclaim_init(void) |
| 337 | +{ |
| 338 | + ctx = damon_new_ctx(); |
| 339 | + if (!ctx) |
| 340 | + return -ENOMEM; |
| 341 | + |
| 342 | + damon_pa_set_primitives(ctx); |
| 343 | + |
| 344 | + /* 4242 means nothing but fun */ |
| 345 | + target = damon_new_target(4242); |
| 346 | + if (!target) { |
| 347 | + damon_destroy_ctx(ctx); |
| 348 | + return -ENOMEM; |
| 349 | + } |
| 350 | + damon_add_target(ctx, target); |
| 351 | + |
| 352 | + schedule_delayed_work(&damon_reclaim_timer, 0); |
| 353 | + return 0; |
| 354 | +} |
| 355 | + |
| 356 | +module_init(damon_reclaim_init); |
0 commit comments