Skip to content

Commit 5095a2b

Browse files
Yu Zhaoakpm00
authored andcommitted
mm/mglru: try to stop at high watermarks
The initial MGLRU patchset didn't include the memcg LRU support, and it relied on should_abort_scan(), added by commit f76c833 ("mm: multi-gen LRU: optimize multiple memcgs"), to "backoff to avoid overshooting their aggregate reclaim target by too much". Later on when the memcg LRU was added, should_abort_scan() was deemed unnecessary, and the test results [1] showed no side effects after it was removed by commit a579086 ("mm: multi-gen LRU: remove eviction fairness safeguard"). However, that test used memory.reclaim, which sets nr_to_reclaim to SWAP_CLUSTER_MAX. So it can overshoot only by SWAP_CLUSTER_MAX-1 pages, i.e., from nr_reclaimed=nr_to_reclaim-1 to nr_reclaimed=nr_to_reclaim+SWAP_CLUSTER_MAX-1. Compared with the batch size kswapd sets to nr_to_reclaim, SWAP_CLUSTER_MAX is tiny. Therefore that test isn't able to reproduce the worst case scenario, i.e., kswapd overshooting GBs on large systems and "consuming 100% CPU" (see the Closes tag). Bring back a simplified version of should_abort_scan() on top of the memcg LRU, so that kswapd stops when all eligible zones are above their respective high watermarks plus a small delta to lower the chance of KSWAPD_HIGH_WMARK_HIT_QUICKLY. Note that this only applies to order-0 reclaim, meaning compaction-induced reclaim can still run wild (which is a different problem). On Android, launching 55 apps sequentially: Before After Change pgpgin 838377172 802955040 -4% pgpgout 38037080 34336300 -10% [1] https://lore.kernel.org/[email protected]/ Link: https://lkml.kernel.org/r/[email protected] Fixes: a579086 ("mm: multi-gen LRU: remove eviction fairness safeguard") Signed-off-by: Yu Zhao <[email protected]> Reported-by: Charan Teja Kalla <[email protected]> Reported-by: Jaroslav Pulchart <[email protected]> Closes: https://lore.kernel.org/CAK8fFZ4DY+GtBA40Pm7Nn5xCHy+51w3sfxPqkqpqakSXYyX+Wg@mail.gmail.com/ Tested-by: Jaroslav Pulchart <[email protected]> Tested-by: Kalesh Singh <[email protected]> Cc: Hillf Danton <[email protected]> Cc: Kairui Song <[email protected]> Cc: T.J. Mercier <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 0814880 commit 5095a2b

File tree

1 file changed

+28
-8
lines changed

1 file changed

+28
-8
lines changed

mm/vmscan.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4648,20 +4648,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
46484648
return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
46494649
}
46504650

4651-
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
4651+
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
46524652
{
4653+
int i;
4654+
enum zone_watermarks mark;
4655+
46534656
/* don't abort memcg reclaim to ensure fairness */
46544657
if (!root_reclaim(sc))
4655-
return -1;
4658+
return false;
4659+
4660+
if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order)))
4661+
return true;
4662+
4663+
/* check the order to exclude compaction-induced reclaim */
4664+
if (!current_is_kswapd() || sc->order)
4665+
return false;
46564666

4657-
return max(sc->nr_to_reclaim, compact_gap(sc->order));
4667+
mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ?
4668+
WMARK_PROMO : WMARK_HIGH;
4669+
4670+
for (i = 0; i <= sc->reclaim_idx; i++) {
4671+
struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
4672+
unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH;
4673+
4674+
if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0))
4675+
return false;
4676+
}
4677+
4678+
/* kswapd should abort if all eligible zones are safe */
4679+
return true;
46584680
}
46594681

46604682
static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
46614683
{
46624684
long nr_to_scan;
46634685
unsigned long scanned = 0;
4664-
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
46654686
int swappiness = get_swappiness(lruvec, sc);
46664687

46674688
/* clean file folios are more likely to exist */
@@ -4683,7 +4704,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
46834704
if (scanned >= nr_to_scan)
46844705
break;
46854706

4686-
if (sc->nr_reclaimed >= nr_to_reclaim)
4707+
if (should_abort_scan(lruvec, sc))
46874708
break;
46884709

46894710
cond_resched();
@@ -4744,7 +4765,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
47444765
struct lru_gen_folio *lrugen;
47454766
struct mem_cgroup *memcg;
47464767
const struct hlist_nulls_node *pos;
4747-
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
47484768

47494769
bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
47504770
restart:
@@ -4777,7 +4797,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
47774797

47784798
rcu_read_lock();
47794799

4780-
if (sc->nr_reclaimed >= nr_to_reclaim)
4800+
if (should_abort_scan(lruvec, sc))
47814801
break;
47824802
}
47834803

@@ -4788,7 +4808,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
47884808

47894809
mem_cgroup_put(memcg);
47904810

4791-
if (sc->nr_reclaimed >= nr_to_reclaim)
4811+
if (!is_a_nulls(pos))
47924812
return;
47934813

47944814
/* restart if raced with lru_gen_rotate_memcg() */

0 commit comments

Comments
 (0)