Skip to content

Commit e26733e

Browse files
cdowntorvalds
authored andcommitted
mm, memcg: throttle allocators based on ancestral memory.high
Prior to this commit, we only directly check the affected cgroup's memory.high against its usage. However, it's possible that we are being reclaimed as a result of hitting an ancestor memory.high and should be penalised based on that, instead. This patch changes memory.high overage throttling to use the largest overage in its ancestors when considering how many penalty jiffies to charge. This makes sure that we penalise poorly behaving cgroups in the same way regardless of at what level of the hierarchy memory.high was breached. Fixes: 0e4b01d ("mm, memcg: throttle allocators when failing reclaim over memory.high") Reported-by: Johannes Weiner <[email protected]> Signed-off-by: Chris Down <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Nathan Chancellor <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: <[email protected]> [5.4.x+] Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.name Signed-off-by: Linus Torvalds <[email protected]>
1 parent d397a45 commit e26733e

File tree

1 file changed

+58
-35
lines changed

1 file changed

+58
-35
lines changed

mm/memcontrol.c

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,53 +2297,51 @@ static void high_work_func(struct work_struct *work)
22972297
#define MEMCG_DELAY_SCALING_SHIFT 14
22982298

22992299
/*
2300-
* Scheduled by try_charge() to be executed from the userland return path
2301-
* and reclaims memory over the high limit.
2300+
* Get the number of jiffies that we should penalise a mischievous cgroup which
2301+
* is exceeding its memory.high by checking both it and its ancestors.
23022302
*/
2303-
void mem_cgroup_handle_over_high(void)
2303+
static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
2304+
unsigned int nr_pages)
23042305
{
2305-
unsigned long usage, high, clamped_high;
2306-
unsigned long pflags;
2307-
unsigned long penalty_jiffies, overage;
2308-
unsigned int nr_pages = current->memcg_nr_pages_over_high;
2309-
struct mem_cgroup *memcg;
2306+
unsigned long penalty_jiffies;
2307+
u64 max_overage = 0;
23102308

2311-
if (likely(!nr_pages))
2312-
return;
2309+
do {
2310+
unsigned long usage, high;
2311+
u64 overage;
23132312

2314-
memcg = get_mem_cgroup_from_mm(current->mm);
2315-
reclaim_high(memcg, nr_pages, GFP_KERNEL);
2316-
current->memcg_nr_pages_over_high = 0;
2313+
usage = page_counter_read(&memcg->memory);
2314+
high = READ_ONCE(memcg->high);
2315+
2316+
/*
2317+
* Prevent division by 0 in overage calculation by acting as if
2318+
* it was a threshold of 1 page
2319+
*/
2320+
high = max(high, 1UL);
2321+
2322+
overage = usage - high;
2323+
overage <<= MEMCG_DELAY_PRECISION_SHIFT;
2324+
overage = div64_u64(overage, high);
2325+
2326+
if (overage > max_overage)
2327+
max_overage = overage;
2328+
} while ((memcg = parent_mem_cgroup(memcg)) &&
2329+
!mem_cgroup_is_root(memcg));
2330+
2331+
if (!max_overage)
2332+
return 0;
23172333

23182334
/*
2319-
* memory.high is breached and reclaim is unable to keep up. Throttle
2320-
* allocators proactively to slow down excessive growth.
2321-
*
23222335
* We use overage compared to memory.high to calculate the number of
23232336
* jiffies to sleep (penalty_jiffies). Ideally this value should be
23242337
* fairly lenient on small overages, and increasingly harsh when the
23252338
* memcg in question makes it clear that it has no intention of stopping
23262339
* its crazy behaviour, so we exponentially increase the delay based on
23272340
* overage amount.
23282341
*/
2329-
2330-
usage = page_counter_read(&memcg->memory);
2331-
high = READ_ONCE(memcg->high);
2332-
2333-
if (usage <= high)
2334-
goto out;
2335-
2336-
/*
2337-
* Prevent division by 0 in overage calculation by acting as if it was a
2338-
* threshold of 1 page
2339-
*/
2340-
clamped_high = max(high, 1UL);
2341-
2342-
overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
2343-
clamped_high);
2344-
2345-
penalty_jiffies = ((u64)overage * overage * HZ)
2346-
>> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
2342+
penalty_jiffies = max_overage * max_overage * HZ;
2343+
penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
2344+
penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;
23472345

23482346
/*
23492347
* Factor in the task's own contribution to the overage, such that four
@@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
23602358
* application moving forwards and also permit diagnostics, albeit
23612359
* extremely slowly.
23622360
*/
2363-
penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
2361+
return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
2362+
}
2363+
2364+
/*
2365+
* Scheduled by try_charge() to be executed from the userland return path
2366+
* and reclaims memory over the high limit.
2367+
*/
2368+
void mem_cgroup_handle_over_high(void)
2369+
{
2370+
unsigned long penalty_jiffies;
2371+
unsigned long pflags;
2372+
unsigned int nr_pages = current->memcg_nr_pages_over_high;
2373+
struct mem_cgroup *memcg;
2374+
2375+
if (likely(!nr_pages))
2376+
return;
2377+
2378+
memcg = get_mem_cgroup_from_mm(current->mm);
2379+
reclaim_high(memcg, nr_pages, GFP_KERNEL);
2380+
current->memcg_nr_pages_over_high = 0;
2381+
2382+
/*
2383+
* memory.high is breached and reclaim is unable to keep up. Throttle
2384+
* allocators proactively to slow down excessive growth.
2385+
*/
2386+
penalty_jiffies = calculate_high_delay(memcg, nr_pages);
23642387

23652388
/*
23662389
* Don't sleep if the amount of jiffies this memcg owes us is so low

0 commit comments

Comments
 (0)