Skip to content

Commit c68ed79

Browse files
Ingo Molnartorvalds
authored andcommitted
mm/vmstat: protect per cpu variables with preempt disable on RT
Disable preemption on -RT for the vmstat code. On vanila the code runs in IRQ-off regions while on -RT it may not when stats are updated under a local_lock. "preempt_disable" ensures that the same resources is not updated in parallel due to preemption. This patch differs from the preempt-rt version where __count_vm_event and __count_vm_events are also protected. The counters are explicitly "allowed to be to be racy" so there is no need to protect them from preemption. Only the accurate page stats that are updated by a read-modify-write need protection. This patch also differs in that a preempt_[en|dis]able_rt helper is not used. As vmstat is the only user of the helper, it was suggested that it be open-coded in vmstat.c instead of risking the helper being used in unnecessary contexts. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Mel Gorman <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Hugh Dickins <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 2d33820 commit c68ed79

File tree

1 file changed

+48
-0
lines changed

1 file changed

+48
-0
lines changed

mm/vmstat.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,16 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
319319
long x;
320320
long t;
321321

322+
/*
323+
* Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
324+
* atomicity is provided by IRQs being disabled -- either explicitly
325+
* or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
326+
* CPU migrations and preemption potentially corrupts a counter so
327+
* disable preemption.
328+
*/
329+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
330+
preempt_disable();
331+
322332
x = delta + __this_cpu_read(*p);
323333

324334
t = __this_cpu_read(pcp->stat_threshold);
@@ -328,6 +338,9 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
328338
x = 0;
329339
}
330340
__this_cpu_write(*p, x);
341+
342+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
343+
preempt_enable();
331344
}
332345
EXPORT_SYMBOL(__mod_zone_page_state);
333346

@@ -350,6 +363,10 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
350363
delta >>= PAGE_SHIFT;
351364
}
352365

366+
/* See __mod_node_page_state */
367+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
368+
preempt_disable();
369+
353370
x = delta + __this_cpu_read(*p);
354371

355372
t = __this_cpu_read(pcp->stat_threshold);
@@ -359,6 +376,9 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
359376
x = 0;
360377
}
361378
__this_cpu_write(*p, x);
379+
380+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
381+
preempt_enable();
362382
}
363383
EXPORT_SYMBOL(__mod_node_page_state);
364384

@@ -391,6 +411,10 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
391411
s8 __percpu *p = pcp->vm_stat_diff + item;
392412
s8 v, t;
393413

414+
/* See __mod_node_page_state */
415+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
416+
preempt_disable();
417+
394418
v = __this_cpu_inc_return(*p);
395419
t = __this_cpu_read(pcp->stat_threshold);
396420
if (unlikely(v > t)) {
@@ -399,6 +423,9 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
399423
zone_page_state_add(v + overstep, zone, item);
400424
__this_cpu_write(*p, -overstep);
401425
}
426+
427+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
428+
preempt_enable();
402429
}
403430

404431
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -409,6 +436,10 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
409436

410437
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
411438

439+
/* See __mod_node_page_state */
440+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
441+
preempt_disable();
442+
412443
v = __this_cpu_inc_return(*p);
413444
t = __this_cpu_read(pcp->stat_threshold);
414445
if (unlikely(v > t)) {
@@ -417,6 +448,9 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
417448
node_page_state_add(v + overstep, pgdat, item);
418449
__this_cpu_write(*p, -overstep);
419450
}
451+
452+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
453+
preempt_enable();
420454
}
421455

422456
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -437,6 +471,10 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
437471
s8 __percpu *p = pcp->vm_stat_diff + item;
438472
s8 v, t;
439473

474+
/* See __mod_node_page_state */
475+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
476+
preempt_disable();
477+
440478
v = __this_cpu_dec_return(*p);
441479
t = __this_cpu_read(pcp->stat_threshold);
442480
if (unlikely(v < - t)) {
@@ -445,6 +483,9 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
445483
zone_page_state_add(v - overstep, zone, item);
446484
__this_cpu_write(*p, overstep);
447485
}
486+
487+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
488+
preempt_enable();
448489
}
449490

450491
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -455,6 +496,10 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
455496

456497
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
457498

499+
/* See __mod_node_page_state */
500+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
501+
preempt_disable();
502+
458503
v = __this_cpu_dec_return(*p);
459504
t = __this_cpu_read(pcp->stat_threshold);
460505
if (unlikely(v < - t)) {
@@ -463,6 +508,9 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
463508
node_page_state_add(v - overstep, pgdat, item);
464509
__this_cpu_write(*p, overstep);
465510
}
511+
512+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
513+
preempt_enable();
466514
}
467515

468516
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)

0 commit comments

Comments
 (0)