@@ -150,6 +150,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
150
150
static void invoke_rcu_core (void );
151
151
static void rcu_report_exp_rdp (struct rcu_data * rdp );
152
152
static void sync_sched_exp_online_cleanup (int cpu );
153
+ static void check_cb_ovld_locked (struct rcu_data * rdp , struct rcu_node * rnp );
153
154
154
155
/* rcuc/rcub kthread realtime priority */
155
156
static int kthread_prio = IS_ENABLED (CONFIG_RCU_BOOST ) ? 1 : 0 ;
@@ -410,10 +411,15 @@ static long blimit = DEFAULT_RCU_BLIMIT;
410
411
static long qhimark = DEFAULT_RCU_QHIMARK ;
411
412
#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
412
413
static long qlowmark = DEFAULT_RCU_QLOMARK ;
414
+ #define DEFAULT_RCU_QOVLD_MULT 2
415
+ #define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
416
+ static long qovld = DEFAULT_RCU_QOVLD ; /* If this many pending, hammer QS. */
417
+ static long qovld_calc = -1 ; /* No pre-initialization lock acquisitions! */
413
418
414
419
module_param (blimit , long , 0444 );
415
420
module_param (qhimark , long , 0444 );
416
421
module_param (qlowmark , long , 0444 );
422
+ module_param (qovld , long , 0444 );
417
423
418
424
static ulong jiffies_till_first_fqs = ULONG_MAX ;
419
425
static ulong jiffies_till_next_fqs = ULONG_MAX ;
@@ -1072,7 +1078,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1072
1078
rnhqp = & per_cpu (rcu_data .rcu_need_heavy_qs , rdp -> cpu );
1073
1079
if (!READ_ONCE (* rnhqp ) &&
1074
1080
(time_after (jiffies , rcu_state .gp_start + jtsq * 2 ) ||
1075
- time_after (jiffies , rcu_state .jiffies_resched ))) {
1081
+ time_after (jiffies , rcu_state .jiffies_resched ) ||
1082
+ rcu_state .cbovld )) {
1076
1083
WRITE_ONCE (* rnhqp , true);
1077
1084
/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
1078
1085
smp_store_release (ruqp , true);
@@ -1089,8 +1096,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1089
1096
* So hit them over the head with the resched_cpu() hammer!
1090
1097
*/
1091
1098
if (tick_nohz_full_cpu (rdp -> cpu ) &&
1092
- time_after (jiffies ,
1093
- READ_ONCE ( rdp -> last_fqs_resched ) + jtsq * 3 )) {
1099
+ ( time_after (jiffies , READ_ONCE ( rdp -> last_fqs_resched ) + jtsq * 3 ) ||
1100
+ rcu_state . cbovld )) {
1094
1101
WRITE_ONCE (* ruqp , true);
1095
1102
resched_cpu (rdp -> cpu );
1096
1103
WRITE_ONCE (rdp -> last_fqs_resched , jiffies );
@@ -1704,8 +1711,9 @@ static void rcu_gp_fqs_loop(void)
1704
1711
*/
1705
1712
static void rcu_gp_cleanup (void )
1706
1713
{
1707
- unsigned long gp_duration ;
1714
+ int cpu ;
1708
1715
bool needgp = false;
1716
+ unsigned long gp_duration ;
1709
1717
unsigned long new_gp_seq ;
1710
1718
bool offloaded ;
1711
1719
struct rcu_data * rdp ;
@@ -1751,6 +1759,12 @@ static void rcu_gp_cleanup(void)
1751
1759
needgp = __note_gp_changes (rnp , rdp ) || needgp ;
1752
1760
/* smp_mb() provided by prior unlock-lock pair. */
1753
1761
needgp = rcu_future_gp_cleanup (rnp ) || needgp ;
1762
+ // Reset overload indication for CPUs no longer overloaded
1763
+ if (rcu_is_leaf_node (rnp ))
1764
+ for_each_leaf_node_cpu_mask (rnp , cpu , rnp -> cbovldmask ) {
1765
+ rdp = per_cpu_ptr (& rcu_data , cpu );
1766
+ check_cb_ovld_locked (rdp , rnp );
1767
+ }
1754
1768
sq = rcu_nocb_gp_get (rnp );
1755
1769
raw_spin_unlock_irq_rcu_node (rnp );
1756
1770
rcu_nocb_gp_cleanup (sq );
@@ -2299,10 +2313,13 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
2299
2313
struct rcu_data * rdp ;
2300
2314
struct rcu_node * rnp ;
2301
2315
2316
+ rcu_state .cbovld = rcu_state .cbovldnext ;
2317
+ rcu_state .cbovldnext = false;
2302
2318
rcu_for_each_leaf_node (rnp ) {
2303
2319
cond_resched_tasks_rcu_qs ();
2304
2320
mask = 0 ;
2305
2321
raw_spin_lock_irqsave_rcu_node (rnp , flags );
2322
+ rcu_state .cbovldnext |= !!rnp -> cbovldmask ;
2306
2323
if (rnp -> qsmask == 0 ) {
2307
2324
if (!IS_ENABLED (CONFIG_PREEMPT_RCU ) ||
2308
2325
rcu_preempt_blocked_readers_cgp (rnp )) {
@@ -2583,6 +2600,48 @@ static void rcu_leak_callback(struct rcu_head *rhp)
2583
2600
{
2584
2601
}
2585
2602
2603
+ /*
2604
+ * Check and if necessary update the leaf rcu_node structure's
2605
+ * ->cbovldmask bit corresponding to the current CPU based on that CPU's
2606
+ * number of queued RCU callbacks. The caller must hold the leaf rcu_node
2607
+ * structure's ->lock.
2608
+ */
2609
+ static void check_cb_ovld_locked (struct rcu_data * rdp , struct rcu_node * rnp )
2610
+ {
2611
+ raw_lockdep_assert_held_rcu_node (rnp );
2612
+ if (qovld_calc <= 0 )
2613
+ return ; // Early boot and wildcard value set.
2614
+ if (rcu_segcblist_n_cbs (& rdp -> cblist ) >= qovld_calc )
2615
+ WRITE_ONCE (rnp -> cbovldmask , rnp -> cbovldmask | rdp -> grpmask );
2616
+ else
2617
+ WRITE_ONCE (rnp -> cbovldmask , rnp -> cbovldmask & ~rdp -> grpmask );
2618
+ }
2619
+
2620
+ /*
2621
+ * Check and if necessary update the leaf rcu_node structure's
2622
+ * ->cbovldmask bit corresponding to the current CPU based on that CPU's
2623
+ * number of queued RCU callbacks. No locks need be held, but the
2624
+ * caller must have disabled interrupts.
2625
+ *
2626
+ * Note that this function ignores the possibility that there are a lot
2627
+ * of callbacks all of which have already seen the end of their respective
2628
+ * grace periods. This omission is due to the need for no-CBs CPUs to
2629
+ * be holding ->nocb_lock to do this check, which is too heavy for a
2630
+ * common-case operation.
2631
+ */
2632
+ static void check_cb_ovld (struct rcu_data * rdp )
2633
+ {
2634
+ struct rcu_node * const rnp = rdp -> mynode ;
2635
+
2636
+ if (qovld_calc <= 0 ||
2637
+ ((rcu_segcblist_n_cbs (& rdp -> cblist ) >= qovld_calc ) ==
2638
+ !!(READ_ONCE (rnp -> cbovldmask ) & rdp -> grpmask )))
2639
+ return ; // Early boot wildcard value or already set correctly.
2640
+ raw_spin_lock_rcu_node (rnp );
2641
+ check_cb_ovld_locked (rdp , rnp );
2642
+ raw_spin_unlock_rcu_node (rnp );
2643
+ }
2644
+
2586
2645
/*
2587
2646
* Helper function for call_rcu() and friends. The cpu argument will
2588
2647
* normally be -1, indicating "currently running CPU". It may specify
@@ -2626,6 +2685,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
2626
2685
rcu_segcblist_init (& rdp -> cblist );
2627
2686
}
2628
2687
2688
+ check_cb_ovld (rdp );
2629
2689
if (rcu_nocb_try_bypass (rdp , head , & was_alldone , flags ))
2630
2690
return ; // Enqueued onto ->nocb_bypass, so just leave.
2631
2691
/* If we get here, rcu_nocb_try_bypass() acquired ->nocb_lock. */
@@ -3814,6 +3874,13 @@ void __init rcu_init(void)
3814
3874
rcu_par_gp_wq = alloc_workqueue ("rcu_par_gp" , WQ_MEM_RECLAIM , 0 );
3815
3875
WARN_ON (!rcu_par_gp_wq );
3816
3876
srcu_init ();
3877
+
3878
+ /* Fill in default value for rcutree.qovld boot parameter. */
3879
+ /* -After- the rcu_node ->lock fields are initialized! */
3880
+ if (qovld < 0 )
3881
+ qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark ;
3882
+ else
3883
+ qovld_calc = qovld ;
3817
3884
}
3818
3885
3819
3886
#include "tree_stall.h"
0 commit comments