@@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
3028
3028
static inline void
3029
3029
dequeue_load_avg (struct cfs_rq * cfs_rq , struct sched_entity * se )
3030
3030
{
3031
- u32 divider = get_pelt_divider (& se -> avg );
3032
3031
sub_positive (& cfs_rq -> avg .load_avg , se -> avg .load_avg );
3033
- cfs_rq -> avg .load_sum = cfs_rq -> avg .load_avg * divider ;
3032
+ sub_positive (& cfs_rq -> avg .load_sum , se_weight (se ) * se -> avg .load_sum );
3033
+ /* See update_cfs_rq_load_avg() */
3034
+ cfs_rq -> avg .load_sum = max_t (u32 , cfs_rq -> avg .load_sum ,
3035
+ cfs_rq -> avg .load_avg * PELT_MIN_DIVIDER );
3034
3036
}
3035
3037
#else
3036
3038
static inline void
@@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se,
3381
3383
se -> avg .last_update_time = n_last_update_time ;
3382
3384
}
3383
3385
3384
-
3385
3386
/*
3386
3387
* When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
3387
3388
* propagate its contribution. The key to this propagation is the invariant
@@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se,
3449
3450
* XXX: only do this for the part of runnable > running ?
3450
3451
*
3451
3452
*/
3452
-
3453
3453
static inline void
3454
3454
update_tg_cfs_util (struct cfs_rq * cfs_rq , struct sched_entity * se , struct cfs_rq * gcfs_rq )
3455
3455
{
3456
- long delta = gcfs_rq -> avg .util_avg - se -> avg .util_avg ;
3457
- u32 divider ;
3456
+ long delta_sum , delta_avg = gcfs_rq -> avg .util_avg - se -> avg .util_avg ;
3457
+ u32 new_sum , divider ;
3458
3458
3459
3459
/* Nothing to update */
3460
- if (!delta )
3460
+ if (!delta_avg )
3461
3461
return ;
3462
3462
3463
3463
/*
@@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
3466
3466
*/
3467
3467
divider = get_pelt_divider (& cfs_rq -> avg );
3468
3468
3469
+
3469
3470
/* Set new sched_entity's utilization */
3470
3471
se -> avg .util_avg = gcfs_rq -> avg .util_avg ;
3471
- se -> avg .util_sum = se -> avg .util_avg * divider ;
3472
+ new_sum = se -> avg .util_avg * divider ;
3473
+ delta_sum = (long )new_sum - (long )se -> avg .util_sum ;
3474
+ se -> avg .util_sum = new_sum ;
3472
3475
3473
3476
/* Update parent cfs_rq utilization */
3474
- add_positive (& cfs_rq -> avg .util_avg , delta );
3475
- cfs_rq -> avg .util_sum = cfs_rq -> avg .util_avg * divider ;
3477
+ add_positive (& cfs_rq -> avg .util_avg , delta_avg );
3478
+ add_positive (& cfs_rq -> avg .util_sum , delta_sum );
3479
+
3480
+ /* See update_cfs_rq_load_avg() */
3481
+ cfs_rq -> avg .util_sum = max_t (u32 , cfs_rq -> avg .util_sum ,
3482
+ cfs_rq -> avg .util_avg * PELT_MIN_DIVIDER );
3476
3483
}
3477
3484
3478
3485
static inline void
3479
3486
update_tg_cfs_runnable (struct cfs_rq * cfs_rq , struct sched_entity * se , struct cfs_rq * gcfs_rq )
3480
3487
{
3481
- long delta = gcfs_rq -> avg .runnable_avg - se -> avg .runnable_avg ;
3482
- u32 divider ;
3488
+ long delta_sum , delta_avg = gcfs_rq -> avg .runnable_avg - se -> avg .runnable_avg ;
3489
+ u32 new_sum , divider ;
3483
3490
3484
3491
/* Nothing to update */
3485
- if (!delta )
3492
+ if (!delta_avg )
3486
3493
return ;
3487
3494
3488
3495
/*
@@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
3493
3500
3494
3501
/* Set new sched_entity's runnable */
3495
3502
se -> avg .runnable_avg = gcfs_rq -> avg .runnable_avg ;
3496
- se -> avg .runnable_sum = se -> avg .runnable_avg * divider ;
3503
+ new_sum = se -> avg .runnable_avg * divider ;
3504
+ delta_sum = (long )new_sum - (long )se -> avg .runnable_sum ;
3505
+ se -> avg .runnable_sum = new_sum ;
3497
3506
3498
3507
/* Update parent cfs_rq runnable */
3499
- add_positive (& cfs_rq -> avg .runnable_avg , delta );
3500
- cfs_rq -> avg .runnable_sum = cfs_rq -> avg .runnable_avg * divider ;
3508
+ add_positive (& cfs_rq -> avg .runnable_avg , delta_avg );
3509
+ add_positive (& cfs_rq -> avg .runnable_sum , delta_sum );
3510
+ /* See update_cfs_rq_load_avg() */
3511
+ cfs_rq -> avg .runnable_sum = max_t (u32 , cfs_rq -> avg .runnable_sum ,
3512
+ cfs_rq -> avg .runnable_avg * PELT_MIN_DIVIDER );
3501
3513
}
3502
3514
3503
3515
static inline void
3504
3516
update_tg_cfs_load (struct cfs_rq * cfs_rq , struct sched_entity * se , struct cfs_rq * gcfs_rq )
3505
3517
{
3506
- long delta , running_sum , runnable_sum = gcfs_rq -> prop_runnable_sum ;
3518
+ long delta_avg , running_sum , runnable_sum = gcfs_rq -> prop_runnable_sum ;
3507
3519
unsigned long load_avg ;
3508
3520
u64 load_sum = 0 ;
3521
+ s64 delta_sum ;
3509
3522
u32 divider ;
3510
3523
3511
3524
if (!runnable_sum )
@@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
3532
3545
* assuming all tasks are equally runnable.
3533
3546
*/
3534
3547
if (scale_load_down (gcfs_rq -> load .weight )) {
3535
- load_sum = div_s64 (gcfs_rq -> avg .load_sum ,
3548
+ load_sum = div_u64 (gcfs_rq -> avg .load_sum ,
3536
3549
scale_load_down (gcfs_rq -> load .weight ));
3537
3550
}
3538
3551
@@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
3549
3562
running_sum = se -> avg .util_sum >> SCHED_CAPACITY_SHIFT ;
3550
3563
runnable_sum = max (runnable_sum , running_sum );
3551
3564
3552
- load_sum = (s64 )se_weight (se ) * runnable_sum ;
3553
- load_avg = div_s64 (load_sum , divider );
3554
-
3555
- se -> avg .load_sum = runnable_sum ;
3565
+ load_sum = se_weight (se ) * runnable_sum ;
3566
+ load_avg = div_u64 (load_sum , divider );
3556
3567
3557
- delta = load_avg - se -> avg .load_avg ;
3558
- if (!delta )
3568
+ delta_avg = load_avg - se -> avg .load_avg ;
3569
+ if (!delta_avg )
3559
3570
return ;
3560
3571
3561
- se -> avg .load_avg = load_avg ;
3572
+ delta_sum = load_sum - ( s64 ) se_weight ( se ) * se -> avg .load_sum ;
3562
3573
3563
- add_positive (& cfs_rq -> avg .load_avg , delta );
3564
- cfs_rq -> avg .load_sum = cfs_rq -> avg .load_avg * divider ;
3574
+ se -> avg .load_sum = runnable_sum ;
3575
+ se -> avg .load_avg = load_avg ;
3576
+ add_positive (& cfs_rq -> avg .load_avg , delta_avg );
3577
+ add_positive (& cfs_rq -> avg .load_sum , delta_sum );
3578
+ /* See update_cfs_rq_load_avg() */
3579
+ cfs_rq -> avg .load_sum = max_t (u32 , cfs_rq -> avg .load_sum ,
3580
+ cfs_rq -> avg .load_avg * PELT_MIN_DIVIDER );
3565
3581
}
3566
3582
3567
3583
static inline void add_tg_cfs_propagate (struct cfs_rq * cfs_rq , long runnable_sum )
@@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
3652
3668
*
3653
3669
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
3654
3670
*
3655
- * Returns true if the load decayed or we removed load.
3671
+ * Return: true if the load decayed or we removed load.
3656
3672
*
3657
3673
* Since both these conditions indicate a changed cfs_rq->avg.load we should
3658
3674
* call update_tg_load_avg() when this function returns true.
@@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
3677
3693
3678
3694
r = removed_load ;
3679
3695
sub_positive (& sa -> load_avg , r );
3680
- sa -> load_sum = sa -> load_avg * divider ;
3696
+ sub_positive (& sa -> load_sum , r * divider );
3697
+ /* See sa->util_sum below */
3698
+ sa -> load_sum = max_t (u32 , sa -> load_sum , sa -> load_avg * PELT_MIN_DIVIDER );
3681
3699
3682
3700
r = removed_util ;
3683
3701
sub_positive (& sa -> util_avg , r );
3684
- sa -> util_sum = sa -> util_avg * divider ;
3702
+ sub_positive (& sa -> util_sum , r * divider );
3703
+ /*
3704
+ * Because of rounding, se->util_sum might ends up being +1 more than
3705
+ * cfs->util_sum. Although this is not a problem by itself, detaching
3706
+ * a lot of tasks with the rounding problem between 2 updates of
3707
+ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
3708
+ * cfs_util_avg is not.
3709
+ * Check that util_sum is still above its lower bound for the new
3710
+ * util_avg. Given that period_contrib might have moved since the last
3711
+ * sync, we are only sure that util_sum must be above or equal to
3712
+ * util_avg * minimum possible divider
3713
+ */
3714
+ sa -> util_sum = max_t (u32 , sa -> util_sum , sa -> util_avg * PELT_MIN_DIVIDER );
3685
3715
3686
3716
r = removed_runnable ;
3687
3717
sub_positive (& sa -> runnable_avg , r );
3688
- sa -> runnable_sum = sa -> runnable_avg * divider ;
3718
+ sub_positive (& sa -> runnable_sum , r * divider );
3719
+ /* See sa->util_sum above */
3720
+ sa -> runnable_sum = max_t (u32 , sa -> runnable_sum ,
3721
+ sa -> runnable_avg * PELT_MIN_DIVIDER );
3689
3722
3690
3723
/*
3691
3724
* removed_runnable is the unweighted version of removed_load so we
@@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
3772
3805
*/
3773
3806
static void detach_entity_load_avg (struct cfs_rq * cfs_rq , struct sched_entity * se )
3774
3807
{
3775
- /*
3776
- * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
3777
- * See ___update_load_avg() for details.
3778
- */
3779
- u32 divider = get_pelt_divider (& cfs_rq -> avg );
3780
-
3781
3808
dequeue_load_avg (cfs_rq , se );
3782
3809
sub_positive (& cfs_rq -> avg .util_avg , se -> avg .util_avg );
3783
- cfs_rq -> avg .util_sum = cfs_rq -> avg .util_avg * divider ;
3810
+ sub_positive (& cfs_rq -> avg .util_sum , se -> avg .util_sum );
3811
+ /* See update_cfs_rq_load_avg() */
3812
+ cfs_rq -> avg .util_sum = max_t (u32 , cfs_rq -> avg .util_sum ,
3813
+ cfs_rq -> avg .util_avg * PELT_MIN_DIVIDER );
3814
+
3784
3815
sub_positive (& cfs_rq -> avg .runnable_avg , se -> avg .runnable_avg );
3785
- cfs_rq -> avg .runnable_sum = cfs_rq -> avg .runnable_avg * divider ;
3816
+ sub_positive (& cfs_rq -> avg .runnable_sum , se -> avg .runnable_sum );
3817
+ /* See update_cfs_rq_load_avg() */
3818
+ cfs_rq -> avg .runnable_sum = max_t (u32 , cfs_rq -> avg .runnable_sum ,
3819
+ cfs_rq -> avg .runnable_avg * PELT_MIN_DIVIDER );
3786
3820
3787
3821
add_tg_cfs_propagate (cfs_rq , - se -> avg .load_sum );
3788
3822
@@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct,
8539
8573
*
8540
8574
* If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
8541
8575
* of @dst_cpu are idle and @sg has lower priority.
8576
+ *
8577
+ * Return: true if @dst_cpu can pull tasks, false otherwise.
8542
8578
*/
8543
8579
static bool asym_smt_can_pull_tasks (int dst_cpu , struct sd_lb_stats * sds ,
8544
8580
struct sg_lb_stats * sgs ,
@@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
8614
8650
/**
8615
8651
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
8616
8652
* @env: The load balancing environment.
8653
+ * @sds: Load-balancing data with statistics of the local group.
8617
8654
* @group: sched_group whose statistics are to be updated.
8618
8655
* @sgs: variable to hold the statistics for this group.
8619
8656
* @sg_status: Holds flag indicating the status of the sched_group
@@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
9421
9458
/**
9422
9459
* find_busiest_group - Returns the busiest group within the sched_domain
9423
9460
* if there is an imbalance.
9461
+ * @env: The load balancing environment.
9424
9462
*
9425
9463
* Also calculates the amount of runnable load which should be moved
9426
9464
* to restore balance.
9427
9465
*
9428
- * @env: The load balancing environment.
9429
- *
9430
9466
* Return: - The busiest group if imbalance exists.
9431
9467
*/
9432
9468
static struct sched_group * find_busiest_group (struct lb_env * env )
0 commit comments