Skip to content

Commit aef6987

Browse files
author
Peter Zijlstra
committed
sched/eevdf: Propagate min_slice up the cgroup hierarchy
In the absence of an explicit cgroup slice configureation, make mixed slice length work with cgroups by propagating the min_slice up the hierarchy. This ensures the cgroup entity gets timely service to service its entities that have this timing constraint set on them. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Valentin Schneider <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 857b158 commit aef6987

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ struct sched_entity {
542542
struct rb_node run_node;
543543
u64 deadline;
544544
u64 min_vruntime;
545+
u64 min_slice;
545546

546547
struct list_head group_node;
547548
unsigned char on_rq;

kernel/sched/fair.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,21 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
782782
cfs_rq->min_vruntime = __update_min_vruntime(cfs_rq, vruntime);
783783
}
784784

785+
static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
786+
{
787+
struct sched_entity *root = __pick_root_entity(cfs_rq);
788+
struct sched_entity *curr = cfs_rq->curr;
789+
u64 min_slice = ~0ULL;
790+
791+
if (curr && curr->on_rq)
792+
min_slice = curr->slice;
793+
794+
if (root)
795+
min_slice = min(min_slice, root->min_slice);
796+
797+
return min_slice;
798+
}
799+
785800
static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
786801
{
787802
return entity_before(__node_2_se(a), __node_2_se(b));
@@ -798,19 +813,34 @@ static inline void __min_vruntime_update(struct sched_entity *se, struct rb_node
798813
}
799814
}
800815

816+
static inline void __min_slice_update(struct sched_entity *se, struct rb_node *node)
817+
{
818+
if (node) {
819+
struct sched_entity *rse = __node_2_se(node);
820+
if (rse->min_slice < se->min_slice)
821+
se->min_slice = rse->min_slice;
822+
}
823+
}
824+
801825
/*
802826
* se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime)
803827
*/
804828
static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
805829
{
806830
u64 old_min_vruntime = se->min_vruntime;
831+
u64 old_min_slice = se->min_slice;
807832
struct rb_node *node = &se->run_node;
808833

809834
se->min_vruntime = se->vruntime;
810835
__min_vruntime_update(se, node->rb_right);
811836
__min_vruntime_update(se, node->rb_left);
812837

813-
return se->min_vruntime == old_min_vruntime;
838+
se->min_slice = se->slice;
839+
__min_slice_update(se, node->rb_right);
840+
__min_slice_update(se, node->rb_left);
841+
842+
return se->min_vruntime == old_min_vruntime &&
843+
se->min_slice == old_min_slice;
814844
}
815845

816846
RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
@@ -823,6 +853,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
823853
{
824854
avg_vruntime_add(cfs_rq, se);
825855
se->min_vruntime = se->vruntime;
856+
se->min_slice = se->slice;
826857
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
827858
__entity_less, &min_vruntime_cb);
828859
}
@@ -6911,6 +6942,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
69116942
int idle_h_nr_running = task_has_idle_policy(p);
69126943
int task_new = !(flags & ENQUEUE_WAKEUP);
69136944
int rq_h_nr_running = rq->cfs.h_nr_running;
6945+
u64 slice = 0;
69146946

69156947
if (flags & ENQUEUE_DELAYED) {
69166948
requeue_delayed_entity(se);
@@ -6940,7 +6972,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
69406972
break;
69416973
}
69426974
cfs_rq = cfs_rq_of(se);
6975+
6976+
/*
6977+
* Basically set the slice of group entries to the min_slice of
6978+
* their respective cfs_rq. This ensures the group can service
6979+
* its entities in the desired time-frame.
6980+
*/
6981+
if (slice) {
6982+
se->slice = slice;
6983+
se->custom_slice = 1;
6984+
}
69436985
enqueue_entity(cfs_rq, se, flags);
6986+
slice = cfs_rq_min_slice(cfs_rq);
69446987

69456988
cfs_rq->h_nr_running++;
69466989
cfs_rq->idle_h_nr_running += idle_h_nr_running;
@@ -6962,6 +7005,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
69627005
se_update_runnable(se);
69637006
update_cfs_group(se);
69647007

7008+
se->slice = slice;
7009+
slice = cfs_rq_min_slice(cfs_rq);
7010+
69657011
cfs_rq->h_nr_running++;
69667012
cfs_rq->idle_h_nr_running += idle_h_nr_running;
69677013

@@ -7027,11 +7073,15 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
70277073
int idle_h_nr_running = 0;
70287074
int h_nr_running = 0;
70297075
struct cfs_rq *cfs_rq;
7076+
u64 slice = 0;
70307077

70317078
if (entity_is_task(se)) {
70327079
p = task_of(se);
70337080
h_nr_running = 1;
70347081
idle_h_nr_running = task_has_idle_policy(p);
7082+
} else {
7083+
cfs_rq = group_cfs_rq(se);
7084+
slice = cfs_rq_min_slice(cfs_rq);
70357085
}
70367086

70377087
for_each_sched_entity(se) {
@@ -7056,6 +7106,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
70567106

70577107
/* Don't dequeue parent if it has other entities besides us */
70587108
if (cfs_rq->load.weight) {
7109+
slice = cfs_rq_min_slice(cfs_rq);
7110+
70597111
/* Avoid re-evaluating load for this entity: */
70607112
se = parent_entity(se);
70617113
/*
@@ -7077,6 +7129,9 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
70777129
se_update_runnable(se);
70787130
update_cfs_group(se);
70797131

7132+
se->slice = slice;
7133+
slice = cfs_rq_min_slice(cfs_rq);
7134+
70807135
cfs_rq->h_nr_running -= h_nr_running;
70817136
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
70827137

0 commit comments

Comments
 (0)