@@ -190,7 +190,6 @@ static void group_init(struct psi_group *group)
190
190
INIT_DELAYED_WORK (& group -> avgs_work , psi_avgs_work );
191
191
mutex_init (& group -> avgs_lock );
192
192
/* Init trigger-related members */
193
- atomic_set (& group -> poll_scheduled , 0 );
194
193
mutex_init (& group -> trigger_lock );
195
194
INIT_LIST_HEAD (& group -> triggers );
196
195
memset (group -> nr_triggers , 0 , sizeof (group -> nr_triggers ));
@@ -199,7 +198,7 @@ static void group_init(struct psi_group *group)
199
198
memset (group -> polling_total , 0 , sizeof (group -> polling_total ));
200
199
group -> polling_next_update = ULLONG_MAX ;
201
200
group -> polling_until = 0 ;
202
- rcu_assign_pointer (group -> poll_kworker , NULL );
201
+ rcu_assign_pointer (group -> poll_task , NULL );
203
202
}
204
203
205
204
void __init psi_init (void )
@@ -547,47 +546,38 @@ static u64 update_triggers(struct psi_group *group, u64 now)
547
546
return now + group -> poll_min_period ;
548
547
}
549
548
550
- /*
551
- * Schedule polling if it's not already scheduled. It's safe to call even from
552
- * hotpath because even though kthread_queue_delayed_work takes worker->lock
553
- * spinlock that spinlock is never contended due to poll_scheduled atomic
554
- * preventing such competition.
555
- */
549
+ /* Schedule polling if it's not already scheduled. */
556
550
static void psi_schedule_poll_work (struct psi_group * group , unsigned long delay )
557
551
{
558
- struct kthread_worker * kworker ;
552
+ struct task_struct * task ;
559
553
560
- /* Do not reschedule if already scheduled */
561
- if (atomic_cmpxchg (& group -> poll_scheduled , 0 , 1 ) != 0 )
554
+ /*
555
+ * Do not reschedule if already scheduled.
556
+ * Possible race with a timer scheduled after this check but before
557
+ * mod_timer below can be tolerated because group->polling_next_update
558
+ * will keep updates on schedule.
559
+ */
560
+ if (timer_pending (& group -> poll_timer ))
562
561
return ;
563
562
564
563
rcu_read_lock ();
565
564
566
- kworker = rcu_dereference (group -> poll_kworker );
565
+ task = rcu_dereference (group -> poll_task );
567
566
/*
568
567
* kworker might be NULL in case psi_trigger_destroy races with
569
568
* psi_task_change (hotpath) which can't use locks
570
569
*/
571
- if (likely (kworker ))
572
- kthread_queue_delayed_work (kworker , & group -> poll_work , delay );
573
- else
574
- atomic_set (& group -> poll_scheduled , 0 );
570
+ if (likely (task ))
571
+ mod_timer (& group -> poll_timer , jiffies + delay );
575
572
576
573
rcu_read_unlock ();
577
574
}
578
575
579
- static void psi_poll_work (struct kthread_work * work )
576
+ static void psi_poll_work (struct psi_group * group )
580
577
{
581
- struct kthread_delayed_work * dwork ;
582
- struct psi_group * group ;
583
578
u32 changed_states ;
584
579
u64 now ;
585
580
586
- dwork = container_of (work , struct kthread_delayed_work , work );
587
- group = container_of (dwork , struct psi_group , poll_work );
588
-
589
- atomic_set (& group -> poll_scheduled , 0 );
590
-
591
581
mutex_lock (& group -> trigger_lock );
592
582
593
583
now = sched_clock ();
@@ -623,6 +613,35 @@ static void psi_poll_work(struct kthread_work *work)
623
613
mutex_unlock (& group -> trigger_lock );
624
614
}
625
615
616
+ static int psi_poll_worker (void * data )
617
+ {
618
+ struct psi_group * group = (struct psi_group * )data ;
619
+ struct sched_param param = {
620
+ .sched_priority = 1 ,
621
+ };
622
+
623
+ sched_setscheduler_nocheck (current , SCHED_FIFO , & param );
624
+
625
+ while (true) {
626
+ wait_event_interruptible (group -> poll_wait ,
627
+ atomic_cmpxchg (& group -> poll_wakeup , 1 , 0 ) ||
628
+ kthread_should_stop ());
629
+ if (kthread_should_stop ())
630
+ break ;
631
+
632
+ psi_poll_work (group );
633
+ }
634
+ return 0 ;
635
+ }
636
+
637
+ static void poll_timer_fn (struct timer_list * t )
638
+ {
639
+ struct psi_group * group = from_timer (group , t , poll_timer );
640
+
641
+ atomic_set (& group -> poll_wakeup , 1 );
642
+ wake_up_interruptible (& group -> poll_wait );
643
+ }
644
+
626
645
static void record_times (struct psi_group_cpu * groupc , int cpu ,
627
646
bool memstall_tick )
628
647
{
@@ -1099,22 +1118,20 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
1099
1118
1100
1119
mutex_lock (& group -> trigger_lock );
1101
1120
1102
- if (!rcu_access_pointer (group -> poll_kworker )) {
1103
- struct sched_param param = {
1104
- .sched_priority = 1 ,
1105
- };
1106
- struct kthread_worker * kworker ;
1121
+ if (!rcu_access_pointer (group -> poll_task )) {
1122
+ struct task_struct * task ;
1107
1123
1108
- kworker = kthread_create_worker ( 0 , "psimon" );
1109
- if (IS_ERR (kworker )) {
1124
+ task = kthread_create ( psi_poll_worker , group , "psimon" );
1125
+ if (IS_ERR (task )) {
1110
1126
kfree (t );
1111
1127
mutex_unlock (& group -> trigger_lock );
1112
- return ERR_CAST (kworker );
1128
+ return ERR_CAST (task );
1113
1129
}
1114
- sched_setscheduler_nocheck (kworker -> task , SCHED_FIFO , & param );
1115
- kthread_init_delayed_work (& group -> poll_work ,
1116
- psi_poll_work );
1117
- rcu_assign_pointer (group -> poll_kworker , kworker );
1130
+ atomic_set (& group -> poll_wakeup , 0 );
1131
+ init_waitqueue_head (& group -> poll_wait );
1132
+ wake_up_process (task );
1133
+ timer_setup (& group -> poll_timer , poll_timer_fn , 0 );
1134
+ rcu_assign_pointer (group -> poll_task , task );
1118
1135
}
1119
1136
1120
1137
list_add (& t -> node , & group -> triggers );
@@ -1132,7 +1149,7 @@ static void psi_trigger_destroy(struct kref *ref)
1132
1149
{
1133
1150
struct psi_trigger * t = container_of (ref , struct psi_trigger , refcount );
1134
1151
struct psi_group * group = t -> group ;
1135
- struct kthread_worker * kworker_to_destroy = NULL ;
1152
+ struct task_struct * task_to_destroy = NULL ;
1136
1153
1137
1154
if (static_branch_likely (& psi_disabled ))
1138
1155
return ;
@@ -1158,39 +1175,37 @@ static void psi_trigger_destroy(struct kref *ref)
1158
1175
period = min (period , div_u64 (tmp -> win .size ,
1159
1176
UPDATES_PER_WINDOW ));
1160
1177
group -> poll_min_period = period ;
1161
- /* Destroy poll_kworker when the last trigger is destroyed */
1178
+ /* Destroy poll_task when the last trigger is destroyed */
1162
1179
if (group -> poll_states == 0 ) {
1163
1180
group -> polling_until = 0 ;
1164
- kworker_to_destroy = rcu_dereference_protected (
1165
- group -> poll_kworker ,
1181
+ task_to_destroy = rcu_dereference_protected (
1182
+ group -> poll_task ,
1166
1183
lockdep_is_held (& group -> trigger_lock ));
1167
- rcu_assign_pointer (group -> poll_kworker , NULL );
1184
+ rcu_assign_pointer (group -> poll_task , NULL );
1168
1185
}
1169
1186
}
1170
1187
1171
1188
mutex_unlock (& group -> trigger_lock );
1172
1189
1173
1190
/*
1174
1191
* Wait for both *trigger_ptr from psi_trigger_replace and
1175
- * poll_kworker RCUs to complete their read-side critical sections
1176
- * before destroying the trigger and optionally the poll_kworker
1192
+ * poll_task RCUs to complete their read-side critical sections
1193
+ * before destroying the trigger and optionally the poll_task
1177
1194
*/
1178
1195
synchronize_rcu ();
1179
1196
/*
1180
1197
* Destroy the kworker after releasing trigger_lock to prevent a
1181
1198
* deadlock while waiting for psi_poll_work to acquire trigger_lock
1182
1199
*/
1183
- if (kworker_to_destroy ) {
1200
+ if (task_to_destroy ) {
1184
1201
/*
1185
1202
* After the RCU grace period has expired, the worker
1186
- * can no longer be found through group->poll_kworker .
1203
+ * can no longer be found through group->poll_task .
1187
1204
* But it might have been already scheduled before
1188
1205
* that - deschedule it cleanly before destroying it.
1189
1206
*/
1190
- kthread_cancel_delayed_work_sync (& group -> poll_work );
1191
- atomic_set (& group -> poll_scheduled , 0 );
1192
-
1193
- kthread_destroy_worker (kworker_to_destroy );
1207
+ del_timer_sync (& group -> poll_timer );
1208
+ kthread_stop (task_to_destroy );
1194
1209
}
1195
1210
kfree (t );
1196
1211
}
0 commit comments