|
96 | 96 | #include "../workqueue_internal.h"
|
97 | 97 | #include "../../io_uring/io-wq.h"
|
98 | 98 | #include "../smpboot.h"
|
| 99 | +#include "../locking/mutex.h" |
99 | 100 |
|
100 | 101 | EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
|
101 | 102 | EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
|
@@ -2933,8 +2934,15 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
2933 | 2934 | struct set_affinity_pending my_pending = { }, *pending = NULL;
|
2934 | 2935 | bool stop_pending, complete = false;
|
2935 | 2936 |
|
2936 |
| - /* Can the task run on the task's current CPU? If so, we're done */ |
2937 |
| - if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { |
| 2937 | + /* |
| 2938 | + * Can the task run on the task's current CPU? If so, we're done |
| 2939 | + * |
| 2940 | + * We are also done if the task is the current donor, boosting a lock- |
| 2941 | + * holding proxy, (and potentially has been migrated outside its |
| 2942 | + * current or previous affinity mask) |
| 2943 | + */ |
| 2944 | + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask) || |
| 2945 | + (task_current_donor(rq, p) && !task_current(rq, p))) { |
2938 | 2946 | struct task_struct *push_task = NULL;
|
2939 | 2947 |
|
2940 | 2948 | if ((flags & SCA_MIGRATE_ENABLE) &&
|
@@ -6573,11 +6581,12 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
|
6573 | 6581 | }
|
6574 | 6582 |
|
6575 | 6583 | #ifdef CONFIG_SCHED_PROXY_EXEC
|
6576 |
| -static inline void proxy_resched_idle(struct rq *rq) |
| 6584 | +static inline struct task_struct *proxy_resched_idle(struct rq *rq) |
6577 | 6585 | {
|
6578 | 6586 | put_prev_set_next_task(rq, rq->donor, rq->idle);
|
6579 | 6587 | rq_set_donor(rq, rq->idle);
|
6580 | 6588 | set_tsk_need_resched(rq->idle);
|
| 6589 | + return rq->idle; |
6581 | 6590 | }
|
6582 | 6591 |
|
6583 | 6592 | static bool __proxy_deactivate(struct rq *rq, struct task_struct *donor)
|
@@ -6614,36 +6623,124 @@ static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *d
|
6614 | 6623 | }
|
6615 | 6624 |
|
6616 | 6625 | /*
|
6617 |
| - * Initial simple sketch that just deactivates the blocked task |
6618 |
| - * chosen by pick_next_task() so we can then pick something that |
6619 |
| - * isn't blocked. |
| 6626 | + * Find runnable lock owner to proxy for mutex blocked donor |
| 6627 | + * |
| 6628 | + * Follow the blocked-on relation: |
| 6629 | + * task->blocked_on -> mutex->owner -> task... |
| 6630 | + * |
| 6631 | + * Lock order: |
| 6632 | + * |
| 6633 | + * p->pi_lock |
| 6634 | + * rq->lock |
| 6635 | + * mutex->wait_lock |
| 6636 | + * |
| 6637 | + * Returns the task that is going to be used as execution context (the one |
| 6638 | + * that is actually going to be run on cpu_of(rq)). |
6620 | 6639 | */
|
6621 | 6640 | static struct task_struct *
|
6622 | 6641 | find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
|
6623 | 6642 | {
|
| 6643 | + struct task_struct *owner = NULL; |
| 6644 | + int this_cpu = cpu_of(rq); |
| 6645 | + struct task_struct *p; |
6624 | 6646 | struct mutex *mutex;
|
6625 | 6647 |
|
6626 |
| - mutex = donor->blocked_on; |
6627 |
| - /* Something changed in the chain, so pick again */ |
6628 |
| - if (!mutex) |
6629 |
| - return NULL; |
6630 |
| - /* |
6631 |
| - * By taking mutex->wait_lock we hold off concurrent mutex_unlock() |
6632 |
| - * and ensure @owner sticks around. |
6633 |
| - */ |
6634 |
| - guard(raw_spinlock)(&mutex->wait_lock); |
| 6648 | + /* Follow blocked_on chain. */ |
| 6649 | + for (p = donor; task_is_blocked(p); p = owner) { |
| 6650 | + mutex = p->blocked_on; |
| 6651 | + /* Something changed in the chain, so pick again */ |
| 6652 | + if (!mutex) |
| 6653 | + return NULL; |
| 6654 | + /* |
| 6655 | + * By taking mutex->wait_lock we hold off concurrent mutex_unlock() |
| 6656 | + * and ensure @owner sticks around. |
| 6657 | + */ |
| 6658 | + guard(raw_spinlock)(&mutex->wait_lock); |
6635 | 6659 |
|
6636 |
| - /* Check again that donor is blocked with blocked_lock held */ |
6637 |
| - if (!task_is_blocked(donor) || mutex != __get_task_blocked_on(donor)) { |
| 6660 | + /* Check again that p is blocked with wait_lock held */ |
| 6661 | + if (mutex != __get_task_blocked_on(p)) { |
| 6662 | + /* |
| 6663 | + * Something changed in the blocked_on chain and |
| 6664 | + * we don't know if only at this level. So, let's |
| 6665 | + * just bail out completely and let __schedule() |
| 6666 | + * figure things out (pick_again loop). |
| 6667 | + */ |
| 6668 | + return NULL; |
| 6669 | + } |
| 6670 | + |
| 6671 | + owner = __mutex_owner(mutex); |
| 6672 | + if (!owner) { |
| 6673 | + __clear_task_blocked_on(p, mutex); |
| 6674 | + return p; |
| 6675 | + } |
| 6676 | + |
| 6677 | + if (!READ_ONCE(owner->on_rq) || owner->se.sched_delayed) { |
| 6678 | + /* XXX Don't handle blocked owners/delayed dequeue yet */ |
| 6679 | + return proxy_deactivate(rq, donor); |
| 6680 | + } |
| 6681 | + |
| 6682 | + if (task_cpu(owner) != this_cpu) { |
| 6683 | + /* XXX Don't handle migrations yet */ |
| 6684 | + return proxy_deactivate(rq, donor); |
| 6685 | + } |
| 6686 | + |
| 6687 | + if (task_on_rq_migrating(owner)) { |
| 6688 | + /* |
| 6689 | + * One of the chain of mutex owners is currently migrating to this |
| 6690 | + * CPU, but has not yet been enqueued because we are holding the |
| 6691 | + * rq lock. As a simple solution, just schedule rq->idle to give |
| 6692 | + * the migration a chance to complete. Much like the migrate_task |
| 6693 | + * case we should end up back in find_proxy_task(), this time |
| 6694 | + * hopefully with all relevant tasks already enqueued. |
| 6695 | + */ |
| 6696 | + return proxy_resched_idle(rq); |
| 6697 | + } |
| 6698 | + |
| 6699 | + /* |
| 6700 | + * Its possible to race where after we check owner->on_rq |
| 6701 | + * but before we check (owner_cpu != this_cpu) that the |
| 6702 | + * task on another cpu was migrated back to this cpu. In |
| 6703 | + * that case it could slip by our checks. So double check |
| 6704 | + * we are still on this cpu and not migrating. If we get |
| 6705 | + * inconsistent results, try again. |
| 6706 | + */ |
| 6707 | + if (!task_on_rq_queued(owner) || task_cpu(owner) != this_cpu) |
| 6708 | + return NULL; |
| 6709 | + |
| 6710 | + if (owner == p) { |
| 6711 | + /* |
| 6712 | + * It's possible we interleave with mutex_unlock like: |
| 6713 | + * |
| 6714 | + * lock(&rq->lock); |
| 6715 | + * find_proxy_task() |
| 6716 | + * mutex_unlock() |
| 6717 | + * lock(&wait_lock); |
| 6718 | + * donor(owner) = current->blocked_donor; |
| 6719 | + * unlock(&wait_lock); |
| 6720 | + * |
| 6721 | + * wake_up_q(); |
| 6722 | + * ... |
| 6723 | + * ttwu_runnable() |
| 6724 | + * __task_rq_lock() |
| 6725 | + * lock(&wait_lock); |
| 6726 | + * owner == p |
| 6727 | + * |
| 6728 | + * Which leaves us to finish the ttwu_runnable() and make it go. |
| 6729 | + * |
| 6730 | + * So schedule rq->idle so that ttwu_runnable() can get the rq |
| 6731 | + * lock and mark owner as running. |
| 6732 | + */ |
| 6733 | + return proxy_resched_idle(rq); |
| 6734 | + } |
6638 | 6735 | /*
|
6639 |
| - * Something changed in the blocked_on chain and |
6640 |
| - * we don't know if only at this level. So, let's |
6641 |
| - * just bail out completely and let __schedule() |
6642 |
| - * figure things out (pick_again loop). |
| 6736 | + * OK, now we're absolutely sure @owner is on this |
| 6737 | + * rq, therefore holding @rq->lock is sufficient to |
| 6738 | + * guarantee its existence, as per ttwu_remote(). |
6643 | 6739 | */
|
6644 |
| - return NULL; /* do pick_next_task() again */ |
6645 | 6740 | }
|
6646 |
| - return proxy_deactivate(rq, donor); |
| 6741 | + |
| 6742 | + WARN_ON_ONCE(owner && !owner->on_rq); |
| 6743 | + return owner; |
6647 | 6744 | }
|
6648 | 6745 | #else /* SCHED_PROXY_EXEC */
|
6649 | 6746 | static struct task_struct *
|
@@ -6801,10 +6898,13 @@ static void __sched notrace __schedule(int sched_mode)
|
6801 | 6898 | next = find_proxy_task(rq, next, &rf);
|
6802 | 6899 | if (!next)
|
6803 | 6900 | goto pick_again;
|
| 6901 | + if (next == rq->idle) |
| 6902 | + goto keep_resched; |
6804 | 6903 | }
|
6805 | 6904 | picked:
|
6806 | 6905 | clear_tsk_need_resched(prev);
|
6807 | 6906 | clear_preempt_need_resched();
|
| 6907 | +keep_resched: |
6808 | 6908 | rq->last_seen_need_resched_ns = 0;
|
6809 | 6909 |
|
6810 | 6910 | is_switch = prev != next;
|
|
0 commit comments