78
78
#include <linux/task_work.h>
79
79
#include <linux/pagemap.h>
80
80
#include <linux/io_uring.h>
81
+ #include <linux/tracehook.h>
81
82
82
83
#define CREATE_TRACE_POINTS
83
84
#include <trace/events/io_uring.h>
@@ -1499,7 +1500,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
1499
1500
all_flushed = list_empty (& ctx -> cq_overflow_list );
1500
1501
if (all_flushed ) {
1501
1502
clear_bit (0 , & ctx -> check_cq_overflow );
1502
- ctx -> rings -> sq_flags &= ~IORING_SQ_CQ_OVERFLOW ;
1503
+ WRITE_ONCE (ctx -> rings -> sq_flags ,
1504
+ ctx -> rings -> sq_flags & ~IORING_SQ_CQ_OVERFLOW );
1503
1505
}
1504
1506
1505
1507
if (posted )
@@ -1578,7 +1580,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
1578
1580
}
1579
1581
if (list_empty (& ctx -> cq_overflow_list )) {
1580
1582
set_bit (0 , & ctx -> check_cq_overflow );
1581
- ctx -> rings -> sq_flags |= IORING_SQ_CQ_OVERFLOW ;
1583
+ WRITE_ONCE (ctx -> rings -> sq_flags ,
1584
+ ctx -> rings -> sq_flags | IORING_SQ_CQ_OVERFLOW );
1585
+
1582
1586
}
1583
1587
ocqe -> cqe .user_data = user_data ;
1584
1588
ocqe -> cqe .res = res ;
@@ -2222,9 +2226,9 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
2222
2226
2223
2227
static inline bool io_run_task_work (void )
2224
2228
{
2225
- if (current -> task_works ) {
2229
+ if (test_thread_flag ( TIF_NOTIFY_SIGNAL ) || current -> task_works ) {
2226
2230
__set_current_state (TASK_RUNNING );
2227
- task_work_run ();
2231
+ tracehook_notify_signal ();
2228
2232
return true;
2229
2233
}
2230
2234
@@ -6803,14 +6807,16 @@ static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
6803
6807
{
6804
6808
/* Tell userspace we may need a wakeup call */
6805
6809
spin_lock_irq (& ctx -> completion_lock );
6806
- ctx -> rings -> sq_flags |= IORING_SQ_NEED_WAKEUP ;
6810
+ WRITE_ONCE (ctx -> rings -> sq_flags ,
6811
+ ctx -> rings -> sq_flags | IORING_SQ_NEED_WAKEUP );
6807
6812
spin_unlock_irq (& ctx -> completion_lock );
6808
6813
}
6809
6814
6810
6815
static inline void io_ring_clear_wakeup_flag (struct io_ring_ctx * ctx )
6811
6816
{
6812
6817
spin_lock_irq (& ctx -> completion_lock );
6813
- ctx -> rings -> sq_flags &= ~IORING_SQ_NEED_WAKEUP ;
6818
+ WRITE_ONCE (ctx -> rings -> sq_flags ,
6819
+ ctx -> rings -> sq_flags & ~IORING_SQ_NEED_WAKEUP );
6814
6820
spin_unlock_irq (& ctx -> completion_lock );
6815
6821
}
6816
6822
@@ -7132,16 +7138,6 @@ static void **io_alloc_page_table(size_t size)
7132
7138
return table ;
7133
7139
}
7134
7140
7135
- static inline void io_rsrc_ref_lock (struct io_ring_ctx * ctx )
7136
- {
7137
- spin_lock_bh (& ctx -> rsrc_ref_lock );
7138
- }
7139
-
7140
- static inline void io_rsrc_ref_unlock (struct io_ring_ctx * ctx )
7141
- {
7142
- spin_unlock_bh (& ctx -> rsrc_ref_lock );
7143
- }
7144
-
7145
7141
static void io_rsrc_node_destroy (struct io_rsrc_node * ref_node )
7146
7142
{
7147
7143
percpu_ref_exit (& ref_node -> refs );
@@ -7158,9 +7154,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
7158
7154
struct io_rsrc_node * rsrc_node = ctx -> rsrc_node ;
7159
7155
7160
7156
rsrc_node -> rsrc_data = data_to_kill ;
7161
- io_rsrc_ref_lock ( ctx );
7157
+ spin_lock_irq ( & ctx -> rsrc_ref_lock );
7162
7158
list_add_tail (& rsrc_node -> node , & ctx -> rsrc_ref_list );
7163
- io_rsrc_ref_unlock ( ctx );
7159
+ spin_unlock_irq ( & ctx -> rsrc_ref_lock );
7164
7160
7165
7161
atomic_inc (& data_to_kill -> refs );
7166
7162
percpu_ref_kill (& rsrc_node -> refs );
@@ -7199,17 +7195,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
7199
7195
/* kill initial ref, already quiesced if zero */
7200
7196
if (atomic_dec_and_test (& data -> refs ))
7201
7197
break ;
7198
+ mutex_unlock (& ctx -> uring_lock );
7202
7199
flush_delayed_work (& ctx -> rsrc_put_work );
7203
7200
ret = wait_for_completion_interruptible (& data -> done );
7204
- if (!ret )
7201
+ if (!ret ) {
7202
+ mutex_lock (& ctx -> uring_lock );
7205
7203
break ;
7204
+ }
7206
7205
7207
7206
atomic_inc (& data -> refs );
7208
7207
/* wait for all works potentially completing data->done */
7209
7208
flush_delayed_work (& ctx -> rsrc_put_work );
7210
7209
reinit_completion (& data -> done );
7211
7210
7212
- mutex_unlock (& ctx -> uring_lock );
7213
7211
ret = io_run_task_work_sig ();
7214
7212
mutex_lock (& ctx -> uring_lock );
7215
7213
} while (ret >= 0 );
@@ -7668,9 +7666,10 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
7668
7666
{
7669
7667
struct io_rsrc_node * node = container_of (ref , struct io_rsrc_node , refs );
7670
7668
struct io_ring_ctx * ctx = node -> rsrc_data -> ctx ;
7669
+ unsigned long flags ;
7671
7670
bool first_add = false;
7672
7671
7673
- io_rsrc_ref_lock ( ctx );
7672
+ spin_lock_irqsave ( & ctx -> rsrc_ref_lock , flags );
7674
7673
node -> done = true;
7675
7674
7676
7675
while (!list_empty (& ctx -> rsrc_ref_list )) {
@@ -7682,7 +7681,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
7682
7681
list_del (& node -> node );
7683
7682
first_add |= llist_add (& node -> llist , & ctx -> rsrc_put_llist );
7684
7683
}
7685
- io_rsrc_ref_unlock ( ctx );
7684
+ spin_unlock_irqrestore ( & ctx -> rsrc_ref_lock , flags );
7686
7685
7687
7686
if (first_add )
7688
7687
mod_delayed_work (system_wq , & ctx -> rsrc_put_work , HZ );
@@ -8653,13 +8652,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
8653
8652
mutex_unlock (& ctx -> uring_lock );
8654
8653
}
8655
8654
8656
- static bool io_wait_rsrc_data (struct io_rsrc_data * data )
8655
+ static void io_wait_rsrc_data (struct io_rsrc_data * data )
8657
8656
{
8658
- if (!data )
8659
- return false;
8660
- if (!atomic_dec_and_test (& data -> refs ))
8657
+ if (data && !atomic_dec_and_test (& data -> refs ))
8661
8658
wait_for_completion (& data -> done );
8662
- return true;
8663
8659
}
8664
8660
8665
8661
static void io_ring_ctx_free (struct io_ring_ctx * ctx )
@@ -8671,10 +8667,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
8671
8667
ctx -> mm_account = NULL ;
8672
8668
}
8673
8669
8670
+ /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
8671
+ io_wait_rsrc_data (ctx -> buf_data );
8672
+ io_wait_rsrc_data (ctx -> file_data );
8673
+
8674
8674
mutex_lock (& ctx -> uring_lock );
8675
- if (io_wait_rsrc_data ( ctx -> buf_data ) )
8675
+ if (ctx -> buf_data )
8676
8676
__io_sqe_buffers_unregister (ctx );
8677
- if (io_wait_rsrc_data ( ctx -> file_data ) )
8677
+ if (ctx -> file_data )
8678
8678
__io_sqe_files_unregister (ctx );
8679
8679
if (ctx -> rings )
8680
8680
__io_cqring_overflow_flush (ctx , true);
0 commit comments