@@ -231,7 +231,6 @@ struct io_ring_ctx {
231
231
struct task_struct * sqo_thread ; /* if using sq thread polling */
232
232
struct mm_struct * sqo_mm ;
233
233
wait_queue_head_t sqo_wait ;
234
- unsigned sqo_stop ;
235
234
236
235
struct {
237
236
/* CQ ring */
@@ -329,9 +328,8 @@ struct io_kiocb {
329
328
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
330
329
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
331
330
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
332
- #define REQ_F_PREPPED 16 /* prep already done */
333
- #define REQ_F_IO_DRAIN 32 /* drain existing IO first */
334
- #define REQ_F_IO_DRAINED 64 /* drain done */
331
+ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */
332
+ #define REQ_F_IO_DRAINED 32 /* drain done */
335
333
u64 user_data ;
336
334
u32 error ; /* iopoll result from callback */
337
335
u32 sequence ;
@@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
490
488
}
491
489
492
490
static void io_cqring_fill_event (struct io_ring_ctx * ctx , u64 ki_user_data ,
493
- long res , unsigned ev_flags )
491
+ long res )
494
492
{
495
493
struct io_uring_cqe * cqe ;
496
494
@@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
503
501
if (cqe ) {
504
502
WRITE_ONCE (cqe -> user_data , ki_user_data );
505
503
WRITE_ONCE (cqe -> res , res );
506
- WRITE_ONCE (cqe -> flags , ev_flags );
504
+ WRITE_ONCE (cqe -> flags , 0 );
507
505
} else {
508
506
unsigned overflow = READ_ONCE (ctx -> cq_ring -> overflow );
509
507
@@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
522
520
}
523
521
524
522
static void io_cqring_add_event (struct io_ring_ctx * ctx , u64 user_data ,
525
- long res , unsigned ev_flags )
523
+ long res )
526
524
{
527
525
unsigned long flags ;
528
526
529
527
spin_lock_irqsave (& ctx -> completion_lock , flags );
530
- io_cqring_fill_event (ctx , user_data , res , ev_flags );
528
+ io_cqring_fill_event (ctx , user_data , res );
531
529
io_commit_cqring (ctx );
532
530
spin_unlock_irqrestore (& ctx -> completion_lock , flags );
533
531
@@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
629
627
req = list_first_entry (done , struct io_kiocb , list );
630
628
list_del (& req -> list );
631
629
632
- io_cqring_fill_event (ctx , req -> user_data , req -> error , 0 );
630
+ io_cqring_fill_event (ctx , req -> user_data , req -> error );
633
631
(* nr_events )++ ;
634
632
635
633
if (refcount_dec_and_test (& req -> refs )) {
@@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
777
775
778
776
kiocb_end_write (kiocb );
779
777
780
- io_cqring_add_event (req -> ctx , req -> user_data , res , 0 );
778
+ io_cqring_add_event (req -> ctx , req -> user_data , res );
781
779
io_put_req (req );
782
780
}
783
781
@@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
896
894
897
895
if (!req -> file )
898
896
return - EBADF ;
899
- /* For -EAGAIN retry, everything is already prepped */
900
- if (req -> flags & REQ_F_PREPPED )
901
- return 0 ;
902
897
903
898
if (force_nonblock && !io_file_supports_async (req -> file ))
904
899
force_nonblock = false;
@@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
941
936
return - EINVAL ;
942
937
kiocb -> ki_complete = io_complete_rw ;
943
938
}
944
- req -> flags |= REQ_F_PREPPED ;
945
939
return 0 ;
946
940
}
947
941
@@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
1216
1210
if (unlikely (ctx -> flags & IORING_SETUP_IOPOLL ))
1217
1211
return - EINVAL ;
1218
1212
1219
- io_cqring_add_event (ctx , user_data , err , 0 );
1213
+ io_cqring_add_event (ctx , user_data , err );
1220
1214
io_put_req (req );
1221
1215
return 0 ;
1222
1216
}
@@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1227
1221
1228
1222
if (!req -> file )
1229
1223
return - EBADF ;
1230
- /* Prep already done (EAGAIN retry) */
1231
- if (req -> flags & REQ_F_PREPPED )
1232
- return 0 ;
1233
1224
1234
1225
if (unlikely (ctx -> flags & IORING_SETUP_IOPOLL ))
1235
1226
return - EINVAL ;
1236
1227
if (unlikely (sqe -> addr || sqe -> ioprio || sqe -> buf_index ))
1237
1228
return - EINVAL ;
1238
1229
1239
- req -> flags |= REQ_F_PREPPED ;
1240
1230
return 0 ;
1241
1231
}
1242
1232
@@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
1265
1255
end > 0 ? end : LLONG_MAX ,
1266
1256
fsync_flags & IORING_FSYNC_DATASYNC );
1267
1257
1268
- io_cqring_add_event (req -> ctx , sqe -> user_data , ret , 0 );
1258
+ io_cqring_add_event (req -> ctx , sqe -> user_data , ret );
1269
1259
io_put_req (req );
1270
1260
return 0 ;
1271
1261
}
@@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1277
1267
1278
1268
if (!req -> file )
1279
1269
return - EBADF ;
1280
- /* Prep already done (EAGAIN retry) */
1281
- if (req -> flags & REQ_F_PREPPED )
1282
- return 0 ;
1283
1270
1284
1271
if (unlikely (ctx -> flags & IORING_SETUP_IOPOLL ))
1285
1272
return - EINVAL ;
1286
1273
if (unlikely (sqe -> addr || sqe -> ioprio || sqe -> buf_index ))
1287
1274
return - EINVAL ;
1288
1275
1289
- req -> flags |= REQ_F_PREPPED ;
1290
1276
return ret ;
1291
1277
}
1292
1278
@@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req,
1313
1299
1314
1300
ret = sync_file_range (req -> rw .ki_filp , sqe_off , sqe_len , flags );
1315
1301
1316
- io_cqring_add_event (req -> ctx , sqe -> user_data , ret , 0 );
1302
+ io_cqring_add_event (req -> ctx , sqe -> user_data , ret );
1317
1303
io_put_req (req );
1318
1304
return 0 ;
1319
1305
}
@@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1371
1357
}
1372
1358
spin_unlock_irq (& ctx -> completion_lock );
1373
1359
1374
- io_cqring_add_event (req -> ctx , sqe -> user_data , ret , 0 );
1360
+ io_cqring_add_event (req -> ctx , sqe -> user_data , ret );
1375
1361
io_put_req (req );
1376
1362
return 0 ;
1377
1363
}
@@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
1380
1366
__poll_t mask )
1381
1367
{
1382
1368
req -> poll .done = true;
1383
- io_cqring_fill_event (ctx , req -> user_data , mangle_poll (mask ), 0 );
1369
+ io_cqring_fill_event (ctx , req -> user_data , mangle_poll (mask ));
1384
1370
io_commit_cqring (ctx );
1385
1371
}
1386
1372
@@ -1700,7 +1686,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
1700
1686
io_put_req (req );
1701
1687
1702
1688
if (ret ) {
1703
- io_cqring_add_event (ctx , sqe -> user_data , ret , 0 );
1689
+ io_cqring_add_event (ctx , sqe -> user_data , ret );
1704
1690
io_put_req (req );
1705
1691
}
1706
1692
@@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
2005
1991
continue ;
2006
1992
}
2007
1993
2008
- io_cqring_add_event (ctx , sqes [i ].sqe -> user_data , ret , 0 );
1994
+ io_cqring_add_event (ctx , sqes [i ].sqe -> user_data , ret );
2009
1995
}
2010
1996
2011
1997
if (statep )
@@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data)
2028
2014
set_fs (USER_DS );
2029
2015
2030
2016
timeout = inflight = 0 ;
2031
- while (!kthread_should_stop () && ! ctx -> sqo_stop ) {
2017
+ while (!kthread_should_park () ) {
2032
2018
bool all_fixed , mm_fault = false;
2033
2019
int i ;
2034
2020
@@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data)
2090
2076
smp_mb ();
2091
2077
2092
2078
if (!io_get_sqring (ctx , & sqes [0 ])) {
2093
- if (kthread_should_stop ()) {
2079
+ if (kthread_should_park ()) {
2094
2080
finish_wait (& ctx -> sqo_wait , & wait );
2095
2081
break ;
2096
2082
}
@@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data)
2140
2126
mmput (cur_mm );
2141
2127
}
2142
2128
2143
- if (kthread_should_park ())
2144
- kthread_parkme ();
2129
+ kthread_parkme ();
2145
2130
2146
2131
return 0 ;
2147
2132
}
@@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
2170
2155
2171
2156
ret = io_submit_sqe (ctx , & s , statep );
2172
2157
if (ret )
2173
- io_cqring_add_event (ctx , s .sqe -> user_data , ret , 0 );
2158
+ io_cqring_add_event (ctx , s .sqe -> user_data , ret );
2174
2159
}
2175
2160
io_commit_sqring (ctx );
2176
2161
@@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
2182
2167
2183
2168
static unsigned io_cqring_events (struct io_cq_ring * ring )
2184
2169
{
2170
+ /* See comment at the top of this file */
2171
+ smp_rmb ();
2185
2172
return READ_ONCE (ring -> r .tail ) - READ_ONCE (ring -> r .head );
2186
2173
}
2187
2174
@@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2194
2181
{
2195
2182
struct io_cq_ring * ring = ctx -> cq_ring ;
2196
2183
sigset_t ksigmask , sigsaved ;
2197
- DEFINE_WAIT (wait );
2198
2184
int ret ;
2199
2185
2200
- /* See comment at the top of this file */
2201
- smp_rmb ();
2202
2186
if (io_cqring_events (ring ) >= min_events )
2203
2187
return 0 ;
2204
2188
@@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2216
2200
return ret ;
2217
2201
}
2218
2202
2219
- do {
2220
- prepare_to_wait (& ctx -> wait , & wait , TASK_INTERRUPTIBLE );
2221
-
2222
- ret = 0 ;
2223
- /* See comment at the top of this file */
2224
- smp_rmb ();
2225
- if (io_cqring_events (ring ) >= min_events )
2226
- break ;
2227
-
2228
- schedule ();
2229
-
2203
+ ret = wait_event_interruptible (ctx -> wait , io_cqring_events (ring ) >= min_events );
2204
+ if (ret == - ERESTARTSYS )
2230
2205
ret = - EINTR ;
2231
- if (signal_pending (current ))
2232
- break ;
2233
- } while (1 );
2234
-
2235
- finish_wait (& ctx -> wait , & wait );
2236
2206
2237
2207
if (sig )
2238
2208
restore_user_sigmask (sig , & sigsaved );
@@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
2273
2243
static void io_sq_thread_stop (struct io_ring_ctx * ctx )
2274
2244
{
2275
2245
if (ctx -> sqo_thread ) {
2276
- ctx -> sqo_stop = 1 ;
2277
- mb ();
2246
+ /*
2247
+ * The park is a bit of a work-around, without it we get
2248
+ * warning spews on shutdown with SQPOLL set and affinity
2249
+ * set to a single CPU.
2250
+ */
2278
2251
kthread_park (ctx -> sqo_thread );
2279
2252
kthread_stop (ctx -> sqo_thread );
2280
2253
ctx -> sqo_thread = NULL ;
@@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
2467
2440
ctx -> sq_thread_idle = HZ ;
2468
2441
2469
2442
if (p -> flags & IORING_SETUP_SQ_AFF ) {
2470
- int cpu = array_index_nospec (p -> sq_thread_cpu ,
2471
- nr_cpu_ids );
2443
+ int cpu = p -> sq_thread_cpu ;
2472
2444
2473
2445
ret = - EINVAL ;
2446
+ if (cpu >= nr_cpu_ids )
2447
+ goto err ;
2474
2448
if (!cpu_online (cpu ))
2475
2449
goto err ;
2476
2450
0 commit comments