@@ -312,80 +312,85 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
312
312
}
313
313
314
314
/*
315
- * Figure out the bio's operation flags from the dio request, the
316
- * mapping, and whether or not we want FUA. Note that we can end up
317
- * clearing the WRITE_THROUGH flag in the dio request.
315
+ * Use a FUA write if we need datasync semantics and this is a pure data I/O
316
+ * that doesn't require any metadata updates (including after I/O completion
317
+ * such as unwritten extent conversion) and the underlying device either
318
+ * doesn't have a volatile write cache or supports FUA.
319
+ * This allows us to avoid cache flushes on I/O completion.
318
320
*/
319
- static inline blk_opf_t iomap_dio_bio_opflags ( struct iomap_dio * dio ,
320
- const struct iomap * iomap , bool use_fua , bool atomic_hw )
321
+ static inline bool iomap_dio_can_use_fua ( const struct iomap * iomap ,
322
+ struct iomap_dio * dio )
321
323
{
322
- blk_opf_t opflags = REQ_SYNC | REQ_IDLE ;
323
-
324
- if (!(dio -> flags & IOMAP_DIO_WRITE ))
325
- return REQ_OP_READ ;
326
-
327
- opflags |= REQ_OP_WRITE ;
328
- if (use_fua )
329
- opflags |= REQ_FUA ;
330
- else
331
- dio -> flags &= ~IOMAP_DIO_WRITE_THROUGH ;
332
- if (atomic_hw )
333
- opflags |= REQ_ATOMIC ;
334
-
335
- return opflags ;
324
+ if (iomap -> flags & (IOMAP_F_SHARED | IOMAP_F_DIRTY ))
325
+ return false;
326
+ if (!(dio -> flags & IOMAP_DIO_WRITE_THROUGH ))
327
+ return false;
328
+ return !bdev_write_cache (iomap -> bdev ) || bdev_fua (iomap -> bdev );
336
329
}
337
330
338
331
static int iomap_dio_bio_iter (struct iomap_iter * iter , struct iomap_dio * dio )
339
332
{
340
333
const struct iomap * iomap = & iter -> iomap ;
341
334
struct inode * inode = iter -> inode ;
342
335
unsigned int fs_block_size = i_blocksize (inode ), pad ;
343
- bool atomic_hw = iter -> flags & IOMAP_ATOMIC_HW ;
344
336
const loff_t length = iomap_length (iter );
345
337
loff_t pos = iter -> pos ;
346
- blk_opf_t bio_opf ;
338
+ blk_opf_t bio_opf = REQ_SYNC | REQ_IDLE ;
347
339
struct bio * bio ;
348
340
bool need_zeroout = false;
349
- bool use_fua = false;
350
341
int nr_pages , ret = 0 ;
351
342
u64 copied = 0 ;
352
343
size_t orig_count ;
353
344
354
- if (atomic_hw && length != iter -> len )
355
- return - EINVAL ;
356
-
357
345
if ((pos | length ) & (bdev_logical_block_size (iomap -> bdev ) - 1 ) ||
358
346
!bdev_iter_is_aligned (iomap -> bdev , dio -> submit .iter ))
359
347
return - EINVAL ;
360
348
361
- if (iomap -> type == IOMAP_UNWRITTEN ) {
362
- dio -> flags |= IOMAP_DIO_UNWRITTEN ;
363
- need_zeroout = true;
364
- }
349
+ if (dio -> flags & IOMAP_DIO_WRITE ) {
350
+ bio_opf |= REQ_OP_WRITE ;
351
+
352
+ if (iomap -> flags & IOMAP_F_ATOMIC_BIO ) {
353
+ /*
354
+ * Ensure that the mapping covers the full write
355
+ * length, otherwise it won't be submitted as a single
356
+ * bio, which is required to use hardware atomics.
357
+ */
358
+ if (length != iter -> len )
359
+ return - EINVAL ;
360
+ bio_opf |= REQ_ATOMIC ;
361
+ }
365
362
366
- if (iomap -> flags & IOMAP_F_SHARED )
367
- dio -> flags |= IOMAP_DIO_COW ;
363
+ if (iomap -> type == IOMAP_UNWRITTEN ) {
364
+ dio -> flags |= IOMAP_DIO_UNWRITTEN ;
365
+ need_zeroout = true;
366
+ }
367
+
368
+ if (iomap -> flags & IOMAP_F_SHARED )
369
+ dio -> flags |= IOMAP_DIO_COW ;
370
+
371
+ if (iomap -> flags & IOMAP_F_NEW ) {
372
+ need_zeroout = true;
373
+ } else if (iomap -> type == IOMAP_MAPPED ) {
374
+ if (iomap_dio_can_use_fua (iomap , dio ))
375
+ bio_opf |= REQ_FUA ;
376
+ else
377
+ dio -> flags &= ~IOMAP_DIO_WRITE_THROUGH ;
378
+ }
368
379
369
- if (iomap -> flags & IOMAP_F_NEW ) {
370
- need_zeroout = true;
371
- } else if (iomap -> type == IOMAP_MAPPED ) {
372
380
/*
373
- * Use a FUA write if we need datasync semantics, this is a pure
374
- * data IO that doesn't require any metadata updates (including
375
- * after IO completion such as unwritten extent conversion) and
376
- * the underlying device either supports FUA or doesn't have
377
- * a volatile write cache. This allows us to avoid cache flushes
378
- * on IO completion. If we can't use writethrough and need to
379
- * sync, disable in-task completions as dio completion will
380
- * need to call generic_write_sync() which will do a blocking
381
- * fsync / cache flush call.
381
+ * We can only do deferred completion for pure overwrites that
382
+ * don't require additional I/O at completion time.
383
+ *
384
+ * This rules out writes that need zeroing or extent conversion,
385
+ * extend the file size, or issue metadata I/O or cache flushes
386
+ * during completion processing.
382
387
*/
383
- if (!(iomap -> flags & (IOMAP_F_SHARED |IOMAP_F_DIRTY )) &&
384
- (dio -> flags & IOMAP_DIO_WRITE_THROUGH ) &&
385
- (bdev_fua (iomap -> bdev ) || !bdev_write_cache (iomap -> bdev )))
386
- use_fua = true;
387
- else if (dio -> flags & IOMAP_DIO_NEED_SYNC )
388
+ if (need_zeroout || (pos >= i_size_read (inode )) ||
389
+ ((dio -> flags & IOMAP_DIO_NEED_SYNC ) &&
390
+ !(bio_opf & REQ_FUA )))
388
391
dio -> flags &= ~IOMAP_DIO_CALLER_COMP ;
392
+ } else {
393
+ bio_opf |= REQ_OP_READ ;
389
394
}
390
395
391
396
/*
@@ -399,18 +404,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
399
404
if (!iov_iter_count (dio -> submit .iter ))
400
405
goto out ;
401
406
402
- /*
403
- * We can only do deferred completion for pure overwrites that
404
- * don't require additional IO at completion. This rules out
405
- * writes that need zeroing or extent conversion, extend
406
- * the file size, or issue journal IO or cache flushes
407
- * during completion processing.
408
- */
409
- if (need_zeroout ||
410
- ((dio -> flags & IOMAP_DIO_NEED_SYNC ) && !use_fua ) ||
411
- ((dio -> flags & IOMAP_DIO_WRITE ) && pos >= i_size_read (inode )))
412
- dio -> flags &= ~IOMAP_DIO_CALLER_COMP ;
413
-
414
407
/*
415
408
* The rules for polled IO completions follow the guidelines as the
416
409
* ones we set for inline and deferred completions. If none of those
@@ -428,8 +421,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
428
421
goto out ;
429
422
}
430
423
431
- bio_opf = iomap_dio_bio_opflags (dio , iomap , use_fua , atomic_hw );
432
-
433
424
nr_pages = bio_iov_vecs_to_alloc (dio -> submit .iter , BIO_MAX_VECS );
434
425
do {
435
426
size_t n ;
@@ -461,9 +452,9 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
461
452
}
462
453
463
454
n = bio -> bi_iter .bi_size ;
464
- if (WARN_ON_ONCE (atomic_hw && n != length )) {
455
+ if (WARN_ON_ONCE (( bio_opf & REQ_ATOMIC ) && n != length )) {
465
456
/*
466
- * This bio should have covered the complete length,
457
+ * An atomic write bio must cover the complete length,
467
458
* which it doesn't, so error. We may need to zero out
468
459
* the tail (complete FS block), similar to when
469
460
* bio_iov_iter_get_pages() returns an error, above.
@@ -686,10 +677,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
686
677
iomi .flags |= IOMAP_OVERWRITE_ONLY ;
687
678
}
688
679
689
- if (dio_flags & IOMAP_DIO_ATOMIC_SW )
690
- iomi .flags |= IOMAP_ATOMIC_SW ;
691
- else if (iocb -> ki_flags & IOCB_ATOMIC )
692
- iomi .flags |= IOMAP_ATOMIC_HW ;
680
+ if (iocb -> ki_flags & IOCB_ATOMIC )
681
+ iomi .flags |= IOMAP_ATOMIC ;
693
682
694
683
/* for data sync or sync, we need sync completion processing */
695
684
if (iocb_is_dsync (iocb )) {
0 commit comments