3030
3131#define NFSDBG_FACILITY NFSDBG_VFS
3232
33+ #define NFSLOCAL_MAX_IOS 3
34+
3335struct nfs_local_kiocb {
3436 struct kiocb kiocb ;
3537 struct bio_vec * bvec ;
3638 struct nfs_pgio_header * hdr ;
3739 struct work_struct work ;
3840 void (* aio_complete_work )(struct work_struct * );
39- struct iov_iter iter ____cacheline_aligned ;
4041 struct nfsd_file * localio ;
42+ /* Begin mostly DIO-specific members */
43+ size_t end_len ;
44+ short int end_iter_index ;
45+ short int n_iters ;
46+ bool iter_is_dio_aligned [NFSLOCAL_MAX_IOS ];
47+ loff_t offset [NFSLOCAL_MAX_IOS ] ____cacheline_aligned ;
48+ struct iov_iter iters [NFSLOCAL_MAX_IOS ];
49+ /* End mostly DIO-specific members */
4150};
4251
4352struct nfs_local_fsync_ctx {
@@ -291,7 +300,7 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
291300{
292301 struct nfs_local_kiocb * iocb ;
293302
294- iocb = kmalloc (sizeof (* iocb ), flags );
303+ iocb = kzalloc (sizeof (* iocb ), flags );
295304 if (iocb == NULL )
296305 return NULL ;
297306
@@ -303,25 +312,72 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
303312 }
304313
305314 init_sync_kiocb (& iocb -> kiocb , file );
306- if (test_bit (NFS_IOHDR_ODIRECT , & hdr -> flags ))
307- iocb -> kiocb .ki_flags = IOCB_DIRECT ;
308315
309- iocb -> kiocb .ki_pos = hdr -> args .offset ;
310316 iocb -> hdr = hdr ;
311317 iocb -> kiocb .ki_flags &= ~IOCB_APPEND ;
312318 iocb -> aio_complete_work = NULL ;
313319
320+ iocb -> end_iter_index = -1 ;
321+
314322 return iocb ;
315323}
316324
325+ struct nfs_local_dio {
326+ u32 mem_align ;
327+ u32 offset_align ;
328+ loff_t middle_offset ;
329+ loff_t end_offset ;
330+ ssize_t start_len ; /* Length for misaligned first extent */
331+ ssize_t middle_len ; /* Length for DIO-aligned middle extent */
332+ ssize_t end_len ; /* Length for misaligned last extent */
333+ };
334+
335+ static bool
336+ nfs_is_local_dio_possible (struct nfs_local_kiocb * iocb , int rw ,
337+ size_t len , struct nfs_local_dio * local_dio )
338+ {
339+ struct nfs_pgio_header * hdr = iocb -> hdr ;
340+ loff_t offset = hdr -> args .offset ;
341+ u32 nf_dio_mem_align , nf_dio_offset_align , nf_dio_read_offset_align ;
342+ loff_t start_end , orig_end , middle_end ;
343+
344+ nfs_to -> nfsd_file_dio_alignment (iocb -> localio , & nf_dio_mem_align ,
345+ & nf_dio_offset_align , & nf_dio_read_offset_align );
346+ if (rw == ITER_DEST )
347+ nf_dio_offset_align = nf_dio_read_offset_align ;
348+
349+ if (unlikely (!nf_dio_mem_align || !nf_dio_offset_align ))
350+ return false;
351+ if (unlikely (nf_dio_offset_align > PAGE_SIZE ))
352+ return false;
353+ if (unlikely (len < nf_dio_offset_align ))
354+ return false;
355+
356+ local_dio -> mem_align = nf_dio_mem_align ;
357+ local_dio -> offset_align = nf_dio_offset_align ;
358+
359+ start_end = round_up (offset , nf_dio_offset_align );
360+ orig_end = offset + len ;
361+ middle_end = round_down (orig_end , nf_dio_offset_align );
362+
363+ local_dio -> middle_offset = start_end ;
364+ local_dio -> end_offset = middle_end ;
365+
366+ local_dio -> start_len = start_end - offset ;
367+ local_dio -> middle_len = middle_end - start_end ;
368+ local_dio -> end_len = orig_end - middle_end ;
369+
370+ return true;
371+ }
372+
317373static bool nfs_iov_iter_aligned_bvec (const struct iov_iter * i ,
318- loff_t offset , unsigned int addr_mask , unsigned int len_mask )
374+ unsigned int addr_mask , unsigned int len_mask )
319375{
320376 const struct bio_vec * bvec = i -> bvec ;
321377 size_t skip = i -> iov_offset ;
322378 size_t size = i -> count ;
323379
324- if (( offset | size ) & len_mask )
380+ if (size & len_mask )
325381 return false;
326382 do {
327383 size_t len = bvec -> bv_len ;
@@ -338,8 +394,68 @@ static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
338394 return true;
339395}
340396
341- static void
342- nfs_local_iter_init (struct iov_iter * i , struct nfs_local_kiocb * iocb , int rw )
397+ /*
398+ * Setup as many as 3 iov_iter based on extents described by @local_dio.
399+ * Returns the number of iov_iter that were setup.
400+ */
401+ static int
402+ nfs_local_iters_setup_dio (struct nfs_local_kiocb * iocb , int rw ,
403+ unsigned int nvecs , size_t len ,
404+ struct nfs_local_dio * local_dio )
405+ {
406+ int n_iters = 0 ;
407+ struct iov_iter * iters = iocb -> iters ;
408+
409+ /* Setup misaligned start? */
410+ if (local_dio -> start_len ) {
411+ iov_iter_bvec (& iters [n_iters ], rw , iocb -> bvec , nvecs , len );
412+ iters [n_iters ].count = local_dio -> start_len ;
413+ iocb -> offset [n_iters ] = iocb -> hdr -> args .offset ;
414+ iocb -> iter_is_dio_aligned [n_iters ] = false;
415+ ++ n_iters ;
416+ }
417+
418+ /* Setup misaligned end?
419+ * If so, the end is purposely setup to be issued using buffered IO
420+ * before the middle (which will use DIO, if DIO-aligned, with AIO).
421+ * This creates problems if/when the end results in a partial write.
422+ * So must save index and length of end to handle this corner case.
423+ */
424+ if (local_dio -> end_len ) {
425+ iov_iter_bvec (& iters [n_iters ], rw , iocb -> bvec , nvecs , len );
426+ iocb -> offset [n_iters ] = local_dio -> end_offset ;
427+ iov_iter_advance (& iters [n_iters ],
428+ local_dio -> start_len + local_dio -> middle_len );
429+ iocb -> iter_is_dio_aligned [n_iters ] = false;
430+ /* Save index and length of end */
431+ iocb -> end_iter_index = n_iters ;
432+ iocb -> end_len = local_dio -> end_len ;
433+ ++ n_iters ;
434+ }
435+
436+ /* Setup DIO-aligned middle to be issued last, to allow for
437+ * DIO with AIO completion (see nfs_local_call_{read,write}).
438+ */
439+ iov_iter_bvec (& iters [n_iters ], rw , iocb -> bvec , nvecs , len );
440+ if (local_dio -> start_len )
441+ iov_iter_advance (& iters [n_iters ], local_dio -> start_len );
442+ iters [n_iters ].count -= local_dio -> end_len ;
443+ iocb -> offset [n_iters ] = local_dio -> middle_offset ;
444+
445+ iocb -> iter_is_dio_aligned [n_iters ] =
446+ nfs_iov_iter_aligned_bvec (& iters [n_iters ],
447+ local_dio -> mem_align - 1 , local_dio -> offset_align - 1 );
448+
449+ if (unlikely (!iocb -> iter_is_dio_aligned [n_iters ]))
450+ return 0 ; /* no DIO-aligned IO possible */
451+ ++ n_iters ;
452+
453+ iocb -> n_iters = n_iters ;
454+ return n_iters ;
455+ }
456+
457+ static noinline_for_stack void
458+ nfs_local_iters_init (struct nfs_local_kiocb * iocb , int rw )
343459{
344460 struct nfs_pgio_header * hdr = iocb -> hdr ;
345461 struct page * * pagevec = hdr -> page_array .pagevec ;
@@ -360,26 +476,18 @@ nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int rw)
360476 }
361477 len = hdr -> args .count - total ;
362478
363- iov_iter_bvec (i , rw , iocb -> bvec , v , len );
364-
365- if (iocb -> kiocb .ki_flags & IOCB_DIRECT ) {
366- u32 nf_dio_mem_align , nf_dio_offset_align , nf_dio_read_offset_align ;
367- /* Verify the IO is DIO-aligned as required */
368- nfs_to -> nfsd_file_dio_alignment (iocb -> localio , & nf_dio_mem_align ,
369- & nf_dio_offset_align ,
370- & nf_dio_read_offset_align );
371- if (rw == ITER_DEST )
372- nf_dio_offset_align = nf_dio_read_offset_align ;
373-
374- if (nf_dio_mem_align && nf_dio_offset_align &&
375- nfs_iov_iter_aligned_bvec (i , hdr -> args .offset ,
376- nf_dio_mem_align - 1 ,
377- nf_dio_offset_align - 1 ))
378- return ; /* is DIO-aligned */
479+ if (test_bit (NFS_IOHDR_ODIRECT , & hdr -> flags )) {
480+ struct nfs_local_dio local_dio ;
379481
380- /* Fallback to using buffered for this misaligned IO */
381- iocb -> kiocb .ki_flags &= ~IOCB_DIRECT ;
482+ if (nfs_is_local_dio_possible (iocb , rw , len , & local_dio ) &&
483+ nfs_local_iters_setup_dio (iocb , rw , v , len , & local_dio ) != 0 )
484+ return ; /* is DIO-aligned */
382485 }
486+
487+ /* Use buffered IO */
488+ iocb -> offset [0 ] = hdr -> args .offset ;
489+ iov_iter_bvec (& iocb -> iters [0 ], rw , iocb -> bvec , v , len );
490+ iocb -> n_iters = 1 ;
383491}
384492
385493static void
@@ -402,10 +510,12 @@ nfs_local_pgio_init(struct nfs_pgio_header *hdr,
402510static void
403511nfs_local_pgio_done (struct nfs_pgio_header * hdr , long status )
404512{
513+ /* Must handle partial completions */
405514 if (status >= 0 ) {
406- hdr -> res .count = status ;
407- hdr -> res .op_status = NFS4_OK ;
408- hdr -> task .tk_status = 0 ;
515+ hdr -> res .count += status ;
516+ /* @hdr was initialized to 0 (zeroed during allocation) */
517+ if (hdr -> task .tk_status == 0 )
518+ hdr -> res .op_status = NFS4_OK ;
409519 } else {
410520 hdr -> res .op_status = nfs_localio_errno_to_nfs4_stat (status );
411521 hdr -> task .tk_status = status ;
@@ -451,8 +561,6 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
451561 pr_info_ratelimited ("nfs: Unexpected direct I/O read alignment failure\n" );
452562 }
453563
454- nfs_local_pgio_done (hdr , status );
455-
456564 /*
457565 * Must clear replen otherwise NFSv3 data corruption will occur
458566 * if/when switching from LOCALIO back to using normal RPC.
@@ -480,6 +588,7 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
480588 struct nfs_local_kiocb * iocb =
481589 container_of (kiocb , struct nfs_local_kiocb , kiocb );
482590
591+ nfs_local_pgio_done (iocb -> hdr , ret );
483592 nfs_local_read_done (iocb , ret );
484593 nfs_local_pgio_aio_complete (iocb ); /* Calls nfs_local_read_aio_complete_work */
485594}
@@ -494,12 +603,21 @@ static void nfs_local_call_read(struct work_struct *work)
494603
495604 save_cred = override_creds (filp -> f_cred );
496605
497- if (iocb -> kiocb .ki_flags & IOCB_DIRECT ) {
498- iocb -> kiocb .ki_complete = nfs_local_read_aio_complete ;
499- iocb -> aio_complete_work = nfs_local_read_aio_complete_work ;
500- }
606+ for (int i = 0 ; i < iocb -> n_iters ; i ++ ) {
607+ if (iocb -> iter_is_dio_aligned [i ]) {
608+ iocb -> kiocb .ki_flags |= IOCB_DIRECT ;
609+ iocb -> kiocb .ki_complete = nfs_local_read_aio_complete ;
610+ iocb -> aio_complete_work = nfs_local_read_aio_complete_work ;
611+ }
501612
502- status = filp -> f_op -> read_iter (& iocb -> kiocb , & iocb -> iter );
613+ iocb -> kiocb .ki_pos = iocb -> offset [i ];
614+ status = filp -> f_op -> read_iter (& iocb -> kiocb , & iocb -> iters [i ]);
615+ if (status != - EIOCBQUEUED ) {
616+ nfs_local_pgio_done (iocb -> hdr , status );
617+ if (iocb -> hdr -> task .tk_status )
618+ break ;
619+ }
620+ }
503621
504622 revert_creds (save_cred );
505623
@@ -635,18 +753,19 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
635753 }
636754
637755 /* Handle short writes as if they are ENOSPC */
756+ status = hdr -> res .count ;
638757 if (status > 0 && status < hdr -> args .count ) {
639758 hdr -> mds_offset += status ;
640759 hdr -> args .offset += status ;
641760 hdr -> args .pgbase += status ;
642761 hdr -> args .count -= status ;
643762 nfs_set_pgio_error (hdr , - ENOSPC , hdr -> args .offset );
644763 status = - ENOSPC ;
764+ /* record -ENOSPC in terms of nfs_local_pgio_done */
765+ nfs_local_pgio_done (hdr , status );
645766 }
646- if (status < 0 )
767+ if (hdr -> task . tk_status < 0 )
647768 nfs_reset_boot_verifier (inode );
648-
649- nfs_local_pgio_done (hdr , status );
650769}
651770
652771static void nfs_local_write_aio_complete_work (struct work_struct * work )
@@ -663,6 +782,7 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
663782 struct nfs_local_kiocb * iocb =
664783 container_of (kiocb , struct nfs_local_kiocb , kiocb );
665784
785+ nfs_local_pgio_done (iocb -> hdr , ret );
666786 nfs_local_write_done (iocb , ret );
667787 nfs_local_pgio_aio_complete (iocb ); /* Calls nfs_local_write_aio_complete_work */
668788}
@@ -679,13 +799,48 @@ static void nfs_local_call_write(struct work_struct *work)
679799 current -> flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO ;
680800 save_cred = override_creds (filp -> f_cred );
681801
682- if (iocb -> kiocb .ki_flags & IOCB_DIRECT ) {
683- iocb -> kiocb .ki_complete = nfs_local_write_aio_complete ;
684- iocb -> aio_complete_work = nfs_local_write_aio_complete_work ;
685- }
686-
687802 file_start_write (filp );
688- status = filp -> f_op -> write_iter (& iocb -> kiocb , & iocb -> iter );
803+ for (int i = 0 ; i < iocb -> n_iters ; i ++ ) {
804+ if (iocb -> iter_is_dio_aligned [i ]) {
805+ iocb -> kiocb .ki_flags |= IOCB_DIRECT ;
806+ iocb -> kiocb .ki_complete = nfs_local_write_aio_complete ;
807+ iocb -> aio_complete_work = nfs_local_write_aio_complete_work ;
808+ }
809+ retry :
810+ iocb -> kiocb .ki_pos = iocb -> offset [i ];
811+ status = filp -> f_op -> write_iter (& iocb -> kiocb , & iocb -> iters [i ]);
812+ if (status != - EIOCBQUEUED ) {
813+ if (unlikely (status >= 0 && status < iocb -> iters [i ].count )) {
814+ /* partial write */
815+ if (i == iocb -> end_iter_index ) {
816+ /* Must not account partial end, otherwise, due
817+ * to end being issued before middle: the partial
818+ * write accounting in nfs_local_write_done()
819+ * would incorrectly advance hdr->args.offset
820+ */
821+ status = 0 ;
822+ } else {
823+ /* Partial write at start or buffered middle,
824+ * exit early.
825+ */
826+ nfs_local_pgio_done (iocb -> hdr , status );
827+ break ;
828+ }
829+ } else if (unlikely (status == - ENOTBLK &&
830+ (iocb -> kiocb .ki_flags & IOCB_DIRECT ))) {
831+ /* VFS will return -ENOTBLK if DIO WRITE fails to
832+ * invalidate the page cache. Retry using buffered IO.
833+ */
834+ iocb -> kiocb .ki_flags &= ~IOCB_DIRECT ;
835+ iocb -> kiocb .ki_complete = NULL ;
836+ iocb -> aio_complete_work = NULL ;
837+ goto retry ;
838+ }
839+ nfs_local_pgio_done (iocb -> hdr , status );
840+ if (iocb -> hdr -> task .tk_status )
841+ break ;
842+ }
843+ }
689844 file_end_write (filp );
690845
691846 revert_creds (save_cred );
@@ -754,7 +909,7 @@ nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
754909 iocb -> hdr = hdr ;
755910 iocb -> localio = localio ;
756911
757- nfs_local_iter_init ( & iocb -> iter , iocb , rw );
912+ nfs_local_iters_init ( iocb , rw );
758913
759914 return iocb ;
760915}
0 commit comments