@@ -811,22 +811,64 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
811
811
{
812
812
struct file * file = iocb -> ki_filp ;
813
813
struct gfs2_inode * ip = GFS2_I (file -> f_mapping -> host );
814
- size_t count = iov_iter_count (to );
814
+ size_t prev_count = 0 , window_size = 0 ;
815
+ size_t written = 0 ;
815
816
ssize_t ret ;
816
817
817
- if (!count )
818
+ /*
819
+ * In this function, we disable page faults when we're holding the
820
+ * inode glock while doing I/O. If a page fault occurs, we indicate
821
+ * that the inode glock may be dropped, fault in the pages manually,
822
+ * and retry.
823
+ *
824
+ * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
825
+ * physical as well as manual page faults, and we need to disable both
826
+ * kinds.
827
+ *
828
+ * For direct I/O, gfs2 takes the inode glock in deferred mode. This
829
+ * locking mode is compatible with other deferred holders, so multiple
830
+ * processes and nodes can do direct I/O to a file at the same time.
831
+ * There's no guarantee that reads or writes will be atomic. Any
832
+ * coordination among readers and writers needs to happen externally.
833
+ */
834
+
835
+ if (!iov_iter_count (to ))
818
836
return 0 ; /* skip atime */
819
837
820
838
gfs2_holder_init (ip -> i_gl , LM_ST_DEFERRED , 0 , gh );
839
+ retry :
821
840
ret = gfs2_glock_nq (gh );
822
841
if (ret )
823
842
goto out_uninit ;
843
+ retry_under_glock :
844
+ pagefault_disable ();
845
+ to -> nofault = true;
846
+ ret = iomap_dio_rw (iocb , to , & gfs2_iomap_ops , NULL ,
847
+ IOMAP_DIO_PARTIAL , written );
848
+ to -> nofault = false;
849
+ pagefault_enable ();
850
+ if (ret > 0 )
851
+ written = ret ;
824
852
825
- ret = iomap_dio_rw (iocb , to , & gfs2_iomap_ops , NULL , 0 , 0 );
826
- gfs2_glock_dq (gh );
853
+ if (should_fault_in_pages (ret , to , & prev_count , & window_size )) {
854
+ size_t leftover ;
855
+
856
+ gfs2_holder_allow_demote (gh );
857
+ leftover = fault_in_iov_iter_writeable (to , window_size );
858
+ gfs2_holder_disallow_demote (gh );
859
+ if (leftover != window_size ) {
860
+ if (!gfs2_holder_queued (gh ))
861
+ goto retry ;
862
+ goto retry_under_glock ;
863
+ }
864
+ }
865
+ if (gfs2_holder_queued (gh ))
866
+ gfs2_glock_dq (gh );
827
867
out_uninit :
828
868
gfs2_holder_uninit (gh );
829
- return ret ;
869
+ if (ret < 0 )
870
+ return ret ;
871
+ return written ;
830
872
}
831
873
832
874
static ssize_t gfs2_file_direct_write (struct kiocb * iocb , struct iov_iter * from ,
@@ -835,10 +877,20 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
835
877
struct file * file = iocb -> ki_filp ;
836
878
struct inode * inode = file -> f_mapping -> host ;
837
879
struct gfs2_inode * ip = GFS2_I (inode );
838
- size_t len = iov_iter_count ( from ) ;
839
- loff_t offset = iocb -> ki_pos ;
880
+ size_t prev_count = 0 , window_size = 0 ;
881
+ size_t read = 0 ;
840
882
ssize_t ret ;
841
883
884
+ /*
885
+ * In this function, we disable page faults when we're holding the
886
+ * inode glock while doing I/O. If a page fault occurs, we indicate
887
+ * that the inode glock may be dropped, fault in the pages manually,
888
+ * and retry.
889
+ *
890
+ * For writes, iomap_dio_rw only triggers manual page faults, so we
891
+ * don't need to disable physical ones.
892
+ */
893
+
842
894
/*
843
895
* Deferred lock, even if its a write, since we do no allocation on
844
896
* this path. All we need to change is the atime, and this lock mode
@@ -848,22 +900,45 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
848
900
* VFS does.
849
901
*/
850
902
gfs2_holder_init (ip -> i_gl , LM_ST_DEFERRED , 0 , gh );
903
+ retry :
851
904
ret = gfs2_glock_nq (gh );
852
905
if (ret )
853
906
goto out_uninit ;
854
-
907
+ retry_under_glock :
855
908
/* Silently fall back to buffered I/O when writing beyond EOF */
856
- if (offset + len > i_size_read (& ip -> i_inode ))
909
+ if (iocb -> ki_pos + iov_iter_count ( from ) > i_size_read (& ip -> i_inode ))
857
910
goto out ;
858
911
859
- ret = iomap_dio_rw (iocb , from , & gfs2_iomap_ops , NULL , 0 , 0 );
912
+ from -> nofault = true;
913
+ ret = iomap_dio_rw (iocb , from , & gfs2_iomap_ops , NULL ,
914
+ IOMAP_DIO_PARTIAL , read );
915
+ from -> nofault = false;
916
+
860
917
if (ret == - ENOTBLK )
861
918
ret = 0 ;
919
+ if (ret > 0 )
920
+ read = ret ;
921
+
922
+ if (should_fault_in_pages (ret , from , & prev_count , & window_size )) {
923
+ size_t leftover ;
924
+
925
+ gfs2_holder_allow_demote (gh );
926
+ leftover = fault_in_iov_iter_readable (from , window_size );
927
+ gfs2_holder_disallow_demote (gh );
928
+ if (leftover != window_size ) {
929
+ if (!gfs2_holder_queued (gh ))
930
+ goto retry ;
931
+ goto retry_under_glock ;
932
+ }
933
+ }
862
934
out :
863
- gfs2_glock_dq (gh );
935
+ if (gfs2_holder_queued (gh ))
936
+ gfs2_glock_dq (gh );
864
937
out_uninit :
865
938
gfs2_holder_uninit (gh );
866
- return ret ;
939
+ if (ret < 0 )
940
+ return ret ;
941
+ return read ;
867
942
}
868
943
869
944
static ssize_t gfs2_file_read_iter (struct kiocb * iocb , struct iov_iter * to )
0 commit comments