24
24
25
25
#include "zonefs.h"
26
26
27
+ static inline int zonefs_zone_mgmt (struct inode * inode ,
28
+ enum req_opf op )
29
+ {
30
+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
31
+ int ret ;
32
+
33
+ lockdep_assert_held (& zi -> i_truncate_mutex );
34
+
35
+ ret = blkdev_zone_mgmt (inode -> i_sb -> s_bdev , op , zi -> i_zsector ,
36
+ zi -> i_zone_size >> SECTOR_SHIFT , GFP_NOFS );
37
+ if (ret ) {
38
+ zonefs_err (inode -> i_sb ,
39
+ "Zone management operation %s at %llu failed %d\n" ,
40
+ blk_op_str (op ), zi -> i_zsector , ret );
41
+ return ret ;
42
+ }
43
+
44
+ return 0 ;
45
+ }
46
+
47
+ static inline void zonefs_i_size_write (struct inode * inode , loff_t isize )
48
+ {
49
+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
50
+
51
+ i_size_write (inode , isize );
52
+ /*
53
+ * A full zone is no longer open/active and does not need
54
+ * explicit closing.
55
+ */
56
+ if (isize >= zi -> i_max_size )
57
+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
58
+ }
59
+
27
60
static int zonefs_iomap_begin (struct inode * inode , loff_t offset , loff_t length ,
28
61
unsigned int flags , struct iomap * iomap ,
29
62
struct iomap * srcmap )
@@ -301,6 +334,17 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
301
334
}
302
335
}
303
336
337
+ /*
338
+ * If the filesystem is mounted with the explicit-open mount option, we
339
+ * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
340
+ * the read-only or offline condition, to avoid attempting an explicit
341
+ * close of the zone when the inode file is closed.
342
+ */
343
+ if ((sbi -> s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN ) &&
344
+ (zone -> cond == BLK_ZONE_COND_OFFLINE ||
345
+ zone -> cond == BLK_ZONE_COND_READONLY ))
346
+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
347
+
304
348
/*
305
349
* If error=remount-ro was specified, any error result in remounting
306
350
* the volume as read-only.
@@ -315,7 +359,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
315
359
* invalid data.
316
360
*/
317
361
zonefs_update_stats (inode , data_size );
318
- i_size_write (inode , data_size );
362
+ zonefs_i_size_write (inode , data_size );
319
363
zi -> i_wpoffset = data_size ;
320
364
321
365
return 0 ;
@@ -328,7 +372,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
328
372
* eventually correct the file size and zonefs inode write pointer offset
329
373
* (which can be out of sync with the drive due to partial write failures).
330
374
*/
331
- static void zonefs_io_error (struct inode * inode , bool write )
375
+ static void __zonefs_io_error (struct inode * inode , bool write )
332
376
{
333
377
struct zonefs_inode_info * zi = ZONEFS_I (inode );
334
378
struct super_block * sb = inode -> i_sb ;
@@ -342,8 +386,6 @@ static void zonefs_io_error(struct inode *inode, bool write)
342
386
};
343
387
int ret ;
344
388
345
- mutex_lock (& zi -> i_truncate_mutex );
346
-
347
389
/*
348
390
* Memory allocations in blkdev_report_zones() can trigger a memory
349
391
* reclaim which may in turn cause a recursion into zonefs as well as
@@ -359,7 +401,14 @@ static void zonefs_io_error(struct inode *inode, bool write)
359
401
zonefs_err (sb , "Get inode %lu zone information failed %d\n" ,
360
402
inode -> i_ino , ret );
361
403
memalloc_noio_restore (noio_flag );
404
+ }
362
405
406
+ static void zonefs_io_error (struct inode * inode , bool write )
407
+ {
408
+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
409
+
410
+ mutex_lock (& zi -> i_truncate_mutex );
411
+ __zonefs_io_error (inode , write );
363
412
mutex_unlock (& zi -> i_truncate_mutex );
364
413
}
365
414
@@ -397,13 +446,27 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
397
446
if (isize == old_isize )
398
447
goto unlock ;
399
448
400
- ret = blkdev_zone_mgmt (inode -> i_sb -> s_bdev , op , zi -> i_zsector ,
401
- zi -> i_zone_size >> SECTOR_SHIFT , GFP_NOFS );
402
- if (ret ) {
403
- zonefs_err (inode -> i_sb ,
404
- "Zone management operation at %llu failed %d" ,
405
- zi -> i_zsector , ret );
449
+ ret = zonefs_zone_mgmt (inode , op );
450
+ if (ret )
406
451
goto unlock ;
452
+
453
+ /*
454
+ * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
455
+ * take care of open zones.
456
+ */
457
+ if (zi -> i_flags & ZONEFS_ZONE_OPEN ) {
458
+ /*
459
+ * Truncating a zone to EMPTY or FULL is the equivalent of
460
+ * closing the zone. For a truncation to 0, we need to
461
+ * re-open the zone to ensure new writes can be processed.
462
+ * For a truncation to the maximum file size, the zone is
463
+ * closed and writes cannot be accepted anymore, so clear
464
+ * the open flag.
465
+ */
466
+ if (!isize )
467
+ ret = zonefs_zone_mgmt (inode , REQ_OP_ZONE_OPEN );
468
+ else
469
+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
407
470
}
408
471
409
472
zonefs_update_stats (inode , isize );
@@ -584,7 +647,7 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
584
647
mutex_lock (& zi -> i_truncate_mutex );
585
648
if (i_size_read (inode ) < iocb -> ki_pos + size ) {
586
649
zonefs_update_stats (inode , iocb -> ki_pos + size );
587
- i_size_write (inode , iocb -> ki_pos + size );
650
+ zonefs_i_size_write (inode , iocb -> ki_pos + size );
588
651
}
589
652
mutex_unlock (& zi -> i_truncate_mutex );
590
653
}
@@ -865,8 +928,128 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
865
928
return ret ;
866
929
}
867
930
931
+ static inline bool zonefs_file_use_exp_open (struct inode * inode , struct file * file )
932
+ {
933
+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
934
+ struct zonefs_sb_info * sbi = ZONEFS_SB (inode -> i_sb );
935
+
936
+ if (!(sbi -> s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN ))
937
+ return false;
938
+
939
+ if (zi -> i_ztype != ZONEFS_ZTYPE_SEQ )
940
+ return false;
941
+
942
+ if (!(file -> f_mode & FMODE_WRITE ))
943
+ return false;
944
+
945
+ return true;
946
+ }
947
+
948
+ static int zonefs_open_zone (struct inode * inode )
949
+ {
950
+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
951
+ struct zonefs_sb_info * sbi = ZONEFS_SB (inode -> i_sb );
952
+ int ret = 0 ;
953
+
954
+ mutex_lock (& zi -> i_truncate_mutex );
955
+
956
+ zi -> i_wr_refcnt ++ ;
957
+ if (zi -> i_wr_refcnt == 1 ) {
958
+
959
+ if (atomic_inc_return (& sbi -> s_open_zones ) > sbi -> s_max_open_zones ) {
960
+ atomic_dec (& sbi -> s_open_zones );
961
+ ret = - EBUSY ;
962
+ goto unlock ;
963
+ }
964
+
965
+ if (i_size_read (inode ) < zi -> i_max_size ) {
966
+ ret = zonefs_zone_mgmt (inode , REQ_OP_ZONE_OPEN );
967
+ if (ret ) {
968
+ zi -> i_wr_refcnt -- ;
969
+ atomic_dec (& sbi -> s_open_zones );
970
+ goto unlock ;
971
+ }
972
+ zi -> i_flags |= ZONEFS_ZONE_OPEN ;
973
+ }
974
+ }
975
+
976
+ unlock :
977
+ mutex_unlock (& zi -> i_truncate_mutex );
978
+
979
+ return ret ;
980
+ }
981
+
982
+ static int zonefs_file_open (struct inode * inode , struct file * file )
983
+ {
984
+ int ret ;
985
+
986
+ ret = generic_file_open (inode , file );
987
+ if (ret )
988
+ return ret ;
989
+
990
+ if (zonefs_file_use_exp_open (inode , file ))
991
+ return zonefs_open_zone (inode );
992
+
993
+ return 0 ;
994
+ }
995
+
996
+ static void zonefs_close_zone (struct inode * inode )
997
+ {
998
+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
999
+ int ret = 0 ;
1000
+
1001
+ mutex_lock (& zi -> i_truncate_mutex );
1002
+ zi -> i_wr_refcnt -- ;
1003
+ if (!zi -> i_wr_refcnt ) {
1004
+ struct zonefs_sb_info * sbi = ZONEFS_SB (inode -> i_sb );
1005
+ struct super_block * sb = inode -> i_sb ;
1006
+
1007
+ /*
1008
+ * If the file zone is full, it is not open anymore and we only
1009
+ * need to decrement the open count.
1010
+ */
1011
+ if (!(zi -> i_flags & ZONEFS_ZONE_OPEN ))
1012
+ goto dec ;
1013
+
1014
+ ret = zonefs_zone_mgmt (inode , REQ_OP_ZONE_CLOSE );
1015
+ if (ret ) {
1016
+ __zonefs_io_error (inode , false);
1017
+ /*
1018
+ * Leaving zones explicitly open may lead to a state
1019
+ * where most zones cannot be written (zone resources
1020
+ * exhausted). So take preventive action by remounting
1021
+ * read-only.
1022
+ */
1023
+ if (zi -> i_flags & ZONEFS_ZONE_OPEN &&
1024
+ !(sb -> s_flags & SB_RDONLY )) {
1025
+ zonefs_warn (sb , "closing zone failed, remounting filesystem read-only\n" );
1026
+ sb -> s_flags |= SB_RDONLY ;
1027
+ }
1028
+ }
1029
+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
1030
+ dec :
1031
+ atomic_dec (& sbi -> s_open_zones );
1032
+ }
1033
+ mutex_unlock (& zi -> i_truncate_mutex );
1034
+ }
1035
+
1036
+ static int zonefs_file_release (struct inode * inode , struct file * file )
1037
+ {
1038
+ /*
1039
+ * If we explicitly open a zone we must close it again as well, but the
1040
+ * zone management operation can fail (either due to an IO error or as
1041
+ * the zone has gone offline or read-only). Make sure we don't fail the
1042
+ * close(2) for user-space.
1043
+ */
1044
+ if (zonefs_file_use_exp_open (inode , file ))
1045
+ zonefs_close_zone (inode );
1046
+
1047
+ return 0 ;
1048
+ }
1049
+
868
1050
static const struct file_operations zonefs_file_operations = {
869
- .open = generic_file_open ,
1051
+ .open = zonefs_file_open ,
1052
+ .release = zonefs_file_release ,
870
1053
.fsync = zonefs_file_fsync ,
871
1054
.mmap = zonefs_file_mmap ,
872
1055
.llseek = zonefs_file_llseek ,
@@ -890,6 +1073,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
890
1073
inode_init_once (& zi -> i_vnode );
891
1074
mutex_init (& zi -> i_truncate_mutex );
892
1075
init_rwsem (& zi -> i_mmap_sem );
1076
+ zi -> i_wr_refcnt = 0 ;
893
1077
894
1078
return & zi -> i_vnode ;
895
1079
}
@@ -940,14 +1124,15 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
940
1124
941
1125
enum {
942
1126
Opt_errors_ro , Opt_errors_zro , Opt_errors_zol , Opt_errors_repair ,
943
- Opt_err ,
1127
+ Opt_explicit_open , Opt_err ,
944
1128
};
945
1129
946
1130
static const match_table_t tokens = {
947
1131
{ Opt_errors_ro , "errors=remount-ro" },
948
1132
{ Opt_errors_zro , "errors=zone-ro" },
949
1133
{ Opt_errors_zol , "errors=zone-offline" },
950
1134
{ Opt_errors_repair , "errors=repair" },
1135
+ { Opt_explicit_open , "explicit-open" },
951
1136
{ Opt_err , NULL }
952
1137
};
953
1138
@@ -984,6 +1169,9 @@ static int zonefs_parse_options(struct super_block *sb, char *options)
984
1169
sbi -> s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK ;
985
1170
sbi -> s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR ;
986
1171
break ;
1172
+ case Opt_explicit_open :
1173
+ sbi -> s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN ;
1174
+ break ;
987
1175
default :
988
1176
return - EINVAL ;
989
1177
}
@@ -1403,6 +1591,13 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
1403
1591
sbi -> s_gid = GLOBAL_ROOT_GID ;
1404
1592
sbi -> s_perm = 0640 ;
1405
1593
sbi -> s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO ;
1594
+ sbi -> s_max_open_zones = bdev_max_open_zones (sb -> s_bdev );
1595
+ atomic_set (& sbi -> s_open_zones , 0 );
1596
+ if (!sbi -> s_max_open_zones &&
1597
+ sbi -> s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN ) {
1598
+ zonefs_info (sb , "No open zones limit. Ignoring explicit_open mount option\n" );
1599
+ sbi -> s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN ;
1600
+ }
1406
1601
1407
1602
ret = zonefs_read_super (sb );
1408
1603
if (ret )
0 commit comments