Skip to content

Commit de08cb7

Browse files
davidchenntnxbehlendorf
authored andcommitted
Fix inode eviction and sync writeback deadlock on Linux
If inode eviction and sync writeback happen on the same inode at the same time, inode eviction will set I_FREEING and wait for sync writeback, and sync writeback may eventually calls zfs_get_data and loop in zfs_zget forever because igrab cannot succeed with I_FREEING, thus causing deadlock. To fix this, in zfs_get_data we call a variant of zfs_zget where we bailout on loop if I_SYNC flag is set, and force the caller to wait for txg sync. Signed-off-by: Chunwei Chen <david.chen@nutanix.com> Fixes openzfs#7964 Fixes openzfs#9430
1 parent 455c361 commit de08cb7

File tree

4 files changed

+40
-3
lines changed

4 files changed

+40
-3
lines changed

include/sys/zfs_znode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ extern int zfs_znode_hold_compare(const void *, const void *);
276276
extern znode_hold_t *zfs_znode_hold_enter(zfsvfs_t *, uint64_t);
277277
extern void zfs_znode_hold_exit(zfsvfs_t *, znode_hold_t *);
278278
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
279+
extern int zfs_zget_impl(zfsvfs_t *, uint64_t, znode_t **, boolean_t);
279280
extern int zfs_rezget(znode_t *);
280281
extern void zfs_zinactive(znode_t *);
281282
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);

module/os/freebsd/zfs/zfs_znode_os.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,13 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
10811081
return (err);
10821082
}
10831083

1084+
int
1085+
zfs_zget_impl(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp,
1086+
boolean_t check_sync)
1087+
{
1088+
return (zfs_zget(zfsvfs, obj_num, zpp));
1089+
}
1090+
10841091
int
10851092
zfs_rezget(znode_t *zp)
10861093
{

module/os/linux/zfs/zfs_znode_os.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,13 +1050,21 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
10501050

10511051
int
10521052
zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1053+
{
1054+
return (zfs_zget_impl(zfsvfs, obj_num, zpp, B_FALSE));
1055+
}
1056+
1057+
int
1058+
zfs_zget_impl(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp,
1059+
boolean_t check_sync)
10531060
{
10541061
dmu_object_info_t doi;
10551062
dmu_buf_t *db;
10561063
znode_t *zp;
10571064
znode_hold_t *zh;
10581065
int err;
10591066
sa_handle_t *hdl;
1067+
boolean_t noloop = B_FALSE;
10601068

10611069
*zpp = NULL;
10621070

@@ -1115,8 +1123,18 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
11151123
if (igrab(ZTOI(zp)) == NULL) {
11161124
if (zp->z_unlinked)
11171125
err = SET_ERROR(ENOENT);
1118-
else
1126+
else {
11191127
err = SET_ERROR(EAGAIN);
1128+
/*
1129+
* In writeback path, I_SYNC flag will be set
1130+
* and block inode eviction. So we must not
1131+
* loop doing igrab in possible writeback
1132+
* path, i.e. zfs_get_data, if inode is being
1133+
* evicted and I_SYNC is also set.
1134+
*/
1135+
if (check_sync && (ZTOI(zp)->i_state & I_SYNC))
1136+
noloop = B_TRUE;
1137+
}
11201138
} else {
11211139
*zpp = zp;
11221140
err = 0;
@@ -1126,7 +1144,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
11261144
sa_buf_rele(db, NULL);
11271145
zfs_znode_hold_exit(zfsvfs, zh);
11281146

1129-
if (err == EAGAIN) {
1147+
if (err == EAGAIN && !noloop) {
11301148
/* inode might need this to finish evict */
11311149
cond_resched();
11321150
goto again;

module/zfs/zfs_vnops.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1342,10 +1342,21 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
13421342
ASSERT3P(lwb, !=, NULL);
13431343
ASSERT3U(size, !=, 0);
13441344

1345+
error = zfs_zget_impl(zfsvfs, object, &zp, B_TRUE);
1346+
#if defined(__linux__)
1347+
/*
1348+
* Under Linux, EAGAIN indicates the inode is being evicted and I_SYNC
1349+
* is also set possibly blocking eviction, so we can't loop in
1350+
* zfs_zget to avoid deadlock. Return EIO to force txg sync under such
1351+
* scenario.
1352+
*/
1353+
if (error == EAGAIN)
1354+
return (SET_ERROR(EIO));
1355+
#endif
13451356
/*
13461357
* Nothing to do if the file has been removed
13471358
*/
1348-
if (zfs_zget(zfsvfs, object, &zp) != 0)
1359+
if (error)
13491360
return (SET_ERROR(ENOENT));
13501361
if (zp->z_unlinked) {
13511362
/*

0 commit comments

Comments
 (0)