Skip to content

Commit e8584c7

Browse files
committed
Fix snapshot automount race causing AVL tree panic
Multiple threads racing to automount the same snapshot can both pass the initial mount check and spawn separate mount helper processes. Both helpers successfully create or find a superblock via sget(), and both return success to their parent threads. Since zpl_mount_impl() doesn't distinguish between creating a new mount versus reusing an existing one, both parent threads proceed to register the mount in the AVL tree, causing a VERIFY() panic in avl_add(). Additionally, this creates duplicate VFS mounts that persist in the kernel. The root cause is that automount uses the newer fsconfig/fsmount API which lacks the lock_mount() serialization present in the traditional mount() syscall path. Manual mounts are protected by lock_mount() which serializes mount point attachment, but automount parent threads execute in parallel without such protection. The fix introduces per-snapshot mount locks using an AVL tree to track locks by snapshot name. Each unique snapshot gets its own lock, allowing different snapshots to mount in parallel while serializing attempts to mount the same snapshot. Locks are dynamically allocated on first use and freed when the last thread releases them. Reproducible with parallel access to a fresh snapshot: ls /mnt/test/.zfs/snapshot/snap1/ & ls /mnt/test/.zfs/snapshot/snap1/ & Signed-off-by: Ameer Hamza <[email protected]>
1 parent 06c73cf commit e8584c7

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

module/os/linux/zfs/zfs_ctldir.c

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,21 @@ static avl_tree_t zfs_snapshots_by_name;
107107
static avl_tree_t zfs_snapshots_by_objsetid;
108108
static krwlock_t zfs_snapshot_lock;
109109

110+
/*
111+
* Per-snapshot mount locks to prevent duplicate concurrent automounts.
112+
* Each unique snapshot name gets its own lock, allowing different snapshots
113+
* to mount in parallel while serializing attempts to mount the same snapshot.
114+
*/
115+
typedef struct snapshot_mount_lock {
116+
char *sml_name; /* snapshot name */
117+
kmutex_t sml_lock; /* per-snapshot lock */
118+
uint32_t sml_refcount; /* number of threads using this lock */
119+
avl_node_t sml_node; /* AVL tree linkage */
120+
} snapshot_mount_lock_t;
121+
122+
static avl_tree_t snapshot_mount_locks;
123+
static kmutex_t snapshot_mount_locks_mutex;
124+
110125
/*
111126
* Control Directory Tunables (.zfs)
112127
*/
@@ -301,6 +316,67 @@ zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
301316
return (se);
302317
}
303318

319+
/*
320+
* AVL comparator for snapshot_mount_lock_t, sorted by snapshot name.
321+
*/
322+
static int
323+
snapshot_mount_lock_compare(const void *l, const void *r)
324+
{
325+
const snapshot_mount_lock_t *sml_l = l;
326+
const snapshot_mount_lock_t *sml_r = r;
327+
return (strcmp(sml_l->sml_name, sml_r->sml_name));
328+
}
329+
330+
/*
331+
* Acquire a per-snapshot mount lock. If this is the first request for this
332+
* snapshot name, allocate and initialize a new lock. Returns with the
333+
* per-snapshot lock held.
334+
*/
335+
static snapshot_mount_lock_t *
336+
snapshot_lock_acquire(const char *snapname)
337+
{
338+
snapshot_mount_lock_t *sml, search;
339+
search.sml_name = (char *)snapname;
340+
mutex_enter(&snapshot_mount_locks_mutex);
341+
sml = avl_find(&snapshot_mount_locks, &search, NULL);
342+
343+
if (sml == NULL) {
344+
/* First thread for this snapshot - create new lock */
345+
sml = kmem_zalloc(sizeof (snapshot_mount_lock_t), KM_SLEEP);
346+
sml->sml_name = kmem_strdup(snapname);
347+
mutex_init(&sml->sml_lock, NULL, MUTEX_DEFAULT, NULL);
348+
sml->sml_refcount = 0;
349+
avl_add(&snapshot_mount_locks, sml);
350+
}
351+
352+
sml->sml_refcount++;
353+
mutex_exit(&snapshot_mount_locks_mutex);
354+
mutex_enter(&sml->sml_lock);
355+
356+
return (sml);
357+
}
358+
359+
/*
360+
* Release a per-snapshot mount lock. If this is the last thread using this
361+
* lock, remove it from the AVL tree and free it.
362+
*/
363+
static void
364+
snapshot_lock_release(snapshot_mount_lock_t *sml)
365+
{
366+
mutex_exit(&sml->sml_lock);
367+
mutex_enter(&snapshot_mount_locks_mutex);
368+
sml->sml_refcount--;
369+
370+
if (sml->sml_refcount == 0) {
371+
avl_remove(&snapshot_mount_locks, sml);
372+
mutex_destroy(&sml->sml_lock);
373+
kmem_strfree(sml->sml_name);
374+
kmem_free(sml, sizeof (snapshot_mount_lock_t));
375+
}
376+
377+
mutex_exit(&snapshot_mount_locks_mutex);
378+
}
379+
304380
/*
305381
* Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is
306382
* removed, renamed, and added back to the new correct location in the tree.
@@ -1162,6 +1238,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
11621238
zfsvfs_t *zfsvfs;
11631239
zfsvfs_t *snap_zfsvfs;
11641240
zfs_snapentry_t *se;
1241+
snapshot_mount_lock_t *sml = NULL;
11651242
char *full_name, *full_path, *options;
11661243
char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n",
11671244
"-o", NULL, NULL, NULL, NULL };
@@ -1233,11 +1310,20 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12331310
snprintf(options, 7, "%s",
12341311
zfs_snapshot_no_setuid ? "nosuid" : "suid");
12351312

1313+
/*
1314+
* Acquire per-snapshot lock to serialize automount operations
1315+
* for this specific snapshot. Different snapshots can mount in
1316+
* parallel. This prevents the race where multiple threads try to
1317+
* automount the same snapshot simultaneously.
1318+
*/
1319+
sml = snapshot_lock_acquire(full_name);
1320+
12361321
/*
12371322
* Multiple concurrent automounts of a snapshot are never allowed.
12381323
* The snapshot may be manually mounted as many times as desired.
12391324
*/
12401325
if (zfsctl_snapshot_ismounted(full_name)) {
1326+
snapshot_lock_release(sml);
12411327
error = 0;
12421328
goto error;
12431329
}
@@ -1275,6 +1361,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12751361
*/
12761362
error = 0;
12771363
}
1364+
snapshot_lock_release(sml);
12781365
goto error;
12791366
}
12801367

@@ -1291,6 +1378,12 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12911378
spath.mnt->mnt_flags |= MNT_SHRINKABLE;
12921379

12931380
rw_enter(&zfs_snapshot_lock, RW_WRITER);
1381+
/*
1382+
* With per-snapshot locking, no other thread can reach this
1383+
* point for the same snapshot.
1384+
*/
1385+
se = zfsctl_snapshot_find_by_name(full_name);
1386+
ASSERT3P(se, ==, NULL);
12941387
se = zfsctl_snapshot_alloc(full_name, full_path,
12951388
snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
12961389
dentry);
@@ -1299,6 +1392,10 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12991392
rw_exit(&zfs_snapshot_lock);
13001393
}
13011394
path_put(&spath);
1395+
1396+
if (sml != NULL)
1397+
snapshot_lock_release(sml);
1398+
13021399
error:
13031400
kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
13041401
kmem_free(full_path, MAXPATHLEN);
@@ -1399,6 +1496,10 @@ zfsctl_init(void)
13991496
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
14001497
se_node_objsetid));
14011498
rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
1499+
avl_create(&snapshot_mount_locks, snapshot_mount_lock_compare,
1500+
sizeof (snapshot_mount_lock_t), offsetof(snapshot_mount_lock_t,
1501+
sml_node));
1502+
mutex_init(&snapshot_mount_locks_mutex, NULL, MUTEX_DEFAULT, NULL);
14021503
}
14031504

14041505
/*
@@ -1408,6 +1509,9 @@ zfsctl_init(void)
14081509
void
14091510
zfsctl_fini(void)
14101511
{
1512+
ASSERT(avl_is_empty(&snapshot_mount_locks));
1513+
avl_destroy(&snapshot_mount_locks);
1514+
mutex_destroy(&snapshot_mount_locks_mutex);
14111515
avl_destroy(&zfs_snapshots_by_name);
14121516
avl_destroy(&zfs_snapshots_by_objsetid);
14131517
rw_destroy(&zfs_snapshot_lock);

0 commit comments

Comments
 (0)