@@ -117,14 +117,17 @@ static int zfs_snapshot_no_setuid = 0;
117117typedef struct {
118118 char * se_name ; /* full snapshot name */
119119 char * se_path ; /* full mount path */
120- spa_t * se_spa ; /* pool spa */
120+ spa_t * se_spa ; /* pool spa (NULL if pending) */
121121 uint64_t se_objsetid ; /* snapshot objset id */
122122 struct dentry * se_root_dentry ; /* snapshot root dentry */
123123 krwlock_t se_taskqid_lock ; /* scheduled unmount taskqid lock */
124124 taskqid_t se_taskqid ; /* scheduled unmount taskqid */
125125 avl_node_t se_node_name ; /* zfs_snapshots_by_name link */
126126 avl_node_t se_node_objsetid ; /* zfs_snapshots_by_objsetid link */
127127 zfs_refcount_t se_refcount ; /* reference count */
128+ kmutex_t se_mtx ; /* serialize auto-mount/unmount */
129+ kcondvar_t se_cv ; /* signal mount completion */
130+ boolean_t se_mounting ; /* mount operation in progress */
128131} zfs_snapentry_t ;
129132
130133static void zfsctl_snapshot_unmount_delay_impl (zfs_snapentry_t * se , int delay );
@@ -148,6 +151,9 @@ zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
148151 se -> se_root_dentry = root_dentry ;
149152 se -> se_taskqid = TASKQID_INVALID ;
150153 rw_init (& se -> se_taskqid_lock , NULL , RW_DEFAULT , NULL );
154+ mutex_init (& se -> se_mtx , NULL , MUTEX_DEFAULT , NULL );
155+ cv_init (& se -> se_cv , NULL , CV_DEFAULT , NULL );
156+ se -> se_mounting = B_FALSE ;
151157
152158 zfs_refcount_create (& se -> se_refcount );
153159
@@ -165,6 +171,8 @@ zfsctl_snapshot_free(zfs_snapentry_t *se)
165171 kmem_strfree (se -> se_name );
166172 kmem_strfree (se -> se_path );
167173 rw_destroy (& se -> se_taskqid_lock );
174+ mutex_destroy (& se -> se_mtx );
175+ cv_destroy (& se -> se_cv );
168176
169177 kmem_free (se , sizeof (zfs_snapentry_t ));
170178}
@@ -190,34 +198,52 @@ zfsctl_snapshot_rele(zfs_snapentry_t *se)
190198}
191199
192200/*
193- * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
194- * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part
195- * of the trees a reference is held.
201+ * Add a zfs_snapentry_t to the zfs_snapshots_by_name tree. If the entry
202+ * is not pending (se_spa != NULL), also add to zfs_snapshots_by_objsetid.
203+ * While the zfs_snapentry_t is part of the trees a reference is held.
196204 */
197205static void
198206zfsctl_snapshot_add (zfs_snapentry_t * se )
199207{
200208 ASSERT (RW_WRITE_HELD (& zfs_snapshot_lock ));
201209 zfsctl_snapshot_hold (se );
202210 avl_add (& zfs_snapshots_by_name , se );
203- avl_add (& zfs_snapshots_by_objsetid , se );
211+ if (se -> se_spa != NULL )
212+ avl_add (& zfs_snapshots_by_objsetid , se );
204213}
205214
206215/*
207- * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
208- * zfs_snapshots_by_objsetid trees . Upon removal a reference is dropped,
209- * this can result in the structure being freed if that was the last
210- * remaining reference.
216+ * Remove a zfs_snapentry_t from the zfs_snapshots_by_name tree and
217+ * zfs_snapshots_by_objsetid tree (if not pending) . Upon removal a
218+ * reference is dropped, this can result in the structure being freed
219+ * if that was the last remaining reference.
211220 */
212221static void
213222zfsctl_snapshot_remove (zfs_snapentry_t * se )
214223{
215224 ASSERT (RW_WRITE_HELD (& zfs_snapshot_lock ));
216225 avl_remove (& zfs_snapshots_by_name , se );
217- avl_remove (& zfs_snapshots_by_objsetid , se );
226+ if (se -> se_spa != NULL )
227+ avl_remove (& zfs_snapshots_by_objsetid , se );
218228 zfsctl_snapshot_rele (se );
219229}
220230
231+ /*
232+ * Fill a pending zfs_snapentry_t after mount succeeds. Fills in the
233+ * remaining fields and adds the entry to the zfs_snapshots_by_objsetid tree.
234+ */
235+ static void
236+ zfsctl_snapshot_fill (zfs_snapentry_t * se , spa_t * spa , uint64_t objsetid ,
237+ struct dentry * root_dentry )
238+ {
239+ ASSERT (RW_WRITE_HELD (& zfs_snapshot_lock ));
240+ ASSERT3P (se -> se_spa , = = , NULL );
241+ se -> se_spa = spa ;
242+ se -> se_objsetid = objsetid ;
243+ se -> se_root_dentry = root_dentry ;
244+ avl_add (& zfs_snapshots_by_objsetid , se );
245+ }
246+
221247/*
222248 * Snapshot name comparison function for the zfs_snapshots_by_name.
223249 */
@@ -315,6 +341,11 @@ zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
315341 se = zfsctl_snapshot_find_by_name (old_snapname );
316342 if (se == NULL )
317343 return (SET_ERROR (ENOENT ));
344+ if (se -> se_spa == NULL ) {
345+ /* Snapshot mount is in progress */
346+ zfsctl_snapshot_rele (se );
347+ return (SET_ERROR (EBUSY ));
348+ }
318349
319350 zfsctl_snapshot_remove (se );
320351 kmem_strfree (se -> se_name );
@@ -437,26 +468,6 @@ zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
437468 return (error );
438469}
439470
440- /*
441- * Check if snapname is currently mounted. Returned non-zero when mounted
442- * and zero when unmounted.
443- */
444- static boolean_t
445- zfsctl_snapshot_ismounted (const char * snapname )
446- {
447- zfs_snapentry_t * se ;
448- boolean_t ismounted = B_FALSE ;
449-
450- rw_enter (& zfs_snapshot_lock , RW_READER );
451- if ((se = zfsctl_snapshot_find_by_name (snapname )) != NULL ) {
452- zfsctl_snapshot_rele (se );
453- ismounted = B_TRUE ;
454- }
455- rw_exit (& zfs_snapshot_lock );
456-
457- return (ismounted );
458- }
459-
460471/*
461472 * Check if the given inode is a part of the virtual .zfs directory.
462473 */
@@ -1133,13 +1144,22 @@ zfsctl_snapshot_unmount(const char *snapname, int flags)
11331144 }
11341145 rw_exit (& zfs_snapshot_lock );
11351146
1147+ /*
1148+ * Wait for any pending auto-mount to complete before unmounting.
1149+ */
1150+ mutex_enter (& se -> se_mtx );
1151+ while (se -> se_mounting )
1152+ cv_wait (& se -> se_cv , & se -> se_mtx );
1153+
11361154 exportfs_flush ();
11371155
11381156 if (flags & MNT_FORCE )
11391157 argv [4 ] = "-fn" ;
11401158 argv [5 ] = se -> se_path ;
11411159 dprintf ("unmount; path=%s\n" , se -> se_path );
11421160 error = call_usermodehelper (argv [0 ], argv , envp , UMH_WAIT_PROC );
1161+
1162+ mutex_exit (& se -> se_mtx );
11431163 zfsctl_snapshot_rele (se );
11441164
11451165
@@ -1234,14 +1254,40 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12341254 zfs_snapshot_no_setuid ? "nosuid" : "suid" );
12351255
12361256 /*
1237- * Multiple concurrent automounts of a snapshot are never allowed.
1238- * The snapshot may be manually mounted as many times as desired.
1257+ * Create a pending AVL entry for the snapshot to prevent concurrent
1258+ * automount attempts. If an entry already exists, the snapshot is
1259+ * either mounted or being mounted by another thread, so return
1260+ * success without starting a duplicate mount operation. returning
1261+ * EBUSY would mean that VFS would retry, which we do not want.
12391262 */
1240- if (zfsctl_snapshot_ismounted (full_name )) {
1263+ rw_enter (& zfs_snapshot_lock , RW_WRITER );
1264+ if ((se = zfsctl_snapshot_find_by_name (full_name )) != NULL ) {
1265+ rw_exit (& zfs_snapshot_lock );
1266+
1267+ /*
1268+ * Entry exists - wait for pending auto-mount to complete.
1269+ */
1270+ mutex_enter (& se -> se_mtx );
1271+ while (se -> se_mounting )
1272+ cv_wait (& se -> se_cv , & se -> se_mtx );
1273+ mutex_exit (& se -> se_mtx );
1274+ zfsctl_snapshot_rele (se );
12411275 error = 0 ;
12421276 goto error ;
12431277 }
12441278
1279+ /*
1280+ * Create pending entry (se_spa=NULL), add to AVL tree, and acquire
1281+ * se_mtx to serialize with concurrent unmount operations. Release
1282+ * global lock before long-running mount helper.
1283+ */
1284+ se = zfsctl_snapshot_alloc (full_name , full_path , NULL , 0 , NULL );
1285+ se -> se_mounting = B_TRUE ;
1286+ zfsctl_snapshot_add (se );
1287+ zfsctl_snapshot_hold (se );
1288+ mutex_enter (& se -> se_mtx );
1289+ rw_exit (& zfs_snapshot_lock );
1290+
12451291 /*
12461292 * Attempt to mount the snapshot from user space. Normally this
12471293 * would be done using the vfs_kern_mount() function, however that
@@ -1260,6 +1306,18 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12601306 argv [9 ] = full_path ;
12611307 error = call_usermodehelper (argv [0 ], argv , envp , UMH_WAIT_PROC );
12621308 if (error ) {
1309+ /*
1310+ * Mount failed - remove pending entry from AVL, signal waiting
1311+ * threads, release mutex, and drop mount operation reference.
1312+ */
1313+ rw_enter (& zfs_snapshot_lock , RW_WRITER );
1314+ zfsctl_snapshot_remove (se );
1315+ rw_exit (& zfs_snapshot_lock );
1316+ se -> se_mounting = B_FALSE ;
1317+ cv_broadcast (& se -> se_cv );
1318+ mutex_exit (& se -> se_mtx );
1319+ zfsctl_snapshot_rele (se );
1320+
12631321 if (!(error & MOUNT_BUSY << 8 )) {
12641322 zfs_dbgmsg ("Unable to automount %s error=%d" ,
12651323 full_path , error );
@@ -1291,14 +1349,25 @@ zfsctl_snapshot_mount(struct path *path, int flags)
12911349 spath .mnt -> mnt_flags |= MNT_SHRINKABLE ;
12921350
12931351 rw_enter (& zfs_snapshot_lock , RW_WRITER );
1294- se = zfsctl_snapshot_alloc (full_name , full_path ,
1295- snap_zfsvfs -> z_os -> os_spa , dmu_objset_id (snap_zfsvfs -> z_os ),
1296- dentry );
1297- zfsctl_snapshot_add (se );
1352+ zfsctl_snapshot_fill (se , snap_zfsvfs -> z_os -> os_spa ,
1353+ dmu_objset_id (snap_zfsvfs -> z_os ), dentry );
12981354 zfsctl_snapshot_unmount_delay_impl (se , zfs_expire_snapshot );
12991355 rw_exit (& zfs_snapshot_lock );
1356+ } else {
1357+ rw_enter (& zfs_snapshot_lock , RW_WRITER );
1358+ zfsctl_snapshot_remove (se );
1359+ rw_exit (& zfs_snapshot_lock );
13001360 }
13011361 path_put (& spath );
1362+
1363+ /*
1364+ * Signal waiting threads that mount operation has completed.
1365+ * Release mutex and drop mount operation reference.
1366+ */
1367+ se -> se_mounting = B_FALSE ;
1368+ cv_broadcast (& se -> se_cv );
1369+ mutex_exit (& se -> se_mtx );
1370+ zfsctl_snapshot_rele (se );
13021371error :
13031372 kmem_free (full_name , ZFS_MAX_DATASET_NAME_LEN );
13041373 kmem_free (full_path , MAXPATHLEN );
0 commit comments