@@ -1579,51 +1579,6 @@ zvol_create_minors_impl(zvol_task_t *task)
1579
1579
zvol_task_update_status (task , total , done , last_error );
1580
1580
}
1581
1581
1582
- /*
1583
- * Remove minors for specified dataset including children and snapshots.
1584
- */
1585
-
1586
- /*
1587
- * Remove the minor for a given zvol. This will do it all:
1588
- * - flag the zvol for removal, so new requests are rejected
1589
- * - wait until outstanding requests are completed
1590
- * - remove it from lists
1591
- * - free it
1592
- * It's also usable as a taskq task, and smells nice too.
1593
- */
1594
- static void
1595
- zvol_remove_minor_task (void * arg )
1596
- {
1597
- zvol_state_t * zv = (zvol_state_t * )arg ;
1598
-
1599
- ASSERT (!RW_LOCK_HELD (& zvol_state_lock ));
1600
- ASSERT (!MUTEX_HELD (& zv -> zv_state_lock ));
1601
-
1602
- mutex_enter (& zv -> zv_state_lock );
1603
- while (zv -> zv_open_count > 0 || atomic_read (& zv -> zv_suspend_ref )) {
1604
- zv -> zv_flags |= ZVOL_REMOVING ;
1605
- cv_wait (& zv -> zv_removing_cv , & zv -> zv_state_lock );
1606
- }
1607
- mutex_exit (& zv -> zv_state_lock );
1608
-
1609
- rw_enter (& zvol_state_lock , RW_WRITER );
1610
- mutex_enter (& zv -> zv_state_lock );
1611
-
1612
- zvol_os_remove_minor (zv );
1613
- zvol_remove (zv );
1614
-
1615
- mutex_exit (& zv -> zv_state_lock );
1616
- rw_exit (& zvol_state_lock );
1617
-
1618
- zvol_os_free (zv );
1619
- }
1620
-
1621
- static void
1622
- zvol_free_task (void * arg )
1623
- {
1624
- zvol_os_free (arg );
1625
- }
1626
-
1627
1582
/*
1628
1583
* Remove minors for specified dataset and, optionally, its children and
1629
1584
* snapshots.
@@ -1636,25 +1591,33 @@ zvol_remove_minors_impl(zvol_task_t *task)
1636
1591
int namelen = ((name ) ? strlen (name ) : 0 );
1637
1592
boolean_t children = task ? !!task -> zt_value : B_TRUE ;
1638
1593
1639
- taskqid_t t ;
1640
- list_t delay_list , free_list ;
1641
-
1642
1594
if (zvol_inhibit_dev )
1643
1595
return ;
1644
1596
1645
- list_create (& delay_list , sizeof (zvol_state_t ),
1646
- offsetof(zvol_state_t , zv_next ));
1647
- list_create (& free_list , sizeof (zvol_state_t ),
1648
- offsetof(zvol_state_t , zv_next ));
1649
-
1650
- int error = ENOENT ;
1597
+ /*
1598
+ * We collect up zvols that we want to remove on a separate list, so
1599
+ * that we don't have to hold zvol_state_lock for the whole time.
1600
+ *
1601
+ * We can't remove them from the global lists until we're completely
1602
+ * done with them, because that would make them appear to ZFS-side ops
1603
+ * that they don't exist, and the name might be reused, which can't be
1604
+ * good.
1605
+ */
1606
+ list_t remove_list ;
1607
+ list_create (& remove_list , sizeof (zvol_state_t ),
1608
+ offsetof(zvol_state_t , zv_remove_node ));
1651
1609
1652
- rw_enter (& zvol_state_lock , RW_WRITER );
1610
+ rw_enter (& zvol_state_lock , RW_READER );
1653
1611
1654
1612
for (zv = list_head (& zvol_state_list ); zv != NULL ; zv = zv_next ) {
1655
1613
zv_next = list_next (& zvol_state_list , zv );
1656
1614
1657
1615
mutex_enter (& zv -> zv_state_lock );
1616
+ if (zv -> zv_flags & ZVOL_REMOVING ) {
1617
+ /* Another thread is handling shutdown, skip it. */
1618
+ mutex_exit (& zv -> zv_state_lock );
1619
+ continue ;
1620
+ }
1658
1621
1659
1622
/*
1660
1623
* This zvol should be removed if:
@@ -1669,61 +1632,87 @@ zvol_remove_minors_impl(zvol_task_t *task)
1669
1632
(children && strncmp (zv -> zv_name , name , namelen ) == 0 &&
1670
1633
(zv -> zv_name [namelen ] == '/' ||
1671
1634
zv -> zv_name [namelen ] == '@' ))) {
1672
- /*
1673
- * By holding zv_state_lock here, we guarantee that no
1674
- * one is currently using this zv
1675
- */
1676
- error = 0 ;
1677
1635
1678
1636
/*
1679
- * If in use, try to throw everyone off and try again
1680
- * later.
1637
+ * Matched, so mark it removal. We want to take the
1638
+ * write half of the suspend lock to make sure that
1639
+ * the zvol is not suspended, and give any data ops
1640
+ * chance to finish.
1681
1641
*/
1682
- zv -> zv_flags |= ZVOL_REMOVING ;
1683
- if (zv -> zv_open_count > 0 ||
1684
- atomic_read (& zv -> zv_suspend_ref )) {
1685
- t = taskq_dispatch (
1686
- zv -> zv_objset -> os_spa -> spa_zvol_taskq ,
1687
- zvol_remove_minor_task , zv , TQ_SLEEP );
1688
- if (t == TASKQID_INVALID ) {
1689
- /*
1690
- * Couldn't create the task, so we'll
1691
- * do it in place once the loop is
1692
- * finished.
1693
- */
1694
- list_insert_head (& delay_list , zv );
1695
- }
1642
+ mutex_exit (& zv -> zv_state_lock );
1643
+ rw_enter (& zv -> zv_suspend_lock , RW_WRITER );
1644
+ mutex_enter (& zv -> zv_state_lock );
1645
+
1646
+ if (zv -> zv_flags & ZVOL_REMOVING ) {
1647
+ /* Another thread has taken it, let them. */
1696
1648
mutex_exit (& zv -> zv_state_lock );
1649
+ rw_exit (& zv -> zv_suspend_lock );
1697
1650
continue ;
1698
1651
}
1699
1652
1700
- zvol_os_remove_minor (zv );
1701
- zvol_remove (zv );
1702
-
1703
- /* Drop zv_state_lock before zvol_free() */
1653
+ /*
1654
+ * Mark it and unlock. New entries will see the flag
1655
+ * and return ENXIO.
1656
+ */
1657
+ zv -> zv_flags |= ZVOL_REMOVING ;
1704
1658
mutex_exit (& zv -> zv_state_lock );
1659
+ rw_exit (& zv -> zv_suspend_lock );
1705
1660
1706
- /* Try parallel zv_free, if failed do it in place */
1707
- t = taskq_dispatch (system_taskq , zvol_free_task , zv ,
1708
- TQ_SLEEP );
1709
- if (t == TASKQID_INVALID )
1710
- list_insert_head (& free_list , zv );
1711
- } else {
1661
+ /* Put it on the list for the next stage. */
1662
+ list_insert_head (& remove_list , zv );
1663
+ } else
1712
1664
mutex_exit (& zv -> zv_state_lock );
1713
- }
1714
1665
}
1666
+
1715
1667
rw_exit (& zvol_state_lock );
1716
1668
1717
- /* Wait for zvols that we couldn't create a remove task for */
1718
- while ((zv = list_remove_head (& delay_list )) != NULL )
1719
- zvol_remove_minor_task (zv );
1669
+ /* Didn't match any, nothing to do! */
1670
+ if (list_is_empty (& remove_list )) {
1671
+ if (task )
1672
+ task -> zt_error = SET_ERROR (ENOENT );
1673
+ return ;
1674
+ }
1675
+
1676
+ /* Actually shut them all down. */
1677
+ for (zv = list_head (& remove_list ); zv != NULL ; zv = zv_next ) {
1678
+ zv_next = list_next (& remove_list , zv );
1720
1679
1721
- /* Free any that we couldn't free in parallel earlier */
1722
- while ((zv = list_remove_head (& free_list )) != NULL )
1680
+ mutex_enter (& zv -> zv_state_lock );
1681
+
1682
+ /*
1683
+ * Still open or suspended, just wait. This can happen if, for
1684
+ * example, we managed to acquire zv_state_lock in the moments
1685
+ * where zvol_open() or zvol_release() are trading locks to
1686
+ * call zvol_first_open() or zvol_last_close().
1687
+ */
1688
+ while (zv -> zv_open_count > 0 ||
1689
+ atomic_read (& zv -> zv_suspend_ref ))
1690
+ cv_wait (& zv -> zv_removing_cv , & zv -> zv_state_lock );
1691
+
1692
+ /*
1693
+ * No users, shut down the OS side. This may not remove the
1694
+ * minor from view immediately, depending on the kernel
1695
+ * specifics, but it will ensure that it is unusable and that
1696
+ * this zvol_state_t can never again be reached from an OS-side
1697
+ * operation.
1698
+ */
1699
+ zvol_os_remove_minor (zv );
1700
+ mutex_exit (& zv -> zv_state_lock );
1701
+
1702
+ /* Remove it from the name lookup lists */
1703
+ rw_enter (& zvol_state_lock , RW_WRITER );
1704
+ zvol_remove (zv );
1705
+ rw_exit (& zvol_state_lock );
1706
+ }
1707
+
1708
+ /*
1709
+ * Our own references on remove_list is the last one, free them and
1710
+ * we're done.
1711
+ */
1712
+ while ((zv = list_remove_head (& remove_list )) != NULL )
1723
1713
zvol_os_free (zv );
1724
1714
1725
- if (error && task )
1726
- task -> zt_error = SET_ERROR (error );
1715
+ list_destroy (& remove_list );
1727
1716
}
1728
1717
1729
1718
/* Remove minor for this specific volume only */
@@ -2182,14 +2171,6 @@ zvol_fini_impl(void)
2182
2171
2183
2172
zvol_remove_minors_impl (NULL );
2184
2173
2185
- /*
2186
- * The call to "zvol_remove_minors_impl" may dispatch entries to
2187
- * the system_taskq, but it doesn't wait for those entries to
2188
- * complete before it returns. Thus, we must wait for all of the
2189
- * removals to finish, before we can continue.
2190
- */
2191
- taskq_wait_outstanding (system_taskq , 0 );
2192
-
2193
2174
kmem_free (zvol_htable , ZVOL_HT_SIZE * sizeof (struct hlist_head ));
2194
2175
list_destroy (& zvol_state_list );
2195
2176
rw_destroy (& zvol_state_lock );
0 commit comments