@@ -82,6 +82,7 @@ static DEFINE_RWLOCK(mnt_ns_tree_lock);
82
82
static seqcount_rwlock_t mnt_ns_tree_seqcount = SEQCNT_RWLOCK_ZERO (mnt_ns_tree_seqcount , & mnt_ns_tree_lock );
83
83
84
84
static struct rb_root mnt_ns_tree = RB_ROOT ; /* protected by mnt_ns_tree_lock */
85
+ static LIST_HEAD (mnt_ns_list ); /* protected by mnt_ns_tree_lock */
85
86
86
87
struct mount_kattr {
87
88
unsigned int attr_set ;
@@ -142,10 +143,19 @@ static inline void mnt_ns_tree_write_unlock(void)
142
143
143
144
static void mnt_ns_tree_add (struct mnt_namespace * ns )
144
145
{
145
- struct rb_node * node ;
146
+ struct rb_node * node , * prev ;
146
147
147
148
mnt_ns_tree_write_lock ();
148
149
node = rb_find_add_rcu (& ns -> mnt_ns_tree_node , & mnt_ns_tree , mnt_ns_cmp );
150
+ /*
151
+ * If there's no previous entry simply add it after the
152
+ * head and if there is add it after the previous entry.
153
+ */
154
+ prev = rb_prev (& ns -> mnt_ns_tree_node );
155
+ if (!prev )
156
+ list_add_rcu (& ns -> mnt_ns_list , & mnt_ns_list );
157
+ else
158
+ list_add_rcu (& ns -> mnt_ns_list , & node_to_mnt_ns (prev )-> mnt_ns_list );
149
159
mnt_ns_tree_write_unlock ();
150
160
151
161
WARN_ON_ONCE (node );
@@ -174,6 +184,7 @@ static void mnt_ns_tree_remove(struct mnt_namespace *ns)
174
184
if (!is_anon_ns (ns )) {
175
185
mnt_ns_tree_write_lock ();
176
186
rb_erase (& ns -> mnt_ns_tree_node , & mnt_ns_tree );
187
+ list_bidir_del_rcu (& ns -> mnt_ns_list );
177
188
mnt_ns_tree_write_unlock ();
178
189
}
179
190
@@ -2086,30 +2097,34 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
2086
2097
return & mnt -> ns ;
2087
2098
}
2088
2099
2089
- struct mnt_namespace * __lookup_next_mnt_ns (struct mnt_namespace * mntns , bool previous )
2100
+ struct mnt_namespace * get_sequential_mnt_ns (struct mnt_namespace * mntns , bool previous )
2090
2101
{
2091
- guard (read_lock )(& mnt_ns_tree_lock );
2102
+ guard (rcu )();
2103
+
2092
2104
for (;;) {
2093
- struct rb_node * node ;
2105
+ struct list_head * list ;
2094
2106
2095
2107
if (previous )
2096
- node = rb_prev ( & mntns -> mnt_ns_tree_node );
2108
+ list = rcu_dereference ( list_bidir_prev_rcu ( & mntns -> mnt_ns_list ) );
2097
2109
else
2098
- node = rb_next ( & mntns -> mnt_ns_tree_node );
2099
- if (! node )
2110
+ list = rcu_dereference ( list_next_rcu ( & mntns -> mnt_ns_list ) );
2111
+ if (list_is_head ( list , & mnt_ns_list ) )
2100
2112
return ERR_PTR (- ENOENT );
2101
2113
2102
- mntns = node_to_mnt_ns (node );
2103
- node = & mntns -> mnt_ns_tree_node ;
2114
+ mntns = list_entry_rcu (list , struct mnt_namespace , mnt_ns_list );
2104
2115
2116
+ /*
2117
+ * The last passive reference count is put with RCU
2118
+ * delay so accessing the mount namespace is not just
2119
+ * safe but all relevant members are still valid.
2120
+ */
2105
2121
if (!ns_capable_noaudit (mntns -> user_ns , CAP_SYS_ADMIN ))
2106
2122
continue ;
2107
2123
2108
2124
/*
2109
- * Holding mnt_ns_tree_lock prevents the mount namespace from
2110
- * being freed but it may well be on it's deathbed. We want an
2111
- * active reference, not just a passive one here as we're
2112
- * persisting the mount namespace.
2125
+ * We need an active reference count as we're persisting
2126
+ * the mount namespace and it might already be on its
2127
+ * deathbed.
2113
2128
*/
2114
2129
if (!refcount_inc_not_zero (& mntns -> ns .count ))
2115
2130
continue ;
@@ -3926,6 +3941,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
3926
3941
refcount_set (& new_ns -> ns .count , 1 );
3927
3942
refcount_set (& new_ns -> passive , 1 );
3928
3943
new_ns -> mounts = RB_ROOT ;
3944
+ INIT_LIST_HEAD (& new_ns -> mnt_ns_list );
3929
3945
RB_CLEAR_NODE (& new_ns -> mnt_ns_tree_node );
3930
3946
init_waitqueue_head (& new_ns -> poll );
3931
3947
new_ns -> user_ns = get_user_ns (user_ns );
0 commit comments