Skip to content

Commit 1b96a41

Browse files
committed
Merge branch 'for-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "There are several notable changes here: - Single thread migrating itself has been optimized so that it doesn't need threadgroup rwsem anymore. - Freezer optimization to avoid unnecessary frozen state changes. - cgroup ID unification so that cgroup fs ino is the only unique ID used for the cgroup and can be used to directly look up live cgroups through filehandle interface on 64bit ino archs. On 32bit archs, cgroup fs ino is still the only ID in use but it is only unique when combined with gen. - selftest and other changes" * 'for-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (24 commits) writeback: fix -Wformat compilation warnings docs: cgroup: mm: Fix spelling of "list" cgroup: fix incorrect WARN_ON_ONCE() in cgroup_setup_root() cgroup: use cgrp->kn->id as the cgroup ID kernfs: use 64bit inos if ino_t is 64bit kernfs: implement custom exportfs ops and fid type kernfs: combine ino/id lookup functions into kernfs_find_and_get_node_by_id() kernfs: convert kernfs_node->id from union kernfs_node_id to u64 kernfs: kernfs_find_and_get_node_by_ino() should only look up activated nodes kernfs: use dumber locking for kernfs_find_and_get_node_by_ino() netprio: use css ID instead of cgroup ID writeback: use ino_t for inodes in tracepoints kernfs: fix ino wrap-around detection kselftests: cgroup: Avoid the reuse of fd after it is deallocated cgroup: freezer: don't change task and cgroups status unnecessarily cgroup: use cgroup->last_bstat instead of cgroup->bstat_pending for consistency cgroup: remove cgroup_enable_task_cg_lists() optimization cgroup: pids: use atomic64_t for pids->limit selftests: cgroup: Run test_core under interfering stress selftests: cgroup: Add task migration tests ...
2 parents 9391ede + 40363cf commit 1b96a41

32 files changed

+746
-534
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1334,7 +1334,7 @@ PAGE_SIZE multiple when read back.
13341334

13351335
pgdeactivate
13361336

1337-
Amount of pages moved to the inactive LRU lis
1337+
Amount of pages moved to the inactive LRU list
13381338

13391339
pglazyfree
13401340

fs/kernfs/dir.c

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -508,10 +508,6 @@ void kernfs_put(struct kernfs_node *kn)
508508
struct kernfs_node *parent;
509509
struct kernfs_root *root;
510510

511-
/*
512-
* kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
513-
* depends on this to filter reused stale node
514-
*/
515511
if (!kn || !atomic_dec_and_test(&kn->count))
516512
return;
517513
root = kernfs_root(kn);
@@ -536,7 +532,7 @@ void kernfs_put(struct kernfs_node *kn)
536532
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
537533
}
538534
spin_lock(&kernfs_idr_lock);
539-
idr_remove(&root->ino_idr, kn->id.ino);
535+
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
540536
spin_unlock(&kernfs_idr_lock);
541537
kmem_cache_free(kernfs_node_cache, kn);
542538

@@ -621,8 +617,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
621617
unsigned flags)
622618
{
623619
struct kernfs_node *kn;
624-
u32 gen;
625-
int cursor;
620+
u32 id_highbits;
626621
int ret;
627622

628623
name = kstrdup_const(name, GFP_KERNEL);
@@ -635,23 +630,19 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
635630

636631
idr_preload(GFP_KERNEL);
637632
spin_lock(&kernfs_idr_lock);
638-
cursor = idr_get_cursor(&root->ino_idr);
639633
ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
640-
if (ret >= 0 && ret < cursor)
641-
root->next_generation++;
642-
gen = root->next_generation;
634+
if (ret >= 0 && ret < root->last_id_lowbits)
635+
root->id_highbits++;
636+
id_highbits = root->id_highbits;
637+
root->last_id_lowbits = ret;
643638
spin_unlock(&kernfs_idr_lock);
644639
idr_preload_end();
645640
if (ret < 0)
646641
goto err_out2;
647-
kn->id.ino = ret;
648-
kn->id.generation = gen;
649642

650-
/*
651-
* set ino first. This RELEASE is paired with atomic_inc_not_zero in
652-
* kernfs_find_and_get_node_by_ino
653-
*/
654-
atomic_set_release(&kn->count, 1);
643+
kn->id = (u64)id_highbits << 32 | ret;
644+
645+
atomic_set(&kn->count, 1);
655646
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
656647
RB_CLEAR_NODE(&kn->rb);
657648

@@ -680,7 +671,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
680671
return kn;
681672

682673
err_out3:
683-
idr_remove(&root->ino_idr, kn->id.ino);
674+
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
684675
err_out2:
685676
kmem_cache_free(kernfs_node_cache, kn);
686677
err_out1:
@@ -705,50 +696,52 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
705696
}
706697

707698
/*
708-
* kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
699+
* kernfs_find_and_get_node_by_id - get kernfs_node from node id
709700
* @root: the kernfs root
710-
* @ino: inode number
701+
* @id: the target node id
702+
*
703+
* @id's lower 32bits encode ino and upper gen. If the gen portion is
704+
* zero, all generations are matched.
711705
*
712706
* RETURNS:
713707
* NULL on failure. Return a kernfs node with reference counter incremented
714708
*/
715-
struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
716-
unsigned int ino)
709+
struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
710+
u64 id)
717711
{
718712
struct kernfs_node *kn;
713+
ino_t ino = kernfs_id_ino(id);
714+
u32 gen = kernfs_id_gen(id);
719715

720-
rcu_read_lock();
721-
kn = idr_find(&root->ino_idr, ino);
716+
spin_lock(&kernfs_idr_lock);
717+
718+
kn = idr_find(&root->ino_idr, (u32)ino);
722719
if (!kn)
723-
goto out;
720+
goto err_unlock;
724721

725-
/*
726-
* Since kernfs_node is freed in RCU, it's possible an old node for ino
727-
* is freed, but reused before RCU grace period. But a freed node (see
728-
* kernfs_put) or an incompletedly initialized node (see
729-
* __kernfs_new_node) should have 'count' 0. We can use this fact to
730-
* filter out such node.
731-
*/
732-
if (!atomic_inc_not_zero(&kn->count)) {
733-
kn = NULL;
734-
goto out;
722+
if (sizeof(ino_t) >= sizeof(u64)) {
723+
/* we looked up with the low 32bits, compare the whole */
724+
if (kernfs_ino(kn) != ino)
725+
goto err_unlock;
726+
} else {
727+
/* 0 matches all generations */
728+
if (unlikely(gen && kernfs_gen(kn) != gen))
729+
goto err_unlock;
735730
}
736731

737732
/*
738-
* The node could be a new node or a reused node. If it's a new node,
739-
* we are ok. If it's reused because of RCU (because of
740-
* SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
741-
* before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
742-
* hence we can use 'ino' to filter stale node.
733+
* ACTIVATED is protected with kernfs_mutex but it was clear when
734+
* @kn was added to idr and we just wanna see it set. No need to
735+
* grab kernfs_mutex.
743736
*/
744-
if (kn->id.ino != ino)
745-
goto out;
746-
rcu_read_unlock();
737+
if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
738+
!atomic_inc_not_zero(&kn->count)))
739+
goto err_unlock;
747740

741+
spin_unlock(&kernfs_idr_lock);
748742
return kn;
749-
out:
750-
rcu_read_unlock();
751-
kernfs_put(kn);
743+
err_unlock:
744+
spin_unlock(&kernfs_idr_lock);
752745
return NULL;
753746
}
754747

@@ -962,7 +955,17 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
962955

963956
idr_init(&root->ino_idr);
964957
INIT_LIST_HEAD(&root->supers);
965-
root->next_generation = 1;
958+
959+
/*
960+
* On 64bit ino setups, id is ino. On 32bit, low 32bits are ino.
961+
* High bits generation. The starting value for both ino and
962+
* genenration is 1. Initialize upper 32bit allocation
963+
* accordingly.
964+
*/
965+
if (sizeof(ino_t) >= sizeof(u64))
966+
root->id_highbits = 0;
967+
else
968+
root->id_highbits = 1;
966969

967970
kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
968971
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
@@ -1678,7 +1681,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
16781681
const char *name = pos->name;
16791682
unsigned int type = dt_type(pos);
16801683
int len = strlen(name);
1681-
ino_t ino = pos->id.ino;
1684+
ino_t ino = kernfs_ino(pos);
16821685

16831686
ctx->pos = pos->hash;
16841687
file->private_data = pos;

fs/kernfs/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
892892
* have the matching @file available. Look up the inodes
893893
* and generate the events manually.
894894
*/
895-
inode = ilookup(info->sb, kn->id.ino);
895+
inode = ilookup(info->sb, kernfs_ino(kn));
896896
if (!inode)
897897
continue;
898898

@@ -901,7 +901,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
901901
if (parent) {
902902
struct inode *p_inode;
903903

904-
p_inode = ilookup(info->sb, parent->id.ino);
904+
p_inode = ilookup(info->sb, kernfs_ino(parent));
905905
if (p_inode) {
906906
fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
907907
inode, FSNOTIFY_EVENT_INODE, &name, 0);

fs/kernfs/inode.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
201201
inode->i_private = kn;
202202
inode->i_mapping->a_ops = &kernfs_aops;
203203
inode->i_op = &kernfs_iops;
204-
inode->i_generation = kn->id.generation;
204+
inode->i_generation = kernfs_gen(kn);
205205

206206
set_default_inode_attr(inode, kn->mode);
207207
kernfs_refresh_inode(kn, inode);
@@ -247,7 +247,7 @@ struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
247247
{
248248
struct inode *inode;
249249

250-
inode = iget_locked(sb, kn->id.ino);
250+
inode = iget_locked(sb, kernfs_ino(kn));
251251
if (inode && (inode->i_state & I_NEW))
252252
kernfs_init_inode(kn, inode);
253253

fs/kernfs/kernfs-internal.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,6 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
109109
const char *name, umode_t mode,
110110
kuid_t uid, kgid_t gid,
111111
unsigned flags);
112-
struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
113-
unsigned int ino);
114112

115113
/*
116114
* file.c

fs/kernfs/mount.c

Lines changed: 58 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -53,63 +53,85 @@ const struct super_operations kernfs_sops = {
5353
.show_path = kernfs_sop_show_path,
5454
};
5555

56-
/*
57-
* Similar to kernfs_fh_get_inode, this one gets kernfs node from inode
58-
* number and generation
59-
*/
60-
struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root,
61-
const union kernfs_node_id *id)
56+
static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
57+
struct inode *parent)
6258
{
63-
struct kernfs_node *kn;
59+
struct kernfs_node *kn = inode->i_private;
6460

65-
kn = kernfs_find_and_get_node_by_ino(root, id->ino);
66-
if (!kn)
67-
return NULL;
68-
if (kn->id.generation != id->generation) {
69-
kernfs_put(kn);
70-
return NULL;
61+
if (*max_len < 2) {
62+
*max_len = 2;
63+
return FILEID_INVALID;
7164
}
72-
return kn;
65+
66+
*max_len = 2;
67+
*(u64 *)fh = kn->id;
68+
return FILEID_KERNFS;
7369
}
7470

75-
static struct inode *kernfs_fh_get_inode(struct super_block *sb,
76-
u64 ino, u32 generation)
71+
static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb,
72+
struct fid *fid, int fh_len,
73+
int fh_type, bool get_parent)
7774
{
7875
struct kernfs_super_info *info = kernfs_info(sb);
79-
struct inode *inode;
8076
struct kernfs_node *kn;
77+
struct inode *inode;
78+
u64 id;
8179

82-
if (ino == 0)
83-
return ERR_PTR(-ESTALE);
80+
if (fh_len < 2)
81+
return NULL;
82+
83+
switch (fh_type) {
84+
case FILEID_KERNFS:
85+
id = *(u64 *)fid;
86+
break;
87+
case FILEID_INO32_GEN:
88+
case FILEID_INO32_GEN_PARENT:
89+
/*
90+
* blk_log_action() exposes "LOW32,HIGH32" pair without
91+
* type and userland can call us with generic fid
92+
* constructed from them. Combine it back to ID. See
93+
* blk_log_action().
94+
*/
95+
id = ((u64)fid->i32.gen << 32) | fid->i32.ino;
96+
break;
97+
default:
98+
return NULL;
99+
}
84100

85-
kn = kernfs_find_and_get_node_by_ino(info->root, ino);
101+
kn = kernfs_find_and_get_node_by_id(info->root, id);
86102
if (!kn)
87103
return ERR_PTR(-ESTALE);
104+
105+
if (get_parent) {
106+
struct kernfs_node *parent;
107+
108+
parent = kernfs_get_parent(kn);
109+
kernfs_put(kn);
110+
kn = parent;
111+
if (!kn)
112+
return ERR_PTR(-ESTALE);
113+
}
114+
88115
inode = kernfs_get_inode(sb, kn);
89116
kernfs_put(kn);
90117
if (!inode)
91118
return ERR_PTR(-ESTALE);
92119

93-
if (generation && inode->i_generation != generation) {
94-
/* we didn't find the right inode.. */
95-
iput(inode);
96-
return ERR_PTR(-ESTALE);
97-
}
98-
return inode;
120+
return d_obtain_alias(inode);
99121
}
100122

101-
static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
102-
int fh_len, int fh_type)
123+
static struct dentry *kernfs_fh_to_dentry(struct super_block *sb,
124+
struct fid *fid, int fh_len,
125+
int fh_type)
103126
{
104-
return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
105-
kernfs_fh_get_inode);
127+
return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, false);
106128
}
107129

108-
static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid,
109-
int fh_len, int fh_type)
130+
static struct dentry *kernfs_fh_to_parent(struct super_block *sb,
131+
struct fid *fid, int fh_len,
132+
int fh_type)
110133
{
111-
return generic_fh_to_parent(sb, fid, fh_len, fh_type,
112-
kernfs_fh_get_inode);
134+
return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, true);
113135
}
114136

115137
static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
@@ -120,6 +142,7 @@ static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
120142
}
121143

122144
static const struct export_operations kernfs_export_ops = {
145+
.encode_fh = kernfs_encode_fh,
123146
.fh_to_dentry = kernfs_fh_to_dentry,
124147
.fh_to_parent = kernfs_fh_to_parent,
125148
.get_parent = kernfs_get_parent_dentry,
@@ -363,18 +386,9 @@ void kernfs_kill_sb(struct super_block *sb)
363386

364387
void __init kernfs_init(void)
365388
{
366-
367-
/*
368-
* the slab is freed in RCU context, so kernfs_find_and_get_node_by_ino
369-
* can access the slab lock free. This could introduce stale nodes,
370-
* please see how kernfs_find_and_get_node_by_ino filters out stale
371-
* nodes.
372-
*/
373389
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
374390
sizeof(struct kernfs_node),
375-
0,
376-
SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
377-
NULL);
391+
0, SLAB_PANIC, NULL);
378392

379393
/* Creates slab cache for kernfs inode attributes */
380394
kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache",

0 commit comments

Comments
 (0)