Skip to content

Commit 7bc3e6e

Browse files
committed
proc: Use a list of inodes to flush from proc
Rework the flushing of proc to use a list of directory inodes that need to be flushed. The list is kept on struct pid not on struct task_struct, as there is a fixed connection between proc inodes and pids but at least for the case of de_thread the pid of a task_struct changes. This removes the dependency on proc_mnt which allows for different mounts of proc having different mount options even in the same pid namespace and this allows for the removal of proc_mnt which will trivially the first mount of proc to honor it's mount options. This flushing remains an optimization. The functions pid_delete_dentry and pid_revalidate ensure that ordinary dcache management will not attempt to use dentries past the point their respective task has died. When unused the shrinker will eventually be able to remove these dentries. There is a case in de_thread where proc_flush_pid can be called early for a given pid. Which winds up being safe (if suboptimal) as this is just an optiimization. Only pid directories are put on the list as the other per pid files are children of those directories and d_invalidate on the directory will get them as well. So that the pid can be used during flushing it's reference count is taken in release_task and dropped in proc_flush_pid. Further the call of proc_flush_pid is moved after the tasklist_lock is released in release_task so that it is certain that the pid has already been unhashed when flushing it taking place. This removes a small race where a dentry could recreated. As struct pid is supposed to be small and I need a per pid lock I reuse the only lock that currently exists in struct pid the the wait_pidfd.lock. The net result is that this adds all of this functionality with just a little extra list management overhead and a single extra pointer in struct pid. v2: Initialize pid->inodes. I somehow failed to get that initialization into the initial version of the patch. A boot failure was reported by "kernel test robot <[email protected]>", and failure to initialize that pid->inodes matches all of the reported symptoms. Signed-off-by: Eric W. Biederman <[email protected]>
1 parent 7144801 commit 7bc3e6e

File tree

7 files changed

+45
-79
lines changed

7 files changed

+45
-79
lines changed

fs/proc/base.c

Lines changed: 36 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,11 +1834,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
18341834
*rgid = gid;
18351835
}
18361836

1837+
void proc_pid_evict_inode(struct proc_inode *ei)
1838+
{
1839+
struct pid *pid = ei->pid;
1840+
1841+
if (S_ISDIR(ei->vfs_inode.i_mode)) {
1842+
spin_lock(&pid->wait_pidfd.lock);
1843+
hlist_del_init_rcu(&ei->sibling_inodes);
1844+
spin_unlock(&pid->wait_pidfd.lock);
1845+
}
1846+
1847+
put_pid(pid);
1848+
}
1849+
18371850
struct inode *proc_pid_make_inode(struct super_block * sb,
18381851
struct task_struct *task, umode_t mode)
18391852
{
18401853
struct inode * inode;
18411854
struct proc_inode *ei;
1855+
struct pid *pid;
18421856

18431857
/* We need a new inode */
18441858

@@ -1856,10 +1870,18 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
18561870
/*
18571871
* grab the reference to task.
18581872
*/
1859-
ei->pid = get_task_pid(task, PIDTYPE_PID);
1860-
if (!ei->pid)
1873+
pid = get_task_pid(task, PIDTYPE_PID);
1874+
if (!pid)
18611875
goto out_unlock;
18621876

1877+
/* Let the pid remember us for quick removal */
1878+
ei->pid = pid;
1879+
if (S_ISDIR(mode)) {
1880+
spin_lock(&pid->wait_pidfd.lock);
1881+
hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
1882+
spin_unlock(&pid->wait_pidfd.lock);
1883+
}
1884+
18631885
task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
18641886
security_task_to_inode(task, inode);
18651887

@@ -3230,90 +3252,29 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
32303252
.permission = proc_pid_permission,
32313253
};
32323254

3233-
static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
3234-
{
3235-
struct dentry *dentry, *leader, *dir;
3236-
char buf[10 + 1];
3237-
struct qstr name;
3238-
3239-
name.name = buf;
3240-
name.len = snprintf(buf, sizeof(buf), "%u", pid);
3241-
/* no ->d_hash() rejects on procfs */
3242-
dentry = d_hash_and_lookup(mnt->mnt_root, &name);
3243-
if (dentry) {
3244-
d_invalidate(dentry);
3245-
dput(dentry);
3246-
}
3247-
3248-
if (pid == tgid)
3249-
return;
3250-
3251-
name.name = buf;
3252-
name.len = snprintf(buf, sizeof(buf), "%u", tgid);
3253-
leader = d_hash_and_lookup(mnt->mnt_root, &name);
3254-
if (!leader)
3255-
goto out;
3256-
3257-
name.name = "task";
3258-
name.len = strlen(name.name);
3259-
dir = d_hash_and_lookup(leader, &name);
3260-
if (!dir)
3261-
goto out_put_leader;
3262-
3263-
name.name = buf;
3264-
name.len = snprintf(buf, sizeof(buf), "%u", pid);
3265-
dentry = d_hash_and_lookup(dir, &name);
3266-
if (dentry) {
3267-
d_invalidate(dentry);
3268-
dput(dentry);
3269-
}
3270-
3271-
dput(dir);
3272-
out_put_leader:
3273-
dput(leader);
3274-
out:
3275-
return;
3276-
}
3277-
32783255
/**
3279-
* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
3280-
* @task: task that should be flushed.
3256+
* proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
3257+
* @pid: pid that should be flushed.
32813258
*
3282-
* When flushing dentries from proc, one needs to flush them from global
3283-
* proc (proc_mnt) and from all the namespaces' procs this task was seen
3284-
* in. This call is supposed to do all of this job.
3285-
*
3286-
* Looks in the dcache for
3287-
* /proc/@pid
3288-
* /proc/@tgid/task/@pid
3289-
* if either directory is present flushes it and all of it'ts children
3290-
* from the dcache.
3259+
* This function walks a list of inodes (that belong to any proc
3260+
* filesystem) that are attached to the pid and flushes them from
3261+
* the dentry cache.
32913262
*
32923263
* It is safe and reasonable to cache /proc entries for a task until
32933264
* that task exits. After that they just clog up the dcache with
32943265
* useless entries, possibly causing useful dcache entries to be
3295-
* flushed instead. This routine is proved to flush those useless
3296-
* dcache entries at process exit time.
3266+
* flushed instead. This routine is provided to flush those useless
3267+
* dcache entries when a process is reaped.
32973268
*
32983269
* NOTE: This routine is just an optimization so it does not guarantee
3299-
* that no dcache entries will exist at process exit time it
3300-
* just makes it very unlikely that any will persist.
3270+
* that no dcache entries will exist after a process is reaped
3271+
* it just makes it very unlikely that any will persist.
33013272
*/
33023273

3303-
void proc_flush_task(struct task_struct *task)
3274+
void proc_flush_pid(struct pid *pid)
33043275
{
3305-
int i;
3306-
struct pid *pid, *tgid;
3307-
struct upid *upid;
3308-
3309-
pid = task_pid(task);
3310-
tgid = task_tgid(task);
3311-
3312-
for (i = 0; i <= pid->level; i++) {
3313-
upid = &pid->numbers[i];
3314-
proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
3315-
tgid->numbers[i].nr);
3316-
}
3276+
proc_invalidate_siblings_dcache(&pid->inodes, &pid->wait_pidfd.lock);
3277+
put_pid(pid);
33173278
}
33183279

33193280
static struct dentry *proc_pid_instantiate(struct dentry * dentry,

fs/proc/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static void proc_evict_inode(struct inode *inode)
4040

4141
/* Stop tracking associated processes */
4242
if (ei->pid) {
43-
put_pid(ei->pid);
43+
proc_pid_evict_inode(ei);
4444
ei->pid = NULL;
4545
}
4646

fs/proc/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
158158
extern const struct dentry_operations pid_dentry_operations;
159159
extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
160160
extern int proc_setattr(struct dentry *, struct iattr *);
161+
extern void proc_pid_evict_inode(struct proc_inode *);
161162
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
162163
extern void pid_update_inode(struct task_struct *, struct inode *);
163164
extern int pid_delete_dentry(const struct dentry *);

include/linux/pid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct pid
6262
unsigned int level;
6363
/* lists of tasks that use this pid */
6464
struct hlist_head tasks[PIDTYPE_MAX];
65+
struct hlist_head inodes;
6566
/* wait queue for pidfd notifications */
6667
wait_queue_head_t wait_pidfd;
6768
struct rcu_head rcu;

include/linux/proc_fs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct proc_ops {
3232
typedef int (*proc_write_t)(struct file *, char *, size_t);
3333

3434
extern void proc_root_init(void);
35-
extern void proc_flush_task(struct task_struct *);
35+
extern void proc_flush_pid(struct pid *);
3636

3737
extern struct proc_dir_entry *proc_symlink(const char *,
3838
struct proc_dir_entry *, const char *);
@@ -105,7 +105,7 @@ static inline void proc_root_init(void)
105105
{
106106
}
107107

108-
static inline void proc_flush_task(struct task_struct *task)
108+
static inline void proc_flush_pid(struct pid *pid)
109109
{
110110
}
111111

kernel/exit.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ void put_task_struct_rcu_user(struct task_struct *task)
191191
void release_task(struct task_struct *p)
192192
{
193193
struct task_struct *leader;
194+
struct pid *thread_pid;
194195
int zap_leader;
195196
repeat:
196197
/* don't need to get the RCU readlock here - the process is dead and
@@ -199,11 +200,11 @@ void release_task(struct task_struct *p)
199200
atomic_dec(&__task_cred(p)->user->processes);
200201
rcu_read_unlock();
201202

202-
proc_flush_task(p);
203203
cgroup_release(p);
204204

205205
write_lock_irq(&tasklist_lock);
206206
ptrace_release_task(p);
207+
thread_pid = get_pid(p->thread_pid);
207208
__exit_signal(p);
208209

209210
/*
@@ -226,6 +227,7 @@ void release_task(struct task_struct *p)
226227
}
227228

228229
write_unlock_irq(&tasklist_lock);
230+
proc_flush_pid(thread_pid);
229231
release_thread(p);
230232
put_task_struct_rcu_user(p);
231233

kernel/pid.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
258258
INIT_HLIST_HEAD(&pid->tasks[type]);
259259

260260
init_waitqueue_head(&pid->wait_pidfd);
261+
INIT_HLIST_HEAD(&pid->inodes);
261262

262263
upid = pid->numbers + ns->level;
263264
spin_lock_irq(&pidmap_lock);

0 commit comments

Comments
 (0)