Skip to content

Commit 03fa86e

Browse files
author
Al Viro
committed
namei: stash the sampled ->d_seq into nameidata
New field: nd->next_seq. Set to 0 outside of RCU mode, holds the sampled value for the next dentry to be considered. Used instead of an arseload of local variables, arguments, etc. step_into() has lost seq argument; nd->next_seq is used, so dentry passed to it must be the one ->next_seq is about. There are two requirements for RCU pathwalk: 1) it should not give a hard failure (other than -ECHILD) unless non-RCU pathwalk might fail that way given suitable timings. 2) it should not succeed unless non-RCU pathwalk might succeed with the same end location given suitable timings. The use of seq numbers is the way we achieve that. Invariant we want to maintain is: if RCU pathwalk can reach the state with given nd->path, nd->inode and nd->seq after having traversed some part of pathname, it must be possible for non-RCU pathwalk to reach the same nd->path and nd->inode after having traversed the same part of pathname, and observe the nd->path.dentry->d_seq equal to what RCU pathwalk has in nd->seq For transition from parent to child, we sample child's ->d_seq and verify that parent's ->d_seq remains unchanged. Anything that disrupts parent-child relationship would've bumped ->d_seq on both. For transitions from child to parent we sample parent's ->d_seq and verify that child's ->d_seq has not changed. Same reasoning as for the previous case applies. For transition from mountpoint to root of mounted we sample the ->d_seq of root and verify that nobody has touched mount_lock since the beginning of pathwalk. That guarantees that mount we'd found had been there all along, with these mountpoint and root of the mounted. It would be possible for a non-RCU pathwalk to reach the previous state, find the same mount and observe its root at the moment we'd sampled ->d_seq of that For transitions from root of mounted to mountpoint we sample ->d_seq of mountpoint and verify that mount_lock had not been touched since the beginning of pathwalk. The same reasoning as in the previous case applies. Signed-off-by: Al Viro <[email protected]>
1 parent 6e18032 commit 03fa86e

File tree

1 file changed

+48
-50
lines changed

1 file changed

+48
-50
lines changed

fs/namei.c

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ struct nameidata {
567567
struct path root;
568568
struct inode *inode; /* path.dentry.d_inode */
569569
unsigned int flags, state;
570-
unsigned seq, m_seq, r_seq;
570+
unsigned seq, next_seq, m_seq, r_seq;
571571
int last_type;
572572
unsigned depth;
573573
int total_link_count;
@@ -668,6 +668,7 @@ static void drop_links(struct nameidata *nd)
668668
static void leave_rcu(struct nameidata *nd)
669669
{
670670
nd->flags &= ~LOOKUP_RCU;
671+
nd->seq = nd->next_seq = 0;
671672
rcu_read_unlock();
672673
}
673674

@@ -792,7 +793,6 @@ static bool try_to_unlazy(struct nameidata *nd)
792793
* try_to_unlazy_next - try to switch to ref-walk mode.
793794
* @nd: nameidata pathwalk data
794795
* @dentry: next dentry to step into
795-
* @seq: seq number to check @dentry against
796796
* Returns: true on success, false on failure
797797
*
798798
* Similar to try_to_unlazy(), but here we have the next dentry already
@@ -801,7 +801,7 @@ static bool try_to_unlazy(struct nameidata *nd)
801801
* Nothing should touch nameidata between try_to_unlazy_next() failure and
802802
* terminate_walk().
803803
*/
804-
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
804+
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
805805
{
806806
int res;
807807
BUG_ON(!(nd->flags & LOOKUP_RCU));
@@ -826,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
826826
*/
827827
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
828828
goto out;
829-
if (read_seqcount_retry(&dentry->d_seq, seq))
829+
if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
830830
goto out_dput;
831831
/*
832832
* Sequence counts matched. Now make sure that the root is
@@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(follow_down);
14751475
* we meet a managed dentry that would need blocking.
14761476
*/
14771477
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1478-
struct inode **inode, unsigned *seqp)
1478+
struct inode **inode)
14791479
{
14801480
struct dentry *dentry = path->dentry;
14811481
unsigned int flags = dentry->d_flags;
@@ -1504,7 +1504,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
15041504
path->mnt = &mounted->mnt;
15051505
dentry = path->dentry = mounted->mnt.mnt_root;
15061506
nd->state |= ND_JUMPED;
1507-
*seqp = read_seqcount_begin(&dentry->d_seq);
1507+
nd->next_seq = read_seqcount_begin(&dentry->d_seq);
15081508
*inode = dentry->d_inode;
15091509
/*
15101510
* We don't need to re-check ->d_seq after this
@@ -1513,6 +1513,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
15131513
* becoming unpinned.
15141514
*/
15151515
flags = dentry->d_flags;
1516+
// makes sure that non-RCU pathwalk could reach
1517+
// this state.
15161518
if (read_seqretry(&mount_lock, nd->m_seq))
15171519
return false;
15181520
continue;
@@ -1525,25 +1527,25 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
15251527
}
15261528

15271529
static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
1528-
struct path *path, struct inode **inode,
1529-
unsigned int *seqp)
1530+
struct path *path, struct inode **inode)
15301531
{
15311532
bool jumped;
15321533
int ret;
15331534

15341535
path->mnt = nd->path.mnt;
15351536
path->dentry = dentry;
15361537
if (nd->flags & LOOKUP_RCU) {
1537-
unsigned int seq = *seqp;
1538+
unsigned int seq = nd->next_seq;
15381539
if (unlikely(!*inode))
15391540
return -ENOENT;
1540-
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
1541+
if (likely(__follow_mount_rcu(nd, path, inode)))
15411542
return 0;
1542-
if (!try_to_unlazy_next(nd, dentry, seq))
1543-
return -ECHILD;
1544-
// *path might've been clobbered by __follow_mount_rcu()
1543+
// *path and nd->next_seq might've been clobbered
15451544
path->mnt = nd->path.mnt;
15461545
path->dentry = dentry;
1546+
nd->next_seq = seq;
1547+
if (!try_to_unlazy_next(nd, dentry))
1548+
return -ECHILD;
15471549
}
15481550
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
15491551
if (jumped) {
@@ -1558,7 +1560,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
15581560
mntput(path->mnt);
15591561
} else {
15601562
*inode = d_backing_inode(path->dentry);
1561-
*seqp = 0; /* out of RCU mode, so the value doesn't matter */
15621563
}
15631564
return ret;
15641565
}
@@ -1618,8 +1619,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
16181619
}
16191620

16201621
static struct dentry *lookup_fast(struct nameidata *nd,
1621-
struct inode **inode,
1622-
unsigned *seqp)
1622+
struct inode **inode)
16231623
{
16241624
struct dentry *dentry, *parent = nd->path.dentry;
16251625
int status = 1;
@@ -1630,8 +1630,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
16301630
* going to fall back to non-racy lookup.
16311631
*/
16321632
if (nd->flags & LOOKUP_RCU) {
1633-
unsigned seq;
1634-
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
1633+
dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
16351634
if (unlikely(!dentry)) {
16361635
if (!try_to_unlazy(nd))
16371636
return ERR_PTR(-ECHILD);
@@ -1643,7 +1642,7 @@ static struct dentry *lookup_fast(struct nameidata *nd,
16431642
* the dentry name information from lookup.
16441643
*/
16451644
*inode = d_backing_inode(dentry);
1646-
if (read_seqcount_retry(&dentry->d_seq, seq))
1645+
if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
16471646
return ERR_PTR(-ECHILD);
16481647

16491648
/*
@@ -1656,11 +1655,10 @@ static struct dentry *lookup_fast(struct nameidata *nd,
16561655
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
16571656
return ERR_PTR(-ECHILD);
16581657

1659-
*seqp = seq;
16601658
status = d_revalidate(dentry, nd->flags);
16611659
if (likely(status > 0))
16621660
return dentry;
1663-
if (!try_to_unlazy_next(nd, dentry, seq))
1661+
if (!try_to_unlazy_next(nd, dentry))
16641662
return ERR_PTR(-ECHILD);
16651663
if (status == -ECHILD)
16661664
/* we'd been told to redo it in non-rcu mode */
@@ -1741,7 +1739,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns,
17411739
return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
17421740
}
17431741

1744-
static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
1742+
static int reserve_stack(struct nameidata *nd, struct path *link)
17451743
{
17461744
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
17471745
return -ELOOP;
@@ -1756,7 +1754,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
17561754
if (nd->flags & LOOKUP_RCU) {
17571755
// we need to grab link before we do unlazy. And we can't skip
17581756
// unlazy even if we fail to grab the link - cleanup needs it
1759-
bool grabbed_link = legitimize_path(nd, link, seq);
1757+
bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
17601758

17611759
if (!try_to_unlazy(nd) || !grabbed_link)
17621760
return -ECHILD;
@@ -1770,11 +1768,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
17701768
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
17711769

17721770
static const char *pick_link(struct nameidata *nd, struct path *link,
1773-
struct inode *inode, unsigned seq, int flags)
1771+
struct inode *inode, int flags)
17741772
{
17751773
struct saved *last;
17761774
const char *res;
1777-
int error = reserve_stack(nd, link, seq);
1775+
int error = reserve_stack(nd, link);
17781776

17791777
if (unlikely(error)) {
17801778
if (!(nd->flags & LOOKUP_RCU))
@@ -1784,7 +1782,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
17841782
last = nd->stack + nd->depth++;
17851783
last->link = *link;
17861784
clear_delayed_call(&last->done);
1787-
last->seq = seq;
1785+
last->seq = nd->next_seq;
17881786

17891787
if (flags & WALK_TRAILING) {
17901788
error = may_follow_link(nd, inode);
@@ -1846,12 +1844,14 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
18461844
* to do this check without having to look at inode->i_op,
18471845
* so we keep a cache of "no, this doesn't need follow_link"
18481846
* for the common case.
1847+
*
1848+
* NOTE: dentry must be what nd->next_seq had been sampled from.
18491849
*/
18501850
static const char *step_into(struct nameidata *nd, int flags,
1851-
struct dentry *dentry, struct inode *inode, unsigned seq)
1851+
struct dentry *dentry, struct inode *inode)
18521852
{
18531853
struct path path;
1854-
int err = handle_mounts(nd, dentry, &path, &inode, &seq);
1854+
int err = handle_mounts(nd, dentry, &path, &inode);
18551855

18561856
if (err < 0)
18571857
return ERR_PTR(err);
@@ -1866,23 +1866,22 @@ static const char *step_into(struct nameidata *nd, int flags,
18661866
}
18671867
nd->path = path;
18681868
nd->inode = inode;
1869-
nd->seq = seq;
1869+
nd->seq = nd->next_seq;
18701870
return NULL;
18711871
}
18721872
if (nd->flags & LOOKUP_RCU) {
18731873
/* make sure that d_is_symlink above matches inode */
1874-
if (read_seqcount_retry(&path.dentry->d_seq, seq))
1874+
if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
18751875
return ERR_PTR(-ECHILD);
18761876
} else {
18771877
if (path.mnt == nd->path.mnt)
18781878
mntget(path.mnt);
18791879
}
1880-
return pick_link(nd, &path, inode, seq, flags);
1880+
return pick_link(nd, &path, inode, flags);
18811881
}
18821882

18831883
static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
1884-
struct inode **inodep,
1885-
unsigned *seqp)
1884+
struct inode **inodep)
18861885
{
18871886
struct dentry *parent, *old;
18881887

@@ -1899,14 +1898,16 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
18991898
nd->path = path;
19001899
nd->inode = path.dentry->d_inode;
19011900
nd->seq = seq;
1901+
// makes sure that non-RCU pathwalk could reach this state
19021902
if (read_seqretry(&mount_lock, nd->m_seq))
19031903
return ERR_PTR(-ECHILD);
19041904
/* we know that mountpoint was pinned */
19051905
}
19061906
old = nd->path.dentry;
19071907
parent = old->d_parent;
19081908
*inodep = parent->d_inode;
1909-
*seqp = read_seqcount_begin(&parent->d_seq);
1909+
nd->next_seq = read_seqcount_begin(&parent->d_seq);
1910+
// makes sure that non-RCU pathwalk could reach this state
19101911
if (read_seqcount_retry(&old->d_seq, nd->seq))
19111912
return ERR_PTR(-ECHILD);
19121913
if (unlikely(!path_connected(nd->path.mnt, parent)))
@@ -1917,14 +1918,13 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
19171918
return ERR_PTR(-ECHILD);
19181919
if (unlikely(nd->flags & LOOKUP_BENEATH))
19191920
return ERR_PTR(-ECHILD);
1920-
*seqp = nd->seq;
1921+
nd->next_seq = nd->seq;
19211922
*inodep = nd->path.dentry->d_inode;
19221923
return nd->path.dentry;
19231924
}
19241925

19251926
static struct dentry *follow_dotdot(struct nameidata *nd,
1926-
struct inode **inodep,
1927-
unsigned *seqp)
1927+
struct inode **inodep)
19281928
{
19291929
struct dentry *parent;
19301930

@@ -1948,14 +1948,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
19481948
dput(parent);
19491949
return ERR_PTR(-ENOENT);
19501950
}
1951-
*seqp = 0;
19521951
*inodep = parent->d_inode;
19531952
return parent;
19541953

19551954
in_root:
19561955
if (unlikely(nd->flags & LOOKUP_BENEATH))
19571956
return ERR_PTR(-EXDEV);
1958-
*seqp = 0;
19591957
*inodep = nd->path.dentry->d_inode;
19601958
return dget(nd->path.dentry);
19611959
}
@@ -1966,20 +1964,19 @@ static const char *handle_dots(struct nameidata *nd, int type)
19661964
const char *error = NULL;
19671965
struct dentry *parent;
19681966
struct inode *inode;
1969-
unsigned seq;
19701967

19711968
if (!nd->root.mnt) {
19721969
error = ERR_PTR(set_root(nd));
19731970
if (error)
19741971
return error;
19751972
}
19761973
if (nd->flags & LOOKUP_RCU)
1977-
parent = follow_dotdot_rcu(nd, &inode, &seq);
1974+
parent = follow_dotdot_rcu(nd, &inode);
19781975
else
1979-
parent = follow_dotdot(nd, &inode, &seq);
1976+
parent = follow_dotdot(nd, &inode);
19801977
if (IS_ERR(parent))
19811978
return ERR_CAST(parent);
1982-
error = step_into(nd, WALK_NOFOLLOW, parent, inode, seq);
1979+
error = step_into(nd, WALK_NOFOLLOW, parent, inode);
19831980
if (unlikely(error))
19841981
return error;
19851982

@@ -2004,7 +2001,6 @@ static const char *walk_component(struct nameidata *nd, int flags)
20042001
{
20052002
struct dentry *dentry;
20062003
struct inode *inode;
2007-
unsigned seq;
20082004
/*
20092005
* "." and ".." are special - ".." especially so because it has
20102006
* to be able to know about the current root directory and
@@ -2015,7 +2011,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
20152011
put_link(nd);
20162012
return handle_dots(nd, nd->last_type);
20172013
}
2018-
dentry = lookup_fast(nd, &inode, &seq);
2014+
dentry = lookup_fast(nd, &inode);
20192015
if (IS_ERR(dentry))
20202016
return ERR_CAST(dentry);
20212017
if (unlikely(!dentry)) {
@@ -2025,7 +2021,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
20252021
}
20262022
if (!(flags & WALK_MORE) && nd->depth)
20272023
put_link(nd);
2028-
return step_into(nd, flags, dentry, inode, seq);
2024+
return step_into(nd, flags, dentry, inode);
20292025
}
20302026

20312027
/*
@@ -2380,6 +2376,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
23802376
flags &= ~LOOKUP_RCU;
23812377
if (flags & LOOKUP_RCU)
23822378
rcu_read_lock();
2379+
else
2380+
nd->seq = nd->next_seq = 0;
23832381

23842382
nd->flags = flags;
23852383
nd->state |= ND_JUMPED;
@@ -2481,8 +2479,9 @@ static int handle_lookup_down(struct nameidata *nd)
24812479
{
24822480
if (!(nd->flags & LOOKUP_RCU))
24832481
dget(nd->path.dentry);
2482+
nd->next_seq = nd->seq;
24842483
return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
2485-
nd->path.dentry, nd->inode, nd->seq));
2484+
nd->path.dentry, nd->inode));
24862485
}
24872486

24882487
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -3401,7 +3400,6 @@ static const char *open_last_lookups(struct nameidata *nd,
34013400
struct dentry *dir = nd->path.dentry;
34023401
int open_flag = op->open_flag;
34033402
bool got_write = false;
3404-
unsigned seq;
34053403
struct inode *inode;
34063404
struct dentry *dentry;
34073405
const char *res;
@@ -3418,7 +3416,7 @@ static const char *open_last_lookups(struct nameidata *nd,
34183416
if (nd->last.name[nd->last.len])
34193417
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
34203418
/* we _can_ be in RCU mode here */
3421-
dentry = lookup_fast(nd, &inode, &seq);
3419+
dentry = lookup_fast(nd, &inode);
34223420
if (IS_ERR(dentry))
34233421
return ERR_CAST(dentry);
34243422
if (likely(dentry))
@@ -3472,7 +3470,7 @@ static const char *open_last_lookups(struct nameidata *nd,
34723470
finish_lookup:
34733471
if (nd->depth)
34743472
put_link(nd);
3475-
res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
3473+
res = step_into(nd, WALK_TRAILING, dentry, inode);
34763474
if (unlikely(res))
34773475
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
34783476
return res;

0 commit comments

Comments
 (0)