Skip to content

Commit dfe51d4

Browse files
amir73ilMiklos Szeredi
authored andcommitted
ovl: avoid possible inode number collisions with xino=on
When xino feature is enabled and a real directory inode number overflows the lower xino bits, we cannot map this directory inode number to a unique and persistent inode number and we fall back to the real inode st_ino and overlay st_dev. The real inode st_ino with high bits may collide with a lower inode number on overlay st_dev that was mapped using xino. To avoid possible collision with legitimate xino values, map a non persistent inode number to a dedicated range in the xino address space. The dedicated range is created by adding one more bit to the number of reserved high xino bits. We could have added just one more fsid, but that would have had the undesired effect of changing persistent overlay inode numbers on kernel or require more complex xino mapping code. Signed-off-by: Amir Goldstein <[email protected]> Signed-off-by: Miklos Szeredi <[email protected]>
1 parent 4d314f7 commit dfe51d4

File tree

3 files changed

+45
-17
lines changed

3 files changed

+45
-17
lines changed

fs/overlayfs/inode.c

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
7979
{
8080
bool samefs = ovl_same_fs(dentry->d_sb);
8181
unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
82+
unsigned int xinoshift = 64 - xinobits;
8283

8384
if (samefs) {
8485
/*
@@ -89,20 +90,20 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
8990
stat->dev = dentry->d_sb->s_dev;
9091
return 0;
9192
} else if (xinobits) {
92-
unsigned int shift = 64 - xinobits;
9393
/*
9494
* All inode numbers of underlying fs should not be using the
9595
* high xinobits, so we use high xinobits to partition the
9696
* overlay st_ino address space. The high bits holds the fsid
97-
* (upper fsid is 0). This way overlay inode numbers are unique
98-
* and all inodes use overlay st_dev. Inode numbers are also
99-
* persistent for a given layer configuration.
97+
* (upper fsid is 0). The lowest xinobit is reserved for mapping
98+
* the non-peresistent inode numbers range in case of overflow.
99+
* This way all overlay inode numbers are unique and use the
100+
* overlay st_dev.
100101
*/
101-
if (stat->ino >> shift) {
102+
if (unlikely(stat->ino >> xinoshift)) {
102103
pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
103104
dentry, stat->ino, xinobits);
104105
} else {
105-
stat->ino |= ((u64)fsid) << shift;
106+
stat->ino |= ((u64)fsid) << (xinoshift + 1);
106107
stat->dev = dentry->d_sb->s_dev;
107108
return 0;
108109
}
@@ -573,6 +574,7 @@ static void ovl_next_ino(struct inode *inode)
573574
static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
574575
{
575576
int xinobits = ovl_xino_bits(inode->i_sb);
577+
unsigned int xinoshift = 64 - xinobits;
576578

577579
/*
578580
* When d_ino is consistent with st_ino (samefs or i_ino has enough
@@ -582,11 +584,28 @@ static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
582584
* with d_ino also causes nfsd readdirplus to fail.
583585
*/
584586
inode->i_ino = ino;
585-
if (ovl_same_dev(inode->i_sb)) {
586-
if (xinobits && fsid && !(ino >> (64 - xinobits)))
587-
inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
588-
} else if (S_ISDIR(inode->i_mode)) {
587+
if (ovl_same_fs(inode->i_sb)) {
588+
return;
589+
} else if (xinobits && likely(!(ino >> xinoshift))) {
590+
inode->i_ino |= (unsigned long)fsid << (xinoshift + 1);
591+
return;
592+
}
593+
594+
/*
595+
* For directory inodes on non-samefs with xino disabled or xino
596+
* overflow, we allocate a non-persistent inode number, to be used for
597+
* resolving st_ino collisions in ovl_map_dev_ino().
598+
*
599+
* To avoid ino collision with legitimate xino values from upper
600+
* layer (fsid 0), use the lowest xinobit to map the non
601+
* persistent inode numbers to the unified st_ino address space.
602+
*/
603+
if (S_ISDIR(inode->i_mode)) {
589604
ovl_next_ino(inode);
605+
if (xinobits) {
606+
inode->i_ino &= ~0UL >> xinobits;
607+
inode->i_ino |= 1UL << xinoshift;
608+
}
590609
}
591610
}
592611

fs/overlayfs/readdir.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,13 +440,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
440440
static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
441441
const char *name, int namelen)
442442
{
443-
if (ino >> (64 - xinobits)) {
443+
unsigned int xinoshift = 64 - xinobits;
444+
445+
if (unlikely(ino >> xinoshift)) {
444446
pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
445447
namelen, name, ino, xinobits);
446448
return ino;
447449
}
448450

449-
return ino | ((u64)fsid) << (64 - xinobits);
451+
/*
452+
* The lowest xinobit is reserved for mapping the non-peresistent inode
453+
* numbers range, but this range is only exposed via st_ino, not here.
454+
*/
455+
return ino | ((u64)fsid) << (xinoshift + 1);
450456
}
451457

452458
/*

fs/overlayfs/super.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
14831483
* free high bits in underlying fs to hold the unique fsid.
14841484
* If overlayfs does encounter underlying inodes using the high xino
14851485
* bits reserved for fsid, it emits a warning and uses the original
1486-
* inode number.
1486+
* inode number or a non persistent inode number allocated from a
1487+
* dedicated range.
14871488
*/
14881489
if (ofs->numfs - !ofs->upper_mnt == 1) {
14891490
if (ofs->config.xino == OVL_XINO_ON)
@@ -1494,11 +1495,13 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
14941495
} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
14951496
/*
14961497
* This is a roundup of number of bits needed for encoding
1497-
* fsid, where fsid 0 is reserved for upper fs even with
1498-
* lower only overlay.
1498+
* fsid, where fsid 0 is reserved for upper fs (even with
1499+
* lower only overlay) +1 extra bit is reserved for the non
1500+
* persistent inode number range that is used for resolving
1501+
* xino lower bits overflow.
14991502
*/
1500-
BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
1501-
ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
1503+
BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1504+
ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
15021505
}
15031506

15041507
if (ofs->xino_mode > 0) {

0 commit comments

Comments
 (0)