Skip to content

Commit 4f11918

Browse files
committed
Merge branch 'readdir' (readdir speedup and sanity checking)
This makes getdents() and getdents64() do sanity checking on the pathname that it gives to user space. And to mitigate the performance impact of that, it first cleans up the way it does the user copying, so that the code avoids doing the SMAP/PAN updates between each part of the dirent structure write. I really wanted to do this during the merge window, but didn't have time. The conversion of filldir to unsafe_put_user() is something I've had around for years now in a private branch, but the extra pathname checking finally made me clean it up to the point where it is mergable. It's worth noting that the filename validity checking really should be a bit smarter: it would be much better to delay the error reporting until the end of the readdir, so that non-corrupted filenames are still returned. But that involves bigger changes, so let's see if anybody actually hits the corrupt directory entry case before worrying about it further. * branch 'readdir': Make filldir[64]() verify the directory entry filename is valid Convert filldir[64]() from __put_user() to unsafe_put_user()
2 parents 9819a30 + 8a23eb8 commit 4f11918

File tree

1 file changed

+133
-35
lines changed

1 file changed

+133
-35
lines changed

fs/readdir.c

Lines changed: 133 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,63 @@
2020
#include <linux/syscalls.h>
2121
#include <linux/unistd.h>
2222
#include <linux/compat.h>
23-
2423
#include <linux/uaccess.h>
2524

25+
#include <asm/unaligned.h>
26+
27+
/*
28+
* Note the "unsafe_put_user() semantics: we goto a
29+
* label for errors.
30+
*
31+
* Also note how we use a "while()" loop here, even though
32+
* only the biggest size needs to loop. The compiler (well,
33+
* at least gcc) is smart enough to turn the smaller sizes
34+
* into just if-statements, and this way we don't need to
35+
* care whether 'u64' or 'u32' is the biggest size.
36+
*/
37+
#define unsafe_copy_loop(dst, src, len, type, label) \
38+
while (len >= sizeof(type)) { \
39+
unsafe_put_user(get_unaligned((type *)src), \
40+
(type __user *)dst, label); \
41+
dst += sizeof(type); \
42+
src += sizeof(type); \
43+
len -= sizeof(type); \
44+
}
45+
46+
/*
47+
* We avoid doing 64-bit copies on 32-bit architectures. They
48+
* might be better, but the component names are mostly small,
49+
* and the 64-bit cases can end up being much more complex and
50+
* put much more register pressure on the code, so it's likely
51+
* not worth the pain of unaligned accesses etc.
52+
*
53+
* So limit the copies to "unsigned long" size. I did verify
54+
* that at least the x86-32 case is ok without this limiting,
55+
* but I worry about random other legacy 32-bit cases that
56+
* might not do as well.
57+
*/
58+
#define unsafe_copy_type(dst, src, len, type, label) do { \
59+
if (sizeof(type) <= sizeof(unsigned long)) \
60+
unsafe_copy_loop(dst, src, len, type, label); \
61+
} while (0)
62+
63+
/*
64+
* Copy the dirent name to user space, and NUL-terminate
65+
* it. This should not be a function call, since we're doing
66+
* the copy inside a "user_access_begin/end()" section.
67+
*/
68+
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
69+
char __user *dst = (_dst); \
70+
const char *src = (_src); \
71+
size_t len = (_len); \
72+
unsafe_copy_type(dst, src, len, u64, label); \
73+
unsafe_copy_type(dst, src, len, u32, label); \
74+
unsafe_copy_type(dst, src, len, u16, label); \
75+
unsafe_copy_type(dst, src, len, u8, label); \
76+
unsafe_put_user(0, dst, label); \
77+
} while (0)
78+
79+
2680
int iterate_dir(struct file *file, struct dir_context *ctx)
2781
{
2882
struct inode *inode = file_inode(file);
@@ -64,6 +118,40 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
64118
}
65119
EXPORT_SYMBOL(iterate_dir);
66120

121+
/*
122+
* POSIX says that a dirent name cannot contain NULL or a '/'.
123+
*
124+
* It's not 100% clear what we should really do in this case.
125+
* The filesystem is clearly corrupted, but returning a hard
126+
* error means that you now don't see any of the other names
127+
* either, so that isn't a perfect alternative.
128+
*
129+
* And if you return an error, what error do you use? Several
130+
* filesystems seem to have decided on EUCLEAN being the error
131+
* code for EFSCORRUPTED, and that may be the error to use. Or
132+
* just EIO, which is perhaps more obvious to users.
133+
*
134+
* In order to see the other file names in the directory, the
135+
* caller might want to make this a "soft" error: skip the
136+
* entry, and return the error at the end instead.
137+
*
138+
* Note that this should likely do a "memchr(name, 0, len)"
139+
* check too, since that would be filesystem corruption as
140+
* well. However, that case can't actually confuse user space,
141+
* which has to do a strlen() on the name anyway to find the
142+
* filename length, and the above "soft error" worry means
143+
* that it's probably better left alone until we have that
144+
* issue clarified.
145+
*/
146+
static int verify_dirent_name(const char *name, int len)
147+
{
148+
if (WARN_ON_ONCE(!len))
149+
return -EIO;
150+
if (WARN_ON_ONCE(memchr(name, '/', len)))
151+
return -EIO;
152+
return 0;
153+
}
154+
67155
/*
68156
* Traditional linux readdir() handling..
69157
*
@@ -173,6 +261,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
173261
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
174262
sizeof(long));
175263

264+
buf->error = verify_dirent_name(name, namlen);
265+
if (unlikely(buf->error))
266+
return buf->error;
176267
buf->error = -EINVAL; /* only used if we fail.. */
177268
if (reclen > buf->count)
178269
return -EINVAL;
@@ -182,28 +273,31 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
182273
return -EOVERFLOW;
183274
}
184275
dirent = buf->previous;
185-
if (dirent) {
186-
if (signal_pending(current))
187-
return -EINTR;
188-
if (__put_user(offset, &dirent->d_off))
189-
goto efault;
190-
}
191-
dirent = buf->current_dir;
192-
if (__put_user(d_ino, &dirent->d_ino))
193-
goto efault;
194-
if (__put_user(reclen, &dirent->d_reclen))
195-
goto efault;
196-
if (copy_to_user(dirent->d_name, name, namlen))
197-
goto efault;
198-
if (__put_user(0, dirent->d_name + namlen))
199-
goto efault;
200-
if (__put_user(d_type, (char __user *) dirent + reclen - 1))
276+
if (dirent && signal_pending(current))
277+
return -EINTR;
278+
279+
/*
280+
* Note! This range-checks 'previous' (which may be NULL).
281+
* The real range was checked in getdents
282+
*/
283+
if (!user_access_begin(dirent, sizeof(*dirent)))
201284
goto efault;
285+
if (dirent)
286+
unsafe_put_user(offset, &dirent->d_off, efault_end);
287+
dirent = buf->current_dir;
288+
unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
289+
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
290+
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
291+
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
292+
user_access_end();
293+
202294
buf->previous = dirent;
203295
dirent = (void __user *)dirent + reclen;
204296
buf->current_dir = dirent;
205297
buf->count -= reclen;
206298
return 0;
299+
efault_end:
300+
user_access_end();
207301
efault:
208302
buf->error = -EFAULT;
209303
return -EFAULT;
@@ -259,34 +353,38 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
259353
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
260354
sizeof(u64));
261355

356+
buf->error = verify_dirent_name(name, namlen);
357+
if (unlikely(buf->error))
358+
return buf->error;
262359
buf->error = -EINVAL; /* only used if we fail.. */
263360
if (reclen > buf->count)
264361
return -EINVAL;
265362
dirent = buf->previous;
266-
if (dirent) {
267-
if (signal_pending(current))
268-
return -EINTR;
269-
if (__put_user(offset, &dirent->d_off))
270-
goto efault;
271-
}
272-
dirent = buf->current_dir;
273-
if (__put_user(ino, &dirent->d_ino))
274-
goto efault;
275-
if (__put_user(0, &dirent->d_off))
276-
goto efault;
277-
if (__put_user(reclen, &dirent->d_reclen))
278-
goto efault;
279-
if (__put_user(d_type, &dirent->d_type))
280-
goto efault;
281-
if (copy_to_user(dirent->d_name, name, namlen))
282-
goto efault;
283-
if (__put_user(0, dirent->d_name + namlen))
363+
if (dirent && signal_pending(current))
364+
return -EINTR;
365+
366+
/*
367+
* Note! This range-checks 'previous' (which may be NULL).
368+
* The real range was checked in getdents
369+
*/
370+
if (!user_access_begin(dirent, sizeof(*dirent)))
284371
goto efault;
372+
if (dirent)
373+
unsafe_put_user(offset, &dirent->d_off, efault_end);
374+
dirent = buf->current_dir;
375+
unsafe_put_user(ino, &dirent->d_ino, efault_end);
376+
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
377+
unsafe_put_user(d_type, &dirent->d_type, efault_end);
378+
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
379+
user_access_end();
380+
285381
buf->previous = dirent;
286382
dirent = (void __user *)dirent + reclen;
287383
buf->current_dir = dirent;
288384
buf->count -= reclen;
289385
return 0;
386+
efault_end:
387+
user_access_end();
290388
efault:
291389
buf->error = -EFAULT;
292390
return -EFAULT;

0 commit comments

Comments
 (0)