Skip to content

Commit 2acda75

Browse files
committed
Merge tag 'fsnotify_for_v5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify updates from Jan Kara: "Support for reporting filesystem errors through fanotify so that system health monitoring daemons can watch for these and act instead of scraping system logs" * tag 'fsnotify_for_v5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (34 commits) samples: remove duplicate include in fs-monitor.c samples: Fix warning in fsnotify sample docs: Fix formatting of literal sections in fanotify docs samples: Make fs-monitor depend on libc and headers docs: Document the FAN_FS_ERROR event samples: Add fs error monitoring example ext4: Send notifications on error fanotify: Allow users to request FAN_FS_ERROR events fanotify: Emit generic error info for error event fanotify: Report fid info for file related file system errors fanotify: WARN_ON against too large file handles fanotify: Add helpers to decide whether to report FID/DFID fanotify: Wrap object_fh inline space in a creator macro fanotify: Support merging of error events fanotify: Support enqueueing of error events fanotify: Pre-allocate pool of error events fanotify: Reserve UAPI bits for FAN_FS_ERROR fsnotify: Support FS_ERROR event type fanotify: Require fid_mode for any non-fd event fanotify: Encode empty file handle when no inode is provided ...
2 parents d8b4e5b + 15c7266 commit 2acda75

File tree

22 files changed

+690
-99
lines changed

22 files changed

+690
-99
lines changed
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
====================================
4+
File system Monitoring with fanotify
5+
====================================
6+
7+
File system Error Reporting
8+
===========================
9+
10+
Fanotify supports the FAN_FS_ERROR event type for file system-wide error
11+
reporting. It is meant to be used by file system health monitoring
12+
daemons, which listen for these events and take actions (notify
13+
sysadmin, start recovery) when a file system problem is detected.
14+
15+
By design, a FAN_FS_ERROR notification exposes sufficient information
16+
for a monitoring tool to know a problem in the file system has happened.
17+
It doesn't necessarily provide a user space application with semantics
18+
to verify an IO operation was successfully executed. That is out of
19+
scope for this feature. Instead, it is only meant as a framework for
20+
early file system problem detection and reporting recovery tools.
21+
22+
When a file system operation fails, it is common for dozens of kernel
23+
errors to cascade after the initial failure, hiding the original failure
24+
log, which is usually the most useful debug data to troubleshoot the
25+
problem. For this reason, FAN_FS_ERROR tries to report only the first
26+
error that occurred for a file system since the last notification, and
27+
it simply counts additional errors. This ensures that the most
28+
important pieces of information are never lost.
29+
30+
FAN_FS_ERROR requires the fanotify group to be setup with the
31+
FAN_REPORT_FID flag.
32+
33+
At the time of this writing, the only file system that emits FAN_FS_ERROR
34+
notifications is Ext4.
35+
36+
A FAN_FS_ERROR Notification has the following format::
37+
38+
::
39+
40+
[ Notification Metadata (Mandatory) ]
41+
[ Generic Error Record (Mandatory) ]
42+
[ FID record (Mandatory) ]
43+
44+
The order of records is not guaranteed, and new records might be added
45+
in the future. Therefore, applications must not rely on the order and
46+
must be prepared to skip over unknown records. Please refer to
47+
``samples/fanotify/fs-monitor.c`` for an example parser.
48+
49+
Generic error record
50+
--------------------
51+
52+
The generic error record provides enough information for a file system
53+
agnostic tool to learn about a problem in the file system, without
54+
providing any additional details about the problem. This record is
55+
identified by ``struct fanotify_event_info_header.info_type`` being set
56+
to FAN_EVENT_INFO_TYPE_ERROR.
57+
58+
::
59+
60+
struct fanotify_event_info_error {
61+
struct fanotify_event_info_header hdr;
62+
__s32 error;
63+
__u32 error_count;
64+
};
65+
66+
The `error` field identifies the type of error using errno values.
67+
`error_count` tracks the number of errors that occurred and were
68+
suppressed to preserve the original error information, since the last
69+
notification.
70+
71+
FID record
72+
----------
73+
74+
The FID record can be used to uniquely identify the inode that triggered
75+
the error through the combination of fsid and file handle. A file system
76+
specific application can use that information to attempt a recovery
77+
procedure. Errors that are not related to an inode are reported with an
78+
empty file handle of type FILEID_INVALID.

Documentation/admin-guide/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ configure specific aspects of kernel behavior to your liking.
8282
edid
8383
efi-stub
8484
ext4
85+
filesystem-monitoring
8586
nfs/index
8687
gpio/index
8788
highuid

fs/ext4/super.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <linux/part_stat.h>
4747
#include <linux/kthread.h>
4848
#include <linux/freezer.h>
49+
#include <linux/fsnotify.h>
4950

5051
#include "ext4.h"
5152
#include "ext4_extents.h" /* Needed for trace points definition */
@@ -759,6 +760,8 @@ void __ext4_error(struct super_block *sb, const char *function,
759760
sb->s_id, function, line, current->comm, &vaf);
760761
va_end(args);
761762
}
763+
fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
764+
762765
ext4_handle_error(sb, force_ro, error, 0, block, function, line);
763766
}
764767

@@ -789,6 +792,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
789792
current->comm, &vaf);
790793
va_end(args);
791794
}
795+
fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
796+
792797
ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
793798
function, line);
794799
}
@@ -827,6 +832,8 @@ void __ext4_error_file(struct file *file, const char *function,
827832
current->comm, path, &vaf);
828833
va_end(args);
829834
}
835+
fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
836+
830837
ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
831838
function, line);
832839
}
@@ -894,6 +901,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
894901
printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
895902
sb->s_id, function, line, errstr);
896903
}
904+
fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
897905

898906
ext4_handle_error(sb, false, -errno, 0, 0, function, line);
899907
}

fs/nfsd/filecache.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,9 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
602602
struct inode *inode, struct inode *dir,
603603
const struct qstr *name, u32 cookie)
604604
{
605+
if (WARN_ON_ONCE(!inode))
606+
return 0;
607+
605608
trace_nfsd_file_fsnotify_handle_event(inode, mask);
606609

607610
/* Should be no marks on non-regular files */

fs/notify/fanotify/fanotify.c

Lines changed: 107 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,16 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
111111
return fanotify_info_equal(info1, info2);
112112
}
113113

114+
static bool fanotify_error_event_equal(struct fanotify_error_event *fee1,
115+
struct fanotify_error_event *fee2)
116+
{
117+
/* Error events against the same file system are always merged. */
118+
if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid))
119+
return false;
120+
121+
return true;
122+
}
123+
114124
static bool fanotify_should_merge(struct fanotify_event *old,
115125
struct fanotify_event *new)
116126
{
@@ -141,6 +151,9 @@ static bool fanotify_should_merge(struct fanotify_event *old,
141151
case FANOTIFY_EVENT_TYPE_FID_NAME:
142152
return fanotify_name_event_equal(FANOTIFY_NE(old),
143153
FANOTIFY_NE(new));
154+
case FANOTIFY_EVENT_TYPE_FS_ERROR:
155+
return fanotify_error_event_equal(FANOTIFY_EE(old),
156+
FANOTIFY_EE(new));
144157
default:
145158
WARN_ON_ONCE(1);
146159
}
@@ -176,6 +189,10 @@ static int fanotify_merge(struct fsnotify_group *group,
176189
break;
177190
if (fanotify_should_merge(old, new)) {
178191
old->mask |= new->mask;
192+
193+
if (fanotify_is_error_event(old->mask))
194+
FANOTIFY_EE(old)->err_count++;
195+
179196
return 1;
180197
}
181198
}
@@ -343,13 +360,23 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
343360
static int fanotify_encode_fh_len(struct inode *inode)
344361
{
345362
int dwords = 0;
363+
int fh_len;
346364

347365
if (!inode)
348366
return 0;
349367

350368
exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
369+
fh_len = dwords << 2;
370+
371+
/*
372+
* struct fanotify_error_event might be preallocated and is
373+
* limited to MAX_HANDLE_SZ. This should never happen, but
374+
* safeguard by forcing an invalid file handle.
375+
*/
376+
if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ))
377+
return 0;
351378

352-
return dwords << 2;
379+
return fh_len;
353380
}
354381

355382
/*
@@ -370,8 +397,14 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
370397
fh->type = FILEID_ROOT;
371398
fh->len = 0;
372399
fh->flags = 0;
400+
401+
/*
402+
* Invalid FHs are used by FAN_FS_ERROR for errors not
403+
* linked to any inode. The f_handle won't be reported
404+
* back to userspace.
405+
*/
373406
if (!inode)
374-
return 0;
407+
goto out;
375408

376409
/*
377410
* !gpf means preallocated variable size fh, but fh_len could
@@ -403,8 +436,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
403436
fh->type = type;
404437
fh->len = fh_len;
405438

406-
/* Mix fh into event merge key */
407-
*hash ^= fanotify_hash_fh(fh);
439+
out:
440+
/*
441+
* Mix fh into event merge key. Hash might be NULL in case of
442+
* unhashed FID events (i.e. FAN_FS_ERROR).
443+
*/
444+
if (hash)
445+
*hash ^= fanotify_hash_fh(fh);
408446

409447
return FANOTIFY_FH_HDR_LEN + fh_len;
410448

@@ -452,7 +490,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data,
452490
if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
453491
return dir;
454492

455-
if (S_ISDIR(inode->i_mode))
493+
if (inode && S_ISDIR(inode->i_mode))
456494
return inode;
457495

458496
return dir;
@@ -563,6 +601,44 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
563601
return &fne->fae;
564602
}
565603

604+
static struct fanotify_event *fanotify_alloc_error_event(
605+
struct fsnotify_group *group,
606+
__kernel_fsid_t *fsid,
607+
const void *data, int data_type,
608+
unsigned int *hash)
609+
{
610+
struct fs_error_report *report =
611+
fsnotify_data_error_report(data, data_type);
612+
struct inode *inode;
613+
struct fanotify_error_event *fee;
614+
int fh_len;
615+
616+
if (WARN_ON_ONCE(!report))
617+
return NULL;
618+
619+
fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS);
620+
if (!fee)
621+
return NULL;
622+
623+
fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
624+
fee->error = report->error;
625+
fee->err_count = 1;
626+
fee->fsid = *fsid;
627+
628+
inode = report->inode;
629+
fh_len = fanotify_encode_fh_len(inode);
630+
631+
/* Bad fh_len. Fallback to using an invalid fh. Should never happen. */
632+
if (!fh_len && inode)
633+
inode = NULL;
634+
635+
fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0);
636+
637+
*hash ^= fanotify_hash_fsid(fsid);
638+
639+
return &fee->fae;
640+
}
641+
566642
static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
567643
u32 mask, const void *data,
568644
int data_type, struct inode *dir,
@@ -630,6 +706,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
630706

631707
if (fanotify_is_perm_event(mask)) {
632708
event = fanotify_alloc_perm_event(path, gfp);
709+
} else if (fanotify_is_error_event(mask)) {
710+
event = fanotify_alloc_error_event(group, fsid, data,
711+
data_type, &hash);
633712
} else if (name_event && (file_name || child)) {
634713
event = fanotify_alloc_name_event(id, fsid, file_name, child,
635714
&hash, gfp);
@@ -702,6 +781,9 @@ static void fanotify_insert_event(struct fsnotify_group *group,
702781

703782
assert_spin_locked(&group->notification_lock);
704783

784+
if (!fanotify_is_hashed_event(event->mask))
785+
return;
786+
705787
pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
706788
group, event, bucket);
707789

@@ -738,8 +820,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
738820
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
739821
BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
740822
BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
823+
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
741824

742-
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
825+
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
743826

744827
mask = fanotify_group_event_mask(group, iter_info, mask, data,
745828
data_type, dir);
@@ -778,9 +861,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
778861
}
779862

780863
fsn_event = &event->fse;
781-
ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
782-
fanotify_is_hashed_event(mask) ?
783-
fanotify_insert_event : NULL);
864+
ret = fsnotify_insert_event(group, fsn_event, fanotify_merge,
865+
fanotify_insert_event);
784866
if (ret) {
785867
/* Permission events shouldn't be merged */
786868
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
@@ -805,6 +887,9 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
805887
if (group->fanotify_data.ucounts)
806888
dec_ucount(group->fanotify_data.ucounts,
807889
UCOUNT_FANOTIFY_GROUPS);
890+
891+
if (mempool_initialized(&group->fanotify_data.error_events_pool))
892+
mempool_exit(&group->fanotify_data.error_events_pool);
808893
}
809894

810895
static void fanotify_free_path_event(struct fanotify_event *event)
@@ -833,7 +918,16 @@ static void fanotify_free_name_event(struct fanotify_event *event)
833918
kfree(FANOTIFY_NE(event));
834919
}
835920

836-
static void fanotify_free_event(struct fsnotify_event *fsn_event)
921+
static void fanotify_free_error_event(struct fsnotify_group *group,
922+
struct fanotify_event *event)
923+
{
924+
struct fanotify_error_event *fee = FANOTIFY_EE(event);
925+
926+
mempool_free(fee, &group->fanotify_data.error_events_pool);
927+
}
928+
929+
static void fanotify_free_event(struct fsnotify_group *group,
930+
struct fsnotify_event *fsn_event)
837931
{
838932
struct fanotify_event *event;
839933

@@ -855,6 +949,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
855949
case FANOTIFY_EVENT_TYPE_OVERFLOW:
856950
kfree(event);
857951
break;
952+
case FANOTIFY_EVENT_TYPE_FS_ERROR:
953+
fanotify_free_error_event(group, event);
954+
break;
858955
default:
859956
WARN_ON_ONCE(1);
860957
}

0 commit comments

Comments
 (0)