Skip to content

Commit 94e00d2

Browse files
amir73iljankara
authored andcommitted
fsnotify: use hash table for faster events merge
In order to improve event merge performance, hash events in a 128 size hash table by the event merge key. The fanotify_event size grows by two pointers, but we just reduced its size by removing the objectid member, so overall its size is increased by one pointer. Permission events and overflow event are not merged so they are also not hashed. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Amir Goldstein <[email protected]> Signed-off-by: Jan Kara <[email protected]>
1 parent 7e3e5c6 commit 94e00d2

File tree

6 files changed

+123
-20
lines changed

6 files changed

+123
-20
lines changed

fs/notify/fanotify/fanotify.c

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,15 @@ static bool fanotify_should_merge(struct fanotify_event *old,
149149
}
150150

151151
/* and the list better be locked by something too! */
152-
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
152+
static int fanotify_merge(struct fsnotify_group *group,
153+
struct fsnotify_event *event)
153154
{
154-
struct fsnotify_event *test_event;
155155
struct fanotify_event *old, *new = FANOTIFY_E(event);
156+
unsigned int bucket = fanotify_event_hash_bucket(group, new);
157+
struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
156158

157-
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
159+
pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
160+
group, event, bucket);
158161

159162
/*
160163
* Don't merge a permission event with any other event so that we know
@@ -164,8 +167,7 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
164167
if (fanotify_is_perm_event(new->mask))
165168
return 0;
166169

167-
list_for_each_entry_reverse(test_event, list, list) {
168-
old = FANOTIFY_E(test_event);
170+
hlist_for_each_entry(old, hlist, merge_list) {
169171
if (fanotify_should_merge(old, new)) {
170172
old->mask |= new->mask;
171173
return 1;
@@ -203,8 +205,11 @@ static int fanotify_get_response(struct fsnotify_group *group,
203205
return ret;
204206
}
205207
/* Event not yet reported? Just remove it. */
206-
if (event->state == FAN_EVENT_INIT)
208+
if (event->state == FAN_EVENT_INIT) {
207209
fsnotify_remove_queued_event(group, &event->fae.fse);
210+
/* Permission events are not supposed to be hashed */
211+
WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list));
212+
}
208213
/*
209214
* Event may be also answered in case signal delivery raced
210215
* with wakeup. In that case we have nothing to do besides
@@ -679,6 +684,24 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
679684
return fsid;
680685
}
681686

687+
/*
688+
* Add an event to hash table for faster merge.
689+
*/
690+
static void fanotify_insert_event(struct fsnotify_group *group,
691+
struct fsnotify_event *fsn_event)
692+
{
693+
struct fanotify_event *event = FANOTIFY_E(fsn_event);
694+
unsigned int bucket = fanotify_event_hash_bucket(group, event);
695+
struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
696+
697+
assert_spin_locked(&group->notification_lock);
698+
699+
pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
700+
group, event, bucket);
701+
702+
hlist_add_head(&event->merge_list, hlist);
703+
}
704+
682705
static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
683706
const void *data, int data_type,
684707
struct inode *dir,
@@ -749,7 +772,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
749772
}
750773

751774
fsn_event = &event->fse;
752-
ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
775+
ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
776+
fanotify_is_hashed_event(mask) ?
777+
fanotify_insert_event : NULL);
753778
if (ret) {
754779
/* Permission events shouldn't be merged */
755780
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
@@ -772,6 +797,7 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
772797
{
773798
struct user_struct *user;
774799

800+
kfree(group->fanotify_data.merge_hash);
775801
user = group->fanotify_data.user;
776802
atomic_dec(&user->fanotify_listeners);
777803
free_uid(user);

fs/notify/fanotify/fanotify.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <linux/path.h>
44
#include <linux/slab.h>
55
#include <linux/exportfs.h>
6+
#include <linux/hashtable.h>
67

78
extern struct kmem_cache *fanotify_mark_cache;
89
extern struct kmem_cache *fanotify_fid_event_cachep;
@@ -150,6 +151,7 @@ enum fanotify_event_type {
150151

151152
struct fanotify_event {
152153
struct fsnotify_event fse;
154+
struct hlist_node merge_list; /* List for hashed merge */
153155
u32 mask;
154156
struct {
155157
unsigned int type : FANOTIFY_EVENT_TYPE_BITS;
@@ -162,6 +164,7 @@ static inline void fanotify_init_event(struct fanotify_event *event,
162164
unsigned int hash, u32 mask)
163165
{
164166
fsnotify_init_event(&event->fse);
167+
INIT_HLIST_NODE(&event->merge_list);
165168
event->hash = hash;
166169
event->mask = mask;
167170
event->pid = NULL;
@@ -299,3 +302,25 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
299302
else
300303
return NULL;
301304
}
305+
306+
/*
307+
* Use 128 size hash table to speed up events merge.
308+
*/
309+
#define FANOTIFY_HTABLE_BITS (7)
310+
#define FANOTIFY_HTABLE_SIZE (1 << FANOTIFY_HTABLE_BITS)
311+
#define FANOTIFY_HTABLE_MASK (FANOTIFY_HTABLE_SIZE - 1)
312+
313+
/*
314+
* Permission events and overflow event do not get merged - don't hash them.
315+
*/
316+
static inline bool fanotify_is_hashed_event(u32 mask)
317+
{
318+
return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
319+
}
320+
321+
static inline unsigned int fanotify_event_hash_bucket(
322+
struct fsnotify_group *group,
323+
struct fanotify_event *event)
324+
{
325+
return event->hash & FANOTIFY_HTABLE_MASK;
326+
}

fs/notify/fanotify/fanotify_user.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,23 @@ static int fanotify_event_info_len(unsigned int fid_mode,
8989
return info_len;
9090
}
9191

92+
/*
93+
* Remove an hashed event from merge hash table.
94+
*/
95+
static void fanotify_unhash_event(struct fsnotify_group *group,
96+
struct fanotify_event *event)
97+
{
98+
assert_spin_locked(&group->notification_lock);
99+
100+
pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
101+
group, event, fanotify_event_hash_bucket(group, event));
102+
103+
if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list)))
104+
return;
105+
106+
hlist_del_init(&event->merge_list);
107+
}
108+
92109
/*
93110
* Get an fanotify notification event if one exists and is small
94111
* enough to fit in "count". Return an error pointer if the count
@@ -126,6 +143,8 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
126143
fsnotify_remove_first_event(group);
127144
if (fanotify_is_perm_event(event->mask))
128145
FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
146+
if (fanotify_is_hashed_event(event->mask))
147+
fanotify_unhash_event(group, event);
129148
out:
130149
spin_unlock(&group->notification_lock);
131150
return event;
@@ -925,6 +944,20 @@ static struct fsnotify_event *fanotify_alloc_overflow_event(void)
925944
return &oevent->fse;
926945
}
927946

947+
static struct hlist_head *fanotify_alloc_merge_hash(void)
948+
{
949+
struct hlist_head *hash;
950+
951+
hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS,
952+
GFP_KERNEL_ACCOUNT);
953+
if (!hash)
954+
return NULL;
955+
956+
__hash_init(hash, FANOTIFY_HTABLE_SIZE);
957+
958+
return hash;
959+
}
960+
928961
/* fanotify syscalls */
929962
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
930963
{
@@ -993,6 +1026,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
9931026
atomic_inc(&user->fanotify_listeners);
9941027
group->memcg = get_mem_cgroup_from_mm(current->mm);
9951028

1029+
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
1030+
if (!group->fanotify_data.merge_hash) {
1031+
fd = -ENOMEM;
1032+
goto out_destroy_group;
1033+
}
1034+
9961035
group->overflow_event = fanotify_alloc_overflow_event();
9971036
if (unlikely(!group->overflow_event)) {
9981037
fd = -ENOMEM;

fs/notify/inotify/inotify_fsnotify.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ static bool event_compare(struct fsnotify_event *old_fsn,
4646
return false;
4747
}
4848

49-
static int inotify_merge(struct list_head *list,
50-
struct fsnotify_event *event)
49+
static int inotify_merge(struct fsnotify_group *group,
50+
struct fsnotify_event *event)
5151
{
52+
struct list_head *list = &group->notification_list;
5253
struct fsnotify_event *last_event;
5354

5455
last_event = list_entry(list->prev, struct fsnotify_event, list);
@@ -115,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
115116
if (len)
116117
strcpy(event->name, name->name);
117118

118-
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
119+
ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL);
119120
if (ret) {
120121
/* Our event wasn't used in the end. Free it. */
121122
fsnotify_destroy_event(group, fsn_event);

fs/notify/notification.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,22 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
6868
}
6969

7070
/*
71-
* Add an event to the group notification queue. The group can later pull this
72-
* event off the queue to deal with. The function returns 0 if the event was
73-
* added to the queue, 1 if the event was merged with some other queued event,
71+
* Try to add an event to the notification queue.
72+
* The group can later pull this event off the queue to deal with.
73+
* The group can use the @merge hook to merge the event with a queued event.
74+
* The group can use the @insert hook to insert the event into hash table.
75+
* The function returns:
76+
* 0 if the event was added to a queue
77+
* 1 if the event was merged with some other queued event
7478
* 2 if the event was not queued - either the queue of events has overflown
75-
* or the group is shutting down.
79+
* or the group is shutting down.
7680
*/
7781
int fsnotify_add_event(struct fsnotify_group *group,
7882
struct fsnotify_event *event,
79-
int (*merge)(struct list_head *,
80-
struct fsnotify_event *))
83+
int (*merge)(struct fsnotify_group *,
84+
struct fsnotify_event *),
85+
void (*insert)(struct fsnotify_group *,
86+
struct fsnotify_event *))
8187
{
8288
int ret = 0;
8389
struct list_head *list = &group->notification_list;
@@ -104,7 +110,7 @@ int fsnotify_add_event(struct fsnotify_group *group,
104110
}
105111

106112
if (!list_empty(list) && merge) {
107-
ret = merge(list, event);
113+
ret = merge(group, event);
108114
if (ret) {
109115
spin_unlock(&group->notification_lock);
110116
return ret;
@@ -114,6 +120,8 @@ int fsnotify_add_event(struct fsnotify_group *group,
114120
queue:
115121
group->q_len++;
116122
list_add_tail(&event->list, list);
123+
if (insert)
124+
insert(group, event);
117125
spin_unlock(&group->notification_lock);
118126

119127
wake_up(&group->notification_waitq);

include/linux/fsnotify_backend.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ struct fsnotify_group {
233233
#endif
234234
#ifdef CONFIG_FANOTIFY
235235
struct fanotify_group_private_data {
236+
/* Hash table of events for merge */
237+
struct hlist_head *merge_hash;
236238
/* allows a group to block waiting for a userspace response */
237239
struct list_head access_list;
238240
wait_queue_head_t access_waitq;
@@ -486,12 +488,14 @@ extern void fsnotify_destroy_event(struct fsnotify_group *group,
486488
/* attach the event to the group notification queue */
487489
extern int fsnotify_add_event(struct fsnotify_group *group,
488490
struct fsnotify_event *event,
489-
int (*merge)(struct list_head *,
490-
struct fsnotify_event *));
491+
int (*merge)(struct fsnotify_group *,
492+
struct fsnotify_event *),
493+
void (*insert)(struct fsnotify_group *,
494+
struct fsnotify_event *));
491495
/* Queue overflow event to a notification group */
492496
static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
493497
{
494-
fsnotify_add_event(group, group->overflow_event, NULL);
498+
fsnotify_add_event(group, group->overflow_event, NULL, NULL);
495499
}
496500

497501
static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)

0 commit comments

Comments
 (0)