Skip to content

Commit a65442e

Browse files
beaubelgraverostedt
authored andcommitted
tracing/user_events: Add auto cleanup and future persist flag
Currently user events need to be manually deleted via the delete IOCTL call or via the dynamic_events file. Most operators and processes wish to have these events auto cleanup when they are no longer used by anything to prevent them piling without manual maintenance. However, some operators may not want this, such as pre-registering events via the dynamic_events tracefs file. Update user_event_put() to attempt an auto delete of the event if it's the last reference. The auto delete must run in a work queue to ensure proper behavior of class->reg() invocations that don't expect the call to go away from underneath them during the unregister. Add work_struct to user_event struct to ensure we can do this reliably. Add a persist flag, that is not yet exposed, to ensure we can toggle between auto-cleanup and leaving the events existing in the future. When a non-zero flag is seen during register, return -EINVAL to ensure ABI is clear for the user processes while we work out the best approach for persistent events. Link: https://lkml.kernel.org/r/[email protected] Link: https://lore.kernel.org/linux-trace-kernel/[email protected]/ Suggested-by: Steven Rostedt <[email protected]> Signed-off-by: Beau Belgrave <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent f0dbf6f commit a65442e

File tree

1 file changed

+126
-13
lines changed

1 file changed

+126
-13
lines changed

kernel/trace/trace_events_user.c

Lines changed: 126 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,18 @@
4949
#define EVENT_STATUS_PERF BIT(1)
5050
#define EVENT_STATUS_OTHER BIT(7)
5151

52+
/*
53+
* User register flags are not allowed yet, keep them here until we are
54+
* ready to expose them out to the user ABI.
55+
*/
56+
enum user_reg_flag {
57+
/* Event will not delete upon last reference closing */
58+
USER_EVENT_REG_PERSIST = 1U << 0,
59+
60+
/* This value or above is currently non-ABI */
61+
USER_EVENT_REG_MAX = 1U << 1,
62+
};
63+
5264
/*
5365
* Stores the system name, tables, and locks for a group of events. This
5466
* allows isolation for events by various means.
@@ -85,6 +97,7 @@ struct user_event {
8597
struct hlist_node node;
8698
struct list_head fields;
8799
struct list_head validators;
100+
struct work_struct put_work;
88101
refcount_t refcnt;
89102
int min_size;
90103
int reg_flags;
@@ -171,6 +184,7 @@ static int user_event_parse(struct user_event_group *group, char *name,
171184
static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
172185
static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
173186
static void user_event_mm_put(struct user_event_mm *mm);
187+
static int destroy_user_event(struct user_event *user);
174188

175189
static u32 user_event_key(char *name)
176190
{
@@ -184,19 +198,103 @@ static struct user_event *user_event_get(struct user_event *user)
184198
return user;
185199
}
186200

201+
static void delayed_destroy_user_event(struct work_struct *work)
202+
{
203+
struct user_event *user = container_of(
204+
work, struct user_event, put_work);
205+
206+
mutex_lock(&event_mutex);
207+
208+
if (!refcount_dec_and_test(&user->refcnt))
209+
goto out;
210+
211+
if (destroy_user_event(user)) {
212+
/*
213+
* The only reason this would fail here is if we cannot
214+
* update the visibility of the event. In this case the
215+
* event stays in the hashtable, waiting for someone to
216+
* attempt to delete it later.
217+
*/
218+
pr_warn("user_events: Unable to delete event\n");
219+
refcount_set(&user->refcnt, 1);
220+
}
221+
out:
222+
mutex_unlock(&event_mutex);
223+
}
224+
187225
static void user_event_put(struct user_event *user, bool locked)
188226
{
189-
#ifdef CONFIG_LOCKDEP
190-
if (locked)
191-
lockdep_assert_held(&event_mutex);
192-
else
193-
lockdep_assert_not_held(&event_mutex);
194-
#endif
227+
bool delete;
195228

196229
if (unlikely(!user))
197230
return;
198231

199-
refcount_dec(&user->refcnt);
232+
/*
233+
* When the event is not enabled for auto-delete there will always
234+
* be at least 1 reference to the event. During the event creation
235+
* we initially set the refcnt to 2 to achieve this. In those cases
236+
* the caller must acquire event_mutex and after decrement check if
237+
* the refcnt is 1, meaning this is the last reference. When auto
238+
* delete is enabled, there will only be 1 ref, IE: refcnt will be
239+
* only set to 1 during creation to allow the below checks to go
240+
* through upon the last put. The last put must always be done with
241+
* the event mutex held.
242+
*/
243+
if (!locked) {
244+
lockdep_assert_not_held(&event_mutex);
245+
delete = refcount_dec_and_mutex_lock(&user->refcnt, &event_mutex);
246+
} else {
247+
lockdep_assert_held(&event_mutex);
248+
delete = refcount_dec_and_test(&user->refcnt);
249+
}
250+
251+
if (!delete)
252+
return;
253+
254+
/*
255+
* We now have the event_mutex in all cases, which ensures that
256+
* no new references will be taken until event_mutex is released.
257+
* New references come through find_user_event(), which requires
258+
* the event_mutex to be held.
259+
*/
260+
261+
if (user->reg_flags & USER_EVENT_REG_PERSIST) {
262+
/* We should not get here when persist flag is set */
263+
pr_alert("BUG: Auto-delete engaged on persistent event\n");
264+
goto out;
265+
}
266+
267+
/*
268+
* Unfortunately we have to attempt the actual destroy in a work
269+
* queue. This is because not all cases handle a trace_event_call
270+
* being removed within the class->reg() operation for unregister.
271+
*/
272+
INIT_WORK(&user->put_work, delayed_destroy_user_event);
273+
274+
/*
275+
* Since the event is still in the hashtable, we have to re-inc
276+
* the ref count to 1. This count will be decremented and checked
277+
* in the work queue to ensure it's still the last ref. This is
278+
* needed because a user-process could register the same event in
279+
* between the time of event_mutex release and the work queue
280+
* running the delayed destroy. If we removed the item now from
281+
* the hashtable, this would result in a timing window where a
282+
* user process would fail a register because the trace_event_call
283+
* register would fail in the tracing layers.
284+
*/
285+
refcount_set(&user->refcnt, 1);
286+
287+
if (WARN_ON_ONCE(!schedule_work(&user->put_work))) {
288+
/*
289+
* If we fail we must wait for an admin to attempt delete or
290+
* another register/close of the event, whichever is first.
291+
*/
292+
pr_warn("user_events: Unable to queue delayed destroy\n");
293+
}
294+
out:
295+
/* Ensure if we didn't have event_mutex before we unlock it */
296+
if (!locked)
297+
mutex_unlock(&event_mutex);
200298
}
201299

202300
static void user_event_group_destroy(struct user_event_group *group)
@@ -793,7 +891,12 @@ static struct user_event_enabler
793891
static __always_inline __must_check
794892
bool user_event_last_ref(struct user_event *user)
795893
{
796-
return refcount_read(&user->refcnt) == 1;
894+
int last = 0;
895+
896+
if (user->reg_flags & USER_EVENT_REG_PERSIST)
897+
last = 1;
898+
899+
return refcount_read(&user->refcnt) == last;
797900
}
798901

799902
static __always_inline __must_check
@@ -1609,7 +1712,8 @@ static int user_event_create(const char *raw_command)
16091712

16101713
mutex_lock(&group->reg_mutex);
16111714

1612-
ret = user_event_parse_cmd(group, name, &user, 0);
1715+
/* Dyn events persist, otherwise they would cleanup immediately */
1716+
ret = user_event_parse_cmd(group, name, &user, USER_EVENT_REG_PERSIST);
16131717

16141718
if (!ret)
16151719
user_event_put(user, false);
@@ -1780,6 +1884,10 @@ static int user_event_parse(struct user_event_group *group, char *name,
17801884
int argc = 0;
17811885
char **argv;
17821886

1887+
/* User register flags are not ready yet */
1888+
if (reg_flags != 0 || flags != NULL)
1889+
return -EINVAL;
1890+
17831891
/* Prevent dyn_event from racing */
17841892
mutex_lock(&event_mutex);
17851893
user = find_user_event(group, name, &key);
@@ -1869,8 +1977,13 @@ static int user_event_parse(struct user_event_group *group, char *name,
18691977

18701978
user->reg_flags = reg_flags;
18711979

1872-
/* Ensure we track self ref and caller ref (2) */
1873-
refcount_set(&user->refcnt, 2);
1980+
if (user->reg_flags & USER_EVENT_REG_PERSIST) {
1981+
/* Ensure we track self ref and caller ref (2) */
1982+
refcount_set(&user->refcnt, 2);
1983+
} else {
1984+
/* Ensure we track only caller ref (1) */
1985+
refcount_set(&user->refcnt, 1);
1986+
}
18741987

18751988
dyn_event_init(&user->devent, &user_event_dops);
18761989
dyn_event_add(&user->devent, &user->call);
@@ -2092,8 +2205,8 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
20922205
if (ret)
20932206
return ret;
20942207

2095-
/* Ensure no flags, since we don't support any yet */
2096-
if (kreg->flags != 0)
2208+
/* Ensure only valid flags */
2209+
if (kreg->flags & ~(USER_EVENT_REG_MAX-1))
20972210
return -EINVAL;
20982211

20992212
/* Ensure supported size */

0 commit comments

Comments
 (0)