|
55 | 55 | #include <linux/pgtable.h>
|
56 | 56 | #include <linux/buildid.h>
|
57 | 57 | #include <linux/task_work.h>
|
| 58 | +#include <linux/percpu-rwsem.h> |
58 | 59 |
|
59 | 60 | #include "internal.h"
|
60 | 61 |
|
@@ -5217,6 +5218,225 @@ static void unaccount_freq_event(void)
|
5217 | 5218 | atomic_dec(&nr_freq_events);
|
5218 | 5219 | }
|
5219 | 5220 |
|
| 5221 | + |
| 5222 | +static struct perf_ctx_data * |
| 5223 | +alloc_perf_ctx_data(struct kmem_cache *ctx_cache, bool global) |
| 5224 | +{ |
| 5225 | + struct perf_ctx_data *cd; |
| 5226 | + |
| 5227 | + cd = kzalloc(sizeof(*cd), GFP_KERNEL); |
| 5228 | + if (!cd) |
| 5229 | + return NULL; |
| 5230 | + |
| 5231 | + cd->data = kmem_cache_zalloc(ctx_cache, GFP_KERNEL); |
| 5232 | + if (!cd->data) { |
| 5233 | + kfree(cd); |
| 5234 | + return NULL; |
| 5235 | + } |
| 5236 | + |
| 5237 | + cd->global = global; |
| 5238 | + cd->ctx_cache = ctx_cache; |
| 5239 | + refcount_set(&cd->refcount, 1); |
| 5240 | + |
| 5241 | + return cd; |
| 5242 | +} |
| 5243 | + |
| 5244 | +static void free_perf_ctx_data(struct perf_ctx_data *cd) |
| 5245 | +{ |
| 5246 | + kmem_cache_free(cd->ctx_cache, cd->data); |
| 5247 | + kfree(cd); |
| 5248 | +} |
| 5249 | + |
| 5250 | +static void __free_perf_ctx_data_rcu(struct rcu_head *rcu_head) |
| 5251 | +{ |
| 5252 | + struct perf_ctx_data *cd; |
| 5253 | + |
| 5254 | + cd = container_of(rcu_head, struct perf_ctx_data, rcu_head); |
| 5255 | + free_perf_ctx_data(cd); |
| 5256 | +} |
| 5257 | + |
| 5258 | +static inline void perf_free_ctx_data_rcu(struct perf_ctx_data *cd) |
| 5259 | +{ |
| 5260 | + call_rcu(&cd->rcu_head, __free_perf_ctx_data_rcu); |
| 5261 | +} |
| 5262 | + |
| 5263 | +static int |
| 5264 | +attach_task_ctx_data(struct task_struct *task, struct kmem_cache *ctx_cache, |
| 5265 | + bool global) |
| 5266 | +{ |
| 5267 | + struct perf_ctx_data *cd, *old = NULL; |
| 5268 | + |
| 5269 | + cd = alloc_perf_ctx_data(ctx_cache, global); |
| 5270 | + if (!cd) |
| 5271 | + return -ENOMEM; |
| 5272 | + |
| 5273 | + for (;;) { |
| 5274 | + if (try_cmpxchg((struct perf_ctx_data **)&task->perf_ctx_data, &old, cd)) { |
| 5275 | + if (old) |
| 5276 | + perf_free_ctx_data_rcu(old); |
| 5277 | + return 0; |
| 5278 | + } |
| 5279 | + |
| 5280 | + if (!old) { |
| 5281 | + /* |
| 5282 | + * After seeing a dead @old, we raced with |
| 5283 | + * removal and lost, try again to install @cd. |
| 5284 | + */ |
| 5285 | + continue; |
| 5286 | + } |
| 5287 | + |
| 5288 | + if (refcount_inc_not_zero(&old->refcount)) { |
| 5289 | + free_perf_ctx_data(cd); /* unused */ |
| 5290 | + return 0; |
| 5291 | + } |
| 5292 | + |
| 5293 | + /* |
| 5294 | + * @old is a dead object, refcount==0 is stable, try and |
| 5295 | + * replace it with @cd. |
| 5296 | + */ |
| 5297 | + } |
| 5298 | + return 0; |
| 5299 | +} |
| 5300 | + |
| 5301 | +static void __detach_global_ctx_data(void); |
| 5302 | +DEFINE_STATIC_PERCPU_RWSEM(global_ctx_data_rwsem); |
| 5303 | +static refcount_t global_ctx_data_ref; |
| 5304 | + |
| 5305 | +static int |
| 5306 | +attach_global_ctx_data(struct kmem_cache *ctx_cache) |
| 5307 | +{ |
| 5308 | + struct task_struct *g, *p; |
| 5309 | + struct perf_ctx_data *cd; |
| 5310 | + int ret; |
| 5311 | + |
| 5312 | + if (refcount_inc_not_zero(&global_ctx_data_ref)) |
| 5313 | + return 0; |
| 5314 | + |
| 5315 | + guard(percpu_write)(&global_ctx_data_rwsem); |
| 5316 | + if (refcount_inc_not_zero(&global_ctx_data_ref)) |
| 5317 | + return 0; |
| 5318 | +again: |
| 5319 | + /* Allocate everything */ |
| 5320 | + scoped_guard (rcu) { |
| 5321 | + for_each_process_thread(g, p) { |
| 5322 | + cd = rcu_dereference(p->perf_ctx_data); |
| 5323 | + if (cd && !cd->global) { |
| 5324 | + cd->global = 1; |
| 5325 | + if (!refcount_inc_not_zero(&cd->refcount)) |
| 5326 | + cd = NULL; |
| 5327 | + } |
| 5328 | + if (!cd) { |
| 5329 | + get_task_struct(p); |
| 5330 | + goto alloc; |
| 5331 | + } |
| 5332 | + } |
| 5333 | + } |
| 5334 | + |
| 5335 | + refcount_set(&global_ctx_data_ref, 1); |
| 5336 | + |
| 5337 | + return 0; |
| 5338 | +alloc: |
| 5339 | + ret = attach_task_ctx_data(p, ctx_cache, true); |
| 5340 | + put_task_struct(p); |
| 5341 | + if (ret) { |
| 5342 | + __detach_global_ctx_data(); |
| 5343 | + return ret; |
| 5344 | + } |
| 5345 | + goto again; |
| 5346 | +} |
| 5347 | + |
| 5348 | +static int |
| 5349 | +attach_perf_ctx_data(struct perf_event *event) |
| 5350 | +{ |
| 5351 | + struct task_struct *task = event->hw.target; |
| 5352 | + struct kmem_cache *ctx_cache = event->pmu->task_ctx_cache; |
| 5353 | + int ret; |
| 5354 | + |
| 5355 | + if (!ctx_cache) |
| 5356 | + return -ENOMEM; |
| 5357 | + |
| 5358 | + if (task) |
| 5359 | + return attach_task_ctx_data(task, ctx_cache, false); |
| 5360 | + |
| 5361 | + ret = attach_global_ctx_data(ctx_cache); |
| 5362 | + if (ret) |
| 5363 | + return ret; |
| 5364 | + |
| 5365 | + event->attach_state |= PERF_ATTACH_GLOBAL_DATA; |
| 5366 | + return 0; |
| 5367 | +} |
| 5368 | + |
| 5369 | +static void |
| 5370 | +detach_task_ctx_data(struct task_struct *p) |
| 5371 | +{ |
| 5372 | + struct perf_ctx_data *cd; |
| 5373 | + |
| 5374 | + scoped_guard (rcu) { |
| 5375 | + cd = rcu_dereference(p->perf_ctx_data); |
| 5376 | + if (!cd || !refcount_dec_and_test(&cd->refcount)) |
| 5377 | + return; |
| 5378 | + } |
| 5379 | + |
| 5380 | + /* |
| 5381 | + * The old ctx_data may be lost because of the race. |
| 5382 | + * Nothing is required to do for the case. |
| 5383 | + * See attach_task_ctx_data(). |
| 5384 | + */ |
| 5385 | + if (try_cmpxchg((struct perf_ctx_data **)&p->perf_ctx_data, &cd, NULL)) |
| 5386 | + perf_free_ctx_data_rcu(cd); |
| 5387 | +} |
| 5388 | + |
| 5389 | +static void __detach_global_ctx_data(void) |
| 5390 | +{ |
| 5391 | + struct task_struct *g, *p; |
| 5392 | + struct perf_ctx_data *cd; |
| 5393 | + |
| 5394 | +again: |
| 5395 | + scoped_guard (rcu) { |
| 5396 | + for_each_process_thread(g, p) { |
| 5397 | + cd = rcu_dereference(p->perf_ctx_data); |
| 5398 | + if (!cd || !cd->global) |
| 5399 | + continue; |
| 5400 | + cd->global = 0; |
| 5401 | + get_task_struct(p); |
| 5402 | + goto detach; |
| 5403 | + } |
| 5404 | + } |
| 5405 | + return; |
| 5406 | +detach: |
| 5407 | + detach_task_ctx_data(p); |
| 5408 | + put_task_struct(p); |
| 5409 | + goto again; |
| 5410 | +} |
| 5411 | + |
| 5412 | +static void detach_global_ctx_data(void) |
| 5413 | +{ |
| 5414 | + if (refcount_dec_not_one(&global_ctx_data_ref)) |
| 5415 | + return; |
| 5416 | + |
| 5417 | + guard(percpu_write)(&global_ctx_data_rwsem); |
| 5418 | + if (!refcount_dec_and_test(&global_ctx_data_ref)) |
| 5419 | + return; |
| 5420 | + |
| 5421 | + /* remove everything */ |
| 5422 | + __detach_global_ctx_data(); |
| 5423 | +} |
| 5424 | + |
| 5425 | +static void detach_perf_ctx_data(struct perf_event *event) |
| 5426 | +{ |
| 5427 | + struct task_struct *task = event->hw.target; |
| 5428 | + |
| 5429 | + event->attach_state &= ~PERF_ATTACH_TASK_DATA; |
| 5430 | + |
| 5431 | + if (task) |
| 5432 | + return detach_task_ctx_data(task); |
| 5433 | + |
| 5434 | + if (event->attach_state & PERF_ATTACH_GLOBAL_DATA) { |
| 5435 | + detach_global_ctx_data(); |
| 5436 | + event->attach_state &= ~PERF_ATTACH_GLOBAL_DATA; |
| 5437 | + } |
| 5438 | +} |
| 5439 | + |
5220 | 5440 | static void unaccount_event(struct perf_event *event)
|
5221 | 5441 | {
|
5222 | 5442 | bool dec = false;
|
@@ -5398,6 +5618,9 @@ static void __free_event(struct perf_event *event)
|
5398 | 5618 | if (is_cgroup_event(event))
|
5399 | 5619 | perf_detach_cgroup(event);
|
5400 | 5620 |
|
| 5621 | + if (event->attach_state & PERF_ATTACH_TASK_DATA) |
| 5622 | + detach_perf_ctx_data(event); |
| 5623 | + |
5401 | 5624 | if (event->destroy)
|
5402 | 5625 | event->destroy(event);
|
5403 | 5626 |
|
@@ -8607,10 +8830,58 @@ static void perf_event_task(struct task_struct *task,
|
8607 | 8830 | task_ctx);
|
8608 | 8831 | }
|
8609 | 8832 |
|
| 8833 | +/* |
| 8834 | + * Allocate data for a new task when profiling system-wide |
| 8835 | + * events which require PMU specific data |
| 8836 | + */ |
| 8837 | +static void |
| 8838 | +perf_event_alloc_task_data(struct task_struct *child, |
| 8839 | + struct task_struct *parent) |
| 8840 | +{ |
| 8841 | + struct kmem_cache *ctx_cache = NULL; |
| 8842 | + struct perf_ctx_data *cd; |
| 8843 | + |
| 8844 | + if (!refcount_read(&global_ctx_data_ref)) |
| 8845 | + return; |
| 8846 | + |
| 8847 | + scoped_guard (rcu) { |
| 8848 | + cd = rcu_dereference(parent->perf_ctx_data); |
| 8849 | + if (cd) |
| 8850 | + ctx_cache = cd->ctx_cache; |
| 8851 | + } |
| 8852 | + |
| 8853 | + if (!ctx_cache) |
| 8854 | + return; |
| 8855 | + |
| 8856 | + guard(percpu_read)(&global_ctx_data_rwsem); |
| 8857 | + scoped_guard (rcu) { |
| 8858 | + cd = rcu_dereference(child->perf_ctx_data); |
| 8859 | + if (!cd) { |
| 8860 | + /* |
| 8861 | + * A system-wide event may be unaccount, |
| 8862 | + * when attaching the perf_ctx_data. |
| 8863 | + */ |
| 8864 | + if (!refcount_read(&global_ctx_data_ref)) |
| 8865 | + return; |
| 8866 | + goto attach; |
| 8867 | + } |
| 8868 | + |
| 8869 | + if (!cd->global) { |
| 8870 | + cd->global = 1; |
| 8871 | + refcount_inc(&cd->refcount); |
| 8872 | + } |
| 8873 | + } |
| 8874 | + |
| 8875 | + return; |
| 8876 | +attach: |
| 8877 | + attach_task_ctx_data(child, ctx_cache, true); |
| 8878 | +} |
| 8879 | + |
8610 | 8880 | void perf_event_fork(struct task_struct *task)
|
8611 | 8881 | {
|
8612 | 8882 | perf_event_task(task, NULL, 1);
|
8613 | 8883 | perf_event_namespaces(task);
|
| 8884 | + perf_event_alloc_task_data(task, current); |
8614 | 8885 | }
|
8615 | 8886 |
|
8616 | 8887 | /*
|
@@ -12490,6 +12761,18 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
12490 | 12761 | if (IS_ERR(pmu))
|
12491 | 12762 | return (void*)pmu;
|
12492 | 12763 |
|
| 12764 | + /* |
| 12765 | + * The PERF_ATTACH_TASK_DATA is set in the event_init()->hw_config(). |
| 12766 | + * The attach should be right after the perf_init_event(). |
| 12767 | + * Otherwise, the __free_event() would mistakenly detach the non-exist |
| 12768 | + * perf_ctx_data because of the other errors between them. |
| 12769 | + */ |
| 12770 | + if (event->attach_state & PERF_ATTACH_TASK_DATA) { |
| 12771 | + err = attach_perf_ctx_data(event); |
| 12772 | + if (err) |
| 12773 | + return ERR_PTR(err); |
| 12774 | + } |
| 12775 | + |
12493 | 12776 | /*
|
12494 | 12777 | * Disallow uncore-task events. Similarly, disallow uncore-cgroup
|
12495 | 12778 | * events (they don't make sense as the cgroup will be different
|
@@ -13637,6 +13920,12 @@ void perf_event_exit_task(struct task_struct *child)
|
13637 | 13920 | * At this point we need to send EXIT events to cpu contexts.
|
13638 | 13921 | */
|
13639 | 13922 | perf_event_task(child, NULL, 0);
|
| 13923 | + |
| 13924 | + /* |
| 13925 | + * Detach the perf_ctx_data for the system-wide event. |
| 13926 | + */ |
| 13927 | + guard(percpu_read)(&global_ctx_data_rwsem); |
| 13928 | + detach_task_ctx_data(child); |
13640 | 13929 | }
|
13641 | 13930 |
|
13642 | 13931 | static void perf_free_event(struct perf_event *event,
|
|
0 commit comments