Skip to content

Commit 530bfff

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel/lbr: Factor out a new struct for generic optimization
To reduce the overhead of a context switch with LBR enabled, some generic optimizations were introduced, e.g. avoiding restore LBR if no one else touched them. The generic optimizations can also be used by Architecture LBR later. Currently, the fields for the generic optimizations are part of structure x86_perf_task_context, which will be deprecated by Architecture LBR. A new structure should be introduced for the common fields of generic optimization, which can be shared between Architecture LBR and model-specific LBR. Both 'valid_lbrs' and 'tos' are also used by the generic optimizations, but they are not moved into the new structure, because Architecture LBR is stack-like. The 'valid_lbrs' which records the index of the valid LBR is not required anymore. The TOS MSR will be removed. LBR registers may be cleared in the deep Cstate. If so, the generic optimizations should not be applied. Perf has to unconditionally restore the LBR registers. A generic function is required to detect the reset due to the deep Cstate. lbr_is_reset_in_cstate() is introduced. Currently, for the model-specific LBR, the TOS MSR is used to detect the reset. There will be another method introduced for Architecture LBR later. Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 799571b commit 530bfff

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -355,33 +355,37 @@ void intel_pmu_lbr_restore(void *ctx)
355355
wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
356356
}
357357

358+
static __always_inline bool
359+
lbr_is_reset_in_cstate(struct x86_perf_task_context *task_ctx)
360+
{
361+
return !rdlbr_from(task_ctx->tos);
362+
}
363+
358364
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
359365
{
360366
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
361-
u64 tos;
362367

363-
if (task_ctx->lbr_callstack_users == 0 ||
364-
task_ctx->lbr_stack_state == LBR_NONE) {
368+
if (task_ctx->opt.lbr_callstack_users == 0 ||
369+
task_ctx->opt.lbr_stack_state == LBR_NONE) {
365370
intel_pmu_lbr_reset();
366371
return;
367372
}
368373

369-
tos = task_ctx->tos;
370374
/*
371375
* Does not restore the LBR registers, if
372376
* - No one else touched them, and
373-
* - Did not enter C6
377+
* - Was not cleared in Cstate
374378
*/
375379
if ((task_ctx == cpuc->last_task_ctx) &&
376-
(task_ctx->log_id == cpuc->last_log_id) &&
377-
rdlbr_from(tos)) {
378-
task_ctx->lbr_stack_state = LBR_NONE;
380+
(task_ctx->opt.log_id == cpuc->last_log_id) &&
381+
!lbr_is_reset_in_cstate(task_ctx)) {
382+
task_ctx->opt.lbr_stack_state = LBR_NONE;
379383
return;
380384
}
381385

382386
x86_pmu.lbr_restore(task_ctx);
383387

384-
task_ctx->lbr_stack_state = LBR_NONE;
388+
task_ctx->opt.lbr_stack_state = LBR_NONE;
385389
}
386390

387391
void intel_pmu_lbr_save(void *ctx)
@@ -415,17 +419,17 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
415419
{
416420
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
417421

418-
if (task_ctx->lbr_callstack_users == 0) {
419-
task_ctx->lbr_stack_state = LBR_NONE;
422+
if (task_ctx->opt.lbr_callstack_users == 0) {
423+
task_ctx->opt.lbr_stack_state = LBR_NONE;
420424
return;
421425
}
422426

423427
x86_pmu.lbr_save(task_ctx);
424428

425-
task_ctx->lbr_stack_state = LBR_VALID;
429+
task_ctx->opt.lbr_stack_state = LBR_VALID;
426430

427431
cpuc->last_task_ctx = task_ctx;
428-
cpuc->last_log_id = ++task_ctx->log_id;
432+
cpuc->last_log_id = ++task_ctx->opt.log_id;
429433
}
430434

431435
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
@@ -447,8 +451,8 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
447451
if (!prev_ctx_data || !next_ctx_data)
448452
return;
449453

450-
swap(prev_ctx_data->lbr_callstack_users,
451-
next_ctx_data->lbr_callstack_users);
454+
swap(prev_ctx_data->opt.lbr_callstack_users,
455+
next_ctx_data->opt.lbr_callstack_users);
452456
}
453457

454458
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
@@ -503,7 +507,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
503507

504508
if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
505509
task_ctx = event->ctx->task_ctx_data;
506-
task_ctx->lbr_callstack_users++;
510+
task_ctx->opt.lbr_callstack_users++;
507511
}
508512

509513
/*
@@ -543,7 +547,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
543547
if (branch_user_callstack(cpuc->br_sel) &&
544548
event->ctx->task_ctx_data) {
545549
task_ctx = event->ctx->task_ctx_data;
546-
task_ctx->lbr_callstack_users--;
550+
task_ctx->opt.lbr_callstack_users--;
547551
}
548552

549553
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)

arch/x86/events/perf_event.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -736,16 +736,20 @@ struct x86_pmu {
736736
int (*aux_output_match) (struct perf_event *event);
737737
};
738738

739+
struct x86_perf_task_context_opt {
740+
int lbr_callstack_users;
741+
int lbr_stack_state;
742+
int log_id;
743+
};
744+
739745
struct x86_perf_task_context {
740746
u64 lbr_from[MAX_LBR_ENTRIES];
741747
u64 lbr_to[MAX_LBR_ENTRIES];
742748
u64 lbr_info[MAX_LBR_ENTRIES];
743749
u64 lbr_sel;
744750
int tos;
745751
int valid_lbrs;
746-
int lbr_callstack_users;
747-
int lbr_stack_state;
748-
int log_id;
752+
struct x86_perf_task_context_opt opt;
749753
};
750754

751755
#define x86_add_quirk(func_) \

0 commit comments

Comments
 (0)