Skip to content

Commit 7d20dd3

Browse files
committed
Merge tag 'x86-apic-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/apic update from Thomas Gleixner: "A single commit which reduces cache misses in __x2apic_send_IPI_mask() significantly by converting x86_cpu_to_logical_apicid() to an array instead of using per CPU storage. This reduces the cost for a full broadcast on a dual socket system with 256 CPUs from 33 down to 11 microseconds" * tag 'x86-apic-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/apic: Reduce cache line misses in __x2apic_send_IPI_mask()
2 parents 9a7e0a9 + cc95a07 commit 7d20dd3

File tree

1 file changed

+21
-6
lines changed

1 file changed

+21
-6
lines changed

arch/x86/kernel/apic/x2apic_cluster.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ struct cluster_mask {
1515
struct cpumask mask;
1616
};
1717

18-
static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
18+
/*
19+
* __x2apic_send_IPI_mask() possibly needs to read
20+
* x86_cpu_to_logical_apicid for all online cpus in a sequential way.
21+
* Using per cpu variable would cost one cache line per cpu.
22+
*/
23+
static u32 *x86_cpu_to_logical_apicid __read_mostly;
24+
1925
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
20-
static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
26+
static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
2127
static struct cluster_mask *cluster_hotplug_mask;
2228

2329
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -27,7 +33,7 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
2733

2834
static void x2apic_send_IPI(int cpu, int vector)
2935
{
30-
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
36+
u32 dest = x86_cpu_to_logical_apicid[cpu];
3137

3238
/* x2apic MSRs are special and need a special fence: */
3339
weak_wrmsr_fence();
@@ -58,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
5864

5965
dest = 0;
6066
for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
61-
dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
67+
dest |= x86_cpu_to_logical_apicid[clustercpu];
6268

6369
if (!dest)
6470
continue;
@@ -94,7 +100,7 @@ static void x2apic_send_IPI_all(int vector)
94100

95101
static u32 x2apic_calc_apicid(unsigned int cpu)
96102
{
97-
return per_cpu(x86_cpu_to_logical_apicid, cpu);
103+
return x86_cpu_to_logical_apicid[cpu];
98104
}
99105

100106
static void init_x2apic_ldr(void)
@@ -103,7 +109,7 @@ static void init_x2apic_ldr(void)
103109
u32 cluster, apicid = apic_read(APIC_LDR);
104110
unsigned int cpu;
105111

106-
this_cpu_write(x86_cpu_to_logical_apicid, apicid);
112+
x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
107113

108114
if (cmsk)
109115
goto update;
@@ -166,12 +172,21 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
166172

167173
static int x2apic_cluster_probe(void)
168174
{
175+
u32 slots;
176+
169177
if (!x2apic_mode)
170178
return 0;
171179

180+
slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);
181+
x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);
182+
if (!x86_cpu_to_logical_apicid)
183+
return 0;
184+
172185
if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
173186
x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
174187
pr_err("Failed to register X2APIC_PREPARE\n");
188+
kfree(x86_cpu_to_logical_apicid);
189+
x86_cpu_to_logical_apicid = NULL;
175190
return 0;
176191
}
177192
init_x2apic_ldr();

0 commit comments

Comments
 (0)