Skip to content

Commit 55e0bf4

Browse files
evangreenpalmer-dabbelt
authored andcommitted
RISC-V: Probe misaligned access speed in parallel
Probing for misaligned access speed takes about 0.06 seconds. On a system with 64 cores, doing this in smp_callin() means it's done serially, extending boot time by 3.8 seconds. That's a lot of boot time. Instead of measuring each CPU serially, let's do the measurements on all CPUs in parallel. If we disable preemption on all CPUs, the jiffies stop ticking, so we can do this in stages of 1) everybody except core 0, then 2) core 0. The allocations are all done outside of on_each_cpu() to avoid calling alloc_pages() with interrupts disabled. For hotplugged CPUs that come in after the boot time measurement, register CPU hotplug callbacks, and do the measurement there. Interrupts are enabled in those callbacks, so they're fine to do alloc_pages() in. Reported-by: Jisheng Zhang <[email protected]> Closes: https://lore.kernel.org/all/mhng-9359993d-6872-4134-83ce-c97debe1cf9a@palmer-ri-x1c9/T/#mae9b8f40016f9df428829d33360144dc5026bcbf Fixes: 584ea65 ("RISC-V: Probe for unaligned access speed") Signed-off-by: Evan Green <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 6eb7a64 commit 55e0bf4

File tree

3 files changed

+77
-21
lines changed

3 files changed

+77
-21
lines changed

arch/riscv/include/asm/cpufeature.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed);
3030
/* Per-cpu ISA extensions. */
3131
extern struct riscv_isainfo hart_isa[NR_CPUS];
3232

33-
void check_unaligned_access(int cpu);
3433
void riscv_user_isa_enable(void);
3534

3635
#ifdef CONFIG_RISCV_MISALIGNED

arch/riscv/kernel/cpufeature.c

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <linux/acpi.h>
1010
#include <linux/bitmap.h>
11+
#include <linux/cpuhotplug.h>
1112
#include <linux/ctype.h>
1213
#include <linux/log2.h>
1314
#include <linux/memory.h>
@@ -29,6 +30,7 @@
2930

3031
#define MISALIGNED_ACCESS_JIFFIES_LG2 1
3132
#define MISALIGNED_BUFFER_SIZE 0x4000
33+
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
3234
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
3335

3436
unsigned long elf_hwcap __read_mostly;
@@ -557,30 +559,21 @@ unsigned long riscv_get_elf_hwcap(void)
557559
return hwcap;
558560
}
559561

560-
void check_unaligned_access(int cpu)
562+
static int check_unaligned_access(void *param)
561563
{
564+
int cpu = smp_processor_id();
562565
u64 start_cycles, end_cycles;
563566
u64 word_cycles;
564567
u64 byte_cycles;
565568
int ratio;
566569
unsigned long start_jiffies, now;
567-
struct page *page;
570+
struct page *page = param;
568571
void *dst;
569572
void *src;
570573
long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
571574

572575
if (check_unaligned_access_emulated(cpu))
573-
return;
574-
575-
/* We are already set since the last check */
576-
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
577-
return;
578-
579-
page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
580-
if (!page) {
581-
pr_warn("Can't alloc pages to measure memcpy performance");
582-
return;
583-
}
576+
return 0;
584577

585578
/* Make an unaligned destination buffer. */
586579
dst = (void *)((unsigned long)page_address(page) | 0x1);
@@ -634,7 +627,7 @@ void check_unaligned_access(int cpu)
634627
pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
635628
cpu);
636629

637-
goto out;
630+
return 0;
638631
}
639632

640633
if (word_cycles < byte_cycles)
@@ -648,19 +641,84 @@ void check_unaligned_access(int cpu)
648641
(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
649642

650643
per_cpu(misaligned_access_speed, cpu) = speed;
644+
return 0;
645+
}
651646

652-
out:
653-
__free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
647+
static void check_unaligned_access_nonboot_cpu(void *param)
648+
{
649+
unsigned int cpu = smp_processor_id();
650+
struct page **pages = param;
651+
652+
if (smp_processor_id() != 0)
653+
check_unaligned_access(pages[cpu]);
654+
}
655+
656+
static int riscv_online_cpu(unsigned int cpu)
657+
{
658+
static struct page *buf;
659+
660+
/* We are already set since the last check */
661+
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
662+
return 0;
663+
664+
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
665+
if (!buf) {
666+
pr_warn("Allocation failure, not measuring misaligned performance\n");
667+
return -ENOMEM;
668+
}
669+
670+
check_unaligned_access(buf);
671+
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
672+
return 0;
654673
}
655674

656-
static int __init check_unaligned_access_boot_cpu(void)
675+
/* Measure unaligned access on all CPUs present at boot in parallel. */
676+
static int check_unaligned_access_all_cpus(void)
657677
{
658-
check_unaligned_access(0);
678+
unsigned int cpu;
679+
unsigned int cpu_count = num_possible_cpus();
680+
struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
681+
GFP_KERNEL);
682+
683+
if (!bufs) {
684+
pr_warn("Allocation failure, not measuring misaligned performance\n");
685+
return 0;
686+
}
687+
688+
/*
689+
* Allocate separate buffers for each CPU so there's no fighting over
690+
* cache lines.
691+
*/
692+
for_each_cpu(cpu, cpu_online_mask) {
693+
bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
694+
if (!bufs[cpu]) {
695+
pr_warn("Allocation failure, not measuring misaligned performance\n");
696+
goto out;
697+
}
698+
}
699+
700+
/* Check everybody except 0, who stays behind to tend jiffies. */
701+
on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
702+
703+
/* Check core 0. */
704+
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
705+
706+
/* Setup hotplug callback for any new CPUs that come online. */
707+
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
708+
riscv_online_cpu, NULL);
709+
710+
out:
659711
unaligned_emulation_finish();
712+
for_each_cpu(cpu, cpu_online_mask) {
713+
if (bufs[cpu])
714+
__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
715+
}
716+
717+
kfree(bufs);
660718
return 0;
661719
}
662720

663-
arch_initcall(check_unaligned_access_boot_cpu);
721+
arch_initcall(check_unaligned_access_all_cpus);
664722

665723
void riscv_user_isa_enable(void)
666724
{

arch/riscv/kernel/smpboot.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,6 @@ asmlinkage __visible void smp_callin(void)
247247
riscv_ipi_enable();
248248

249249
numa_add_cpu(curr_cpuid);
250-
check_unaligned_access(curr_cpuid);
251250
set_cpu_online(curr_cpuid, 1);
252251

253252
if (has_vector()) {

0 commit comments

Comments
 (0)