Skip to content

Commit d4b6f15

Browse files
committed
LoongArch: Add Non-Uniform Memory Access (NUMA) support
Add Non-Uniform Memory Access (NUMA) support for LoongArch. LoongArch has 48-bit physical address, but the HyperTransport I/O bus only support 40-bit address, so we need a custom phys_to_dma() and dma_to_phys() to extract the 4-bit node id (bit 44~47) from Loongson-3's 48-bit physical address space and embed it into 40-bit. In the 40-bit dma address, node id offset can be read from the LS7A_DMA_CFG register. Reviewed-by: WANG Xuerui <[email protected]> Reviewed-by: Jiaxun Yang <[email protected]> Signed-off-by: Huacai Chen <[email protected]>
1 parent 46859ac commit d4b6f15

File tree

17 files changed

+844
-29
lines changed

17 files changed

+844
-29
lines changed

arch/loongarch/Kconfig

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ config LOONGARCH
77
select ARCH_ENABLE_MEMORY_HOTPLUG
88
select ARCH_ENABLE_MEMORY_HOTREMOVE
99
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
10+
select ARCH_HAS_PHYS_TO_DMA
1011
select ARCH_HAS_PTE_SPECIAL
1112
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
1213
select ARCH_INLINE_READ_LOCK if !PREEMPTION
@@ -41,6 +42,7 @@ config LOONGARCH
4142
select ARCH_SUPPORTS_ACPI
4243
select ARCH_SUPPORTS_ATOMIC_RMW
4344
select ARCH_SUPPORTS_HUGETLBFS
45+
select ARCH_SUPPORTS_NUMA_BALANCING
4446
select ARCH_USE_BUILTIN_BSWAP
4547
select ARCH_USE_CMPXCHG_LOCKREF
4648
select ARCH_USE_QUEUED_RWLOCKS
@@ -91,12 +93,15 @@ config LOONGARCH
9193
select HAVE_PERF_EVENTS
9294
select HAVE_REGS_AND_STACK_ACCESS_API
9395
select HAVE_RSEQ
96+
select HAVE_SETUP_PER_CPU_AREA if NUMA
9497
select HAVE_SYSCALL_TRACEPOINTS
9598
select HAVE_TIF_NOHZ
9699
select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP
97100
select IRQ_FORCED_THREADING
98101
select IRQ_LOONGARCH_CPU
99102
select MODULES_USE_ELF_RELA if MODULES
103+
select NEED_PER_CPU_EMBED_FIRST_CHUNK
104+
select NEED_PER_CPU_PAGE_FIRST_CHUNK
100105
select OF
101106
select OF_EARLY_FLATTREE
102107
select PERF_USE_VMALLOC
@@ -105,6 +110,7 @@ config LOONGARCH
105110
select SYSCTL_EXCEPTION_TRACE
106111
select SWIOTLB
107112
select TRACE_IRQFLAGS_SUPPORT
113+
select USE_PERCPU_NUMA_NODE_ID
108114
select ZONE_DMA32
109115

110116
config 32BIT
@@ -335,6 +341,20 @@ config NR_CPUS
335341
This allows you to specify the maximum number of CPUs which this
336342
kernel will support.
337343

344+
config NUMA
345+
bool "NUMA Support"
346+
select ACPI_NUMA if ACPI
347+
help
348+
Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)
349+
support. This option improves performance on systems with more
350+
than one NUMA node; on single node systems it is generally better
351+
to leave it disabled.
352+
353+
config NODES_SHIFT
354+
int
355+
default "6"
356+
depends on NUMA
357+
338358
config FORCE_MAX_ZONEORDER
339359
int "Maximum zone order"
340360
range 14 64 if PAGE_SIZE_64KB
@@ -381,6 +401,7 @@ config ARCH_SELECT_MEMORY_MODEL
381401

382402
config ARCH_FLATMEM_ENABLE
383403
def_bool y
404+
depends on !NUMA
384405

385406
config ARCH_SPARSEMEM_ENABLE
386407
def_bool y

arch/loongarch/include/asm/bootinfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ const char *get_system_type(void);
1313
extern void init_environ(void);
1414
extern void memblock_init(void);
1515
extern void platform_init(void);
16+
extern void plat_swiotlb_setup(void);
17+
extern int __init init_numa_memory(void);
1618

1719
struct loongson_board_info {
1820
int bios_size;
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4+
*/
5+
#ifndef _LOONGARCH_DMA_DIRECT_H
6+
#define _LOONGARCH_DMA_DIRECT_H
7+
8+
dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
9+
phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
10+
11+
#endif /* _LOONGARCH_DMA_DIRECT_H */

arch/loongarch/include/asm/mmzone.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Author: Huacai Chen ([email protected])
4+
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
5+
*/
6+
#ifndef _ASM_MMZONE_H_
7+
#define _ASM_MMZONE_H_
8+
9+
#include <asm/page.h>
10+
#include <asm/numa.h>
11+
12+
extern struct pglist_data *node_data[];
13+
14+
#define NODE_DATA(nid) (node_data[(nid)])
15+
16+
extern void setup_zero_pages(void);
17+
18+
#endif /* _ASM_MMZONE_H_ */

arch/loongarch/include/asm/numa.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Author: Jianmin Lv <[email protected]>
4+
* Huacai Chen <[email protected]>
5+
*
6+
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
7+
*/
8+
9+
#ifndef _ASM_LOONGARCH_NUMA_H
10+
#define _ASM_LOONGARCH_NUMA_H
11+
12+
#include <linux/nodemask.h>
13+
14+
#define NODE_ADDRSPACE_SHIFT 44
15+
16+
#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
17+
#define nid_to_addrbase(nid) (_ULCAST_(nid) << NODE_ADDRSPACE_SHIFT)
18+
19+
#ifdef CONFIG_NUMA
20+
21+
extern int numa_off;
22+
extern s16 __cpuid_to_node[CONFIG_NR_CPUS];
23+
extern nodemask_t numa_nodes_parsed __initdata;
24+
25+
struct numa_memblk {
26+
u64 start;
27+
u64 end;
28+
int nid;
29+
};
30+
31+
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
32+
struct numa_meminfo {
33+
int nr_blks;
34+
struct numa_memblk blk[NR_NODE_MEMBLKS];
35+
};
36+
37+
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
38+
39+
extern void __init early_numa_add_cpu(int cpuid, s16 node);
40+
extern void numa_add_cpu(unsigned int cpu);
41+
extern void numa_remove_cpu(unsigned int cpu);
42+
43+
static inline void numa_clear_node(int cpu)
44+
{
45+
}
46+
47+
static inline void set_cpuid_to_node(int cpuid, s16 node)
48+
{
49+
__cpuid_to_node[cpuid] = node;
50+
}
51+
52+
extern int early_cpu_to_node(int cpu);
53+
54+
#else
55+
56+
static inline void early_numa_add_cpu(int cpuid, s16 node) { }
57+
static inline void numa_add_cpu(unsigned int cpu) { }
58+
static inline void numa_remove_cpu(unsigned int cpu) { }
59+
60+
static inline int early_cpu_to_node(int cpu)
61+
{
62+
return 0;
63+
}
64+
65+
#endif /* CONFIG_NUMA */
66+
67+
#endif /* _ASM_LOONGARCH_NUMA_H */

arch/loongarch/include/asm/pgtable.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,18 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
541541

542542
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
543543

544+
#ifdef CONFIG_NUMA_BALANCING
545+
static inline long pte_protnone(pte_t pte)
546+
{
547+
return (pte_val(pte) & _PAGE_PROTNONE);
548+
}
549+
550+
static inline long pmd_protnone(pmd_t pmd)
551+
{
552+
return (pmd_val(pmd) & _PAGE_PROTNONE);
553+
}
554+
#endif /* CONFIG_NUMA_BALANCING */
555+
544556
/*
545557
* We provide our own get_unmapped area to cope with the virtual aliasing
546558
* constraints placed on us by the cache architecture.

arch/loongarch/include/asm/topology.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,27 @@
77

88
#include <linux/smp.h>
99

10+
#ifdef CONFIG_NUMA
11+
12+
extern cpumask_t cpus_on_node[];
13+
14+
#define cpumask_of_node(node) (&cpus_on_node[node])
15+
16+
struct pci_bus;
17+
extern int pcibus_to_node(struct pci_bus *);
18+
19+
#define cpumask_of_pcibus(bus) (cpu_online_mask)
20+
21+
extern unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
22+
23+
void numa_set_distance(int from, int to, int distance);
24+
25+
#define node_distance(from, to) (node_distances[(from)][(to)])
26+
27+
#else
28+
#define pcibus_to_node(bus) 0
29+
#endif
30+
1031
#ifdef CONFIG_SMP
1132
#define topology_physical_package_id(cpu) (cpu_data[cpu].package)
1233
#define topology_core_id(cpu) (cpu_data[cpu].core)

arch/loongarch/kernel/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,6 @@ obj-$(CONFIG_PROC_FS) += proc.o
2020

2121
obj-$(CONFIG_SMP) += smp.o
2222

23+
obj-$(CONFIG_NUMA) += numa.o
24+
2325
CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS)

arch/loongarch/kernel/acpi.c

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/memblock.h>
1515
#include <linux/serial_core.h>
1616
#include <asm/io.h>
17+
#include <asm/numa.h>
1718
#include <asm/loongson.h>
1819

1920
int acpi_disabled;
@@ -199,6 +200,79 @@ int __init acpi_boot_init(void)
199200
return 0;
200201
}
201202

203+
#ifdef CONFIG_ACPI_NUMA
204+
205+
static __init int setup_node(int pxm)
206+
{
207+
return acpi_map_pxm_to_node(pxm);
208+
}
209+
210+
/*
211+
* Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
212+
* I/O localities since SRAT does not list them. I/O localities are
213+
* not supported at this point.
214+
*/
215+
unsigned int numa_distance_cnt;
216+
217+
static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit)
218+
{
219+
return slit->locality_count;
220+
}
221+
222+
void __init numa_set_distance(int from, int to, int distance)
223+
{
224+
if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) {
225+
pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
226+
from, to, distance);
227+
return;
228+
}
229+
230+
node_distances[from][to] = distance;
231+
}
232+
233+
/* Callback for Proximity Domain -> CPUID mapping */
234+
void __init
235+
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
236+
{
237+
int pxm, node;
238+
239+
if (srat_disabled())
240+
return;
241+
if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
242+
bad_srat();
243+
return;
244+
}
245+
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
246+
return;
247+
pxm = pa->proximity_domain_lo;
248+
if (acpi_srat_revision >= 2) {
249+
pxm |= (pa->proximity_domain_hi[0] << 8);
250+
pxm |= (pa->proximity_domain_hi[1] << 16);
251+
pxm |= (pa->proximity_domain_hi[2] << 24);
252+
}
253+
node = setup_node(pxm);
254+
if (node < 0) {
255+
pr_err("SRAT: Too many proximity domains %x\n", pxm);
256+
bad_srat();
257+
return;
258+
}
259+
260+
if (pa->apic_id >= CONFIG_NR_CPUS) {
261+
pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n",
262+
pxm, pa->apic_id, node);
263+
return;
264+
}
265+
266+
early_numa_add_cpu(pa->apic_id, node);
267+
268+
set_cpuid_to_node(pa->apic_id, node);
269+
node_set(node, numa_nodes_parsed);
270+
pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", pxm, pa->apic_id, node);
271+
}
272+
273+
void __init acpi_numa_arch_fixup(void) {}
274+
#endif
275+
202276
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
203277
{
204278
memblock_reserve(addr, size);
@@ -208,6 +282,22 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
208282

209283
#include <acpi/processor.h>
210284

285+
static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
286+
{
287+
#ifdef CONFIG_ACPI_NUMA
288+
int nid;
289+
290+
nid = acpi_get_node(handle);
291+
if (nid != NUMA_NO_NODE) {
292+
set_cpuid_to_node(physid, nid);
293+
node_set(nid, numa_nodes_parsed);
294+
set_cpu_numa_node(cpu, nid);
295+
cpumask_set_cpu(cpu, cpumask_of_node(nid));
296+
}
297+
#endif
298+
return 0;
299+
}
300+
211301
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu)
212302
{
213303
int cpu;
@@ -218,6 +308,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu
218308
return cpu;
219309
}
220310

311+
acpi_map_cpu2node(handle, cpu, physid);
312+
221313
*pcpu = cpu;
222314

223315
return 0;
@@ -226,6 +318,9 @@ EXPORT_SYMBOL(acpi_map_cpu);
226318

227319
int acpi_unmap_cpu(int cpu)
228320
{
321+
#ifdef CONFIG_ACPI_NUMA
322+
set_cpuid_to_node(cpu_logical_map(cpu), NUMA_NO_NODE);
323+
#endif
229324
set_cpu_present(cpu, false);
230325
num_processors--;
231326

0 commit comments

Comments
 (0)