Skip to content

Commit d3b8865

Browse files
committed
Merge branch 'for-5.7/numa' into libnvdimm-for-next
- Promote numa_map_to_online_node() to a cross-kernel generic facility. - Save x86 numa information to allow for node-id lookups for reserved memory ranges, deploy that capability for the e820-pmem driver. - Introduce phys_to_target_node() to facilitate drivers that want to know resulting numa node if a given reserved address range was onlined.
2 parents 91bf79b + 7b27a86 commit d3b8865

File tree

9 files changed

+140
-92
lines changed

9 files changed

+140
-92
lines changed

arch/powerpc/platforms/pseries/papr_scm.c

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -285,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
285285
return 0;
286286
}
287287

288-
static inline int papr_scm_node(int node)
289-
{
290-
int min_dist = INT_MAX, dist;
291-
int nid, min_node;
292-
293-
if ((node == NUMA_NO_NODE) || node_online(node))
294-
return node;
295-
296-
min_node = first_online_node;
297-
for_each_online_node(nid) {
298-
dist = node_distance(node, nid);
299-
if (dist < min_dist) {
300-
min_dist = dist;
301-
min_node = nid;
302-
}
303-
}
304-
return min_node;
305-
}
306-
307288
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
308289
{
309290
struct device *dev = &p->pdev->dev;
@@ -349,7 +330,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
349330

350331
memset(&ndr_desc, 0, sizeof(ndr_desc));
351332
target_nid = dev_to_node(&p->pdev->dev);
352-
online_nid = papr_scm_node(target_nid);
333+
online_nid = numa_map_to_online_node(target_nid);
353334
ndr_desc.numa_node = online_nid;
354335
ndr_desc.target_node = target_nid;
355336
ndr_desc.res = &p->res;

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,7 @@ config X86_PMEM_LEGACY
16641664
depends on PHYS_ADDR_T_64BIT
16651665
depends on BLK_DEV
16661666
select X86_PMEM_LEGACY_DEVICE
1667+
select NUMA_KEEP_MEMINFO if NUMA
16671668
select LIBNVDIMM
16681669
help
16691670
Treat memory marked using the non-standard e820 type of 12 as used

arch/x86/mm/numa.c

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
2525
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
2626
EXPORT_SYMBOL(node_data);
2727

28-
static struct numa_meminfo numa_meminfo
29-
#ifndef CONFIG_MEMORY_HOTPLUG
30-
__initdata
31-
#endif
32-
;
28+
static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
29+
static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
3330

3431
static int numa_distance_cnt;
3532
static u8 *numa_distance;
@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
168165
(mi->nr_blks - idx) * sizeof(mi->blk[0]));
169166
}
170167

168+
/**
169+
* numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
170+
* @dst: numa_meminfo to append block to
171+
* @idx: Index of memblk to remove
172+
* @src: numa_meminfo to remove memblk from
173+
*/
174+
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
175+
struct numa_meminfo *src)
176+
{
177+
dst->blk[dst->nr_blks++] = src->blk[idx];
178+
numa_remove_memblk_from(idx, src);
179+
}
180+
171181
/**
172182
* numa_add_memblk - Add one numa_memblk to numa_meminfo
173183
* @nid: NUMA node ID of the new memblk
@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
237247
for (i = 0; i < mi->nr_blks; i++) {
238248
struct numa_memblk *bi = &mi->blk[i];
239249

240-
/* make sure all blocks are inside the limits */
250+
/* move / save reserved memory ranges */
251+
if (!memblock_overlaps_region(&memblock.memory,
252+
bi->start, bi->end - bi->start)) {
253+
numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
254+
continue;
255+
}
256+
257+
/* make sure all non-reserved blocks are inside the limits */
241258
bi->start = max(bi->start, low);
242259
bi->end = min(bi->end, high);
243260

244-
/* and there's no empty or non-exist block */
245-
if (bi->start >= bi->end ||
246-
!memblock_overlaps_region(&memblock.memory,
247-
bi->start, bi->end - bi->start))
261+
/* and there's no empty block */
262+
if (bi->start >= bi->end)
248263
numa_remove_memblk_from(i--, mi);
249264
}
250265

@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
881896

882897
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
883898

884-
#ifdef CONFIG_MEMORY_HOTPLUG
885-
int memory_add_physaddr_to_nid(u64 start)
899+
#ifdef CONFIG_NUMA_KEEP_MEMINFO
900+
static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
886901
{
887-
struct numa_meminfo *mi = &numa_meminfo;
888-
int nid = mi->blk[0].nid;
889902
int i;
890903

891904
for (i = 0; i < mi->nr_blks; i++)
892905
if (mi->blk[i].start <= start && mi->blk[i].end > start)
893-
nid = mi->blk[i].nid;
906+
return mi->blk[i].nid;
907+
return NUMA_NO_NODE;
908+
}
909+
910+
int phys_to_target_node(phys_addr_t start)
911+
{
912+
int nid = meminfo_to_nid(&numa_meminfo, start);
913+
914+
/*
915+
* Prefer online nodes, but if reserved memory might be
916+
* hot-added continue the search with reserved ranges.
917+
*/
918+
if (nid != NUMA_NO_NODE)
919+
return nid;
920+
921+
return meminfo_to_nid(&numa_reserved_meminfo, start);
922+
}
923+
EXPORT_SYMBOL_GPL(phys_to_target_node);
924+
925+
int memory_add_physaddr_to_nid(u64 start)
926+
{
927+
int nid = meminfo_to_nid(&numa_meminfo, start);
928+
929+
if (nid == NUMA_NO_NODE)
930+
nid = numa_meminfo.blk[0].nid;
894931
return nid;
895932
}
896933
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);

drivers/acpi/numa/srat.c

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
7272
}
7373
EXPORT_SYMBOL(acpi_map_pxm_to_node);
7474

75-
/**
76-
* acpi_map_pxm_to_online_node - Map proximity ID to online node
77-
* @pxm: ACPI proximity ID
78-
*
79-
* This is similar to acpi_map_pxm_to_node(), but always returns an online
80-
* node. When the mapped node from a given proximity ID is offline, it
81-
* looks up the node distance table and returns the nearest online node.
82-
*
83-
* ACPI device drivers, which are called after the NUMA initialization has
84-
* completed in the kernel, can call this interface to obtain their device
85-
* NUMA topology from ACPI tables. Such drivers do not have to deal with
86-
* offline nodes. A node may be offline when a device proximity ID is
87-
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
88-
* "numa=off" on x86.
89-
*/
90-
int acpi_map_pxm_to_online_node(int pxm)
91-
{
92-
int node, min_node;
93-
94-
node = acpi_map_pxm_to_node(pxm);
95-
96-
if (node == NUMA_NO_NODE)
97-
node = 0;
98-
99-
min_node = node;
100-
if (!node_online(node)) {
101-
int min_dist = INT_MAX, dist, n;
102-
103-
for_each_online_node(n) {
104-
dist = node_distance(node, n);
105-
if (dist < min_dist) {
106-
min_dist = dist;
107-
min_node = n;
108-
}
109-
}
110-
}
111-
112-
return min_node;
113-
}
114-
EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
115-
11675
static void __init
11776
acpi_table_print_srat_entry(struct acpi_subtable_header *header)
11877
{

drivers/nvdimm/e820.c

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/memory_hotplug.h>
88
#include <linux/libnvdimm.h>
99
#include <linux/module.h>
10+
#include <linux/numa.h>
1011

1112
static int e820_pmem_remove(struct platform_device *pdev)
1213
{
@@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev)
1617
return 0;
1718
}
1819

19-
#ifdef CONFIG_MEMORY_HOTPLUG
20-
static int e820_range_to_nid(resource_size_t addr)
21-
{
22-
return memory_add_physaddr_to_nid(addr);
23-
}
24-
#else
25-
static int e820_range_to_nid(resource_size_t addr)
26-
{
27-
return NUMA_NO_NODE;
28-
}
29-
#endif
30-
3120
static int e820_register_one(struct resource *res, void *data)
3221
{
3322
struct nd_region_desc ndr_desc;
3423
struct nvdimm_bus *nvdimm_bus = data;
24+
int nid = phys_to_target_node(res->start);
3525

3626
memset(&ndr_desc, 0, sizeof(ndr_desc));
3727
ndr_desc.res = res;
38-
ndr_desc.numa_node = e820_range_to_nid(res->start);
39-
ndr_desc.target_node = ndr_desc.numa_node;
28+
ndr_desc.numa_node = numa_map_to_online_node(nid);
29+
ndr_desc.target_node = nid;
4030
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
4131
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
4232
return -ENXIO;

include/linux/acpi.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str);
416416
extern bool acpi_osi_is_win8(void);
417417

418418
#ifdef CONFIG_ACPI_NUMA
419-
int acpi_map_pxm_to_online_node(int pxm);
420419
int acpi_map_pxm_to_node(int pxm);
421420
int acpi_get_node(acpi_handle handle);
421+
422+
/**
423+
* acpi_map_pxm_to_online_node - Map proximity ID to online node
424+
* @pxm: ACPI proximity ID
425+
*
426+
* This is similar to acpi_map_pxm_to_node(), but always returns an online
427+
* node. When the mapped node from a given proximity ID is offline, it
428+
* looks up the node distance table and returns the nearest online node.
429+
*
430+
* ACPI device drivers, which are called after the NUMA initialization has
431+
* completed in the kernel, can call this interface to obtain their device
432+
* NUMA topology from ACPI tables. Such drivers do not have to deal with
433+
* offline nodes. A node may be offline when a device proximity ID is
434+
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
435+
* "numa=off" on x86.
436+
*/
437+
static inline int acpi_map_pxm_to_online_node(int pxm)
438+
{
439+
int node = acpi_map_pxm_to_node(pxm);
440+
441+
return numa_map_to_online_node(node);
442+
}
422443
#else
423444
static inline int acpi_map_pxm_to_online_node(int pxm)
424445
{

include/linux/numa.h

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
22
#ifndef _LINUX_NUMA_H
33
#define _LINUX_NUMA_H
4-
4+
#include <linux/types.h>
55

66
#ifdef CONFIG_NODES_SHIFT
77
#define NODES_SHIFT CONFIG_NODES_SHIFT
@@ -13,4 +13,32 @@
1313

1414
#define NUMA_NO_NODE (-1)
1515

16+
/* optionally keep NUMA memory info available post init */
17+
#ifdef CONFIG_NUMA_KEEP_MEMINFO
18+
#define __initdata_or_meminfo
19+
#else
20+
#define __initdata_or_meminfo __initdata
21+
#endif
22+
23+
#ifdef CONFIG_NUMA
24+
/* Generic implementation available */
25+
int numa_map_to_online_node(int node);
26+
27+
/*
28+
* Optional architecture specific implementation, users need a "depends
29+
* on $ARCH"
30+
*/
31+
int phys_to_target_node(phys_addr_t addr);
32+
#else
33+
static inline int numa_map_to_online_node(int node)
34+
{
35+
return NUMA_NO_NODE;
36+
}
37+
38+
static inline int phys_to_target_node(phys_addr_t addr)
39+
{
40+
return NUMA_NO_NODE;
41+
}
42+
#endif
43+
1644
#endif /* _LINUX_NUMA_H */

mm/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ config HAVE_FAST_GUP
139139
config ARCH_KEEP_MEMBLOCK
140140
bool
141141

142+
# Keep arch NUMA mapping infrastructure post-init.
143+
config NUMA_KEEP_MEMINFO
144+
bool
145+
142146
config MEMORY_ISOLATION
143147
bool
144148

@@ -154,6 +158,7 @@ config MEMORY_HOTPLUG
154158
bool "Allow for memory hot-add"
155159
depends on SPARSEMEM || X86_64_ACPI_NUMA
156160
depends on ARCH_ENABLE_MEMORY_HOTPLUG
161+
select NUMA_KEEP_MEMINFO if NUMA
157162

158163
config MEMORY_HOTPLUG_SPARSE
159164
def_bool y

mm/mempolicy.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,32 @@ static struct mempolicy default_policy = {
127127

128128
static struct mempolicy preferred_node_policy[MAX_NUMNODES];
129129

130+
/**
131+
* numa_map_to_online_node - Find closest online node
132+
* @nid: Node id to start the search
133+
*
134+
* Lookup the next closest node by distance if @nid is not online.
135+
*/
136+
int numa_map_to_online_node(int node)
137+
{
138+
int min_dist = INT_MAX, dist, n, min_node;
139+
140+
if (node == NUMA_NO_NODE || node_online(node))
141+
return node;
142+
143+
min_node = node;
144+
for_each_online_node(n) {
145+
dist = node_distance(node, n);
146+
if (dist < min_dist) {
147+
min_dist = dist;
148+
min_node = n;
149+
}
150+
}
151+
152+
return min_node;
153+
}
154+
EXPORT_SYMBOL_GPL(numa_map_to_online_node);
155+
130156
struct mempolicy *get_task_policy(struct task_struct *p)
131157
{
132158
struct mempolicy *pol = p->mempolicy;

0 commit comments

Comments
 (0)