Skip to content

Commit 9b06860

Browse files
committed
Merge tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and dax updates from Dan Williams: "There were multiple touches outside of drivers/nvdimm/ this round to add cross arch compatibility to the devm_memremap_pages() interface, enhance numa information for persistent memory ranges, and add a zero_page_range() dax operation. This cycle I switched from the patchwork api to Konstantin's b4 script for collecting tags (from x86, PowerPC, filesystem, and device-mapper folks), and everything looks to have gone ok there. This has all appeared in -next with no reported issues. Summary: - Add support for region alignment configuration and enforcement to fix compatibility across architectures and PowerPC page size configurations. - Introduce 'zero_page_range' as a dax operation. This facilitates filesystem-dax operation without a block-device. - Introduce phys_to_target_node() to facilitate drivers that want to know resulting numa node if a given reserved address range was onlined. - Advertise a persistence-domain for of_pmem and papr_scm. The persistence domain indicates where cpu-store cycles need to reach in the platform-memory subsystem before the platform will consider them power-fail protected. - Promote numa_map_to_online_node() to a cross-kernel generic facility. - Save x86 numa information to allow for node-id lookups for reserved memory ranges, deploy that capability for the e820-pmem driver. - Pick up some miscellaneous minor fixes, that missed v5.6-final, including a some smatch reports in the ioctl path and some unit test compilation fixups. - Fixup some flexible-array declarations" * tag 'libnvdimm-for-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (29 commits) dax: Move mandatory ->zero_page_range() check in alloc_dax() dax,iomap: Add helper dax_iomap_zero() to zero a range dax: Use new dax zero page method for zeroing a page dm,dax: Add dax zero_page_range operation s390,dcssblk,dax: Add dax zero_page_range operation to dcssblk driver dax, pmem: Add a dax operation zero_page_range pmem: Add functions for reading/writing page to/from pmem libnvdimm: Update persistence domain value for of_pmem and papr_scm device tools/test/nvdimm: Fix out of tree build libnvdimm/region: Fix build error libnvdimm/region: Replace zero-length array with flexible-array member libnvdimm/label: Replace zero-length array with flexible-array member ACPI: NFIT: Replace zero-length array with flexible-array member libnvdimm/region: Introduce an 'align' attribute libnvdimm/region: Introduce NDD_LABELING libnvdimm/namespace: Enforce memremap_compat_align() libnvdimm/pfn: Prevent raw mode fallback if pfn-infoblock valid libnvdimm: Out of bounds read in __nd_ioctl() acpi/nfit: improve bounds checking for 'func' mm/memremap_pages: Introduce memremap_compat_align() ...
2 parents 0906d8b + f6d2b80 commit 9b06860

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+737
-261
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9670,6 +9670,7 @@ F: drivers/acpi/nfit/*
96709670
F: include/linux/nd.h
96719671
F: include/linux/libnvdimm.h
96729672
F: include/uapi/linux/ndctl.h
9673+
F: tools/testing/nvdimm/
96739674

96749675
LICENSES and SPDX stuff
96759676
M: Thomas Gleixner <[email protected]>

arch/powerpc/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ config PPC
122122
select ARCH_HAS_GCOV_PROFILE_ALL
123123
select ARCH_HAS_KCOV
124124
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
125+
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
125126
select ARCH_HAS_MMIOWB if PPC64
126127
select ARCH_HAS_PHYS_TO_DMA
127128
select ARCH_HAS_PMEM_API

arch/powerpc/mm/ioremap.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <linux/io.h>
44
#include <linux/slab.h>
5+
#include <linux/mmzone.h>
56
#include <linux/vmalloc.h>
67
#include <asm/io-workarounds.h>
78

@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
9798

9899
return NULL;
99100
}
101+
102+
#ifdef CONFIG_ZONE_DEVICE
103+
/*
104+
* Override the generic version in mm/memremap.c.
105+
*
106+
* With hash translation, the direct-map range is mapped with just one
107+
* page size selected by htab_init_page_sizes(). Consult
108+
* mmu_psize_defs[] to determine the minimum page size alignment.
109+
*/
110+
unsigned long memremap_compat_align(void)
111+
{
112+
unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
113+
114+
if (radix_enabled())
115+
return SUBSECTION_SIZE;
116+
return max(SUBSECTION_SIZE, 1UL << shift);
117+
118+
}
119+
EXPORT_SYMBOL_GPL(memremap_compat_align);
120+
#endif

arch/powerpc/platforms/pseries/papr_scm.c

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -286,25 +286,6 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
286286
return 0;
287287
}
288288

289-
static inline int papr_scm_node(int node)
290-
{
291-
int min_dist = INT_MAX, dist;
292-
int nid, min_node;
293-
294-
if ((node == NUMA_NO_NODE) || node_online(node))
295-
return node;
296-
297-
min_node = first_online_node;
298-
for_each_online_node(nid) {
299-
dist = node_distance(node, nid);
300-
if (dist < min_dist) {
301-
min_dist = dist;
302-
min_node = nid;
303-
}
304-
}
305-
return min_node;
306-
}
307-
308289
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
309290
{
310291
struct device *dev = &p->pdev->dev;
@@ -329,7 +310,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
329310
}
330311

331312
dimm_flags = 0;
332-
set_bit(NDD_ALIASING, &dimm_flags);
313+
set_bit(NDD_LABELING, &dimm_flags);
333314

334315
p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
335316
PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
@@ -350,7 +331,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
350331

351332
memset(&ndr_desc, 0, sizeof(ndr_desc));
352333
target_nid = dev_to_node(&p->pdev->dev);
353-
online_nid = papr_scm_node(target_nid);
334+
online_nid = numa_map_to_online_node(target_nid);
354335
ndr_desc.numa_node = online_nid;
355336
ndr_desc.target_node = target_nid;
356337
ndr_desc.res = &p->res;
@@ -362,8 +343,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
362343

363344
if (p->is_volatile)
364345
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
365-
else
346+
else {
347+
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
366348
p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
349+
}
367350
if (!p->region) {
368351
dev_err(dev, "Error registering region %pR from %pOF\n",
369352
ndr_desc.res, p->dn);

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,7 @@ config X86_PMEM_LEGACY
16611661
depends on PHYS_ADDR_T_64BIT
16621662
depends on BLK_DEV
16631663
select X86_PMEM_LEGACY_DEVICE
1664+
select NUMA_KEEP_MEMINFO if NUMA
16641665
select LIBNVDIMM
16651666
help
16661667
Treat memory marked using the non-standard e820 type of 12 as used

arch/x86/mm/numa.c

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
2525
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
2626
EXPORT_SYMBOL(node_data);
2727

28-
static struct numa_meminfo numa_meminfo
29-
#ifndef CONFIG_MEMORY_HOTPLUG
30-
__initdata
31-
#endif
32-
;
28+
static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
29+
static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
3330

3431
static int numa_distance_cnt;
3532
static u8 *numa_distance;
@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
168165
(mi->nr_blks - idx) * sizeof(mi->blk[0]));
169166
}
170167

168+
/**
169+
* numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
170+
* @dst: numa_meminfo to append block to
171+
* @idx: Index of memblk to remove
172+
* @src: numa_meminfo to remove memblk from
173+
*/
174+
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
175+
struct numa_meminfo *src)
176+
{
177+
dst->blk[dst->nr_blks++] = src->blk[idx];
178+
numa_remove_memblk_from(idx, src);
179+
}
180+
171181
/**
172182
* numa_add_memblk - Add one numa_memblk to numa_meminfo
173183
* @nid: NUMA node ID of the new memblk
@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
237247
for (i = 0; i < mi->nr_blks; i++) {
238248
struct numa_memblk *bi = &mi->blk[i];
239249

240-
/* make sure all blocks are inside the limits */
250+
/* move / save reserved memory ranges */
251+
if (!memblock_overlaps_region(&memblock.memory,
252+
bi->start, bi->end - bi->start)) {
253+
numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
254+
continue;
255+
}
256+
257+
/* make sure all non-reserved blocks are inside the limits */
241258
bi->start = max(bi->start, low);
242259
bi->end = min(bi->end, high);
243260

244-
/* and there's no empty or non-exist block */
245-
if (bi->start >= bi->end ||
246-
!memblock_overlaps_region(&memblock.memory,
247-
bi->start, bi->end - bi->start))
261+
/* and there's no empty block */
262+
if (bi->start >= bi->end)
248263
numa_remove_memblk_from(i--, mi);
249264
}
250265

@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);
881896

882897
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
883898

884-
#ifdef CONFIG_MEMORY_HOTPLUG
885-
int memory_add_physaddr_to_nid(u64 start)
899+
#ifdef CONFIG_NUMA_KEEP_MEMINFO
900+
static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
886901
{
887-
struct numa_meminfo *mi = &numa_meminfo;
888-
int nid = mi->blk[0].nid;
889902
int i;
890903

891904
for (i = 0; i < mi->nr_blks; i++)
892905
if (mi->blk[i].start <= start && mi->blk[i].end > start)
893-
nid = mi->blk[i].nid;
906+
return mi->blk[i].nid;
907+
return NUMA_NO_NODE;
908+
}
909+
910+
int phys_to_target_node(phys_addr_t start)
911+
{
912+
int nid = meminfo_to_nid(&numa_meminfo, start);
913+
914+
/*
915+
* Prefer online nodes, but if reserved memory might be
916+
* hot-added continue the search with reserved ranges.
917+
*/
918+
if (nid != NUMA_NO_NODE)
919+
return nid;
920+
921+
return meminfo_to_nid(&numa_reserved_meminfo, start);
922+
}
923+
EXPORT_SYMBOL_GPL(phys_to_target_node);
924+
925+
int memory_add_physaddr_to_nid(u64 start)
926+
{
927+
int nid = meminfo_to_nid(&numa_meminfo, start);
928+
929+
if (nid == NUMA_NO_NODE)
930+
nid = numa_meminfo.blk[0].nid;
894931
return nid;
895932
}
896933
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);

drivers/acpi/nfit/core.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ static union acpi_object *acpi_label_info(acpi_handle handle)
360360

361361
static u8 nfit_dsm_revid(unsigned family, unsigned func)
362362
{
363-
static const u8 revid_table[NVDIMM_FAMILY_MAX+1][32] = {
363+
static const u8 revid_table[NVDIMM_FAMILY_MAX+1][NVDIMM_CMD_MAX+1] = {
364364
[NVDIMM_FAMILY_INTEL] = {
365365
[NVDIMM_INTEL_GET_MODES] = 2,
366366
[NVDIMM_INTEL_GET_FWINFO] = 2,
@@ -386,7 +386,7 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func)
386386

387387
if (family > NVDIMM_FAMILY_MAX)
388388
return 0;
389-
if (func > 31)
389+
if (func > NVDIMM_CMD_MAX)
390390
return 0;
391391
id = revid_table[family][func];
392392
if (id == 0)
@@ -492,7 +492,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
492492
* Check for a valid command. For ND_CMD_CALL, we also have to
493493
* make sure that the DSM function is supported.
494494
*/
495-
if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask))
495+
if (cmd == ND_CMD_CALL &&
496+
(func > NVDIMM_CMD_MAX || !test_bit(func, &dsm_mask)))
496497
return -ENOTTY;
497498
else if (!test_bit(cmd, &cmd_mask))
498499
return -ENOTTY;
@@ -2026,8 +2027,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
20262027
continue;
20272028
}
20282029

2029-
if (nfit_mem->bdw && nfit_mem->memdev_pmem)
2030+
if (nfit_mem->bdw && nfit_mem->memdev_pmem) {
20302031
set_bit(NDD_ALIASING, &flags);
2032+
set_bit(NDD_LABELING, &flags);
2033+
}
20312034

20322035
/* collate flags across all memdevs for this dimm */
20332036
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
@@ -3492,7 +3495,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
34923495
if (nvdimm && cmd == ND_CMD_CALL &&
34933496
call_pkg->nd_family == NVDIMM_FAMILY_INTEL) {
34943497
func = call_pkg->nd_command;
3495-
if ((1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK)
3498+
if (func > NVDIMM_CMD_MAX ||
3499+
(1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK)
34963500
return -EOPNOTSUPP;
34973501
}
34983502

drivers/acpi/nfit/nfit.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
3535

3636
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV
37+
#define NVDIMM_CMD_MAX 31
3738

3839
#define NVDIMM_STANDARD_CMDMASK \
3940
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
@@ -144,32 +145,32 @@ struct nfit_spa {
144145
unsigned long ars_state;
145146
u32 clear_err_unit;
146147
u32 max_ars;
147-
struct acpi_nfit_system_address spa[0];
148+
struct acpi_nfit_system_address spa[];
148149
};
149150

150151
struct nfit_dcr {
151152
struct list_head list;
152-
struct acpi_nfit_control_region dcr[0];
153+
struct acpi_nfit_control_region dcr[];
153154
};
154155

155156
struct nfit_bdw {
156157
struct list_head list;
157-
struct acpi_nfit_data_region bdw[0];
158+
struct acpi_nfit_data_region bdw[];
158159
};
159160

160161
struct nfit_idt {
161162
struct list_head list;
162-
struct acpi_nfit_interleave idt[0];
163+
struct acpi_nfit_interleave idt[];
163164
};
164165

165166
struct nfit_flush {
166167
struct list_head list;
167-
struct acpi_nfit_flush_address flush[0];
168+
struct acpi_nfit_flush_address flush[];
168169
};
169170

170171
struct nfit_memdev {
171172
struct list_head list;
172-
struct acpi_nfit_memory_map memdev[0];
173+
struct acpi_nfit_memory_map memdev[];
173174
};
174175

175176
enum nfit_mem_flags {

drivers/acpi/numa/srat.c

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
7272
}
7373
EXPORT_SYMBOL(acpi_map_pxm_to_node);
7474

75-
/**
76-
* acpi_map_pxm_to_online_node - Map proximity ID to online node
77-
* @pxm: ACPI proximity ID
78-
*
79-
* This is similar to acpi_map_pxm_to_node(), but always returns an online
80-
* node. When the mapped node from a given proximity ID is offline, it
81-
* looks up the node distance table and returns the nearest online node.
82-
*
83-
* ACPI device drivers, which are called after the NUMA initialization has
84-
* completed in the kernel, can call this interface to obtain their device
85-
* NUMA topology from ACPI tables. Such drivers do not have to deal with
86-
* offline nodes. A node may be offline when a device proximity ID is
87-
* unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
88-
* "numa=off" on x86.
89-
*/
90-
int acpi_map_pxm_to_online_node(int pxm)
91-
{
92-
int node, min_node;
93-
94-
node = acpi_map_pxm_to_node(pxm);
95-
96-
if (node == NUMA_NO_NODE)
97-
node = 0;
98-
99-
min_node = node;
100-
if (!node_online(node)) {
101-
int min_dist = INT_MAX, dist, n;
102-
103-
for_each_online_node(n) {
104-
dist = node_distance(node, n);
105-
if (dist < min_dist) {
106-
min_dist = dist;
107-
min_node = n;
108-
}
109-
}
110-
}
111-
112-
return min_node;
113-
}
114-
EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
115-
11675
static void __init
11776
acpi_table_print_srat_entry(struct acpi_subtable_header *header)
11877
{

drivers/dax/bus.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
421421
* device outside of mmap of the resulting character device.
422422
*/
423423
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
424-
if (!dax_dev)
424+
if (IS_ERR(dax_dev)) {
425+
rc = PTR_ERR(dax_dev);
425426
goto err;
427+
}
426428

427429
/* a device_dax instance is dead while the driver is not attached */
428430
kill_dax(dax_dev);

0 commit comments

Comments
 (0)