Skip to content

Commit c91d713

Browse files
committed
nvdimm: Support sizeof(struct page) > MAX_STRUCT_PAGE_SIZE
Commit 6e9f05d ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE") ...updated MAX_STRUCT_PAGE_SIZE to account for sizeof(struct page) potentially doubling in the case of CONFIG_KMSAN=y. Unfortunately this doubles the amount of capacity stolen from user addressable capacity for everyone, regardless of whether they are using the debug option. Revert that change, mandate that MAX_STRUCT_PAGE_SIZE never exceed 64, but allow for debug scenarios to proceed with creating debug sized page maps with a compile option to support debug scenarios. Note that this only applies to cases where the page map is permanent, i.e. stored in a reservation of the pmem itself ("--map=dev" in "ndctl create-namespace" terms). For the "--map=mem" case, since the allocation is ephemeral for the lifespan of the namespace, there are no explicit restriction. However, the implicit restriction, of having enough available "System RAM" to store the page map for the typically large pmem, still applies. Fixes: 6e9f05d ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE") Cc: <[email protected]> Cc: Alexander Potapenko <[email protected]> Cc: Marco Elver <[email protected]> Reported-by: Jeff Moyer <[email protected]> Acked-by: Yu Zhao <[email protected]> Link: https://lore.kernel.org/r/167467815773.463042.7022545814443036382.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <[email protected]>
1 parent fb6df43 commit c91d713

File tree

3 files changed

+47
-16
lines changed

3 files changed

+47
-16
lines changed

drivers/nvdimm/Kconfig

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,25 @@ config NVDIMM_KEYS
102102
depends on ENCRYPTED_KEYS
103103
depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m
104104

105+
config NVDIMM_KMSAN
106+
bool
107+
depends on KMSAN
108+
help
109+
KMSAN, and other memory debug facilities, increase the size of
110+
'struct page' to contain extra metadata. This collides with
111+
the NVDIMM capability to store a potentially
112+
larger-than-"System RAM" size 'struct page' array in a
113+
reservation of persistent memory rather than limited /
114+
precious DRAM. However, that reservation needs to persist for
115+
the life of the given NVDIMM namespace. If you are using KMSAN
116+
to debug an issue unrelated to NVDIMMs or DAX then say N to this
117+
option. Otherwise, say Y but understand that any namespaces
118+
(with the page array stored pmem) created with this build of
119+
the kernel will permanently reserve and strand excess
120+
capacity compared to the CONFIG_KMSAN=n case.
121+
122+
Select N if unsure.
123+
105124
config NVDIMM_TEST_BUILD
106125
tristate "Build the unit test core"
107126
depends on m

drivers/nvdimm/nd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ void devm_namespace_disable(struct device *dev,
652652
struct nd_namespace_common *ndns);
653653
#if IS_ENABLED(CONFIG_ND_CLAIM)
654654
/* max struct page size independent of kernel config */
655-
#define MAX_STRUCT_PAGE_SIZE 128
655+
#define MAX_STRUCT_PAGE_SIZE 64
656656
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
657657
#else
658658
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,

drivers/nvdimm/pfn_devs.c

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include "pfn.h"
1414
#include "nd.h"
1515

16+
static const bool page_struct_override = IS_ENABLED(CONFIG_NVDIMM_KMSAN);
17+
1618
static void nd_pfn_release(struct device *dev)
1719
{
1820
struct nd_region *nd_region = to_nd_region(dev->parent);
@@ -758,12 +760,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
758760
return -ENXIO;
759761
}
760762

761-
/*
762-
* Note, we use 64 here for the standard size of struct page,
763-
* debugging options may cause it to be larger in which case the
764-
* implementation will limit the pfns advertised through
765-
* ->direct_access() to those that are included in the memmap.
766-
*/
767763
start = nsio->res.start;
768764
size = resource_size(&nsio->res);
769765
npfns = PHYS_PFN(size - SZ_8K);
@@ -782,20 +778,33 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
782778
}
783779
end_trunc = start + size - ALIGN_DOWN(start + size, align);
784780
if (nd_pfn->mode == PFN_MODE_PMEM) {
781+
unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns;
782+
785783
/*
786784
* The altmap should be padded out to the block size used
787785
* when populating the vmemmap. This *should* be equal to
788786
* PMD_SIZE for most architectures.
789787
*
790-
* Also make sure size of struct page is less than 128. We
791-
* want to make sure we use large enough size here so that
792-
* we don't have a dynamic reserve space depending on
793-
* struct page size. But we also want to make sure we notice
794-
* when we end up adding new elements to struct page.
788+
* Also make sure size of struct page is less than
789+
* MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the
790+
* face of production kernel configurations that reduce the
791+
* 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug
792+
* kernel configurations that increase the 'struct page' size
793+
* above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows
794+
* for continuing with the capacity that will be wasted when
795+
* reverting to a production kernel configuration. Otherwise,
796+
* those configurations are blocked by default.
795797
*/
796-
BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE);
797-
offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align)
798-
- start;
798+
if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) {
799+
if (page_struct_override)
800+
page_map_size = sizeof(struct page) * npfns;
801+
else {
802+
dev_err(&nd_pfn->dev,
803+
"Memory debug options prevent using pmem for the page map\n");
804+
return -EINVAL;
805+
}
806+
}
807+
offset = ALIGN(start + SZ_8K + page_map_size, align) - start;
799808
} else if (nd_pfn->mode == PFN_MODE_RAM)
800809
offset = ALIGN(start + SZ_8K, align) - start;
801810
else
@@ -818,7 +827,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
818827
pfn_sb->version_minor = cpu_to_le16(4);
819828
pfn_sb->end_trunc = cpu_to_le32(end_trunc);
820829
pfn_sb->align = cpu_to_le32(nd_pfn->align);
821-
pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
830+
if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override)
831+
pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page));
832+
else
833+
pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE);
822834
pfn_sb->page_size = cpu_to_le32(PAGE_SIZE);
823835
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
824836
pfn_sb->checksum = cpu_to_le64(checksum);

0 commit comments

Comments
 (0)