Skip to content

Commit f537669

Browse files
kvaneeshdjbw
authored andcommitted
libnvdimm/dax: Pick the right alignment default when creating dax devices
Allow arch to provide the supported alignments and use hugepage alignment only if we support hugepage. Right now we depend on compile time configs whereas this patch switch this to runtime discovery. Architectures like ppc64 can have THP enabled in code, but then can have hugepage size disabled by the hypervisor. This allows us to create dax devices with PAGE_SIZE alignment in this case. Existing dax namespace with alignment larger than PAGE_SIZE will fail to initialize in this specific case. We still allow fsdax namespace initialization. With respect to identifying whether to enable hugepage fault for a dax device, if THP is enabled during compile, we default to taking hugepage fault and in dax fault handler if we find the fault size > alignment we retry with PAGE_SIZE fault size. This also addresses the below failure scenario on ppc64 ndctl create-namespace --mode=devdax | grep align "align":16777216, "align":16777216 cat /sys/devices/ndbus0/region0/dax0.0/supported_alignments 65536 16777216 daxio.static-debug -z -o /dev/dax0.0 Bus error (core dumped) $ dmesg | tail lpar: Failed hash pte insert with error -4 hash-mmu: mm: Hashing failure ! EA=0x7fff17000000 access=0x8000000000000006 current=daxio hash-mmu: trap=0x300 vsid=0x22cb7a3 ssize=1 base psize=2 psize 10 pte=0xc000000501002b86 daxio[3860]: bus error (7) at 7fff17000000 nip 7fff973c007c lr 7fff973bff34 code 2 in libpmem.so.1.0.0[7fff973b0000+20000] daxio[3860]: code: 792945e4 7d494b78 e95f0098 7d494b78 f93f00a0 4800012c e93f0088 f93f0120 daxio[3860]: code: e93f00a0 f93f0128 e93f0120 e95f0128 <f9490000> e93f0088 39290008 f93f0110 The failure was due to guest kernel using wrong page size. The namespaces created with 16M alignment will appear as below on a config with 16M page size disabled. $ ndctl list -Ni [ { "dev":"namespace0.1", "mode":"fsdax", "map":"dev", "size":5351931904, "uuid":"fc6e9667-461a-4718-82b4-69b24570bddb", "align":16777216, "blockdev":"pmem0.1", "supported_alignments":[ 65536 ] }, { "dev":"namespace0.0", "mode":"fsdax", <==== devdax 16M alignment marked disabled. "map":"mem", "size":5368709120, "uuid":"a4bdf81a-f2ee-4bc6-91db-7b87eddd0484", "state":"disabled" } ] Cc: [email protected] Cc: "Kirill A. Shutemov" <[email protected]> Signed-off-by: Aneesh Kumar K.V <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Dan Williams <[email protected]>
1 parent a6f197f commit f537669

File tree

3 files changed

+61
-27
lines changed

3 files changed

+61
-27
lines changed

drivers/nvdimm/nd.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,11 +289,7 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
289289
struct nd_pfn *to_nd_pfn(struct device *dev);
290290
#if IS_ENABLED(CONFIG_NVDIMM_PFN)
291291

292-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
293-
#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
294-
#else
295-
#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
296-
#endif
292+
#define MAX_NVDIMM_ALIGN 4
297293

298294
int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
299295
bool is_nd_pfn(struct device *dev);

drivers/nvdimm/pfn_devs.c

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -103,39 +103,42 @@ static ssize_t align_show(struct device *dev,
103103
return sprintf(buf, "%ld\n", nd_pfn->align);
104104
}
105105

106-
static const unsigned long *nd_pfn_supported_alignments(void)
106+
static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments)
107107
{
108-
/*
109-
* This needs to be a non-static variable because the *_SIZE
110-
* macros aren't always constants.
111-
*/
112-
const unsigned long supported_alignments[] = {
113-
PAGE_SIZE,
114-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
115-
HPAGE_PMD_SIZE,
116-
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
117-
HPAGE_PUD_SIZE,
118-
#endif
119-
#endif
120-
0,
121-
};
122-
static unsigned long data[ARRAY_SIZE(supported_alignments)];
123108

124-
memcpy(data, supported_alignments, sizeof(data));
109+
alignments[0] = PAGE_SIZE;
110+
111+
if (has_transparent_hugepage()) {
112+
alignments[1] = HPAGE_PMD_SIZE;
113+
if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
114+
alignments[2] = HPAGE_PUD_SIZE;
115+
}
116+
117+
return alignments;
118+
}
119+
120+
/*
121+
* Use pmd mapping if supported as default alignment
122+
*/
123+
static unsigned long nd_pfn_default_alignment(void)
124+
{
125125

126-
return data;
126+
if (has_transparent_hugepage())
127+
return HPAGE_PMD_SIZE;
128+
return PAGE_SIZE;
127129
}
128130

129131
static ssize_t align_store(struct device *dev,
130132
struct device_attribute *attr, const char *buf, size_t len)
131133
{
132134
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
135+
unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
133136
ssize_t rc;
134137

135138
nd_device_lock(dev);
136139
nvdimm_bus_lock(dev);
137140
rc = nd_size_select_store(dev, buf, &nd_pfn->align,
138-
nd_pfn_supported_alignments());
141+
nd_pfn_supported_alignments(aligns));
139142
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
140143
buf[len - 1] == '\n' ? "" : "\n");
141144
nvdimm_bus_unlock(dev);
@@ -259,7 +262,10 @@ static DEVICE_ATTR_RO(size);
259262
static ssize_t supported_alignments_show(struct device *dev,
260263
struct device_attribute *attr, char *buf)
261264
{
262-
return nd_size_select_show(0, nd_pfn_supported_alignments(), buf);
265+
unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
266+
267+
return nd_size_select_show(0,
268+
nd_pfn_supported_alignments(aligns), buf);
263269
}
264270
static DEVICE_ATTR_RO(supported_alignments);
265271

@@ -302,7 +308,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
302308
return NULL;
303309

304310
nd_pfn->mode = PFN_MODE_NONE;
305-
nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
311+
nd_pfn->align = nd_pfn_default_alignment();
306312
dev = &nd_pfn->dev;
307313
device_initialize(&nd_pfn->dev);
308314
if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
@@ -412,6 +418,21 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
412418
return 0;
413419
}
414420

421+
static bool nd_supported_alignment(unsigned long align)
422+
{
423+
int i;
424+
unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, };
425+
426+
if (align == 0)
427+
return false;
428+
429+
nd_pfn_supported_alignments(supported);
430+
for (i = 0; supported[i]; i++)
431+
if (align == supported[i])
432+
return true;
433+
return false;
434+
}
435+
415436
/**
416437
* nd_pfn_validate - read and validate info-block
417438
* @nd_pfn: fsdax namespace runtime state / properties
@@ -496,6 +517,18 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
496517
return -EOPNOTSUPP;
497518
}
498519

520+
/*
521+
* Check whether the we support the alignment. For Dax if the
522+
* superblock alignment is not matching, we won't initialize
523+
* the device.
524+
*/
525+
if (!nd_supported_alignment(align) &&
526+
!memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) {
527+
dev_err(&nd_pfn->dev, "init failed, alignment mismatch: "
528+
"%ld:%ld\n", nd_pfn->align, align);
529+
return -EOPNOTSUPP;
530+
}
531+
499532
if (!nd_pfn->uuid) {
500533
/*
501534
* When probing a namepace via nd_pfn_probe() the uuid

include/linux/huge_mm.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
108108

109109
if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
110110
return true;
111-
111+
/*
112+
* For dax vmas, try to always use hugepage mappings. If the kernel does
113+
* not support hugepages, fsdax mappings will fallback to PAGE_SIZE
114+
* mappings, and device-dax namespaces, that try to guarantee a given
115+
* mapping size, will fail to enable
116+
*/
112117
if (vma_is_dax(vma))
113118
return true;
114119

0 commit comments

Comments
 (0)