Skip to content

Commit 35cdd86

Browse files
committed
Merge tag 'libnvdimm-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and DAX updates from Dan Williams: "New support for clearing memory errors when a file is in DAX mode, alongside with some other fixes and cleanups. Previously it was only possible to clear these errors using a truncate or hole-punch operation to trigger the filesystem to reallocate the block, now, any page aligned write can opportunistically clear errors as well. This change spans x86/mm, nvdimm, and fs/dax, and has received the appropriate sign-offs. Thanks to Jane for her work on this. Summary: - Add support for clearing memory error via pwrite(2) on DAX - Fix 'security overwrite' support in the presence of media errors - Miscellaneous cleanups and fixes for nfit_test (nvdimm unit tests)" * tag 'libnvdimm-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: pmem: implement pmem_recovery_write() pmem: refactor pmem_clear_poison() dax: add .recovery_write dax_operation dax: introduce DAX_RECOVERY_WRITE dax access mode mce: fix set_mce_nospec to always unmap the whole page x86/mce: relocate set{clear}_mce_nospec() functions acpi/nfit: rely on mce->misc to determine poison granularity testing: nvdimm: asm/mce.h is not needed in nfit.c testing: nvdimm: iomap: make __nfit_test_ioremap a macro nvdimm: Allow overwrite in the presence of disabled dimms tools/testing/nvdimm: remove unneeded flush_workqueue
2 parents ea6c3bc + f42e8e5 commit 35cdd86

File tree

24 files changed

+359
-171
lines changed

24 files changed

+359
-171
lines changed

arch/x86/include/asm/set_memory.h

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -86,56 +86,4 @@ bool kernel_page_present(struct page *page);
8686

8787
extern int kernel_set_to_readonly;
8888

89-
#ifdef CONFIG_X86_64
90-
/*
91-
* Prevent speculative access to the page by either unmapping
92-
* it (if we do not require access to any part of the page) or
93-
* marking it uncacheable (if we want to try to retrieve data
94-
* from non-poisoned lines in the page).
95-
*/
96-
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
97-
{
98-
unsigned long decoy_addr;
99-
int rc;
100-
101-
/* SGX pages are not in the 1:1 map */
102-
if (arch_is_platform_page(pfn << PAGE_SHIFT))
103-
return 0;
104-
/*
105-
* We would like to just call:
106-
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
107-
* but doing that would radically increase the odds of a
108-
* speculative access to the poison page because we'd have
109-
* the virtual address of the kernel 1:1 mapping sitting
110-
* around in registers.
111-
* Instead we get tricky. We create a non-canonical address
112-
* that looks just like the one we want, but has bit 63 flipped.
113-
* This relies on set_memory_XX() properly sanitizing any __pa()
114-
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
115-
*/
116-
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
117-
118-
if (unmap)
119-
rc = set_memory_np(decoy_addr, 1);
120-
else
121-
rc = set_memory_uc(decoy_addr, 1);
122-
if (rc)
123-
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
124-
return rc;
125-
}
126-
#define set_mce_nospec set_mce_nospec
127-
128-
/* Restore full speculative operation to the pfn. */
129-
static inline int clear_mce_nospec(unsigned long pfn)
130-
{
131-
return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
132-
}
133-
#define clear_mce_nospec clear_mce_nospec
134-
#else
135-
/*
136-
* Few people would run a 32-bit kernel on a machine that supports
137-
* recoverable errors because they have too much memory to boot 32-bit.
138-
*/
139-
#endif
140-
14189
#endif /* _ASM_X86_SET_MEMORY_H */

arch/x86/kernel/cpu/mce/core.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
581581

582582
pfn = mce->addr >> PAGE_SHIFT;
583583
if (!memory_failure(pfn, 0)) {
584-
set_mce_nospec(pfn, whole_page(mce));
584+
set_mce_nospec(pfn);
585585
mce->kflags |= MCE_HANDLED_UC;
586586
}
587587

@@ -1318,7 +1318,7 @@ static void kill_me_maybe(struct callback_head *cb)
13181318

13191319
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
13201320
if (!ret) {
1321-
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
1321+
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
13221322
sync_core();
13231323
return;
13241324
}
@@ -1344,7 +1344,7 @@ static void kill_me_never(struct callback_head *cb)
13441344
p->mce_count = 0;
13451345
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
13461346
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
1347-
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
1347+
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
13481348
}
13491349

13501350
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))

arch/x86/mm/pat/set_memory.c

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/vmstat.h>
2020
#include <linux/kernel.h>
2121
#include <linux/cc_platform.h>
22+
#include <linux/set_memory.h>
2223

2324
#include <asm/e820/api.h>
2425
#include <asm/processor.h>
@@ -29,7 +30,6 @@
2930
#include <asm/pgalloc.h>
3031
#include <asm/proto.h>
3132
#include <asm/memtype.h>
32-
#include <asm/set_memory.h>
3333
#include <asm/hyperv-tlfs.h>
3434
#include <asm/mshyperv.h>
3535

@@ -1805,7 +1805,7 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages,
18051805
}
18061806

18071807
/*
1808-
* _set_memory_prot is an internal helper for callers that have been passed
1808+
* __set_memory_prot is an internal helper for callers that have been passed
18091809
* a pgprot_t value from upper layers and a reservation has already been taken.
18101810
* If you want to set the pgprot to a specific page protocol, use the
18111811
* set_memory_xx() functions.
@@ -1914,6 +1914,51 @@ int set_memory_wb(unsigned long addr, int numpages)
19141914
}
19151915
EXPORT_SYMBOL(set_memory_wb);
19161916

1917+
/* Prevent speculative access to a page by marking it not-present */
1918+
#ifdef CONFIG_X86_64
1919+
int set_mce_nospec(unsigned long pfn)
1920+
{
1921+
unsigned long decoy_addr;
1922+
int rc;
1923+
1924+
/* SGX pages are not in the 1:1 map */
1925+
if (arch_is_platform_page(pfn << PAGE_SHIFT))
1926+
return 0;
1927+
/*
1928+
* We would like to just call:
1929+
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
1930+
* but doing that would radically increase the odds of a
1931+
* speculative access to the poison page because we'd have
1932+
* the virtual address of the kernel 1:1 mapping sitting
1933+
* around in registers.
1934+
* Instead we get tricky. We create a non-canonical address
1935+
* that looks just like the one we want, but has bit 63 flipped.
1936+
* This relies on set_memory_XX() properly sanitizing any __pa()
1937+
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
1938+
*/
1939+
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
1940+
1941+
rc = set_memory_np(decoy_addr, 1);
1942+
if (rc)
1943+
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
1944+
return rc;
1945+
}
1946+
1947+
static int set_memory_present(unsigned long *addr, int numpages)
1948+
{
1949+
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
1950+
}
1951+
1952+
/* Restore full speculative operation to the pfn. */
1953+
int clear_mce_nospec(unsigned long pfn)
1954+
{
1955+
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
1956+
1957+
return set_memory_present(&addr, 1);
1958+
}
1959+
EXPORT_SYMBOL_GPL(clear_mce_nospec);
1960+
#endif /* CONFIG_X86_64 */
1961+
19171962
int set_memory_x(unsigned long addr, int numpages)
19181963
{
19191964
if (!(__supported_pte_mask & _PAGE_NX))

drivers/acpi/nfit/mce.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
3232
*/
3333
mutex_lock(&acpi_desc_lock);
3434
list_for_each_entry(acpi_desc, &acpi_descs, list) {
35+
unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc);
3536
struct device *dev = acpi_desc->dev;
3637
int found_match = 0;
3738

@@ -63,8 +64,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
6364

6465
/* If this fails due to an -ENOMEM, there is little we can do */
6566
nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
66-
ALIGN(mce->addr, L1_CACHE_BYTES),
67-
L1_CACHE_BYTES);
67+
ALIGN_DOWN(mce->addr, align), align);
6868
nvdimm_region_notify(nfit_spa->nd_region,
6969
NVDIMM_REVALIDATE_POISON);
7070

drivers/dax/super.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,15 @@ enum dax_device_flags {
117117
* @dax_dev: a dax_device instance representing the logical memory range
118118
* @pgoff: offset in pages from the start of the device to translate
119119
* @nr_pages: number of consecutive pages caller can handle relative to @pfn
120+
* @mode: indicator on normal access or recovery write
120121
* @kaddr: output parameter that returns a virtual address mapping of pfn
121122
* @pfn: output parameter that returns an absolute pfn translation of @pgoff
122123
*
123124
* Return: negative errno if an error occurs, otherwise the number of
124125
* pages accessible at the device relative @pgoff.
125126
*/
126127
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
127-
void **kaddr, pfn_t *pfn)
128+
enum dax_access_mode mode, void **kaddr, pfn_t *pfn)
128129
{
129130
long avail;
130131

@@ -138,7 +139,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
138139
return -EINVAL;
139140

140141
avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
141-
kaddr, pfn);
142+
mode, kaddr, pfn);
142143
if (!avail)
143144
return -ERANGE;
144145
return min(avail, nr_pages);
@@ -194,6 +195,15 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
194195
}
195196
EXPORT_SYMBOL_GPL(dax_zero_page_range);
196197

198+
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
199+
void *addr, size_t bytes, struct iov_iter *iter)
200+
{
201+
if (!dax_dev->ops->recovery_write)
202+
return 0;
203+
return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, iter);
204+
}
205+
EXPORT_SYMBOL_GPL(dax_recovery_write);
206+
197207
#ifdef CONFIG_ARCH_HAS_PMEM_API
198208
void arch_wb_cache_pmem(void *addr, size_t size);
199209
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)

drivers/md/dm-linear.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,12 @@ static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
165165
}
166166

167167
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
168-
long nr_pages, void **kaddr, pfn_t *pfn)
168+
long nr_pages, enum dax_access_mode mode, void **kaddr,
169+
pfn_t *pfn)
169170
{
170171
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
171172

172-
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
173+
return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
173174
}
174175

175176
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
@@ -180,9 +181,18 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
180181
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
181182
}
182183

184+
static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
185+
void *addr, size_t bytes, struct iov_iter *i)
186+
{
187+
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
188+
189+
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
190+
}
191+
183192
#else
184193
#define linear_dax_direct_access NULL
185194
#define linear_dax_zero_page_range NULL
195+
#define linear_dax_recovery_write NULL
186196
#endif
187197

188198
static struct target_type linear_target = {
@@ -200,6 +210,7 @@ static struct target_type linear_target = {
200210
.iterate_devices = linear_iterate_devices,
201211
.direct_access = linear_dax_direct_access,
202212
.dax_zero_page_range = linear_dax_zero_page_range,
213+
.dax_recovery_write = linear_dax_recovery_write,
203214
};
204215

205216
int __init dm_linear_init(void)

drivers/md/dm-log-writes.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -888,11 +888,12 @@ static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
888888
}
889889

890890
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
891-
long nr_pages, void **kaddr, pfn_t *pfn)
891+
long nr_pages, enum dax_access_mode mode, void **kaddr,
892+
pfn_t *pfn)
892893
{
893894
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
894895

895-
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
896+
return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
896897
}
897898

898899
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
@@ -903,9 +904,18 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
903904
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
904905
}
905906

907+
static size_t log_writes_dax_recovery_write(struct dm_target *ti,
908+
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
909+
{
910+
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
911+
912+
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
913+
}
914+
906915
#else
907916
#define log_writes_dax_direct_access NULL
908917
#define log_writes_dax_zero_page_range NULL
918+
#define log_writes_dax_recovery_write NULL
909919
#endif
910920

911921
static struct target_type log_writes_target = {
@@ -923,6 +933,7 @@ static struct target_type log_writes_target = {
923933
.io_hints = log_writes_io_hints,
924934
.direct_access = log_writes_dax_direct_access,
925935
.dax_zero_page_range = log_writes_dax_zero_page_range,
936+
.dax_recovery_write = log_writes_dax_recovery_write,
926937
};
927938

928939
static int __init dm_log_writes_init(void)

drivers/md/dm-stripe.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,12 @@ static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
315315
}
316316

317317
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
318-
long nr_pages, void **kaddr, pfn_t *pfn)
318+
long nr_pages, enum dax_access_mode mode, void **kaddr,
319+
pfn_t *pfn)
319320
{
320321
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
321322

322-
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
323+
return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
323324
}
324325

325326
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
@@ -330,9 +331,18 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
330331
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
331332
}
332333

334+
static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
335+
void *addr, size_t bytes, struct iov_iter *i)
336+
{
337+
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
338+
339+
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
340+
}
341+
333342
#else
334343
#define stripe_dax_direct_access NULL
335344
#define stripe_dax_zero_page_range NULL
345+
#define stripe_dax_recovery_write NULL
336346
#endif
337347

338348
/*
@@ -469,6 +479,7 @@ static struct target_type stripe_target = {
469479
.io_hints = stripe_io_hints,
470480
.direct_access = stripe_dax_direct_access,
471481
.dax_zero_page_range = stripe_dax_zero_page_range,
482+
.dax_recovery_write = stripe_dax_recovery_write,
472483
};
473484

474485
int __init dm_stripe_init(void)

drivers/md/dm-target.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/init.h>
1111
#include <linux/kmod.h>
1212
#include <linux/bio.h>
13+
#include <linux/dax.h>
1314

1415
#define DM_MSG_PREFIX "target"
1516

@@ -142,7 +143,8 @@ static void io_err_release_clone_rq(struct request *clone,
142143
}
143144

144145
static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
145-
long nr_pages, void **kaddr, pfn_t *pfn)
146+
long nr_pages, enum dax_access_mode mode, void **kaddr,
147+
pfn_t *pfn)
146148
{
147149
return -EIO;
148150
}

drivers/md/dm-writecache.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
286286

287287
id = dax_read_lock();
288288

289-
da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn);
289+
da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, DAX_ACCESS,
290+
&wc->memory_map, &pfn);
290291
if (da < 0) {
291292
wc->memory_map = NULL;
292293
r = da;
@@ -308,8 +309,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
308309
i = 0;
309310
do {
310311
long daa;
311-
daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i,
312-
NULL, &pfn);
312+
daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i,
313+
p - i, DAX_ACCESS, NULL, &pfn);
313314
if (daa <= 0) {
314315
r = daa ? daa : -EINVAL;
315316
goto err3;

0 commit comments

Comments
 (0)