Skip to content

Commit 047218e

Browse files
jchu314atgithubdjbw
authored andcommitted
dax: add .recovery_write dax_operation
Introduce dax_recovery_write() operation. The function is used to recover a dax range that contains poison. Typical use case is when a user process receives a SIGBUS with si_code BUS_MCEERR_AR indicating poison(s) in a dax range, in response, the user process issues a pwrite() to the page-aligned dax range, thus clears the poison and puts valid data in the range. Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Jane Chu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Dan Williams <[email protected]>
1 parent e511c4a commit 047218e

File tree

9 files changed

+100
-1
lines changed

9 files changed

+100
-1
lines changed

drivers/dax/super.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
195195
}
196196
EXPORT_SYMBOL_GPL(dax_zero_page_range);
197197

198+
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
199+
void *addr, size_t bytes, struct iov_iter *iter)
200+
{
201+
if (!dax_dev->ops->recovery_write)
202+
return 0;
203+
return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, iter);
204+
}
205+
EXPORT_SYMBOL_GPL(dax_recovery_write);
206+
198207
#ifdef CONFIG_ARCH_HAS_PMEM_API
199208
void arch_wb_cache_pmem(void *addr, size_t size);
200209
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)

drivers/md/dm-linear.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,18 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
188188
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
189189
}
190190

191+
static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
192+
void *addr, size_t bytes, struct iov_iter *i)
193+
{
194+
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
195+
196+
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
197+
}
198+
191199
#else
192200
#define linear_dax_direct_access NULL
193201
#define linear_dax_zero_page_range NULL
202+
#define linear_dax_recovery_write NULL
194203
#endif
195204

196205
static struct target_type linear_target = {
@@ -208,6 +217,7 @@ static struct target_type linear_target = {
208217
.iterate_devices = linear_iterate_devices,
209218
.direct_access = linear_dax_direct_access,
210219
.dax_zero_page_range = linear_dax_zero_page_range,
220+
.dax_recovery_write = linear_dax_recovery_write,
211221
};
212222

213223
int __init dm_linear_init(void)

drivers/md/dm-log-writes.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,9 +905,18 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
905905
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
906906
}
907907

908+
static size_t log_writes_dax_recovery_write(struct dm_target *ti,
909+
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
910+
{
911+
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
912+
913+
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
914+
}
915+
908916
#else
909917
#define log_writes_dax_direct_access NULL
910918
#define log_writes_dax_zero_page_range NULL
919+
#define log_writes_dax_recovery_write NULL
911920
#endif
912921

913922
static struct target_type log_writes_target = {
@@ -925,6 +934,7 @@ static struct target_type log_writes_target = {
925934
.io_hints = log_writes_io_hints,
926935
.direct_access = log_writes_dax_direct_access,
927936
.dax_zero_page_range = log_writes_dax_zero_page_range,
937+
.dax_recovery_write = log_writes_dax_recovery_write,
928938
};
929939

930940
static int __init dm_log_writes_init(void)

drivers/md/dm-stripe.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,18 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
331331
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
332332
}
333333

334+
static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
335+
void *addr, size_t bytes, struct iov_iter *i)
336+
{
337+
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
338+
339+
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
340+
}
341+
334342
#else
335343
#define stripe_dax_direct_access NULL
336344
#define stripe_dax_zero_page_range NULL
345+
#define stripe_dax_recovery_write NULL
337346
#endif
338347

339348
/*
@@ -470,6 +479,7 @@ static struct target_type stripe_target = {
470479
.io_hints = stripe_io_hints,
471480
.direct_access = stripe_dax_direct_access,
472481
.dax_zero_page_range = stripe_dax_zero_page_range,
482+
.dax_recovery_write = stripe_dax_recovery_write,
473483
};
474484

475485
int __init dm_stripe_init(void)

drivers/md/dm.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,25 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
11471147
return ret;
11481148
}
11491149

1150+
static size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
1151+
void *addr, size_t bytes, struct iov_iter *i)
1152+
{
1153+
struct mapped_device *md = dax_get_private(dax_dev);
1154+
sector_t sector = pgoff * PAGE_SECTORS;
1155+
struct dm_target *ti;
1156+
int srcu_idx;
1157+
long ret = 0;
1158+
1159+
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1160+
if (!ti || !ti->type->dax_recovery_write)
1161+
goto out;
1162+
1163+
ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i);
1164+
out:
1165+
dm_put_live_table(md, srcu_idx);
1166+
return ret;
1167+
}
1168+
11501169
/*
11511170
* A target may call dm_accept_partial_bio only from the map routine. It is
11521171
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
@@ -3147,6 +3166,7 @@ static const struct block_device_operations dm_rq_blk_dops = {
31473166
static const struct dax_operations dm_dax_ops = {
31483167
.direct_access = dm_dax_direct_access,
31493168
.zero_page_range = dm_dax_zero_page_range,
3169+
.recovery_write = dm_dax_recovery_write,
31503170
};
31513171

31523172
/*

drivers/nvdimm/pmem.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,16 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
287287
return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
288288
}
289289

290+
static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
291+
void *addr, size_t bytes, struct iov_iter *i)
292+
{
293+
return 0;
294+
}
295+
290296
static const struct dax_operations pmem_dax_ops = {
291297
.direct_access = pmem_dax_direct_access,
292298
.zero_page_range = pmem_dax_zero_page_range,
299+
.recovery_write = pmem_recovery_write,
293300
};
294301

295302
static ssize_t write_cache_show(struct device *dev,

fs/dax.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
12401240
const size_t size = ALIGN(length + offset, PAGE_SIZE);
12411241
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
12421242
ssize_t map_len;
1243+
bool recovery = false;
12431244
void *kaddr;
12441245

12451246
if (fatal_signal_pending(current)) {
@@ -1249,6 +1250,13 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
12491250

12501251
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
12511252
DAX_ACCESS, &kaddr, NULL);
1253+
if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
1254+
map_len = dax_direct_access(dax_dev, pgoff,
1255+
PHYS_PFN(size), DAX_RECOVERY_WRITE,
1256+
&kaddr, NULL);
1257+
if (map_len > 0)
1258+
recovery = true;
1259+
}
12521260
if (map_len < 0) {
12531261
ret = map_len;
12541262
break;
@@ -1260,7 +1268,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
12601268
if (map_len > end - pos)
12611269
map_len = end - pos;
12621270

1263-
if (iov_iter_rw(iter) == WRITE)
1271+
if (recovery)
1272+
xfer = dax_recovery_write(dax_dev, pgoff, kaddr,
1273+
map_len, iter);
1274+
else if (iov_iter_rw(iter) == WRITE)
12641275
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
12651276
map_len, iter);
12661277
else

include/linux/dax.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ struct dax_operations {
3535
sector_t, sector_t);
3636
/* zero_page_range: required operation. Zero page range */
3737
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
38+
/*
39+
* recovery_write: recover a poisoned range by DAX device driver
40+
* capable of clearing poison.
41+
*/
42+
size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff,
43+
void *addr, size_t bytes, struct iov_iter *iter);
3844
};
3945

4046
#if IS_ENABLED(CONFIG_DAX)
@@ -45,6 +51,8 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc);
4551
bool dax_write_cache_enabled(struct dax_device *dax_dev);
4652
bool dax_synchronous(struct dax_device *dax_dev);
4753
void set_dax_synchronous(struct dax_device *dax_dev);
54+
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
55+
void *addr, size_t bytes, struct iov_iter *i);
4856
/*
4957
* Check if given mapping is supported by the file / underlying device.
5058
*/
@@ -92,6 +100,11 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
92100
{
93101
return !(vma->vm_flags & VM_SYNC);
94102
}
103+
static inline size_t dax_recovery_write(struct dax_device *dax_dev,
104+
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
105+
{
106+
return 0;
107+
}
95108
#endif
96109

97110
void set_dax_nocache(struct dax_device *dax_dev);

include/linux/device-mapper.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,14 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
152152
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
153153
size_t nr_pages);
154154

155+
/*
156+
* Returns:
157+
* != 0 : number of bytes transferred
158+
* 0 : recovery write failed
159+
*/
160+
typedef size_t (*dm_dax_recovery_write_fn)(struct dm_target *ti, pgoff_t pgoff,
161+
void *addr, size_t bytes, struct iov_iter *i);
162+
155163
void dm_error(const char *message);
156164

157165
struct dm_dev {
@@ -201,6 +209,7 @@ struct target_type {
201209
dm_io_hints_fn io_hints;
202210
dm_dax_direct_access_fn direct_access;
203211
dm_dax_zero_page_range_fn dax_zero_page_range;
212+
dm_dax_recovery_write_fn dax_recovery_write;
204213

205214
/* For internal device-mapper use. */
206215
struct list_head list;

0 commit comments

Comments
 (0)