Skip to content

Commit 5e6bdd3

Browse files
Stefan Haberlandaxboe
authored andcommitted
s390/dasd: fix data corruption for thin provisioned devices
Devices are formatted in multiple of tracks. For an Extent Space Efficient (ESE) volume we get errors when accessing unformatted tracks. In this case the driver either formats the track on the flight for write requests or returns zero data for read requests. In case a request spans multiple tracks, the indication of an unformatted track presented for the first track is incorrectly applied to all tracks covered by the request. As a result, tracks containing data will be handled as empty, resulting in zero data being returned on read, or overwriting existing data with zero on write. Fix by determining the track that gets the NRF error. For write requests only format the track that is surely not formatted. For Read requests all tracks before have returned valid data and should not be touched. All tracks after the unformatted track might be formatted or not. Those are returned to the blocklayer to build a new request. When using alias devices there is a chance that multiple write requests trigger a format of the same track which might lead to data loss. Ensure that a track is formatted only once by maintaining a list of currently processed tracks. Fixes: 5e2b17e ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: [email protected] # 5.3+ Signed-off-by: Stefan Haberland <[email protected]> Reviewed-by: Jan Hoeppner <[email protected]> Reviewed-by: Peter Oberparleiter <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent dcd6589 commit 5e6bdd3

File tree

3 files changed

+193
-12
lines changed

3 files changed

+193
-12
lines changed

drivers/s390/block/dasd.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void)
178178
(unsigned long) block);
179179
INIT_LIST_HEAD(&block->ccw_queue);
180180
spin_lock_init(&block->queue_lock);
181+
INIT_LIST_HEAD(&block->format_list);
182+
spin_lock_init(&block->format_lock);
181183
timer_setup(&block->timer, dasd_block_timeout, 0);
182184
spin_lock_init(&block->profile.lock);
183185

@@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
17791781

17801782
if (dasd_ese_needs_format(cqr->block, irb)) {
17811783
if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
1782-
device->discipline->ese_read(cqr);
1784+
device->discipline->ese_read(cqr, irb);
17831785
cqr->status = DASD_CQR_SUCCESS;
17841786
cqr->stopclk = now;
17851787
dasd_device_clear_timer(device);
17861788
dasd_schedule_device_bh(device);
17871789
return;
17881790
}
1789-
fcqr = device->discipline->ese_format(device, cqr);
1791+
fcqr = device->discipline->ese_format(device, cqr, irb);
17901792
if (IS_ERR(fcqr)) {
1793+
if (PTR_ERR(fcqr) == -EINVAL) {
1794+
cqr->status = DASD_CQR_ERROR;
1795+
return;
1796+
}
17911797
/*
17921798
* If we can't format now, let the request go
17931799
* one extra round. Maybe we can format later.
17941800
*/
17951801
cqr->status = DASD_CQR_QUEUED;
1802+
dasd_schedule_device_bh(device);
1803+
return;
17961804
} else {
17971805
fcqr->status = DASD_CQR_QUEUED;
17981806
cqr->status = DASD_CQR_QUEUED;
@@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
27482756
{
27492757
struct request *req;
27502758
blk_status_t error = BLK_STS_OK;
2759+
unsigned int proc_bytes;
27512760
int status;
27522761

27532762
req = (struct request *) cqr->callback_data;
27542763
dasd_profile_end(cqr->block, cqr, req);
27552764

2765+
proc_bytes = cqr->proc_bytes;
27562766
status = cqr->block->base->discipline->free_cp(cqr, req);
27572767
if (status < 0)
27582768
error = errno_to_blk_status(status);
@@ -2783,7 +2793,18 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
27832793
blk_mq_end_request(req, error);
27842794
blk_mq_run_hw_queues(req->q, true);
27852795
} else {
2786-
blk_mq_complete_request(req);
2796+
/*
2797+
* Partial completed requests can happen with ESE devices.
2798+
* During read we might have gotten a NRF error and have to
2799+
* complete a request partially.
2800+
*/
2801+
if (proc_bytes) {
2802+
blk_update_request(req, BLK_STS_OK,
2803+
blk_rq_bytes(req) - proc_bytes);
2804+
blk_mq_requeue_request(req, true);
2805+
} else {
2806+
blk_mq_complete_request(req);
2807+
}
27872808
}
27882809
}
27892810

drivers/s390/block/dasd_eckd.c

Lines changed: 156 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head)
207207
geo->head |= head;
208208
}
209209

210+
/*
211+
* calculate failing track from sense data depending if
212+
* it is an EAV device or not
213+
*/
214+
static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device,
215+
sector_t *track)
216+
{
217+
struct dasd_eckd_private *private = device->private;
218+
u8 *sense = NULL;
219+
u32 cyl;
220+
u8 head;
221+
222+
sense = dasd_get_sense(irb);
223+
if (!sense) {
224+
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
225+
"ESE error no sense data\n");
226+
return -EINVAL;
227+
}
228+
if (!(sense[27] & DASD_SENSE_BIT_2)) {
229+
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
230+
"ESE error no valid track data\n");
231+
return -EINVAL;
232+
}
233+
234+
if (sense[27] & DASD_SENSE_BIT_3) {
235+
/* enhanced addressing */
236+
cyl = sense[30] << 20;
237+
cyl |= (sense[31] & 0xF0) << 12;
238+
cyl |= sense[28] << 8;
239+
cyl |= sense[29];
240+
} else {
241+
cyl = sense[29] << 8;
242+
cyl |= sense[30];
243+
}
244+
head = sense[31] & 0x0F;
245+
*track = cyl * private->rdc_data.trk_per_cyl + head;
246+
return 0;
247+
}
248+
210249
static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data,
211250
struct dasd_device *device)
212251
{
@@ -2986,22 +3025,57 @@ static int dasd_eckd_format_device(struct dasd_device *base,
29863025
0, NULL);
29873026
}
29883027

3028+
static bool test_and_set_format_track(struct dasd_format_entry *to_format,
3029+
struct dasd_block *block)
3030+
{
3031+
struct dasd_format_entry *format;
3032+
unsigned long flags;
3033+
bool rc = false;
3034+
3035+
spin_lock_irqsave(&block->format_lock, flags);
3036+
list_for_each_entry(format, &block->format_list, list) {
3037+
if (format->track == to_format->track) {
3038+
rc = true;
3039+
goto out;
3040+
}
3041+
}
3042+
list_add_tail(&to_format->list, &block->format_list);
3043+
3044+
out:
3045+
spin_unlock_irqrestore(&block->format_lock, flags);
3046+
return rc;
3047+
}
3048+
3049+
static void clear_format_track(struct dasd_format_entry *format,
3050+
struct dasd_block *block)
3051+
{
3052+
unsigned long flags;
3053+
3054+
spin_lock_irqsave(&block->format_lock, flags);
3055+
list_del_init(&format->list);
3056+
spin_unlock_irqrestore(&block->format_lock, flags);
3057+
}
3058+
29893059
/*
29903060
* Callback function to free ESE format requests.
29913061
*/
29923062
static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data)
29933063
{
29943064
struct dasd_device *device = cqr->startdev;
29953065
struct dasd_eckd_private *private = device->private;
3066+
struct dasd_format_entry *format = data;
29963067

3068+
clear_format_track(format, cqr->basedev->block);
29973069
private->count--;
29983070
dasd_ffree_request(cqr, device);
29993071
}
30003072

30013073
static struct dasd_ccw_req *
3002-
dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
3074+
dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
3075+
struct irb *irb)
30033076
{
30043077
struct dasd_eckd_private *private;
3078+
struct dasd_format_entry *format;
30053079
struct format_data_t fdata;
30063080
unsigned int recs_per_trk;
30073081
struct dasd_ccw_req *fcqr;
@@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
30113085
struct request *req;
30123086
sector_t first_trk;
30133087
sector_t last_trk;
3088+
sector_t curr_trk;
30143089
int rc;
30153090

30163091
req = cqr->callback_data;
3017-
base = cqr->block->base;
3092+
block = cqr->block;
3093+
base = block->base;
30183094
private = base->private;
3019-
block = base->block;
30203095
blksize = block->bp_block;
30213096
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
3097+
format = &startdev->format_entry;
30223098

30233099
first_trk = blk_rq_pos(req) >> block->s2b_shift;
30243100
sector_div(first_trk, recs_per_trk);
30253101
last_trk =
30263102
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
30273103
sector_div(last_trk, recs_per_trk);
3104+
rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
3105+
if (rc)
3106+
return ERR_PTR(rc);
30283107

3029-
fdata.start_unit = first_trk;
3030-
fdata.stop_unit = last_trk;
3108+
if (curr_trk < first_trk || curr_trk > last_trk) {
3109+
DBF_DEV_EVENT(DBF_WARNING, startdev,
3110+
"ESE error track %llu not within range %llu - %llu\n",
3111+
curr_trk, first_trk, last_trk);
3112+
return ERR_PTR(-EINVAL);
3113+
}
3114+
format->track = curr_trk;
3115+
/* test if track is already in formatting by another thread */
3116+
if (test_and_set_format_track(format, block))
3117+
return ERR_PTR(-EEXIST);
3118+
3119+
fdata.start_unit = curr_trk;
3120+
fdata.stop_unit = curr_trk;
30313121
fdata.blksize = blksize;
30323122
fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0;
30333123

@@ -3044,36 +3134,95 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
30443134
return fcqr;
30453135

30463136
fcqr->callback = dasd_eckd_ese_format_cb;
3137+
fcqr->callback_data = (void *) format;
30473138

30483139
return fcqr;
30493140
}
30503141

30513142
/*
30523143
* When data is read from an unformatted area of an ESE volume, this function
30533144
* returns zeroed data and thereby mimics a read of zero data.
3145+
*
3146+
* The first unformatted track is the one that got the NRF error, the address is
3147+
* encoded in the sense data.
3148+
*
3149+
* All tracks before have returned valid data and should not be touched.
3150+
* All tracks after the unformatted track might be formatted or not. This is
3151+
* currently not known, remember the processed data and return the remainder of
3152+
* the request to the blocklayer in __dasd_cleanup_cqr().
30543153
*/
3055-
static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr)
3154+
static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
30563155
{
3156+
struct dasd_eckd_private *private;
3157+
sector_t first_trk, last_trk;
3158+
sector_t first_blk, last_blk;
30573159
unsigned int blksize, off;
3160+
unsigned int recs_per_trk;
30583161
struct dasd_device *base;
30593162
struct req_iterator iter;
3163+
struct dasd_block *block;
3164+
unsigned int skip_block;
3165+
unsigned int blk_count;
30603166
struct request *req;
30613167
struct bio_vec bv;
3168+
sector_t curr_trk;
3169+
sector_t end_blk;
30623170
char *dst;
3171+
int rc;
30633172

30643173
req = (struct request *) cqr->callback_data;
30653174
base = cqr->block->base;
30663175
blksize = base->block->bp_block;
3176+
block = cqr->block;
3177+
private = base->private;
3178+
skip_block = 0;
3179+
blk_count = 0;
3180+
3181+
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
3182+
first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift;
3183+
sector_div(first_trk, recs_per_trk);
3184+
last_trk = last_blk =
3185+
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
3186+
sector_div(last_trk, recs_per_trk);
3187+
rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
3188+
if (rc)
3189+
return rc;
3190+
3191+
/* sanity check if the current track from sense data is valid */
3192+
if (curr_trk < first_trk || curr_trk > last_trk) {
3193+
DBF_DEV_EVENT(DBF_WARNING, base,
3194+
"ESE error track %llu not within range %llu - %llu\n",
3195+
curr_trk, first_trk, last_trk);
3196+
return -EINVAL;
3197+
}
3198+
3199+
/*
3200+
* if not the first track got the NRF error we have to skip over valid
3201+
* blocks
3202+
*/
3203+
if (curr_trk != first_trk)
3204+
skip_block = curr_trk * recs_per_trk - first_blk;
3205+
3206+
/* we have no information beyond the current track */
3207+
end_blk = (curr_trk + 1) * recs_per_trk;
30673208

30683209
rq_for_each_segment(bv, req, iter) {
30693210
dst = page_address(bv.bv_page) + bv.bv_offset;
30703211
for (off = 0; off < bv.bv_len; off += blksize) {
3071-
if (dst && rq_data_dir(req) == READ) {
3212+
if (first_blk + blk_count >= end_blk) {
3213+
cqr->proc_bytes = blk_count * blksize;
3214+
return 0;
3215+
}
3216+
if (dst && !skip_block) {
30723217
dst += off;
30733218
memset(dst, 0, blksize);
3219+
} else {
3220+
skip_block--;
30743221
}
3222+
blk_count++;
30753223
}
30763224
}
3225+
return 0;
30773226
}
30783227

30793228
/*

drivers/s390/block/dasd_int.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ struct dasd_ccw_req {
187187

188188
void (*callback)(struct dasd_ccw_req *, void *data);
189189
void *callback_data;
190+
unsigned int proc_bytes; /* bytes for partial completion */
190191
};
191192

192193
/*
@@ -387,8 +388,9 @@ struct dasd_discipline {
387388
int (*ext_pool_warn_thrshld)(struct dasd_device *);
388389
int (*ext_pool_oos)(struct dasd_device *);
389390
int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *);
390-
struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *);
391-
void (*ese_read)(struct dasd_ccw_req *);
391+
struct dasd_ccw_req *(*ese_format)(struct dasd_device *,
392+
struct dasd_ccw_req *, struct irb *);
393+
int (*ese_read)(struct dasd_ccw_req *, struct irb *);
392394
};
393395

394396
extern struct dasd_discipline *dasd_diag_discipline_pointer;
@@ -474,6 +476,11 @@ struct dasd_profile {
474476
spinlock_t lock;
475477
};
476478

479+
struct dasd_format_entry {
480+
struct list_head list;
481+
sector_t track;
482+
};
483+
477484
struct dasd_device {
478485
/* Block device stuff. */
479486
struct dasd_block *block;
@@ -539,6 +546,7 @@ struct dasd_device {
539546
struct dentry *debugfs_dentry;
540547
struct dentry *hosts_dentry;
541548
struct dasd_profile profile;
549+
struct dasd_format_entry format_entry;
542550
};
543551

544552
struct dasd_block {
@@ -564,6 +572,9 @@ struct dasd_block {
564572

565573
struct dentry *debugfs_dentry;
566574
struct dasd_profile profile;
575+
576+
struct list_head format_list;
577+
spinlock_t format_lock;
567578
};
568579

569580
struct dasd_attention_data {

0 commit comments

Comments
 (0)