Skip to content

Commit 1a51656

Browse files
author
Robert Breker
committed
Add support for discard to tapdisk
With this commit, tapdisk is able to understand and pass-through discard request to tapdisk drivers which support it. Each discard messages on the xen blkif is handled as follows: 1. xenio_blkif_get_request() gets discard requests from the ring. It decodes the request depending on the blkif protocol type and converts them into generic blkif_request_discard_t using blkif_get_req_discard. 2. tapdisk_xenblkif_make_vbd_request() iterates the message counter blkback_stats.st_ds_req for discards. 3. tapdisk_xenblkif_parse_request_discard() converts the discard request into a td_vbd_request with a start sector (sec) and a length (discard_nr_sectors). 3. The td_vbd_request is encapsulated into a td_request_t and is sanity checked in tapdisk_image_check_td_request, tapdisk_image_check_request and the new td_queue_discard method. 4. Ultimately the request is handled in td_queue_discard. If the tapdisk driver implements td_queue_discard, the request is passed through to that. If not, the request is failed with -EOPNOTSUPP. This commit has been dev-tested using: * v8 Windows PV drivers that include XenDisk and thereby implement discard * Linux xen-blkfront that implements discard Signed-off-by: Robert Breker <robert.breker@citrix.com>
1 parent 8ab7496 commit 1a51656

File tree

9 files changed

+162
-10
lines changed

9 files changed

+162
-10
lines changed

drivers/tapdisk-image.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,17 @@ tapdisk_image_check_td_request(td_image_t *image, td_request_t treq)
9494
info = &image->info;
9595
rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY);
9696

97-
if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE)
97+
if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE &&
98+
treq.op != TD_OP_DISCARD)
9899
goto fail;
99100

100-
if (treq.op == TD_OP_WRITE && rdonly) {
101+
if ((treq.op == TD_OP_WRITE || treq.op == TD_OP_DISCARD) && rdonly) {
101102
err = -EPERM;
102103
goto fail;
103104
}
104105

105-
if (treq.secs <= 0 || treq.sec + treq.secs > info->size)
106+
if ((treq.secs <= 0 || treq.sec + treq.secs > info->size) &&
107+
treq.op != TD_OP_DISCARD)
106108
goto fail;
107109

108110
return 0;
@@ -140,6 +142,9 @@ tapdisk_image_check_request(td_image_t *image, td_vbd_request_t *vreq)
140142
secs += vreq->iov[i].secs;
141143

142144
switch (vreq->op) {
145+
case TD_OP_DISCARD:
146+
secs = vreq->discard_nr_sectors;
147+
/* fall through */
143148
case TD_OP_WRITE:
144149
if (rdonly) {
145150
err = -EPERM;

drivers/tapdisk-interface.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,42 @@ td_queue_read(td_image_t *image, td_request_t treq)
236236
td_complete_request(treq, err);
237237
}
238238

239+
240+
void
241+
td_queue_discard(td_image_t *image, td_request_t treq)
242+
{
243+
int err;
244+
td_driver_t *driver;
245+
246+
driver = image->driver;
247+
if (!driver) {
248+
err = -ENODEV;
249+
goto fail;
250+
}
251+
252+
if (!td_flag_test(driver->state, TD_DRIVER_OPEN)) {
253+
err = -EBADF;
254+
goto fail;
255+
}
256+
257+
if (!driver->ops->td_queue_discard) {
258+
err = -EOPNOTSUPP;
259+
goto fail;
260+
}
261+
262+
err = tapdisk_image_check_td_request(image, treq);
263+
if (err)
264+
goto fail;
265+
266+
driver->ops->td_queue_discard(driver, treq);
267+
268+
return;
269+
270+
fail:
271+
td_complete_request(treq, err);
272+
}
273+
274+
239275
void
240276
td_forward_request(td_request_t treq)
241277
{

drivers/tapdisk-interface.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ int td_set_quantum(td_image_t *, int);
3333

3434
void td_queue_write(td_image_t *, td_request_t);
3535
void td_queue_read(td_image_t *, td_request_t);
36+
void td_queue_discard(td_image_t *, td_request_t);
3637
void td_forward_request(td_request_t);
3738
void td_complete_request(td_request_t, int);
3839

drivers/tapdisk-vbd.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,10 @@ __tapdisk_vbd_reissue_td_request(td_vbd_t *vbd,
13631363
case TD_OP_READ:
13641364
td_queue_read(parent, treq);
13651365
break;
1366+
1367+
case TD_OP_DISCARD:
1368+
td_queue_discard(parent, treq);
1369+
break;
13661370
}
13671371

13681372
done:
@@ -1485,6 +1489,19 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
14851489
goto fail;
14861490
}
14871491

1492+
if(vreq->op==TD_OP_DISCARD) {
1493+
treq.sidx = 1;
1494+
treq.sec = sec;
1495+
treq.image = image;
1496+
treq.cb = tapdisk_vbd_complete_td_request;
1497+
treq.cb_data = NULL;
1498+
treq.vreq = vreq;
1499+
treq.op = TD_OP_DISCARD;
1500+
td_queue_discard(treq.image, treq);
1501+
err = 0;
1502+
goto out;
1503+
}
1504+
14881505
for (i = 0; i < vreq->iovcnt; i++) {
14891506
struct td_iovec *iov = &vreq->iov[i];
14901507

@@ -1529,6 +1546,11 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
15291546
vbd->vdi_stats.stats->read_reqs_submitted++;
15301547
td_queue_read(treq.image, treq);
15311548
break;
1549+
1550+
case TD_OP_DISCARD:
1551+
treq.op = TD_OP_DISCARD;
1552+
td_queue_discard(treq.image, treq);
1553+
break;
15321554
}
15331555

15341556
DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08"PRIx64" secs 0x%04x "

drivers/tapdisk.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050
#include <time.h>
5151
#include <stdint.h>
52+
#include <stdbool.h>
5253

5354
#include "list.h"
5455
#include "compiler.h"
@@ -74,6 +75,7 @@ extern unsigned int PAGE_SHIFT;
7475

7576
#define TD_OP_READ 0
7677
#define TD_OP_WRITE 1
78+
#define TD_OP_DISCARD 2
7779

7880
#define TD_OPEN_QUIET 0x00001
7981
#define TD_OPEN_QUERY 0x00002
@@ -126,6 +128,7 @@ struct td_disk_info {
126128
td_sector_t size;
127129
long sector_size;
128130
uint32_t info;
131+
bool discard_supported;
129132
};
130133

131134
struct td_iovec {
@@ -155,6 +158,8 @@ struct td_vbd_request {
155158
td_vbd_t *vbd;
156159
struct list_head next;
157160
struct list_head *list_head;
161+
162+
uint64_t discard_nr_sectors;
158163
};
159164

160165
struct td_request {
@@ -188,6 +193,7 @@ struct tap_disk {
188193
int (*td_validate_parent) (td_driver_t *, td_driver_t *, td_flag_t);
189194
void (*td_queue_read) (td_driver_t *, td_request_t);
190195
void (*td_queue_write) (td_driver_t *, td_request_t);
196+
void (*td_queue_discard) (td_driver_t *, td_request_t);
191197
void (*td_debug) (td_driver_t *);
192198
void (*td_stats) (td_driver_t *, td_stats_t *);
193199

drivers/td-ctx.c

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,17 @@ xenio_pending_blkif(struct td_xenio_ctx * const ctx)
125125
dst->seg[i] = src->seg[i]; \
126126
}
127127

128+
#define blkif_get_req_discard(dst, discard_src) \
129+
{ \
130+
/* assert(sizeof(blkif_request_discard_t)<sizeof(blkif_request_t)) */ \
131+
blkif_request_discard_t *discard_dst = (blkif_request_discard_t *) dst; \
132+
discard_dst->operation = src->operation; \
133+
discard_dst->flag = discard_src->flag; \
134+
discard_dst->id = discard_src->id; \
135+
discard_dst->sector_number = discard_src->sector_number; \
136+
discard_dst->nr_sectors = discard_src->nr_sectors; \
137+
}
138+
128139
/**
129140
* Utility function that retrieves a request using @idx as the ring index,
130141
* copying it to the @dst in a H/W independent way.
@@ -149,6 +160,7 @@ xenio_blkif_get_request(struct td_xenblkif * const blkif,
149160
{
150161
blkif_request_t *src;
151162
src = RING_GET_REQUEST(&rings->native, idx);
163+
// sizeof(blkif_request_t)>sizeof(blkif_request_discard_t)
152164
memcpy(dst, src, sizeof(blkif_request_t));
153165
break;
154166
}
@@ -157,15 +169,27 @@ xenio_blkif_get_request(struct td_xenblkif * const blkif,
157169
{
158170
blkif_x86_32_request_t *src;
159171
src = RING_GET_REQUEST(&rings->x86_32, idx);
160-
blkif_get_req(dst, src);
172+
if (src->operation==BLKIF_OP_DISCARD) {
173+
blkif_x86_32_request_discard_t * discard_src;
174+
discard_src = (blkif_x86_32_request_discard_t *) src;
175+
blkif_get_req_discard(dst, discard_src);
176+
} else {
177+
blkif_get_req(dst, src);
178+
}
161179
break;
162180
}
163181

164182
case BLKIF_PROTOCOL_X86_64:
165183
{
166184
blkif_x86_64_request_t *src;
167185
src = RING_GET_REQUEST(&rings->x86_64, idx);
168-
blkif_get_req(dst, src);
186+
if (src->operation==BLKIF_OP_DISCARD) {
187+
blkif_x86_64_request_discard_t * discard_src;
188+
discard_src = (blkif_x86_64_request_discard_t *) src;
189+
blkif_get_req_discard(dst, discard_src);
190+
} else {
191+
blkif_get_req(dst, src);
192+
}
169193
break;
170194
}
171195

drivers/td-req.c

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,37 @@ tapdisk_xenblkif_parse_request(struct td_xenblkif * const blkif,
670670
return err;
671671
}
672672

673+
static inline int
674+
tapdisk_xenblkif_parse_request_discard(struct td_xenblkif * const blkif,
675+
struct td_xenblkif_req * const req)
676+
{
677+
int err = 0;
678+
td_vbd_request_t *vreq;
679+
blkif_request_discard_t * request_discard_msg;
680+
681+
vreq = &req->vreq;
682+
ASSERT(vreq);
683+
684+
vreq->iov = 0;
685+
vreq->iovcnt = 0;
686+
vreq->sec = 0;
687+
688+
request_discard_msg = (blkif_request_discard_t*)&req->msg;
689+
vreq->discard_nr_sectors = request_discard_msg->nr_sectors;
690+
vreq->sec = request_discard_msg->sector_number;
691+
692+
/*
693+
* TODO Isn't this kind of expensive to do for each requests? Why does
694+
* the tapdisk need this in the first place?
695+
*/
696+
snprintf(req->name, sizeof(req->name), "xenvbd-%d-%d.%"SCNx64"",
697+
blkif->domid, blkif->devid, request_discard_msg->id);
698+
vreq->name = req->name;
699+
vreq->token = blkif;
700+
vreq->cb = __tapdisk_xenblkif_request_cb;
701+
702+
return err;
703+
}
673704

674705
/**
675706
* Initialises the standard tapdisk request (td_vbd_request_t) from the
@@ -710,6 +741,11 @@ tapdisk_xenblkif_make_vbd_request(struct td_xenblkif * const blkif,
710741
tapreq->prot = PROT_READ;
711742
vreq->op = TD_OP_WRITE;
712743
break;
744+
case BLKIF_OP_DISCARD:
745+
blkif->stats.xenvbd->st_ds_req++;
746+
tapreq->prot = PROT_WRITE;
747+
vreq->op = TD_OP_DISCARD;
748+
break;
713749
default:
714750
RING_ERR(blkif, "req %lu: invalid request type %d\n",
715751
tapreq->msg.id, tapreq->msg.operation);
@@ -723,15 +759,18 @@ tapdisk_xenblkif_make_vbd_request(struct td_xenblkif * const blkif,
723759
* Check that the number of segments is sane.
724760
*/
725761
if (unlikely((tapreq->msg.nr_segments == 0 &&
726-
tapreq->msg.operation != BLKIF_OP_WRITE_BARRIER) ||
762+
tapreq->msg.operation != BLKIF_OP_WRITE_BARRIER &&
763+
tapreq->msg.operation != BLKIF_OP_DISCARD) ||
727764
tapreq->msg.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
728765
RING_ERR(blkif, "req %lu: bad number of segments in request (%d)\n",
729766
tapreq->msg.id, tapreq->msg.nr_segments);
730767
err = EINVAL;
731768
goto out;
732769
}
733770

734-
if (likely(tapreq->msg.nr_segments))
771+
if (unlikely(tapreq->msg.operation == BLKIF_OP_DISCARD))
772+
err = tapdisk_xenblkif_parse_request_discard(blkif, tapreq);
773+
else if (likely(tapreq->msg.nr_segments))
735774
err = tapdisk_xenblkif_parse_request(blkif, tapreq);
736775
/*
737776
* If we only got one request from the ring and that was a barrier one,
@@ -781,7 +820,8 @@ tapdisk_xenblkif_queue_request(struct td_xenblkif * const blkif,
781820
return err;
782821
}
783822

784-
if (likely(tapreq->msg.nr_segments)) {
823+
if (likely(tapreq->msg.nr_segments ||
824+
tapreq->msg.operation == BLKIF_OP_DISCARD )) {
785825
err = tapdisk_vbd_queue_request(blkif->vbd, &tapreq->vreq);
786826
if (unlikely(err)) {
787827
/* TODO log error */

include/blktap3.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@
3939
*/
4040
struct blkback_stats {
4141
/**
42-
* BLKIF_OP_DISCARD, not currently supported in blktap3, should always
43-
* be zero
42+
* Received BLKIF_OP_DISCARD requests.
4443
*/
4544
unsigned long long st_ds_req;
4645

include/xen_blkif.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,21 @@ struct blkif_x86_32_request {
2626
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
2727
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
2828
};
29+
struct blkif_x86_32_request_discard {
30+
uint8_t operation; /* BLKIF_OP_DISCARD */
31+
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
32+
blkif_vdev_t handle; /* was "handle" for read/write requests */
33+
uint64_t id; /* private guest value, echoed in resp */
34+
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
35+
uint64_t nr_sectors;
36+
};
2937
struct blkif_x86_32_response {
3038
uint64_t id; /* copied from request */
3139
uint8_t operation; /* copied from request */
3240
int16_t status; /* BLKIF_RSP_??? */
3341
};
3442
typedef struct blkif_x86_32_request blkif_x86_32_request_t;
43+
typedef struct blkif_x86_32_request_discard blkif_x86_32_request_discard_t;
3544
typedef struct blkif_x86_32_response blkif_x86_32_response_t;
3645
#pragma pack(pop)
3746

@@ -44,12 +53,22 @@ struct blkif_x86_64_request {
4453
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
4554
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
4655
};
56+
struct blkif_x86_64_request_discard {
57+
uint8_t operation; /* BLKIF_OP_DISCARD */
58+
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
59+
blkif_vdev_t _pad1; /* was "handle" for read/write requests */
60+
uint32_t _pad2;
61+
uint64_t id; /* private guest value, echoed in resp */
62+
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
63+
uint64_t nr_sectors;
64+
};
4765
struct blkif_x86_64_response {
4866
uint64_t __attribute__((__aligned__(8))) id;
4967
uint8_t operation; /* copied from request */
5068
int16_t status; /* BLKIF_RSP_??? */
5169
};
5270
typedef struct blkif_x86_64_request blkif_x86_64_request_t;
71+
typedef struct blkif_x86_64_request_discard blkif_x86_64_request_discard_t;
5372
typedef struct blkif_x86_64_response blkif_x86_64_response_t;
5473

5574
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);

0 commit comments

Comments
 (0)