Skip to content

Commit f0fe1e5

Browse files
jtlaytonidryomov
authored andcommitted
ceph: plumb in decryption during reads
Force the use of sparse reads when the inode is encrypted, and add the appropriate code to decrypt the extent map after receiving. Note that the crypto block may be smaller than a page, but the reverse cannot be true. Signed-off-by: Jeff Layton <[email protected]> Reviewed-by: Xiubo Li <[email protected]> Reviewed-and-tested-by: Luís Henriques <[email protected]> Reviewed-by: Milind Changire <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent d552077 commit f0fe1e5

File tree

2 files changed

+125
-34
lines changed

2 files changed

+125
-34
lines changed

fs/ceph/addr.c

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "mds_client.h"
1919
#include "cache.h"
2020
#include "metric.h"
21+
#include "crypto.h"
2122
#include <linux/ceph/osd_client.h>
2223
#include <linux/ceph/striper.h>
2324

@@ -242,7 +243,8 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
242243

243244
static void finish_netfs_read(struct ceph_osd_request *req)
244245
{
245-
struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
246+
struct inode *inode = req->r_inode;
247+
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
246248
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
247249
struct netfs_io_subrequest *subreq = req->r_priv;
248250
struct ceph_osd_req_op *op = &req->r_ops[0];
@@ -256,16 +258,31 @@ static void finish_netfs_read(struct ceph_osd_request *req)
256258
subreq->len, i_size_read(req->r_inode));
257259

258260
/* no object means success but no data */
259-
if (sparse && err >= 0)
260-
err = ceph_sparse_ext_map_end(op);
261-
else if (err == -ENOENT)
261+
if (err == -ENOENT)
262262
err = 0;
263263
else if (err == -EBLOCKLISTED)
264264
fsc->blocklisted = true;
265265

266-
if (err >= 0 && err < subreq->len)
267-
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
266+
if (err >= 0) {
267+
if (sparse && err > 0)
268+
err = ceph_sparse_ext_map_end(op);
269+
if (err < subreq->len)
270+
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
271+
if (IS_ENCRYPTED(inode) && err > 0) {
272+
err = ceph_fscrypt_decrypt_extents(inode,
273+
osd_data->pages, subreq->start,
274+
op->extent.sparse_ext,
275+
op->extent.sparse_ext_cnt);
276+
if (err > subreq->len)
277+
err = subreq->len;
278+
}
279+
}
268280

281+
if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
282+
ceph_put_page_vector(osd_data->pages,
283+
calc_pages_for(osd_data->alignment,
284+
osd_data->length), false);
285+
}
269286
netfs_subreq_terminated(subreq, err, false);
270287
iput(req->r_inode);
271288
}
@@ -336,7 +353,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
336353
struct iov_iter iter;
337354
int err = 0;
338355
u64 len = subreq->len;
339-
bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
356+
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
357+
u64 off = subreq->start;
340358

341359
if (ceph_inode_is_shutdown(inode)) {
342360
err = -EIO;
@@ -346,8 +364,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
346364
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
347365
return;
348366

349-
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
350-
0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
367+
ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
368+
369+
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
370+
off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
351371
CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
352372
NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
353373
if (IS_ERR(req)) {
@@ -363,8 +383,37 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
363383
}
364384

365385
dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
386+
366387
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
367-
osd_req_op_extent_osd_iter(req, 0, &iter);
388+
389+
/*
390+
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
391+
* encrypted inodes. We'd need infrastructure that handles an iov_iter
392+
* instead of page arrays, and we don't have that as of yet. Once the
393+
* dust settles on the write helpers and encrypt/decrypt routines for
394+
* netfs, we should be able to rework this.
395+
*/
396+
if (IS_ENCRYPTED(inode)) {
397+
struct page **pages;
398+
size_t page_off;
399+
400+
err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
401+
if (err < 0) {
402+
dout("%s: iov_ter_get_pages_alloc returned %d\n",
403+
__func__, err);
404+
goto out;
405+
}
406+
407+
/* should always give us a page-aligned read */
408+
WARN_ON_ONCE(page_off);
409+
len = err;
410+
err = 0;
411+
412+
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
413+
false);
414+
} else {
415+
osd_req_op_extent_osd_iter(req, 0, &iter);
416+
}
368417
req->r_callback = finish_netfs_read;
369418
req->r_priv = subreq;
370419
req->r_inode = inode;

fs/ceph/file.c

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
970970
u64 off = *ki_pos;
971971
u64 len = iov_iter_count(to);
972972
u64 i_size = i_size_read(inode);
973-
bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
973+
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
974974
u64 objver = 0;
975975

976976
dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
@@ -1001,10 +1001,19 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10011001
int idx;
10021002
size_t left;
10031003
struct ceph_osd_req_op *op;
1004+
u64 read_off = off;
1005+
u64 read_len = len;
1006+
1007+
/* determine new offset/length if encrypted */
1008+
ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
1009+
1010+
dout("sync_read orig %llu~%llu reading %llu~%llu",
1011+
off, len, read_off, read_len);
10041012

10051013
req = ceph_osdc_new_request(osdc, &ci->i_layout,
1006-
ci->i_vino, off, &len, 0, 1,
1007-
sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
1014+
ci->i_vino, read_off, &read_len, 0, 1,
1015+
sparse ? CEPH_OSD_OP_SPARSE_READ :
1016+
CEPH_OSD_OP_READ,
10081017
CEPH_OSD_FLAG_READ,
10091018
NULL, ci->i_truncate_seq,
10101019
ci->i_truncate_size, false);
@@ -1013,18 +1022,22 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10131022
break;
10141023
}
10151024

1025+
/* adjust len downward if the request truncated the len */
1026+
if (off + len > read_off + read_len)
1027+
len = read_off + read_len - off;
10161028
more = len < iov_iter_count(to);
10171029

1018-
num_pages = calc_pages_for(off, len);
1019-
page_off = off & ~PAGE_MASK;
1030+
num_pages = calc_pages_for(read_off, read_len);
1031+
page_off = offset_in_page(off);
10201032
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
10211033
if (IS_ERR(pages)) {
10221034
ceph_osdc_put_request(req);
10231035
ret = PTR_ERR(pages);
10241036
break;
10251037
}
10261038

1027-
osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
1039+
osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
1040+
offset_in_page(read_off),
10281041
false, false);
10291042

10301043
op = &req->r_ops[0];
@@ -1042,7 +1055,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10421055
ceph_update_read_metrics(&fsc->mdsc->metric,
10431056
req->r_start_latency,
10441057
req->r_end_latency,
1045-
len, ret);
1058+
read_len, ret);
10461059

10471060
if (ret > 0)
10481061
objver = req->r_version;
@@ -1057,8 +1070,35 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10571070
else if (ret == -ENOENT)
10581071
ret = 0;
10591072

1073+
if (ret > 0 && IS_ENCRYPTED(inode)) {
1074+
int fret;
1075+
1076+
fret = ceph_fscrypt_decrypt_extents(inode, pages,
1077+
read_off, op->extent.sparse_ext,
1078+
op->extent.sparse_ext_cnt);
1079+
if (fret < 0) {
1080+
ret = fret;
1081+
ceph_osdc_put_request(req);
1082+
break;
1083+
}
1084+
1085+
/* account for any partial block at the beginning */
1086+
fret -= (off - read_off);
1087+
1088+
/*
1089+
* Short read after big offset adjustment?
1090+
* Nothing is usable, just call it a zero
1091+
* len read.
1092+
*/
1093+
fret = max(fret, 0);
1094+
1095+
/* account for partial block at the end */
1096+
ret = min_t(ssize_t, fret, len);
1097+
}
1098+
10601099
ceph_osdc_put_request(req);
10611100

1101+
/* Short read but not EOF? Zero out the remainder. */
10621102
if (ret >= 0 && ret < len && (off + ret < i_size)) {
10631103
int zlen = min(len - ret, i_size - off - ret);
10641104
int zoff = page_off + ret;
@@ -1072,15 +1112,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10721112
idx = 0;
10731113
left = ret > 0 ? ret : 0;
10741114
while (left > 0) {
1075-
size_t len, copied;
1076-
page_off = off & ~PAGE_MASK;
1077-
len = min_t(size_t, left, PAGE_SIZE - page_off);
1115+
size_t plen, copied;
1116+
1117+
plen = min_t(size_t, left, PAGE_SIZE - page_off);
10781118
SetPageUptodate(pages[idx]);
10791119
copied = copy_page_to_iter(pages[idx++],
1080-
page_off, len, to);
1120+
page_off, plen, to);
10811121
off += copied;
10821122
left -= copied;
1083-
if (copied < len) {
1123+
page_off = 0;
1124+
if (copied < plen) {
10841125
ret = -EFAULT;
10851126
break;
10861127
}
@@ -1097,20 +1138,21 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10971138
break;
10981139
}
10991140

1100-
if (off > *ki_pos) {
1101-
if (off >= i_size) {
1102-
*retry_op = CHECK_EOF;
1103-
ret = i_size - *ki_pos;
1104-
*ki_pos = i_size;
1105-
} else {
1106-
ret = off - *ki_pos;
1107-
*ki_pos = off;
1141+
if (ret > 0) {
1142+
if (off > *ki_pos) {
1143+
if (off >= i_size) {
1144+
*retry_op = CHECK_EOF;
1145+
ret = i_size - *ki_pos;
1146+
*ki_pos = i_size;
1147+
} else {
1148+
ret = off - *ki_pos;
1149+
*ki_pos = off;
1150+
}
11081151
}
1109-
}
1110-
1111-
if (last_objver && ret > 0)
1112-
*last_objver = objver;
11131152

1153+
if (last_objver)
1154+
*last_objver = objver;
1155+
}
11141156
dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
11151157
return ret;
11161158
}

0 commit comments

Comments
 (0)