Skip to content

Commit d011c52

Browse files
committed
cls/rgw: duplicate reshard checks in all cls_rgw write operations
with the addition of the reshard log, all write ops now have to read the omap header themselves. this read duplicates the one in cls_rgw_guard_bucket_resharding(), leading to a minor performance regression this commit prepares a long-term fix for this regression by duplicating the the reshard check done by cls_rgw_guard_bucket_resharding() in each write operation. this will allow the cls_rgw_guard_bucket_resharding() calls to be removed from rgw two releases later when we can guarantee that all OSDs are performing this check for all write operations Signed-off-by: Casey Bodley <[email protected]>
1 parent 658098b commit d011c52

File tree

4 files changed

+96
-27
lines changed

4 files changed

+96
-27
lines changed

src/cls/rgw/cls_rgw.cc

Lines changed: 84 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,23 @@ static int write_header_while_logrecord(cls_method_context_t hctx,
886886
return 0;
887887
}
888888

889+
static int guard_bucket_resharding(cls_method_context_t hctx,
890+
const rgw_bucket_dir_header& header,
891+
int error_code = -CLS_RGW_ERR_BUSY_RESHARDING)
892+
{
893+
const ConfigProxy& conf = cls_get_config(hctx);
894+
const uint32_t reshardlog_threshold = conf->rgw_reshardlog_threshold;
895+
896+
if (header.resharding_in_progress() ||
897+
(header.resharding_in_logrecord() && header.reshardlog_entries >= reshardlog_threshold)) {
898+
CLS_LOG(4, "ERROR: writes are blocked while bucket is "
899+
"resharding, returning %d", error_code);
900+
return error_code;
901+
}
902+
903+
return 0;
904+
}
905+
889906
int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
890907
{
891908
const ConfigProxy& conf = cls_get_config(hctx);
@@ -925,6 +942,11 @@ int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist
925942
return rc;
926943
}
927944

945+
rc = guard_bucket_resharding(hctx, header);
946+
if (rc < 0) {
947+
return rc;
948+
}
949+
928950
// get on-disk state
929951
std::string idx;
930952

@@ -1131,6 +1153,11 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
11311153
return -EINVAL;
11321154
}
11331155

1156+
rc = guard_bucket_resharding(hctx, header);
1157+
if (rc < 0) {
1158+
return rc;
1159+
}
1160+
11341161
rgw_bucket_dir_entry entry;
11351162
bool ondisk = true;
11361163

@@ -1757,6 +1784,11 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
17571784
return rc;
17581785
}
17591786

1787+
rc = guard_bucket_resharding(hctx, header);
1788+
if (rc < 0) {
1789+
return rc;
1790+
}
1791+
17601792
/* read instance entry */
17611793
BIVerObjEntry obj(hctx, op.key);
17621794
int ret = obj.init(op.delete_marker);
@@ -1998,6 +2030,11 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
19982030
return ret;
19992031
}
20002032

2033+
ret = guard_bucket_resharding(hctx, header);
2034+
if (ret < 0) {
2035+
return ret;
2036+
}
2037+
20012038
BIVerObjEntry obj(hctx, dest_key);
20022039
BIOLHEntry olh(hctx, dest_key);
20032040

@@ -2230,6 +2267,11 @@ static int rgw_bucket_trim_olh_log(cls_method_context_t hctx, bufferlist *in, bu
22302267
return rc;
22312268
}
22322269

2270+
rc = guard_bucket_resharding(hctx, header);
2271+
if (rc < 0) {
2272+
return rc;
2273+
}
2274+
22332275
/* write the olh data entry */
22342276
ret = write_entry(hctx, olh_data_entry, olh_data_key, header);
22352277
if (ret < 0) {
@@ -2265,6 +2307,11 @@ static int rgw_bucket_clear_olh(cls_method_context_t hctx, bufferlist *in, buffe
22652307
return rc;
22662308
}
22672309

2310+
rc = guard_bucket_resharding(hctx, header);
2311+
if (rc < 0) {
2312+
return rc;
2313+
}
2314+
22682315
/* read olh entry */
22692316
rgw_bucket_olh_entry olh_data_entry;
22702317
string olh_data_key, olh_sub_ver;
@@ -2336,6 +2383,11 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
23362383
return rc;
23372384
}
23382385

2386+
rc = guard_bucket_resharding(hctx, header);
2387+
if (rc < 0) {
2388+
return rc;
2389+
}
2390+
23392391
const uint64_t config_op_expiration =
23402392
conf->rgw_pending_bucket_index_op_expiration;
23412393

@@ -2840,6 +2892,11 @@ static int rgw_bi_put_entries(cls_method_context_t hctx, bufferlist *in, bufferl
28402892
return r;
28412893
}
28422894

2895+
r = guard_bucket_resharding(hctx, header);
2896+
if (r < 0) {
2897+
return r;
2898+
}
2899+
28432900
if (op.check_existing) {
28442901
// fetch any existing keys and decrement their stats before overwriting
28452902
std::set<std::string> keys;
@@ -3308,18 +3365,12 @@ static int reshard_log_list_entries(cls_method_context_t hctx, const string& mar
33083365
}
33093366

33103367
static int check_index(cls_method_context_t hctx,
3311-
rgw_bucket_dir_header *existing_header,
3368+
const rgw_bucket_dir_header& existing_header,
33123369
rgw_bucket_dir_header *calc_header)
33133370
{
3314-
int rc = read_bucket_header(hctx, existing_header);
3315-
if (rc < 0) {
3316-
CLS_LOG(1, "ERROR: check_index(): failed to read header\n");
3317-
return rc;
3318-
}
3319-
3320-
calc_header->tag_timeout = existing_header->tag_timeout;
3321-
calc_header->ver = existing_header->ver;
3322-
calc_header->syncstopped = existing_header->syncstopped;
3371+
calc_header->tag_timeout = existing_header.tag_timeout;
3372+
calc_header->ver = existing_header.ver;
3373+
calc_header->syncstopped = existing_header.syncstopped;
33233374

33243375
std::list<rgw_cls_bi_entry> entries;
33253376
string start_obj;
@@ -3329,7 +3380,7 @@ static int check_index(cls_method_context_t hctx,
33293380
bool more;
33303381

33313382
do {
3332-
rc = list_plain_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
3383+
int rc = list_plain_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
33333384
if (rc < 0) {
33343385
return rc;
33353386
}
@@ -3358,7 +3409,7 @@ static int check_index(cls_method_context_t hctx,
33583409

33593410
start_obj = "";
33603411
do {
3361-
rc = list_instance_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
3412+
int rc = list_instance_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
33623413
if (rc < 0) {
33633414
return rc;
33643415
}
@@ -3391,9 +3442,21 @@ static int check_index(cls_method_context_t hctx,
33913442
int rgw_bucket_rebuild_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
33923443
{
33933444
CLS_LOG(10, "entered %s", __func__);
3445+
33943446
rgw_bucket_dir_header existing_header;
3447+
int rc = read_bucket_header(hctx, &existing_header);
3448+
if (rc < 0) {
3449+
CLS_LOG(1, "ERROR: check_index(): failed to read header\n");
3450+
return rc;
3451+
}
3452+
3453+
rc = guard_bucket_resharding(hctx, existing_header);
3454+
if (rc < 0) {
3455+
return rc;
3456+
}
3457+
33953458
rgw_bucket_dir_header calc_header;
3396-
int rc = check_index(hctx, &existing_header, &calc_header);
3459+
rc = check_index(hctx, existing_header, &calc_header);
33973460
if (rc < 0)
33983461
return rc;
33993462

@@ -3405,8 +3468,13 @@ int rgw_bucket_check_index(cls_method_context_t hctx, bufferlist *in, bufferlist
34053468
{
34063469
CLS_LOG(10, "entered %s", __func__);
34073470
rgw_cls_check_index_ret ret;
3471+
int rc = read_bucket_header(hctx, &ret.existing_header);
3472+
if (rc < 0) {
3473+
CLS_LOG(1, "ERROR: check_index(): failed to read header\n");
3474+
return rc;
3475+
}
34083476

3409-
int rc = check_index(hctx, &ret.existing_header, &ret.calculated_header);
3477+
rc = check_index(hctx, ret.existing_header, &ret.calculated_header);
34103478
if (rc < 0)
34113479
return rc;
34123480

@@ -4934,13 +5002,9 @@ static int rgw_guard_bucket_resharding(cls_method_context_t hctx, bufferlist *in
49345002
{
49355003
CLS_LOG(10, "entered %s", __func__);
49365004

4937-
const ConfigProxy& conf = cls_get_config(hctx);
4938-
const uint32_t reshardlog_threshold = conf->rgw_reshardlog_threshold;
4939-
49405005
cls_rgw_guard_bucket_resharding_op op;
4941-
4942-
auto in_iter = in->cbegin();
49435006
try {
5007+
auto in_iter = in->cbegin();
49445008
decode(op, in_iter);
49455009
} catch (ceph::buffer::error& err) {
49465010
CLS_LOG(1, "ERROR: %s: failed to decode entry", __func__);
@@ -4954,12 +5018,7 @@ static int rgw_guard_bucket_resharding(cls_method_context_t hctx, bufferlist *in
49545018
return rc;
49555019
}
49565020

4957-
if (header.resharding_in_progress() ||
4958-
(header.resharding_in_logrecord() && header.reshardlog_entries >= reshardlog_threshold)) {
4959-
return op.ret_err;
4960-
}
4961-
4962-
return 0;
5021+
return guard_bucket_resharding(hctx, header, op.ret_err);
49635022
}
49645023

49655024
static int rgw_get_bucket_resharding(cls_method_context_t hctx,

src/cls/rgw/cls_rgw_client.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,8 +675,16 @@ int cls_rgw_reshard_list(librados::IoCtx& io_ctx, const std::string& oid, std::s
675675
int cls_rgw_reshard_get(librados::IoCtx& io_ctx, const std::string& oid, cls_rgw_reshard_entry& entry);
676676
#endif
677677

678-
/* resharding attribute on bucket index shard headers */
678+
// If writes to the bucket index should be blocked during resharding, fail with
679+
// the given error code. RGWRados::guard_reshard() calls this in a loop to retry
680+
// the write until the reshard completes.
681+
//
682+
// As of the T release, all index write ops in cls_rgw perform this check
683+
// themselves. RGW can stop issuing this call in the T+2 (V) release once it
684+
// knows that OSDs are running T at least. The call can be safely removed from
685+
// cls_rgw in the T+4 (X) release.
679686
void cls_rgw_guard_bucket_resharding(librados::ObjectOperation& op, int ret_err);
687+
680688
// these overloads which call io_ctx.operate() should not be called in the rgw.
681689
// rgw_rados_operate() should be called after the overloads w/o calls to io_ctx.operate()
682690
#ifndef CLS_CLIENT_HIDE_IOCTX

src/cls/rgw/cls_rgw_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#define CEPH_RGW_DIR_SUGGEST_LOG_OP 0x80
1919
#define CEPH_RGW_DIR_SUGGEST_OP_MASK 0x7f
2020

21+
#define CLS_RGW_ERR_BUSY_RESHARDING 2300 // also in rgw_common.h, don't change!
22+
2123
constexpr uint64_t CEPH_RGW_DEFAULT_TAG_TIMEOUT = 120; // in seconds
2224

2325
class JSONObj;

src/rgw/rgw_common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ inline constexpr const char* RGW_REST_STS_XMLNS =
331331
#define ERR_PRESIGNED_URL_DISABLED 2224
332332
#define ERR_AUTHORIZATION 2225 // SNS 403 AuthorizationError
333333

334-
#define ERR_BUSY_RESHARDING 2300
334+
#define ERR_BUSY_RESHARDING 2300 // also in cls_rgw_types.h, don't change!
335335
#define ERR_NO_SUCH_ENTITY 2301
336336
#define ERR_LIMIT_EXCEEDED 2302
337337

0 commit comments

Comments
 (0)