Skip to content

Commit bcf3b6f

Browse files
committed
DAOS-17444 rebuild: cache obj open handle for rebuild
Only call dsc_obj_open() one time for each object for rebuild puller rather than open it for each dkey migrate, to save layout calculation overhead. Signed-off-by: Xuezhao Liu <xuezhao.liu@hpe.com>
1 parent ef82b28 commit bcf3b6f

File tree

1 file changed

+91
-82
lines changed

1 file changed

+91
-82
lines changed

src/object/srv_obj_migrate.c

Lines changed: 91 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#define MIGRATE_DEFAULT_MAX_ULT 4096
4141
#define ENV_MIGRATE_ULT_CNT "D_MIGRATE_ULT_CNT"
4242
struct migrate_one {
43+
struct iter_obj_arg *mo_obj_arg;
4344
daos_key_t mo_dkey;
4445
uint64_t mo_dkey_hash;
4546
uuid_t mo_pool_uuid;
@@ -116,6 +117,9 @@ struct iter_obj_arg {
116117
uuid_t pool_uuid;
117118
uuid_t cont_uuid;
118119
daos_unit_oid_t oid;
120+
daos_handle_t ioa_oh;
121+
int ioa_obj_ref;
122+
struct daos_oclass_attr ioa_oca;
119123
daos_epoch_t epoch;
120124
daos_epoch_t punched_epoch;
121125
unsigned int shard;
@@ -126,6 +130,28 @@ struct iter_obj_arg {
126130
uint32_t generation;
127131
};
128132

133+
static void
134+
migrate_obj_get(struct iter_obj_arg *arg)
135+
{
136+
arg->ioa_obj_ref++;
137+
}
138+
139+
static void
140+
migrate_obj_put(struct iter_obj_arg *arg)
141+
{
142+
D_ASSERTF(arg->ioa_obj_ref > 0, DF_CONT " obj " DF_UOID " bad ioa_obj_ref %d\n",
143+
DP_CONT(arg->pool_uuid, arg->cont_uuid), DP_UOID(arg->oid), arg->ioa_obj_ref);
144+
arg->ioa_obj_ref--;
145+
if (arg->ioa_obj_ref == 0) {
146+
if (daos_handle_is_valid(arg->ioa_oh)) {
147+
dsc_obj_close(arg->ioa_oh);
148+
arg->ioa_oh = DAOS_HDL_INVAL;
149+
}
150+
D_FREE(arg->snaps);
151+
D_FREE(arg);
152+
}
153+
}
154+
129155
static int
130156
obj_tree_destory_cb(daos_handle_t ih, d_iov_t *key_iov,
131157
d_iov_t *val_iov, void *data)
@@ -1619,48 +1645,29 @@ static int
16191645
migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone,
16201646
daos_size_t data_size)
16211647
{
1622-
struct ds_cont_child *cont = NULL;
1623-
struct cont_props props;
1624-
daos_handle_t coh = DAOS_HDL_INVAL;
1648+
struct ds_cont_child *cont = NULL;
16251649
daos_handle_t oh = DAOS_HDL_INVAL;
16261650
int rc;
16271651

16281652
D_ASSERT(dss_get_module_info()->dmi_xs_id != 0);
16291653
rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont, true);
16301654
if (rc || cont == NULL)
1631-
D_GOTO(cont_put, rc);
1632-
1633-
rc = dsc_pool_open(tls->mpt_pool_uuid, tls->mpt_poh_uuid, 0,
1634-
NULL, tls->mpt_pool->spc_pool->sp_map,
1635-
&tls->mpt_svc_list, &tls->mpt_pool_hdl);
1636-
if (rc)
1637-
D_GOTO(cont_put, rc);
1638-
1639-
/* Open client dc handle used to read the remote object data */
1640-
rc = migrate_cont_open(tls, mrone->mo_cont_uuid, 0, &coh);
1641-
if (rc)
1642-
D_GOTO(cont_put, rc);
1655+
D_GOTO(out, rc);
16431656

1644-
/* Open the remote object */
1645-
rc = dsc_obj_open(coh, mrone->mo_oid.id_pub, DAOS_OO_RO, &oh);
1646-
if (rc)
1647-
D_GOTO(cont_put, rc);
1657+
D_ASSERTF(mrone->mo_obj_arg->ioa_obj_ref > 0,
1658+
DF_RB ": oid " DF_UOID ", bad ioa_obj_ref %d\n", DP_RB_MPT(tls),
1659+
DP_UOID(mrone->mo_oid), mrone->mo_obj_arg->ioa_obj_ref);
1660+
D_ASSERT(daos_handle_is_valid(mrone->mo_obj_arg->ioa_oh));
1661+
oh = mrone->mo_obj_arg->ioa_oh;
1662+
mrone->mo_oca = mrone->mo_obj_arg->ioa_oca;
16481663

16491664
if (DAOS_FAIL_CHECK(DAOS_REBUILD_TGT_NOSPACE))
1650-
D_GOTO(obj_close, rc = -DER_NOSPACE);
1665+
D_GOTO(out, rc = -DER_NOSPACE);
16511666

16521667
if (DAOS_FAIL_CHECK(DAOS_REBUILD_NO_REBUILD)) {
16531668
D_DEBUG(DB_REBUILD, DF_UUID" disable rebuild\n",
16541669
DP_UUID(tls->mpt_pool_uuid));
1655-
D_GOTO(obj_close, rc);
1656-
}
1657-
1658-
dsc_cont_get_props(coh, &props);
1659-
rc = dsc_obj_id2oc_attr(mrone->mo_oid.id_pub, &props, &mrone->mo_oca);
1660-
if (rc) {
1661-
D_ERROR("Unknown object class: %u\n",
1662-
daos_obj_id2class(mrone->mo_oid.id_pub));
1663-
D_GOTO(obj_close, rc);
1670+
D_GOTO(out, rc);
16641671
}
16651672

16661673
/* punch the object */
@@ -1672,21 +1679,21 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone,
16721679
if (rc) {
16731680
D_ERROR(DF_UOID" punch obj failed: "DF_RC"\n",
16741681
DP_UOID(mrone->mo_oid), DP_RC(rc));
1675-
D_GOTO(obj_close, rc);
1682+
D_GOTO(out, rc);
16761683
}
16771684
}
16781685

16791686
rc = migrate_punch(tls, mrone, cont);
16801687
if (rc)
1681-
D_GOTO(obj_close, rc);
1688+
D_GOTO(out, rc);
16821689

16831690
if (data_size == 0) {
16841691
D_DEBUG(DB_REBUILD, "empty mrone %p\n", mrone);
1685-
D_GOTO(obj_close, rc);
1692+
D_GOTO(out, rc);
16861693
}
16871694

16881695
if (DAOS_FAIL_CHECK(DAOS_REBUILD_UPDATE_FAIL))
1689-
D_GOTO(obj_close, rc = -DER_INVAL);
1696+
D_GOTO(out, rc = -DER_INVAL);
16901697

16911698
if (mrone->mo_iods[0].iod_type == DAOS_IOD_SINGLE)
16921699
rc = migrate_fetch_update_single(mrone, oh, cont);
@@ -1702,9 +1709,8 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone,
17021709

17031710
tls->mpt_rec_count += mrone->mo_rec_num;
17041711
tls->mpt_size += mrone->mo_size;
1705-
obj_close:
1706-
dsc_obj_close(oh);
1707-
cont_put:
1712+
1713+
out:
17081714
if (cont != NULL)
17091715
ds_cont_child_put(cont);
17101716
return rc;
@@ -1835,13 +1841,16 @@ static void
18351841
migrate_one_ult(void *arg)
18361842
{
18371843
struct migrate_one *mrone = arg;
1844+
struct iter_obj_arg *obj_arg;
18381845
struct migrate_pool_tls *tls;
18391846
daos_size_t data_size;
18401847
int rc = 0;
18411848

18421849
while (daos_fail_check(DAOS_REBUILD_TGT_REBUILD_HANG))
18431850
dss_sleep(0);
18441851

1852+
obj_arg = mrone->mo_obj_arg;
1853+
18451854
tls = migrate_pool_tls_lookup(mrone->mo_pool_uuid,
18461855
mrone->mo_pool_tls_version, mrone->mo_generation);
18471856
if (tls == NULL || tls->mpt_fini) {
@@ -1898,6 +1907,7 @@ migrate_one_ult(void *arg)
18981907
tls->mpt_fini = 1;
18991908
}
19001909
out:
1910+
migrate_obj_put(obj_arg);
19011911
migrate_one_destroy(mrone);
19021912
if (tls != NULL) {
19031913
migrate_tgt_exit(tls, DKEY_ULT);
@@ -2666,10 +2676,13 @@ migrate_start_ult(struct enum_unpack_arg *unpack_arg)
26662676
rc = migrate_tgt_enter(tls, DKEY_ULT, NULL);
26672677
if (rc)
26682678
break;
2679+
migrate_obj_get(arg);
2680+
mrone->mo_obj_arg = arg;
26692681
d_list_del_init(&mrone->mo_list);
26702682
rc = dss_ult_create(migrate_one_ult, mrone, DSS_XS_VOS,
26712683
arg->tgt_idx, MIGRATE_STACK_SIZE, NULL);
26722684
if (rc) {
2685+
migrate_obj_put(arg);
26732686
migrate_tgt_exit(tls, DKEY_ULT);
26742687
migrate_one_destroy(mrone);
26752688
break;
@@ -2700,13 +2713,10 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls,
27002713
daos_key_desc_t kds[KDS_NUM] = {0};
27012714
d_iov_t csum = {0};
27022715
d_iov_t *p_csum;
2703-
uint8_t stack_csum_buf[CSUM_BUF_SIZE] = {0};
2704-
struct cont_props props;
2716+
uint8_t stack_csum_buf[CSUM_BUF_SIZE] = {0};
27052717
struct enum_unpack_arg unpack_arg = { 0 };
27062718
d_iov_t iov = { 0 };
2707-
d_sg_list_t sgl = { 0 };
2708-
daos_handle_t coh = DAOS_HDL_INVAL;
2709-
daos_handle_t oh = DAOS_HDL_INVAL;
2719+
d_sg_list_t sgl = {0};
27102720
uint32_t minimum_nr;
27112721
uint32_t enum_flags;
27122722
uint32_t num;
@@ -2724,46 +2734,16 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls,
27242734
}
27252735

27262736
D_ASSERT(dss_get_module_info()->dmi_xs_id != 0);
2727-
2728-
rc = dsc_pool_open(tls->mpt_pool_uuid, tls->mpt_poh_uuid, 0,
2729-
NULL, tls->mpt_pool->spc_pool->sp_map,
2730-
&tls->mpt_svc_list, &tls->mpt_pool_hdl);
2731-
if (rc) {
2732-
D_ERROR("dsc_pool_open failed: "DF_RC"\n", DP_RC(rc));
2733-
D_GOTO(out, rc);
2734-
}
2735-
2736-
rc = migrate_cont_open(tls, arg->cont_uuid, 0, &coh);
2737-
if (rc) {
2738-
D_ERROR("migrate_cont_open failed: "DF_RC"\n", DP_RC(rc));
2739-
D_GOTO(out, rc);
2740-
}
2741-
2742-
/* Only open with RW flag, reintegrating flag will be set, which is needed
2743-
* during unpack_cb to check if parity shard alive.
2744-
*/
2745-
rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &oh);
2746-
if (rc) {
2747-
D_ERROR("dsc_obj_open failed: "DF_RC"\n", DP_RC(rc));
2748-
D_GOTO(out, rc);
2749-
}
2750-
2737+
D_ASSERT(daos_handle_is_valid(arg->ioa_oh));
27512738
unpack_arg.arg = arg;
27522739
unpack_arg.epr = *epr;
2753-
unpack_arg.oh = oh;
2740+
unpack_arg.oh = arg->ioa_oh;
27542741
unpack_arg.version = tls->mpt_version;
27552742
D_INIT_LIST_HEAD(&unpack_arg.merge_list);
2743+
unpack_arg.oc_attr = arg->ioa_oca;
27562744
buf = stack_buf;
27572745
buf_len = ITER_BUF_SIZE;
27582746

2759-
dsc_cont_get_props(coh, &props);
2760-
rc = dsc_obj_id2oc_attr(arg->oid.id_pub, &props, &unpack_arg.oc_attr);
2761-
if (rc) {
2762-
D_ERROR("Unknown object class: %u\n",
2763-
daos_obj_id2class(arg->oid.id_pub));
2764-
D_GOTO(out_obj, rc);
2765-
}
2766-
27672747
memset(&anchor, 0, sizeof(anchor));
27682748
memset(&akey_anchor, 0, sizeof(akey_anchor));
27692749
memset(&dkey_anchor, 0, sizeof(dkey_anchor));
@@ -2814,9 +2794,8 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls,
28142794

28152795
daos_anchor_set_flags(&dkey_anchor, enum_flags);
28162796
num = KDS_NUM;
2817-
rc = dsc_obj_list_obj(oh, epr, NULL, NULL, NULL,
2818-
&num, kds, &sgl, &anchor,
2819-
&dkey_anchor, &akey_anchor, p_csum);
2797+
rc = dsc_obj_list_obj(arg->ioa_oh, epr, NULL, NULL, NULL, &num, kds, &sgl, &anchor,
2798+
&dkey_anchor, &akey_anchor, p_csum);
28202799

28212800
if (rc == -DER_KEY2BIG) {
28222801
D_DEBUG(DB_REBUILD, "migrate obj "DF_UOID" got "
@@ -2951,9 +2930,7 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls,
29512930

29522931
if (csum.iov_buf != NULL && csum.iov_buf != stack_csum_buf)
29532932
D_FREE(csum.iov_buf);
2954-
out_obj:
2955-
dsc_obj_close(oh);
2956-
out:
2933+
29572934
D_DEBUG(DB_REBUILD, "obj "DF_UOID" for shard %u eph "
29582935
DF_U64"-"DF_U64": "DF_RC"\n", DP_UOID(arg->oid), arg->shard,
29592936
epr->epr_lo, epr->epr_hi, DP_RC(rc));
@@ -3061,9 +3038,13 @@ migrate_obj_ult(void *data)
30613038
struct iter_obj_arg *arg = data;
30623039
struct migrate_pool_tls *tls = NULL;
30633040
daos_epoch_range_t epr;
3041+
daos_handle_t coh = DAOS_HDL_INVAL;
3042+
struct cont_props props;
30643043
int i;
30653044
int rc = 0;
30663045

3046+
migrate_obj_get(arg);
3047+
30673048
tls = migrate_pool_tls_lookup(arg->pool_uuid, arg->version, arg->generation);
30683049
if (tls == NULL || tls->mpt_fini) {
30693050
D_WARN("some one abort the rebuild "DF_UUID"\n",
@@ -3092,6 +3073,33 @@ migrate_obj_ult(void *data)
30923073
}
30933074
}
30943075

3076+
rc = dsc_pool_open(tls->mpt_pool_uuid, tls->mpt_poh_uuid, 0, NULL,
3077+
tls->mpt_pool->spc_pool->sp_map, &tls->mpt_svc_list, &tls->mpt_pool_hdl);
3078+
if (rc) {
3079+
DL_ERROR(rc, DF_RB ": dsc_pool_open failed", DP_RB_MPT(tls));
3080+
D_GOTO(out, rc);
3081+
}
3082+
3083+
rc = migrate_cont_open(tls, arg->cont_uuid, 0, &coh);
3084+
if (rc) {
3085+
DL_ERROR(rc, DF_RB ": migrate_cont_open failed", DP_RB_MPT(tls));
3086+
D_GOTO(out, rc);
3087+
}
3088+
3089+
rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &arg->ioa_oh);
3090+
if (rc) {
3091+
DL_ERROR(rc, DF_RB ": dsc_obj_open failed", DP_RB_MPT(tls));
3092+
D_GOTO(out, rc);
3093+
}
3094+
3095+
dsc_cont_get_props(coh, &props);
3096+
rc = dsc_obj_id2oc_attr(arg->oid.id_pub, &props, &arg->ioa_oca);
3097+
if (rc) {
3098+
DL_ERROR(rc, DF_RB ": unknown object class: %u", DP_RB_MPT(tls),
3099+
daos_obj_id2class(arg->oid.id_pub));
3100+
D_GOTO(out, rc);
3101+
}
3102+
30953103
for (i = 0; i < arg->snap_cnt; i++) {
30963104
epr.epr_lo = i > 0 ? arg->snaps[i - 1] + 1 : 0;
30973105
epr.epr_hi = arg->snaps[i];
@@ -3153,8 +3161,7 @@ migrate_obj_ult(void *data)
31533161
if (tls != NULL)
31543162
migrate_tgt_exit(tls, OBJ_ULT);
31553163

3156-
D_FREE(arg->snaps);
3157-
D_FREE(arg);
3164+
migrate_obj_put(arg);
31583165
migrate_pool_tls_put(tls);
31593166
}
31603167

@@ -3185,6 +3192,8 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e
31853192
return -DER_NOMEM;
31863193

31873194
obj_arg->oid = oid;
3195+
obj_arg->ioa_oh = DAOS_HDL_INVAL;
3196+
obj_arg->ioa_obj_ref = 0;
31883197
obj_arg->epoch = eph;
31893198
obj_arg->shard = shard;
31903199
obj_arg->punched_epoch = punched_eph;

0 commit comments

Comments
 (0)