diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 582c5f97352..578784d3d3c 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1734,6 +1734,7 @@ cont_track_eph_leader_alloc(struct cont_svc *cont_svc, uuid_t cont_uuid, eph_ldr->cte_server_ephs[i].re_rank = doms[i].do_comp.co_rank; eph_ldr->cte_server_ephs[i].re_ec_agg_eph = 0; eph_ldr->cte_server_ephs[i].re_stable_eph = 0; + eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts = daos_gettime_coarse(); } d_list_add(&eph_ldr->cte_list, &cont_svc->cs_cont_ephs_leader_list); *leader_p = eph_ldr; @@ -1790,8 +1791,11 @@ ds_cont_leader_update_track_eph(uuid_t pool_uuid, uuid_t cont_uuid, d_rank_t ran for (i = 0; i < eph_ldr->cte_servers_num; i++) { if (eph_ldr->cte_server_ephs[i].re_rank == rank) { - if (eph_ldr->cte_server_ephs[i].re_ec_agg_eph < ec_agg_eph) + if (eph_ldr->cte_server_ephs[i].re_ec_agg_eph < ec_agg_eph) { eph_ldr->cte_server_ephs[i].re_ec_agg_eph = ec_agg_eph; + eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts = + daos_gettime_coarse(); + } if (eph_ldr->cte_server_ephs[i].re_stable_eph < stable_eph) eph_ldr->cte_server_ephs[i].re_stable_eph = stable_eph; break; @@ -2056,6 +2060,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) uint64_t cur_eph, new_eph; daos_epoch_t min_ec_agg_eph; daos_epoch_t min_stable_eph; + uint64_t cur_ts; int i; int rc = 0; @@ -2090,6 +2095,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) min_ec_agg_eph = DAOS_EPOCH_MAX; min_stable_eph = DAOS_EPOCH_MAX; + cur_ts = daos_gettime_coarse(); for (i = 0; i < eph_ldr->cte_servers_num; i++) { d_rank_t rank = eph_ldr->cte_server_ephs[i].re_rank; @@ -2099,6 +2105,14 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) continue; } + if (pool->sp_reclaim != DAOS_RECLAIM_DISABLED && + cur_ts > eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts + 600) + D_WARN(DF_CONT ": Sluggish EC boundary report from rank %d, " DF_U64 + " Seconds.", + DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid), rank, + cur_ts - + eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts); + if (eph_ldr->cte_server_ephs[i].re_ec_agg_eph < min_ec_agg_eph) min_ec_agg_eph = eph_ldr->cte_server_ephs[i].re_ec_agg_eph; if (eph_ldr->cte_server_ephs[i].re_stable_eph < min_stable_eph) diff --git a/src/container/srv_internal.h b/src/container/srv_internal.h index 469a671ffb0..7e4a6c8a626 100644 --- a/src/container/srv_internal.h +++ b/src/container/srv_internal.h @@ -62,6 +62,7 @@ struct rank_eph { d_rank_t re_rank; daos_epoch_t re_ec_agg_eph; daos_epoch_t re_stable_eph; + uint64_t re_ec_agg_eph_update_ts; /* re_ec_agg_eph update timestamp */ }; /* container EC aggregation epoch and stable epoch control descriptor, which is only on leader */ diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 0cc48dcea1f..d2c309a990d 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -4138,8 +4138,10 @@ anchor_update_check_eof(struct obj_auxi_args *obj_auxi, daos_anchor_t *anchor) obj_auxi_shards_iterate(obj_auxi, update_sub_anchor_cb, NULL); sub_anchors = (struct shard_anchors *)anchor->da_sub_anchors; - if (!d_list_empty(&sub_anchors->sa_merged_list)) + if (!d_list_empty(&sub_anchors->sa_merged_list)) { + D_ASSERT(obj_auxi->opc != DAOS_OBJ_RPC_ENUMERATE); return; + } if (sub_anchors_is_eof(sub_anchors)) { daos_obj_list_t *obj_args; @@ -4148,6 +4150,18 @@ anchor_update_check_eof(struct obj_auxi_args *obj_auxi, daos_anchor_t *anchor) obj_args = dc_task_get_args(obj_auxi->obj_task); sub_anchors_free(obj_args, obj_auxi->opc); + } else if (obj_auxi->opc == DAOS_OBJ_RPC_ENUMERATE) { + for (int i = 0; i < sub_anchors->sa_anchors_nr; i++) { + daos_anchor_t *sub_anchor; + + sub_anchor = &sub_anchors->sa_anchors[i].ssa_anchor; + if (!daos_anchor_is_eof(sub_anchor)) { + D_DEBUG(DB_REBUILD, "shard %d sub_anchor %d/%d non EOF", + sub_anchors->sa_anchors[i].ssa_shard, i, + sub_anchors->sa_anchors_nr); + break; + } + } } } diff --git a/src/object/obj_enum.c b/src/object/obj_enum.c index 84669771669..4175d7de907 100644 --- a/src/object/obj_enum.c +++ b/src/object/obj_enum.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -689,9 +690,8 @@ obj_enum_iterate(daos_key_desc_t *kdss, d_sg_list_t *sgl, int nr, ptr = sgl_indexed_byte(sgl, &sgl_idx); D_ASSERTF(ptr != NULL, "kds and sgl don't line up"); - D_DEBUG(DB_REBUILD, "process %d, type %d, ptr %p, len "DF_U64 - ", total %zd\n", i, kds->kd_val_type, ptr, - kds->kd_key_len, sgl->sg_iovs[0].iov_len); + D_DEBUG(DB_REBUILD, "process %d/%d, type %d, ptr %p, len " DF_U64 ", total %zd\n", + i, nr, kds->kd_val_type, ptr, kds->kd_key_len, sgl->sg_iovs[0].iov_len); if (kds->kd_val_type == 0 || (kds->kd_val_type != type && type != -1)) { sgl_move_forward(sgl, &sgl_idx, kds->kd_key_len); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index ef6c5a26830..6ea4bb63ab6 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -3246,6 +3246,27 @@ obj_enum_complete(crt_rpc_t *rpc, int status, int map_version, D_FREE(oeo->oeo_csum_iov.iov_buf); } +static void +dump_enum_anchor(daos_unit_oid_t uoid, daos_anchor_t *anchor, char *str) +{ + int nr = DAOS_ANCHOR_BUF_MAX / 8; + int i; + uint64_t data[nr]; + + D_DEBUG(DB_REBUILD, DF_UOID "%s anchor -", DP_UOID(uoid), str); + D_DEBUG(DB_REBUILD, "type %d, shard %d, flags 0x%x\n", anchor->da_type, anchor->da_shard, + anchor->da_flags); + for (i = 0; i < nr; i++) + data[i] = *(uint64_t *)((char *)anchor->da_buf + i * 8); + if (nr >= 13) + D_DEBUG(DB_REBUILD, + "da_buf " DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 + "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 + "," DF_X64, + data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7], + data[8], data[9], data[10], data[11], data[12]); +} + static int obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc, struct vos_iter_anchors *anchors, struct ds_obj_enum_arg *enum_arg, @@ -3314,6 +3335,8 @@ obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc, D_ASSERT(opc == DAOS_OBJ_RPC_ENUMERATE); type = VOS_ITER_DKEY; param.ip_flags |= VOS_IT_RECX_VISIBLE; + dump_enum_anchor(oei->oei_oid, &anchors->ia_dkey, "dkey"); + dump_enum_anchor(oei->oei_oid, &anchors->ia_akey, "akey"); if (daos_anchor_get_flags(&anchors->ia_dkey) & DIOF_WITH_SPEC_EPOCH) { /* For obj verification case. */ @@ -3331,7 +3354,12 @@ obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc, enum_arg->chk_key2big = 1; enum_arg->need_punch = 1; enum_arg->copy_data_cb = vos_iter_copy; - fill_oid(oei->oei_oid, enum_arg); + rc = fill_oid(oei->oei_oid, enum_arg); + if (rc != 0) { + rc = -DER_KEY2BIG; + DL_ERROR(rc, DF_UOID "fill oid failed", DP_UOID(oei->oei_oid)); + goto failed; + } } /* diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 73236fe6c43..3040191b4e4 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -2587,9 +2587,18 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) if (rc == 1 && (is_ec_data_shard_by_tgt_off(unpack_tgt_off, &arg->oc_attr) || (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) { - D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID " ignore shard " DF_KEY "/%u/%d/%u/%d.\n", - DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, - (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); + if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8) + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " ignore shard, int dkey " DF_U64 + "/%u/%d/%u/%d.\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), + *(uint64_t *)io->ui_dkey.iov_buf, shard, + (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); + else + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " ignore shard " DF_KEY "/%u/%d/%u/%d.\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, + (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); D_GOTO(put, rc = 0); } rc = 0; @@ -2605,11 +2614,19 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) continue; } - D_DEBUG(DB_REBUILD, - DF_RB ": " DF_UOID " unpack " DF_KEY " for shard " - "%u/%u/%u/" DF_X64 "/%u\n", - DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, - unpack_tgt_off, migrate_tgt_off, io->ui_dkey_hash, parity_shard); + if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8) + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " unpack int dkey " DF_U64 " for shard " + "%u/%u/%u/" DF_X64 "/%u\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), + *(uint64_t *)io->ui_dkey.iov_buf, shard, unpack_tgt_off, + migrate_tgt_off, io->ui_dkey_hash, parity_shard); + else + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " unpack " DF_KEY " for shard " + "%u/%u/%u/" DF_X64 "/%u\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, + unpack_tgt_off, migrate_tgt_off, io->ui_dkey_hash, parity_shard); /** * Since we do not need split the rebuild into parity rebuild @@ -2647,8 +2664,14 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) if (!create_migrate_one) { struct ds_cont_child *cont = NULL; - D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID "/" DF_KEY " does not need rebuild.\n", - DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey)); + if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8) + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID "/int dkey: " DF_U64 " does not need rebuild.", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), + *(uint64_t *)io->ui_dkey.iov_buf); + else + D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID "/" DF_KEY " does not need rebuild.", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey)); /* Create the vos container when no record need to be rebuilt for this shard, * for the case of reintegrate the container was discarded ahead.