Skip to content

Commit 0f05b2f

Browse files
liwliuxuezhaoLiang Zhendaltonbohning
authored
DAOS-16930 pool: Share map bulk resources (#15763)
Improve concurrent POOL_QUERY, POOL_CONNECT, and POOL_TGT_QUERY_MAP efficiency by giving them a chance to share the same pool map buffer and pool map buffer bulk handle. Introduce pool space query on service leader to avoid space query flooding. The pool space cache expiration time is 2 seconds by default, one can change the expiration time via DAOS_POOL_SPACE_CACHE_INTVL, if the expiration time is set to zero, space cache will be disabled. Signed-off-by: Li Wei <liwei@hpe.com> Signed-off-by: Niu Yawei <yawei.niu@hpe.com> Co-authored-by: Xuezhao Liu <xuezhao.liu@hpe.com> Co-authored-by: Liang Zhen <liang.zhen@hpe.com> Co-authored-by: Dalton Bohning <dalton.bohning@hpe.com>
1 parent ad722a6 commit 0f05b2f

File tree

7 files changed

+212
-56
lines changed

7 files changed

+212
-56
lines changed

src/include/daos_srv/pool.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ struct ds_pool_svc;
3838
/* age of an entry in svc_ops KVS before it may be evicted */
3939
#define DEFAULT_SVC_OPS_ENTRY_AGE_SEC_MAX 300ULL
4040

41+
/* Pool map buffer cache */
42+
struct ds_pool_map_bc {
43+
struct pool_buf *pmc_buf;
44+
crt_bulk_t pmc_bulk;
45+
uint32_t pmc_ref;
46+
};
47+
4148
/*
4249
* Pool object
4350
*
@@ -48,7 +55,8 @@ struct ds_pool {
4855
uuid_t sp_uuid; /* pool UUID */
4956
d_list_t sp_hdls;
5057
ABT_rwlock sp_lock;
51-
struct pool_map *sp_map;
58+
struct pool_map *sp_map;
59+
struct ds_pool_map_bc *sp_map_bc;
5260
uint32_t sp_map_version; /* temporary */
5361
uint32_t sp_ec_cell_sz;
5462
uint64_t sp_reclaim;

src/pool/srv.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* (C) Copyright 2016-2024 Intel Corporation.
3+
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -22,6 +23,7 @@
2223

2324
bool ec_agg_disabled;
2425
uint32_t pw_rf = -1; /* pool wise redundancy factor */
26+
uint32_t ps_cache_intvl = 2; /* pool space cache expiration time, in seconds */
2527
#define PW_RF_DEFAULT (2)
2628
#define PW_RF_MIN (0)
2729
#define PW_RF_MAX (4)
@@ -76,6 +78,14 @@ init(void)
7678
pw_rf = PW_RF_DEFAULT;
7779
D_INFO("pool redundancy factor %d\n", pw_rf);
7880

81+
d_getenv_uint32_t("DAOS_POOL_SPACE_CACHE_INTVL", &ps_cache_intvl);
82+
if (ps_cache_intvl > 20) {
83+
D_WARN("pool space cache expiration time %u is too large, use default value\n",
84+
ps_cache_intvl);
85+
ps_cache_intvl = 2;
86+
}
87+
D_INFO("pool space cache expiration time set to %u seconds\n", ps_cache_intvl);
88+
7989
ds_pool_rsvc_class_register();
8090

8191
bio_register_ract_ops(&nvme_reaction_ops);

src/pool/srv_internal.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* (C) Copyright 2016-2024 Intel Corporation.
3+
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -17,6 +18,7 @@
1718
#include <gurt/telemetry_common.h>
1819

1920
extern uint32_t pw_rf;
21+
extern uint32_t ps_cache_intvl;
2022

2123
/**
2224
* Global pool metrics
@@ -236,8 +238,10 @@ int ds_pool_tgt_prop_update(struct ds_pool *pool, struct pool_iv_prop *iv_prop);
236238
int ds_pool_tgt_connect(struct ds_pool *pool, struct pool_iv_conn *pic);
237239
void ds_pool_tgt_query_map_handler(crt_rpc_t *rpc);
238240
void ds_pool_tgt_discard_handler(crt_rpc_t *rpc);
239-
void
240-
ds_pool_tgt_warmup_handler(crt_rpc_t *rpc);
241+
void ds_pool_tgt_warmup_handler(crt_rpc_t *rpc);
242+
int ds_pool_lookup_map_bc(struct ds_pool *pool, crt_context_t ctx,
243+
struct ds_pool_map_bc **map_bc_out, uint32_t *map_version_out);
244+
void ds_pool_put_map_bc(struct ds_pool_map_bc *map_bc);
241245

242246
/*
243247
* srv_util.c
@@ -246,8 +250,7 @@ bool ds_pool_map_rank_up(struct pool_map *map, d_rank_t rank);
246250
int ds_pool_plan_svc_reconfs(int svc_rf, struct pool_map *map, d_rank_list_t *replicas,
247251
d_rank_t self, bool filter_only, d_rank_list_t **to_add_out,
248252
d_rank_list_t **to_remove_out);
249-
int ds_pool_transfer_map_buf(struct pool_buf *map_buf, uint32_t map_version,
250-
crt_rpc_t *rpc, crt_bulk_t remote_bulk,
253+
int ds_pool_transfer_map_buf(struct ds_pool_map_bc *map_bc, crt_rpc_t *rpc, crt_bulk_t remote_bulk,
251254
uint32_t *required_buf_size);
252255
extern struct bio_reaction_ops nvme_reaction_ops;
253256

src/pool/srv_pool.c

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ sched_cancel_and_wait(struct pool_svc_sched *sched)
191191
sched_wait(sched);
192192
}
193193

194+
struct pool_space_cache {
195+
struct daos_pool_space psc_space;
196+
uint64_t psc_timestamp;
197+
ABT_mutex psc_lock;
198+
};
199+
194200
/* Pool service */
195201
struct pool_svc {
196202
struct ds_rsvc ps_rsvc;
@@ -204,6 +210,7 @@ struct pool_svc {
204210
rdb_path_t ps_ops; /* metadata ops KVS */
205211
int ps_error; /* in DB data (see pool_svc_lookup_leader) */
206212
struct pool_svc_events ps_events;
213+
struct pool_space_cache ps_space_cache;
207214
uint32_t ps_global_version;
208215
int ps_svc_rf;
209216
bool ps_force_notify; /* MS of PS membership */
@@ -1235,9 +1242,16 @@ pool_svc_alloc_cb(d_iov_t *id, struct ds_rsvc **rsvc)
12351242
goto err_pool;
12361243
}
12371244

1245+
rc = ABT_mutex_create(&svc->ps_space_cache.psc_lock);
1246+
if (rc != ABT_SUCCESS) {
1247+
D_ERROR("failed to create psc_lock: %d\n", rc);
1248+
rc = dss_abterr2der(rc);
1249+
goto err_lock;
1250+
}
1251+
12381252
rc = rdb_path_init(&svc->ps_root);
12391253
if (rc != 0)
1240-
goto err_lock;
1254+
goto err_psc_lock;
12411255
rc = rdb_path_push(&svc->ps_root, &rdb_path_root_key);
12421256
if (rc != 0)
12431257
goto err_root;
@@ -1306,6 +1320,8 @@ pool_svc_alloc_cb(d_iov_t *id, struct ds_rsvc **rsvc)
13061320
rdb_path_fini(&svc->ps_handles);
13071321
err_root:
13081322
rdb_path_fini(&svc->ps_root);
1323+
err_psc_lock:
1324+
ABT_mutex_free(&svc->ps_space_cache.psc_lock);
13091325
err_lock:
13101326
ABT_rwlock_free(&svc->ps_lock);
13111327
err_pool:
@@ -3872,8 +3888,6 @@ ds_pool_connect_handler(crt_rpc_t *rpc, int handler_version)
38723888
struct pool_connect_in *in = crt_req_get(rpc);
38733889
struct pool_connect_out *out = crt_reply_get(rpc);
38743890
struct pool_svc *svc;
3875-
struct pool_buf *map_buf = NULL;
3876-
uint32_t map_version;
38773891
uint32_t connectable;
38783892
uint32_t global_ver;
38793893
uint32_t obj_layout_ver;
@@ -4095,12 +4109,6 @@ ds_pool_connect_handler(crt_rpc_t *rpc, int handler_version)
40954109
goto out_map_version;
40964110
}
40974111

4098-
rc = read_map_buf(&tx, &svc->ps_root, &map_buf, &map_version);
4099-
if (rc != 0) {
4100-
D_ERROR(DF_UUID": failed to read pool map: "DF_RC"\n",
4101-
DP_UUID(svc->ps_uuid), DP_RC(rc));
4102-
D_GOTO(out_map_version, rc);
4103-
}
41044112
transfer_map = true;
41054113
if (skip_update)
41064114
D_GOTO(out_map_version, rc = 0);
@@ -4208,13 +4216,20 @@ ds_pool_connect_handler(crt_rpc_t *rpc, int handler_version)
42084216
ABT_rwlock_unlock(svc->ps_lock);
42094217
rdb_tx_end(&tx);
42104218
if (rc == 0 && transfer_map) {
4211-
rc = ds_pool_transfer_map_buf(map_buf, map_version, rpc, bulk,
4212-
&out->pco_map_buf_size);
4219+
struct ds_pool_map_bc *map_bc;
4220+
uint32_t map_version;
4221+
4222+
rc = ds_pool_lookup_map_bc(svc->ps_pool, rpc->cr_ctx, &map_bc, &map_version);
4223+
if (rc == 0) {
4224+
rc = ds_pool_transfer_map_buf(map_bc, rpc, bulk, &out->pco_map_buf_size);
4225+
ds_pool_put_map_bc(map_bc);
4226+
/* Ensure the map version matches the map buffer. */
4227+
out->pco_op.po_map_version = map_version;
4228+
}
42134229
/** TODO: roll back tx if transfer fails? Perhaps rdb_tx_discard()? */
42144230
}
42154231
if (rc == 0)
42164232
rc = op_val.ov_rc;
4217-
D_FREE(map_buf);
42184233
D_FREE(hdl);
42194234
D_FREE(machine);
42204235
if (prop)
@@ -4487,8 +4502,23 @@ pool_space_query_bcast(crt_context_t ctx, struct pool_svc *svc, uuid_t pool_hdl,
44874502
struct pool_tgt_query_in *in;
44884503
struct pool_tgt_query_out *out;
44894504
crt_rpc_t *rpc;
4505+
struct pool_space_cache *cache = &svc->ps_space_cache;
4506+
uint64_t cur_time = 0;
4507+
bool unlock = false;
44904508
int rc;
44914509

4510+
if (ps_cache_intvl > 0) {
4511+
ABT_mutex_lock(cache->psc_lock);
4512+
4513+
cur_time = daos_gettime_coarse();
4514+
if (cur_time < cache->psc_timestamp + ps_cache_intvl) {
4515+
*ps = cache->psc_space;
4516+
ABT_mutex_unlock(cache->psc_lock);
4517+
return 0;
4518+
}
4519+
unlock = true;
4520+
}
4521+
44924522
D_DEBUG(DB_MD, DF_UUID": bcasting\n", DP_UUID(svc->ps_uuid));
44934523

44944524
rc = bcast_create(ctx, svc, POOL_TGT_QUERY, NULL, &rpc);
@@ -4516,11 +4546,18 @@ pool_space_query_bcast(crt_context_t ctx, struct pool_svc *svc, uuid_t pool_hdl,
45164546
} else {
45174547
D_ASSERT(ps != NULL);
45184548
*ps = out->tqo_space;
4549+
if (ps_cache_intvl > 0 && cur_time > cache->psc_timestamp) {
4550+
cache->psc_timestamp = cur_time;
4551+
cache->psc_space = *ps;
4552+
}
45194553
}
45204554

45214555
out_rpc:
45224556
crt_req_decref(rpc);
45234557
out:
4558+
if (unlock)
4559+
ABT_mutex_unlock(cache->psc_lock);
4560+
45244561
D_DEBUG(DB_MD, DF_UUID": bcasted: "DF_RC"\n", DP_UUID(svc->ps_uuid),
45254562
DP_RC(rc));
45264563
return rc;
@@ -4979,7 +5016,7 @@ ds_pool_query_handler(crt_rpc_t *rpc, int handler_version)
49795016
struct pool_query_in *in = crt_req_get(rpc);
49805017
struct pool_query_out *out = crt_reply_get(rpc);
49815018
daos_prop_t *prop = NULL;
4982-
struct pool_buf *map_buf;
5019+
struct ds_pool_map_bc *map_bc;
49835020
uint32_t map_version = 0;
49845021
struct pool_svc *svc;
49855022
struct pool_metrics *metrics;
@@ -5144,19 +5181,18 @@ ds_pool_query_handler(crt_rpc_t *rpc, int handler_version)
51445181
}
51455182
}
51465183

5147-
rc = read_map_buf(&tx, &svc->ps_root, &map_buf, &map_version);
5148-
if (rc != 0)
5149-
D_ERROR(DF_UUID": failed to read pool map: "DF_RC"\n",
5150-
DP_UUID(svc->ps_uuid), DP_RC(rc));
5151-
51525184
out_lock:
51535185
ABT_rwlock_unlock(svc->ps_lock);
51545186
rdb_tx_end(&tx);
51555187
if (rc != 0)
51565188
goto out_svc;
51575189

5158-
rc = ds_pool_transfer_map_buf(map_buf, map_version, rpc, bulk, &out->pqo_map_buf_size);
5159-
D_FREE(map_buf);
5190+
5191+
rc = ds_pool_lookup_map_bc(svc->ps_pool, rpc->cr_ctx, &map_bc, &map_version);
5192+
if (rc != 0)
5193+
goto out_svc;
5194+
rc = ds_pool_transfer_map_buf(map_bc, rpc, bulk, &out->pqo_map_buf_size);
5195+
ds_pool_put_map_bc(map_bc);
51605196
if (rc != 0)
51615197
goto out_svc;
51625198

0 commit comments

Comments
 (0)