Skip to content

Commit 165fd98

Browse files
committed
DAOS-18164 bio: bump default cluster_sz
Bump default BS cluster size from 32MB to 128MB in md-on-ssd mode, make the cluster size configurable through DAOS_BS_CLUSTER_MB. Signed-off-by: Niu Yawei <[email protected]>
1 parent d9ae7df commit 165fd98

File tree

4 files changed

+47
-39
lines changed

4 files changed

+47
-39
lines changed

src/bio/bio_xstream.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/**
22
* (C) Copyright 2018-2024 Intel Corporation.
33
* (C) Copyright 2025 Google LLC
4-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
4+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
55
*
66
* SPDX-License-Identifier: BSD-2-Clause-Patent
77
*/
@@ -31,8 +31,8 @@
3131

3232
/* These Macros should be turned into DAOS configuration in the future */
3333
#define DAOS_MSG_RING_SZ 4096
34-
/* SPDK blob parameters */
35-
#define DAOS_BS_CLUSTER_SZ (1ULL << 25) /* 32MB */
34+
/* Default cluster size in MB */
35+
#define DAOS_DEFAULT_CLUSTER_MB 128
3636
/* DMA buffer parameters */
3737
#define DAOS_DMA_CHUNK_INIT_PCT 50 /* Default per-xstream init chunks, in percentage */
3838
#define DAOS_DMA_CHUNK_CNT_MAX 128 /* Default per-xstream max chunks, 1GB */
@@ -224,6 +224,7 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size,
224224
char *env;
225225
int rc, fd;
226226
unsigned int size_mb = BIO_DMA_CHUNK_MB, io_timeout_secs = 0;
227+
unsigned int cluster_mb = DAOS_DEFAULT_CLUSTER_MB;
227228

228229
if (tgt_nr <= 0) {
229230
D_ERROR("tgt_nr: %u should be > 0\n", tgt_nr);
@@ -323,8 +324,14 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size,
323324
D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks, prealloc %u chunks\n",
324325
bio_chk_cnt_max, size_mb, init_chk_cnt());
325326

327+
d_getenv_uint("DAOS_BS_CLUSTER_MB", &cluster_mb);
328+
if (cluster_mb < 32 || cluster_mb > 1024) {
329+
D_WARN("DAOS_BS_CLUSTER_MB %u is invalid, default %u is used\n", cluster_mb,
330+
DAOS_DEFAULT_CLUSTER_MB);
331+
cluster_mb = DAOS_DEFAULT_CLUSTER_MB;
332+
}
326333
spdk_bs_opts_init(&nvme_glb.bd_bs_opts, sizeof(nvme_glb.bd_bs_opts));
327-
nvme_glb.bd_bs_opts.cluster_sz = DAOS_BS_CLUSTER_SZ;
334+
nvme_glb.bd_bs_opts.cluster_sz = (cluster_mb << 20);
328335
nvme_glb.bd_bs_opts.max_channel_ops = BIO_BS_MAX_CHANNEL_OPS;
329336

330337
d_agetenv_str(&env, "VOS_BDEV_CLASS");
@@ -368,8 +375,9 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size,
368375
if (!bio_nvme_configured(SMD_DEV_TYPE_META))
369376
nvme_glb.bd_bs_opts.cluster_sz = (1UL << 30); /* 1GB */
370377

371-
D_INFO("MD on SSD is %s\n",
372-
bio_nvme_configured(SMD_DEV_TYPE_META) ? "enabled" : "disabled");
378+
D_INFO("MD on SSD is %s, %u cluster size is used\n",
379+
bio_nvme_configured(SMD_DEV_TYPE_META) ? "enabled" : "disabled",
380+
nvme_glb.bd_bs_opts.cluster_sz);
373381

374382
bio_spdk_inited = true;
375383

src/tests/suite/daos_md_replication.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* (C) Copyright 2017-2022 Intel Corporation.
3+
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -11,6 +12,12 @@
1112
#include <daos/pool.h>
1213
#include "daos_test.h"
1314

15+
/*
16+
* Given the 128MB default blobstore cluster size, the minimal pool scm_size for
17+
* an 8 targets engine would be 128MB * 8 = 1GB.
18+
*/
19+
#define MIN_SCM_SIZE (1ULL << 30)
20+
1421
static void
1522
mdr_stop_pool_svc(void **argv)
1623
{
@@ -24,10 +31,8 @@ mdr_stop_pool_svc(void **argv)
2431
/* Create the pool. */
2532
if (arg->myrank == 0) {
2633
print_message("creating pool\n");
27-
rc = dmg_pool_create(dmg_config_file,
28-
geteuid(), getegid(), arg->group,
29-
NULL, 256 * 1024 * 1024, 0,
30-
NULL, arg->pool.svc, uuid);
34+
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL,
35+
MIN_SCM_SIZE, 0, NULL, arg->pool.svc, uuid);
3136
}
3237
par_bcast(PAR_COMM_WORLD, &rc, 1, PAR_INT, 0);
3338
assert_rc_equal(rc, 0);
@@ -134,10 +139,8 @@ mdr_stop_cont_svc(void **argv)
134139
int rc;
135140

136141
print_message("creating pool\n");
137-
rc = dmg_pool_create(dmg_config_file,
138-
geteuid(), getegid(), arg->group,
139-
NULL, 256 * 1024 * 1024, 0,
140-
NULL, arg->pool.svc, pool_uuid);
142+
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL, MIN_SCM_SIZE,
143+
0, NULL, arg->pool.svc, pool_uuid);
141144
assert_rc_equal(rc, 0);
142145

143146
if (arg->pool.svc->rl_nr < 3) {

src/tests/suite/daos_mgmt.c

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -20,6 +20,12 @@
2020
#include <daos_event.h>
2121
#include <daos/agent.h>
2222

23+
/*
24+
* Given the 128MB default blobstore cluster size, the minimal pool scm_size for
25+
* an 8 targets engine would be 128MB * 8 = 1GB.
26+
*/
27+
#define MIN_SCM_SIZE (1ULL << 30)
28+
2329
/** create/destroy pool on all tgts */
2430
static void
2531
pool_create_all(void **state)
@@ -36,11 +42,8 @@ pool_create_all(void **state)
3642

3743
/** create container */
3844
print_message("creating pool synchronously ... ");
39-
rc = dmg_pool_create(dmg_config_file,
40-
geteuid(), getegid(),
41-
arg->group, NULL /* tgts */,
42-
256 * 1024 * 1024 /* minimal size */,
43-
0 /* nvme size */, NULL /* prop */,
45+
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */,
46+
MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */,
4447
arg->pool.svc /* svc */, uuid);
4548
assert_rc_equal(rc, 0);
4649

@@ -341,11 +344,8 @@ pool_create_and_destroy_retry(void **state)
341344

342345
test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_POOL_CREATE_FAIL_CORPC | DAOS_FAIL_ONCE);
343346
print_message("creating pool synchronously ... ");
344-
rc = dmg_pool_create(dmg_config_file,
345-
geteuid(), getegid(),
346-
arg->group, NULL /* tgts */,
347-
256 * 1024 * 1024 /* minimal size */,
348-
0 /* nvme size */, NULL /* prop */,
347+
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */,
348+
MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */,
349349
arg->pool.svc /* svc */, uuid);
350350
assert_rc_equal(rc, 0);
351351
print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid));
@@ -435,8 +435,7 @@ pool_create_steps_down_from_up_empty(void **state)
435435
svc.rl_ranks = &rank;
436436
svc.rl_nr = 1;
437437
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */,
438-
256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */,
439-
NULL /* prop */, &svc, uuid);
438+
MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, &svc, uuid);
440439
assert_rc_equal(rc, 0);
441440
print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid));
442441

@@ -466,8 +465,7 @@ pool_destroy_disconnect_all(void **state)
466465

467466
print_message("creating pool synchronously ... ");
468467
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */,
469-
256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */,
470-
NULL /* prop */, arg->pool.svc, uuid);
468+
MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc, uuid);
471469
assert_rc_equal(rc, 0);
472470
print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid));
473471

@@ -515,8 +513,7 @@ pool_destroy_cancel_rfcheck(void **state)
515513

516514
print_message("creating pool synchronously ... ");
517515
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */,
518-
256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */,
519-
NULL /* prop */, arg->pool.svc, uuid);
516+
MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc, uuid);
520517
assert_rc_equal(rc, 0);
521518
print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid));
522519

@@ -544,8 +541,7 @@ pool_create_query_fail(void **state)
544541

545542
print_message("creating pool synchronously ... ");
546543
rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */,
547-
256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */,
548-
NULL /* prop */, arg->pool.svc, uuid);
544+
MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc, uuid);
549545
assert_rc_equal(rc, 0);
550546
print_message("success uuid = " DF_UUIDF "\n", DP_UUID(uuid));
551547

src/vos/tests/wal_ut.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* (C) Copyright 2023-2024 Intel Corporation.
3+
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -669,8 +670,8 @@ ut_fill_wal(struct bio_ut_args *args, int tx_nr, struct ut_tx_array **txa_ptr)
669670
tx = txa->ta_tx_ptrs[0];
670671

671672
/*
672-
* Each tx is roughly 800k, 40 txs will consume 32000k, which is more than
673-
* half of 50MB WAL size.
673+
* Each tx is roughly 800k, 100 txs will consume 80MB, which is more than
674+
* half of 128MB WAL size.
674675
*/
675676
for (i = 0; i < tx_nr; i++) {
676677
tx = txa->ta_tx_ptrs[i];
@@ -705,11 +706,11 @@ static void
705706
wal_ut_wrap(void **state)
706707
{
707708
struct bio_ut_args *args = *state;
708-
uint64_t meta_sz = (50ULL << 20); /* 50 MB */
709+
uint64_t meta_sz = (128ULL << 20); /* 128 MB */
709710
struct ut_tx_array *txa;
710711
struct umem_wal_tx *tx;
711712
struct ut_fake_tx *fake_tx;
712-
int tx_nr = 40, rc;
713+
int tx_nr = 100, rc;
713714

714715
rc = ut_mc_init(args, meta_sz, meta_sz, meta_sz);
715716
assert_rc_equal(rc, 0);
@@ -745,11 +746,11 @@ static void
745746
wal_ut_wrap_many(void **state)
746747
{
747748
struct bio_ut_args *args = *state;
748-
uint64_t meta_sz = (50ULL << 20); /* 50 MB */
749+
uint64_t meta_sz = (128ULL << 20); /* 128 MB */
749750
struct ut_tx_array *txa;
750751
struct umem_wal_tx *tx;
751752
struct ut_fake_tx *fake_tx;
752-
int tx_nr = 40, rc;
753+
int tx_nr = 100, rc;
753754

754755
rc = ut_mc_init(args, meta_sz, meta_sz, meta_sz);
755756
assert_rc_equal(rc, 0);

0 commit comments

Comments
 (0)