diff --git a/src/bio/bio_device.c b/src/bio/bio_device.c index 07deb6c7b10..06492f95364 100644 --- a/src/bio/bio_device.c +++ b/src/bio/bio_device.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2020-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1067,3 +1067,70 @@ bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, u return led_manage(xs_ctxt, pci_addr, (Ctl__LedAction)action, (Ctl__LedState *)state, duration); } + +static void +set_power_mgmt_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct bio_bdev *d_bdev = cb_arg; + int sc; + int sct; + uint32_t cdw0; + + spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); + if (sc) { + D_ERROR("Set power management failed for device %s, NVMe status code/type: %d/%d\n", + d_bdev->bb_name, sc, sct); + } else { + D_INFO("Power management value set on device %s\n", d_bdev->bb_name); + } + + spdk_bdev_free_io(bdev_io); +} + +int +bio_set_power_mgmt(struct bio_bdev *d_bdev, struct spdk_io_channel *channel) +{ + struct spdk_bdev *bdev; + struct spdk_nvme_cmd cmd; + int rc; + + /* If default has not been overwritten, skip setting the value */ + if (bio_spdk_power_mgmt_val == UINT32_MAX) + return 0; + + D_ASSERT(d_bdev != NULL); + D_ASSERT(d_bdev->bb_desc != NULL); + D_ASSERT(channel != NULL); + + bdev = spdk_bdev_desc_get_bdev(d_bdev->bb_desc); + if (bdev == NULL) { + D_ERROR("No bdev associated with device descriptor\n"); + return -DER_INVAL; + } + + if (get_bdev_type(bdev) != BDEV_CLASS_NVME) { + D_DEBUG(DB_MGMT, "Device %s is not NVMe, skipping power management\n", + d_bdev->bb_name); + return -DER_NOTSUPPORTED; + } + + if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) { + D_ERROR("Bdev NVMe admin passthru not supported for %s\n", d_bdev->bb_name); + return -DER_NOTSUPPORTED; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.opc = SPDK_NVME_OPC_SET_FEATURES; + cmd.cdw10 = SPDK_NVME_FEAT_POWER_MANAGEMENT; + cmd.cdw11 = bio_spdk_power_mgmt_val; + + rc = spdk_bdev_nvme_admin_passthru(d_bdev->bb_desc, channel, &cmd, NULL, 0, + set_power_mgmt_completion, d_bdev); + if (rc) { + D_ERROR("Failed to submit power management command for %s, rc:%d\n", + d_bdev->bb_name, rc); + return daos_errno2der(-rc); + } + + return 0; +} diff --git a/src/bio/bio_internal.h b/src/bio/bio_internal.h index 5b97582d18c..f476e9a8b14 100644 --- a/src/bio/bio_internal.h +++ b/src/bio/bio_internal.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -606,6 +606,7 @@ extern unsigned int bio_numa_node; extern unsigned int bio_spdk_max_unmap_cnt; extern unsigned int bio_max_async_sz; extern unsigned int bio_io_timeout; +extern unsigned int bio_spdk_power_mgmt_val; int xs_poll_completion(struct bio_xs_context *ctxt, unsigned int *inflights, uint64_t timeout); @@ -720,6 +721,8 @@ void trigger_faulty_reaction(struct bio_blobstore *bbs); int fill_in_traddr(struct bio_dev_info *b_info, char *dev_name); struct bio_dev_info * alloc_dev_info(uuid_t dev_id, struct bio_bdev *d_bdev, struct smd_dev_info *s_info); +int +bio_set_power_mgmt(struct bio_bdev *d_bdev, struct spdk_io_channel *channel); /* bio_config.c */ int diff --git a/src/bio/bio_monitor.c b/src/bio/bio_monitor.c index 272e0dc80c5..92680b621ee 100644 --- a/src/bio/bio_monitor.c +++ b/src/bio/bio_monitor.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1014,6 +1014,12 @@ bio_init_health_monitoring(struct bio_blobstore *bb, char *bdev_name) D_ASSERT(channel != NULL); bb->bb_dev_health.bdh_io_channel = channel; + /* Set NVMe power management */ + rc = bio_set_power_mgmt(bb->bb_dev, channel); + if (rc != 0 && rc != -DER_NOTSUPPORTED) + D_WARN("Failed to set power management for device %s: " DF_RC "\n", bdev_name, + DP_RC(rc)); + /* Set the NVMe SSD PCI Vendor ID */ bio_set_vendor_id(bb, bdev_name); /* Register DAOS metrics to export NVMe SSD health stats */ diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 059556ba89a..7f35ed0ff2e 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2018-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -61,6 +61,8 @@ bool bio_spdk_inited; bool bio_vmd_enabled; /* SPDK subsystem fini timeout */ unsigned int bio_spdk_subsys_timeout = 25000; /* ms */ +/* SPDK NVMe power management value */ +unsigned int bio_spdk_power_mgmt_val = UINT32_MAX; /* usually 0-4 */ /* How many blob unmap calls can be called in a row */ unsigned int bio_spdk_max_unmap_cnt = 32; unsigned int bio_max_async_sz = (1UL << 15) /* 32k */; @@ -270,6 +272,11 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, d_getenv_bool("DAOS_SCM_RDMA_ENABLED", &bio_scm_rdma); D_INFO("RDMA to SCM is %s\n", bio_scm_rdma ? "enabled" : "disabled"); + d_getenv_uint("DAOS_NVME_POWER_MGMT", &bio_spdk_power_mgmt_val); + if (bio_spdk_power_mgmt_val != UINT32_MAX) + D_INFO("NVMe power management setting to be applied is %u\n", + bio_spdk_power_mgmt_val); + d_getenv_uint("DAOS_SPDK_SUBSYS_TIMEOUT", &bio_spdk_subsys_timeout); D_INFO("SPDK subsystem fini timeout is %u ms\n", bio_spdk_subsys_timeout);