-
Notifications
You must be signed in to change notification settings - Fork 338
DAOS-18431 bio: Set power management register on NVMe #17355
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| /** | ||
| * (C) Copyright 2020-2024 Intel Corporation. | ||
| * (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
| * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP | ||
| * | ||
| * SPDX-License-Identifier: BSD-2-Clause-Patent | ||
| */ | ||
|
|
@@ -1067,3 +1067,70 @@ bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, u | |
| return led_manage(xs_ctxt, pci_addr, (Ctl__LedAction)action, (Ctl__LedState *)state, | ||
| duration); | ||
| } | ||
|
|
||
| static void | ||
| set_power_mgmt_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) | ||
| { | ||
| struct bio_bdev *d_bdev = cb_arg; | ||
| int sc; | ||
| int sct; | ||
| uint32_t cdw0; | ||
|
|
||
| spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); | ||
| if (sc) { | ||
| D_ERROR("Set power management failed for device %s, NVMe status code/type: %d/%d\n", | ||
| d_bdev->bb_name, sc, sct); | ||
| } else { | ||
| D_INFO("Power management value set on device %s\n", d_bdev->bb_name); | ||
| } | ||
|
|
||
| spdk_bdev_free_io(bdev_io); | ||
| } | ||
|
|
||
| int | ||
| bio_set_power_mgmt(struct bio_bdev *d_bdev, struct spdk_io_channel *channel) | ||
| { | ||
| struct spdk_bdev *bdev; | ||
| struct spdk_nvme_cmd cmd; | ||
| int rc; | ||
|
|
||
| /* If default has not been overwritten, skip setting the value */ | ||
| if (bio_spdk_power_mgmt_val == UINT32_MAX) | ||
| return 0; | ||
|
|
||
| D_ASSERT(d_bdev != NULL); | ||
| D_ASSERT(d_bdev->bb_desc != NULL); | ||
| D_ASSERT(channel != NULL); | ||
|
|
||
| bdev = spdk_bdev_desc_get_bdev(d_bdev->bb_desc); | ||
| if (bdev == NULL) { | ||
| D_ERROR("No bdev associated with device descriptor\n"); | ||
| return -DER_INVAL; | ||
| } | ||
|
|
||
| if (get_bdev_type(bdev) != BDEV_CLASS_NVME) { | ||
| D_DEBUG(DB_MGMT, "Device %s is not NVMe, skipping power management\n", | ||
| d_bdev->bb_name); | ||
| return -DER_NOTSUPPORTED; | ||
| } | ||
|
|
||
| if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) { | ||
| D_ERROR("Bdev NVMe admin passthru not supported for %s\n", d_bdev->bb_name); | ||
| return -DER_NOTSUPPORTED; | ||
| } | ||
|
|
||
| memset(&cmd, 0, sizeof(cmd)); | ||
| cmd.opc = SPDK_NVME_OPC_SET_FEATURES; | ||
| cmd.cdw10 = SPDK_NVME_FEAT_POWER_MANAGEMENT; | ||
| cmd.cdw11 = bio_spdk_power_mgmt_val; | ||
|
|
||
| rc = spdk_bdev_nvme_admin_passthru(d_bdev->bb_desc, channel, &cmd, NULL, 0, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This uses |
||
| set_power_mgmt_completion, d_bdev); | ||
| if (rc) { | ||
| D_ERROR("Failed to submit power management command for %s, rc:%d\n", | ||
| d_bdev->bb_name, rc); | ||
| return daos_errno2der(-rc); | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| /** | ||
| * (C) Copyright 2019-2024 Intel Corporation. | ||
| * (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
| * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP | ||
| * | ||
| * SPDX-License-Identifier: BSD-2-Clause-Patent | ||
| */ | ||
|
|
@@ -1014,6 +1014,12 @@ bio_init_health_monitoring(struct bio_blobstore *bb, char *bdev_name) | |
| D_ASSERT(channel != NULL); | ||
| bb->bb_dev_health.bdh_io_channel = channel; | ||
|
|
||
| /* Set NVMe power management */ | ||
| rc = bio_set_power_mgmt(bb->bb_dev, channel); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we don't want this being skipped when health monitor is disabled? (see |
||
| if (rc != 0 && rc != -DER_NOTSUPPORTED) | ||
| D_WARN("Failed to set power management for device %s: " DF_RC "\n", bdev_name, | ||
| DP_RC(rc)); | ||
|
|
||
| /* Set the NVMe SSD PCI Vendor ID */ | ||
| bio_set_vendor_id(bb, bdev_name); | ||
| /* Register DAOS metrics to export NVMe SSD health stats */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
union spdk_nvme_cmd_cdw11has a memberunion spdk_nvme_feat_power_management feat_power_managementthat has multiple fields, not just the power state. So I think we should be using that to make sure we're setting the value correctly.IMO we should also check the value taken from the env variable is in an acceptable range, preferably when we first ingest it. I had some trouble finding definitions for the power state values. Does SPDK define those somewhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I wait for @Michael-Hennecke to verify the PR. this is more of an enablement change that will unblock some testing. Once verified we can look at tidying things.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems like we at least need to check the value. I was wondering about what would happen if we set the env variable to some value larger than 5 bits, potentially overflowing into the other bit field ("workload hint"). Here's the struct definition I found: https://spdk.io/doc/unionspdk__nvme__feat__power__management.html
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Michael-Hennecke thoughts on acceptable values?