Skip to content
This repository was archived by the owner on Mar 9, 2026. It is now read-only.

Commit 7646db7

Browse files
nolanhergertStevenPontsler
authored andcommitted
Make ipmctl more resilient to unhealthy PMem modules
Only zero out headers for PCD and LSA when calling delete -pcd. Will reduce runtime on non-functional PMem modules from 10 minutes down to 10 seconds. In FwCmdSetPlatformConfigData, use the same flow for PCD and LSA when we set PcdSize, there is no reason for there to be a difference. For non-functional PMem modules, allow delete -goal to work on them and exclude them from checking for existing goals on the socket (since we are not going to provision them with create -goal). Don't bail out when there are errors retrieving PCD during Retrieve Goal Configs. However, make sure to preserve the behavior used in the past for when EFI_NO_RESPONSE is returned ("One or more dimms are busy"...) Signed-off-by: Nolan Hergert <nolan.hergert@intel.com>
1 parent 16b9df5 commit 7646db7

File tree

5 files changed

+60
-62
lines changed

5 files changed

+60
-62
lines changed

DcpmPkg/driver/Core/Dimm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2857,7 +2857,6 @@ FwCmdSetPlatformConfigData (
28572857
ReturnCode = EFI_INVALID_PARAMETER;
28582858
goto Finish;
28592859
}
2860-
PcdSize = RawDataSize;
28612860
} else if (PartitionId == PCD_LSA_PARTITION_ID) {
28622861
if (gPCDCacheEnabled) {
28632862
if (NULL == pDimm->pPcdLsa) {
@@ -2866,8 +2865,10 @@ FwCmdSetPlatformConfigData (
28662865
pTempCache = pDimm->pPcdLsa;
28672866
pTempCacheSz = pDimm->PcdLsaPartitionSize;
28682867
}
2869-
PcdSize = pDimm->PcdLsaPartitionSize;
28702868
}
2869+
2870+
PcdSize = RawDataSize;
2871+
28712872
if (PcdSize == 0) {
28722873
ReturnCode = EFI_INVALID_PARAMETER;
28732874
goto Finish;

DcpmPkg/driver/Core/Namespace.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,7 +1526,7 @@ WriteLabelStorageArea(
15261526
}
15271527

15281528
/**
1529-
Zero the Label Storage Area on the specified DIMM.
1529+
Zero the Label Storage Area Header on the specified DIMM.
15301530
15311531
@param[in] DimmPid Dimm ID of DIMM on which to write LSA
15321532
@@ -1535,13 +1535,16 @@ WriteLabelStorageArea(
15351535
@retval EFI_SUCCESS LSA written correctly
15361536
**/
15371537
EFI_STATUS
1538-
ZeroLabelStorageArea(
1538+
ZeroLabelStorageAreaHeader(
15391539
IN UINT16 DimmPid
15401540
)
15411541
{
15421542
EFI_STATUS ReturnCode = EFI_INVALID_PARAMETER;
15431543
DIMM *pDimm = NULL;
1544-
UINT8 *pZeroRawLsa = NULL;
1544+
UINT8 *pZeroRawLsaHeader = NULL;
1545+
// 2 LSA header index blocks corresponding to 256KB partitions. We only
1546+
// have 128KB partitions in PMem (256B index blocks), but rounding up just in case.
1547+
CONST UINT16 BytesToZero = 2 * 512;
15451548

15461549
NVDIMM_ENTRY();
15471550

@@ -1550,22 +1553,22 @@ ZeroLabelStorageArea(
15501553
goto Finish;
15511554
}
15521555

1553-
pZeroRawLsa = AllocateZeroPool(pDimm->PcdLsaPartitionSize);
1554-
if (pZeroRawLsa == NULL) {
1556+
pZeroRawLsaHeader = AllocateZeroPool(BytesToZero);
1557+
if (pZeroRawLsaHeader == NULL) {
15551558
ReturnCode = EFI_OUT_OF_RESOURCES;
15561559
goto Finish;
15571560
}
15581561

1559-
NVDIMM_DBG("Zero-ing the LSA on DIMM 0x%x ...", pDimm->DeviceHandle.AsUint32);
1562+
NVDIMM_DBG("Zero-ing the LSA header on DIMM 0x%x ...", pDimm->DeviceHandle.AsUint32);
15601563
ReturnCode = FwCmdSetPlatformConfigData(pDimm, PCD_LSA_PARTITION_ID,
1561-
pZeroRawLsa, pDimm->PcdLsaPartitionSize);
1564+
pZeroRawLsaHeader, BytesToZero);
15621565
if (EFI_ERROR(ReturnCode)) {
15631566
NVDIMM_DBG("FwCmdSetPlatformConfigData returned: " FORMAT_EFI_STATUS "", ReturnCode);
15641567
goto Finish;
15651568
}
15661569

15671570
Finish:
1568-
FREE_POOL_SAFE(pZeroRawLsa);
1571+
FREE_POOL_SAFE(pZeroRawLsaHeader);
15691572
NVDIMM_EXIT_I64(ReturnCode);
15701573
return ReturnCode;
15711574
}

DcpmPkg/driver/Core/Namespace.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ WriteLabelStorageArea(
330330
);
331331

332332
/**
333-
Zero the Label Storage Area on the specified DIMM.
333+
Zero the Label Storage Area Header on the specified DIMM.
334334
335335
@param[in] DimmPid Dimm ID of DIMM on which to write LSA
336336
@@ -339,7 +339,7 @@ WriteLabelStorageArea(
339339
@retval EFI_SUCCESS LSA written correctly
340340
**/
341341
EFI_STATUS
342-
ZeroLabelStorageArea(
342+
ZeroLabelStorageAreaHeader(
343343
IN UINT16 DimmPid
344344
);
345345

DcpmPkg/driver/Core/Region.c

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -860,51 +860,46 @@ RetrieveISsFromPlatformConfigData(
860860
return EFI_INVALID_PARAMETER;
861861
}
862862

863-
for (pDimmNode = GetFirstNode(pDimmList);
864-
!IsNull(pDimmList, pDimmNode);
865-
pDimmNode = GetNextNode(pDimmList, pDimmNode)) {
863+
LIST_FOR_EACH(pDimmNode, pDimmList) {
866864
pDimm = DIMM_FROM_NODE(pDimmNode);
867865

866+
// Set default values
867+
pDimm->ConfigStatus = DIMM_CONFIG_UNDEFINED;
868+
pDimm->IsNew = 0;
869+
pDimm->Configured = FALSE;
870+
868871
if (!IsDimmManageable(pDimm) || DIMM_MEDIA_NOT_ACCESSIBLE(pDimm->BootStatusBitmask)) {
869872
continue;
870873
}
871874

875+
// Free previous use of pcd header if needed
876+
FREE_POOL_SAFE(pPcdConfHeader);
872877
ReturnCode = GetPlatformConfigDataOemPartition(pDimm, FALSE, &pPcdConfHeader);
873878
#ifdef MEMORY_CORRUPTION_WA
874-
if (ReturnCode == EFI_DEVICE_ERROR) {
875-
ReturnCode = GetPlatformConfigDataOemPartition(pDimm, FALSE, &pPcdConfHeader);
876-
}
879+
if (ReturnCode == EFI_DEVICE_ERROR) {
880+
ReturnCode = GetPlatformConfigDataOemPartition(pDimm, FALSE, &pPcdConfHeader);
881+
}
877882
#endif // MEMORY_CORRUPTIO_WA
878883
if (EFI_ERROR(ReturnCode)) {
884+
// Ignore all errors except for PMem module busy with sanitize operation
879885
if (EFI_NO_RESPONSE == ReturnCode) {
880886
/* Save the return code here and continue with the execution for rest of the dimms.
881887
This is done to make the UEFI initialization succeed. During UEFI init,
882888
return code will be ignored but we have to error out when the actual command is executed. */
883889
IReturnCode = ReturnCode;
884890
}
885-
/* set these values like they were never set */
886-
pDimm->ConfigStatus = DIMM_CONFIG_UNDEFINED;
887-
pDimm->IsNew = 0;
888-
pDimm->Configured = FALSE;
891+
ReturnCode = EFI_SUCCESS;
889892
continue;
890893
}
891894

892895
if (pPcdConfHeader->CurrentConfStartOffset == 0 || pPcdConfHeader->CurrentConfDataSize == 0) {
893896
NVDIMM_DBG("There is no Current Config table");
894-
FreePool(pPcdConfHeader);
895-
pPcdConfHeader = NULL;
896-
/* set these values like they were never set */
897-
pDimm->ConfigStatus = DIMM_CONFIG_UNDEFINED;
898-
pDimm->IsNew = 0;
899-
pDimm->Configured = FALSE;
900897
continue;
901898
}
902899

903900
pPcdCurrentConf = GET_NVDIMM_CURRENT_CONFIG(pPcdConfHeader);
904901

905902
if (!IsPcdCurrentConfHeaderValid(pPcdCurrentConf, pDimm->PcdOemPartitionSize)) {
906-
FreePool(pPcdConfHeader);
907-
pPcdConfHeader = NULL;
908903
continue;
909904
}
910905

@@ -1945,6 +1940,7 @@ RetrieveGoalConfigsFromPlatformConfigData(
19451940
)
19461941
{
19471942
EFI_STATUS ReturnCode = EFI_SUCCESS;
1943+
EFI_STATUS IReturnCode = EFI_SUCCESS;
19481944
DIMM *pDimm = NULL;
19491945
LIST_ENTRY *pDimmNode = NULL;
19501946
NVDIMM_CONFIGURATION_HEADER *pPcdConfHeader = NULL;
@@ -1957,7 +1953,6 @@ RetrieveGoalConfigsFromPlatformConfigData(
19571953
UINT32 RegionGoalsNum = 0;
19581954
REGION_GOAL *pNewRegionGoal = NULL;
19591955
BOOLEAN New = FALSE;
1960-
BOOLEAN ValidConfigGoal = TRUE;
19611956
UINT32 SequenceIndex = 0;
19621957
ACPI_REVISION PcdCinRev;
19631958
UINT8 InterleaveChangeStatus = 0;
@@ -1978,56 +1973,61 @@ RetrieveGoalConfigsFromPlatformConfigData(
19781973
LIST_FOR_EACH(pDimmNode, pDimmList) {
19791974
pDimm = DIMM_FROM_NODE(pDimmNode);
19801975

1981-
// Skip PMem modules that we can't read from
1976+
// Set default values
1977+
pDimm->GoalConfigStatus = GOAL_CONFIG_STATUS_NO_GOAL_OR_SUCCESS;
1978+
pDimm->RegionsGoalConfig = FALSE;
1979+
pDimm->PcdSynced = TRUE;
1980+
19821981
if (!IsDimmManageable(pDimm) || DIMM_MEDIA_NOT_ACCESSIBLE(pDimm->BootStatusBitmask)) {
19831982
continue;
19841983
}
19851984

1985+
// Free previous use of pcd header if needed
1986+
FREE_POOL_SAFE(pPcdConfHeader);
19861987
ReturnCode = GetPlatformConfigDataOemPartition(pDimm, RestoreCorrupt, &pPcdConfHeader);
19871988
#ifdef MEMORY_CORRUPTION_WA
1988-
if (ReturnCode == EFI_DEVICE_ERROR) {
1989-
ReturnCode = GetPlatformConfigDataOemPartition(pDimm, RestoreCorrupt, &pPcdConfHeader);
1990-
}
1989+
if (ReturnCode == EFI_DEVICE_ERROR) {
1990+
ReturnCode = GetPlatformConfigDataOemPartition(pDimm, RestoreCorrupt, &pPcdConfHeader);
1991+
}
19911992
#endif // MEMORY_CORRUPTIO_WA
19921993
if (EFI_ERROR(ReturnCode)) {
1993-
goto FinishError;
1994+
// Ignore all errors except for PMem module busy with sanitize operation
1995+
if (EFI_NO_RESPONSE == ReturnCode) {
1996+
/* Save the return code here and continue with the execution for rest of the dimms.
1997+
This is done to make the UEFI initialization succeed. During UEFI init,
1998+
return code will be ignored but we have to error out when the actual command is executed. */
1999+
IReturnCode = ReturnCode;
2000+
}
2001+
ReturnCode = EFI_SUCCESS;
2002+
continue;
19942003
}
19952004

19962005
if (NULL != pPcdConfHeader) {
19972006
pPcdConfInput = GET_NVDIMM_PLATFORM_CONFIG_INPUT(pPcdConfHeader);
19982007
pPcdConfOutput = GET_NVDIMM_PLATFORM_CONFIG_OUTPUT(pPcdConfHeader);
19992008
}
20002009

2001-
ValidConfigGoal = TRUE;
2002-
20032010
// If no PCD Header, CIN record then no goal
20042011
if ((NULL == pPcdConfHeader) || (pPcdConfHeader->ConfInputStartOffset == 0) || (pPcdConfHeader->ConfInputDataSize == 0)) {
20052012
NVDIMM_DBG("There is no Config Input table");
2006-
ValidConfigGoal = FALSE;
2013+
continue;
20072014
}
20082015
// CIN is corrupt
20092016
else if (!IsPcdConfInputHeaderValid(pPcdConfInput, pDimm->PcdOemPartitionSize)) {
20102017
pPcdConfHeader->ConfInputStartOffset = 0;
20112018
pPcdConfHeader->ConfInputDataSize = 0;
20122019
NVDIMM_DBG("The Config Input table is corrupted, Ignoring it");
2013-
ValidConfigGoal = FALSE;
2020+
continue;
20142021
}
20152022
// If CIN and COUT sequence are the same, then goal attempted to be applied already
20162023
else if ((pPcdConfHeader->ConfOutputStartOffset != 0) && (pPcdConfHeader->ConfOutputDataSize != 0) &&
20172024
IsPcdConfOutputHeaderValid(pPcdConfOutput, pDimm->PcdOemPartitionSize) &&
20182025
(pPcdConfInput->SequenceNumber == pPcdConfOutput->SequenceNumber)) {
20192026
NVDIMM_DBG("The config goal is already applied");
2020-
ValidConfigGoal = FALSE;
2021-
}
2022-
2023-
if (!ValidConfigGoal) {
2024-
pDimm->GoalConfigStatus = GOAL_CONFIG_STATUS_NO_GOAL_OR_SUCCESS;
2025-
pDimm->RegionsGoalConfig = FALSE;
2026-
pDimm->PcdSynced = TRUE;
2027-
FREE_POOL_SAFE(pPcdConfHeader);
20282027
continue;
20292028
}
20302029

2030+
// We have a valid goal after this point
20312031
PcdCinRev = pPcdConfInput->Header.Revision;
20322032

20332033
pDimm->PcdSynced = FALSE;
@@ -2252,6 +2252,8 @@ RetrieveGoalConfigsFromPlatformConfigData(
22522252
ClearInternalGoalConfigsInfo(pDimmList);
22532253
Finish:
22542254
FREE_POOL_SAFE(pPcdConfHeader);
2255+
2256+
ReturnCode = (IReturnCode != EFI_SUCCESS) ? IReturnCode : ReturnCode;
22552257
NVDIMM_EXIT_I64(ReturnCode);
22562258
return ReturnCode;
22572259
}
@@ -5043,8 +5045,7 @@ CheckForExistingGoalConfigPerSocket(
50435045
}
50445046
LIST_FOR_EACH(pDimmNode, &gNvmDimmData->PMEMDev.Dimms) {
50455047
pDimm = DIMM_FROM_NODE(pDimmNode);
5046-
5047-
if (!IsDimmManageable(pDimm) || (Socket != pDimm->SocketId)) {
5048+
if (!IsDimmManageable(pDimm) || pDimm->NonFunctional || (Socket != pDimm->SocketId)) {
50485049
continue;
50495050
}
50505051

DcpmPkg/driver/Protocol/Driver/NvmDimmConfig.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3729,7 +3729,10 @@ ModifyPcdConfig(
37293729
UINT32 Index = 0;
37303730
UINT32 SecurityState = 0;
37313731
NVDIMM_CONFIGURATION_HEADER *pConfigHeader = NULL;
3732-
UINT32 ConfigSize = 0;
3732+
// Only need to zero out headers, not the whole 64K partition.
3733+
// Adding 256 bytes just to make sure
3734+
UINT32 ConfigSize = sizeof(NVDIMM_CONFIGURATION_HEADER) + sizeof(NVDIMM_PLATFORM_CONFIG_INPUT)
3735+
+ sizeof(NVDIMM_CURRENT_CONFIG) + sizeof(NVDIMM_PLATFORM_CONFIG_OUTPUT) + 256;
37333736

37343737
NVDIMM_ENTRY();
37353738

@@ -3780,7 +3783,7 @@ ModifyPcdConfig(
37803783

37813784
//zero LSA
37823785
if (ConfigIdMask & DELETE_PCD_CONFIG_LSA_MASK) {
3783-
TmpReturnCode = ZeroLabelStorageArea(pDimms[Index]->DimmID);
3786+
TmpReturnCode = ZeroLabelStorageAreaHeader(pDimms[Index]->DimmID);
37843787
if (EFI_ERROR(TmpReturnCode)) {
37853788
KEEP_ERROR(ReturnCode, TmpReturnCode);
37863789
SetObjStatusForDimm(pCommandStatus, pDimms[Index], NVM_ERR_OPERATION_FAILED);
@@ -3806,15 +3809,6 @@ ModifyPcdConfig(
38063809
continue;
38073810
}
38083811

3809-
//determine the size of the PCD partition, which will be used at the end to write the partion back to PCD
3810-
TmpReturnCode = GetPcdOemDataSize(pConfigHeader, &ConfigSize);
3811-
if (EFI_ERROR(TmpReturnCode)) {
3812-
KEEP_ERROR(ReturnCode, TmpReturnCode);
3813-
SetObjStatusForDimm(pCommandStatus, pDimms[Index], NVM_ERR_OPERATION_FAILED);
3814-
NVDIMM_DBG("Failed to get PCD size");
3815-
continue;
3816-
}
3817-
38183812
//clear CIN
38193813
if (ConfigIdMask & DELETE_PCD_CONFIG_CIN_MASK) {
38203814
pConfigHeader->ConfInputDataSize = 0x0;
@@ -6615,7 +6609,6 @@ DeleteGoalConfig (
66156609
/** Verify input parameters and determine a list of DIMMs **/
66166610
ReturnCode = VerifyTargetDimms(pDimmIds, DimmIdsCount, pSocketIds, SocketIdsCount,
66176611
REQUIRE_DCPMMS_MANAGEABLE |
6618-
REQUIRE_DCPMMS_FUNCTIONAL |
66196612
REQUIRE_DCPMMS_MEDIA_ACCESSIBLE,
66206613
pDimms, &DimmsNum, pCommandStatus);
66216614
if (EFI_ERROR(ReturnCode) || pCommandStatus->GeneralStatus != NVM_ERR_OPERATION_NOT_STARTED) {

0 commit comments

Comments
 (0)