Skip to content

Commit c94f75c

Browse files
PowerFlex/ScaleIO - Wait after SDC service start/restart/stop, and retry to fetch SDC id/guid (#11099)
* [PowerFlex/ScaleIO] Added wait time after SDC service start/restart/stop, and retries to fetch SDC id/guid * Added agent property 'powerflex.sdc.service.wait' for the time (in secs) to wait after SDC service start/restart/stop * code improvements
1 parent 9688cbb commit c94f75c

File tree

6 files changed

+96
-27
lines changed

6 files changed

+96
-27
lines changed

agent/src/main/java/com/cloud/agent/properties/AgentProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,7 @@ public static class Property <T>{
823823
private T defaultValue;
824824
private Class<T> typeClass;
825825

826-
Property(String name, T value) {
826+
public Property(String name, T value) {
827827
init(name, value);
828828
}
829829

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptor.java

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.cloudstack.utils.qemu.QemuImgException;
3838
import org.apache.cloudstack.utils.qemu.QemuImgFile;
3939
import org.apache.cloudstack.utils.qemu.QemuObject;
40+
import org.apache.commons.collections.MapUtils;
4041
import org.apache.commons.io.filefilter.WildcardFileFilter;
4142
import org.apache.logging.log4j.Logger;
4243
import org.apache.logging.log4j.LogManager;
@@ -581,14 +582,23 @@ public Ternary<Boolean, Map<String, String>, String> prepareStorageClient(Storag
581582
}
582583

583584
if (!ScaleIOUtil.isSDCServiceActive()) {
585+
logger.debug("SDC service is not active on host, starting it");
584586
if (!ScaleIOUtil.startSDCService()) {
585587
return new Ternary<>(false, null, "Couldn't start SDC service on host");
586588
}
587-
} else if (!ScaleIOUtil.restartSDCService()) {
588-
return new Ternary<>(false, null, "Couldn't restart SDC service on host");
589+
} else {
590+
logger.debug("SDC service is active on host, re-starting it");
591+
if (!ScaleIOUtil.restartSDCService()) {
592+
return new Ternary<>(false, null, "Couldn't restart SDC service on host");
593+
}
594+
}
595+
596+
Map<String, String> sdcDetails = getSDCDetails(details);
597+
if (MapUtils.isEmpty(sdcDetails)) {
598+
return new Ternary<>(false, null, "Couldn't get the SDC details on the host");
589599
}
590600

591-
return new Ternary<>( true, getSDCDetails(details), "Prepared client successfully");
601+
return new Ternary<>( true, sdcDetails, "Prepared client successfully");
592602
}
593603

594604
public Pair<Boolean, String> unprepareStorageClient(Storage.StoragePoolType type, String uuid) {
@@ -611,20 +621,40 @@ public Pair<Boolean, String> unprepareStorageClient(Storage.StoragePoolType type
611621

612622
private Map<String, String> getSDCDetails(Map<String, String> details) {
613623
Map<String, String> sdcDetails = new HashMap<String, String>();
614-
if (details == null || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) {
624+
if (MapUtils.isEmpty(details) || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) {
615625
return sdcDetails;
616626
}
617627

618628
String storageSystemId = details.get(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID);
619-
String sdcId = ScaleIOUtil.getSdcId(storageSystemId);
620-
if (sdcId != null) {
621-
sdcDetails.put(ScaleIOGatewayClient.SDC_ID, sdcId);
622-
} else {
623-
String sdcGuId = ScaleIOUtil.getSdcGuid();
624-
if (sdcGuId != null) {
625-
sdcDetails.put(ScaleIOGatewayClient.SDC_GUID, sdcGuId);
626-
}
629+
if (StringUtils.isEmpty(storageSystemId)) {
630+
return sdcDetails;
627631
}
632+
633+
int numberOfTries = 5;
634+
int timeBetweenTries = 1000; // Try more frequently (every sec) and return early when SDC Id or Guid found
635+
int attempt = 1;
636+
do {
637+
logger.debug("Get SDC details, attempt #{}", attempt);
638+
String sdcId = ScaleIOUtil.getSdcId(storageSystemId);
639+
if (sdcId != null) {
640+
sdcDetails.put(ScaleIOGatewayClient.SDC_ID, sdcId);
641+
return sdcDetails;
642+
} else {
643+
String sdcGuId = ScaleIOUtil.getSdcGuid();
644+
if (sdcGuId != null) {
645+
sdcDetails.put(ScaleIOGatewayClient.SDC_GUID, sdcGuId);
646+
return sdcDetails;
647+
}
648+
}
649+
650+
try {
651+
Thread.sleep(timeBetweenTries);
652+
} catch (Exception ignore) {
653+
}
654+
numberOfTries--;
655+
attempt++;
656+
} while (numberOfTries > 0);
657+
628658
return sdcDetails;
629659
}
630660

plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptorTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ public void testPrepareStorageClient_SDCServiceRestarted() {
116116

117117
Ternary<Boolean, Map<String, String>, String> result = scaleIOStorageAdaptor.prepareStorageClient(Storage.StoragePoolType.PowerFlex, poolUuid, new HashMap<>());
118118

119-
Assert.assertTrue(result.first());
120-
Assert.assertNotNull(result.second());
121-
Assert.assertTrue(result.second().isEmpty());
119+
Assert.assertFalse(result.first());
120+
Assert.assertNull(result.second());
121+
Assert.assertEquals("Couldn't get the SDC details on the host", result.third());
122122
}
123123

124124
@Test

plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/manager/ScaleIOSDCManagerImpl.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,13 @@ public String prepareSDC(Host host, DataStore dataStore) {
183183
storagePoolHost.setLocalPath(sdcId);
184184
storagePoolHostDao.update(storagePoolHost.getId(), storagePoolHost);
185185
}
186-
}
187186

188-
int waitTimeInSecs = 15; // Wait for 15 secs (usual tests with SDC service start took 10-15 secs)
189-
if (hostSdcConnected(sdcId, dataStore, waitTimeInSecs)) {
190-
return sdcId;
187+
int waitTimeInSecs = 15; // Wait for 15 secs (usual tests with SDC service start took 10-15 secs)
188+
if (hostSdcConnected(sdcId, dataStore, waitTimeInSecs)) {
189+
return sdcId;
190+
}
191191
}
192+
192193
return null;
193194
} finally {
194195
if (storageSystemIdLock != null) {
@@ -246,7 +247,7 @@ private String prepareSDCOnHost(Host host, DataStore dataStore, String systemId)
246247
}
247248

248249
if (StringUtils.isBlank(sdcId)) {
249-
logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, try (re)install SDC and restart agent", host);
250+
logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, add MDMs if not or try (re)install SDC & restart agent", host);
250251
return null;
251252
}
252253

@@ -381,6 +382,9 @@ private boolean isHostSdcConnected(String sdcId, long poolId) {
381382

382383
private ScaleIOGatewayClient getScaleIOClient(final Long storagePoolId) throws Exception {
383384
StoragePoolVO storagePool = storagePoolDao.findById(storagePoolId);
385+
if (storagePool == null) {
386+
throw new CloudRuntimeException("Unable to find the storage pool with id " + storagePoolId);
387+
}
384388
return ScaleIOGatewayClientConnectionPool.getInstance().getClient(storagePool, storagePoolDetailsDao);
385389
}
386390

plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/provider/ScaleIOHostListener.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@ private String getSdcIdOfHost(HostVO host, StoragePool storagePool) {
102102
if (systemId == null) {
103103
throw new CloudRuntimeException("Failed to get the system id for PowerFlex storage pool " + storagePool.getName());
104104
}
105-
Map<String,String> details = new HashMap<>();
105+
Map<String, String> details = new HashMap<>();
106106
details.put(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID, systemId);
107107

108108
ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, storagePool, storagePool.getPath(), details);
109109
ModifyStoragePoolAnswer answer = sendModifyStoragePoolCommand(cmd, storagePool, host);
110-
Map<String,String> poolDetails = answer.getPoolInfo().getDetails();
110+
Map<String, String> poolDetails = answer.getPoolInfo().getDetails();
111111
if (MapUtils.isEmpty(poolDetails)) {
112112
String msg = String.format("PowerFlex storage SDC details not found on the host: %s, (re)install SDC and restart agent", host);
113113
logger.warn(msg);
@@ -124,7 +124,7 @@ private String getSdcIdOfHost(HostVO host, StoragePool storagePool) {
124124
}
125125

126126
if (StringUtils.isBlank(sdcId)) {
127-
String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, (re)install SDC and restart agent", host);
127+
String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, add MDMs if not or try (re)install SDC & restart agent", host);
128128
logger.warn(msg);
129129
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "SDC details not found on host: " + host.getUuid(), msg);
130130
return null;

plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/util/ScaleIOUtil.java

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.cloudstack.storage.datastore.util;
1919

20+
import com.cloud.agent.properties.AgentProperties;
21+
import com.cloud.agent.properties.AgentPropertiesFileHandler;
2022
import org.apache.logging.log4j.Logger;
2123
import org.apache.logging.log4j.LogManager;
2224

@@ -60,6 +62,14 @@ public class ScaleIOUtil {
6062
private static final String SDC_SERVICE_ENABLE_CMD = "systemctl enable scini";
6163

6264
public static final String CONNECTED_SDC_COUNT_STAT = "ConnectedSDCCount";
65+
66+
/**
67+
* Time (in seconds) to wait after SDC service 'scini' start/restart/stop.<br>
68+
* Data type: Integer.<br>
69+
* Default value: <code>3</code>
70+
*/
71+
public static final AgentProperties.Property<Integer> SDC_SERVICE_ACTION_WAIT = new AgentProperties.Property<>("powerflex.sdc.service.wait", 3);
72+
6373
/**
6474
* Cmd for querying volumes in SDC
6575
* Sample output for cmd: drv_cfg --query_vols:
@@ -216,16 +226,41 @@ public static boolean enableSDCService() {
216226

217227
public static boolean startSDCService() {
218228
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_START_CMD);
219-
return exitValue == 0;
229+
if (exitValue != 0) {
230+
return false;
231+
}
232+
waitForSdcServiceActionToComplete();
233+
return true;
220234
}
221235

222236
public static boolean stopSDCService() {
223237
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_STOP_CMD);
224-
return exitValue == 0;
238+
if (exitValue != 0) {
239+
return false;
240+
}
241+
waitForSdcServiceActionToComplete();
242+
return true;
225243
}
226244

227245
public static boolean restartSDCService() {
228246
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_RESTART_CMD);
229-
return exitValue == 0;
247+
if (exitValue != 0) {
248+
return false;
249+
}
250+
waitForSdcServiceActionToComplete();
251+
return true;
252+
}
253+
254+
private static void waitForSdcServiceActionToComplete() {
255+
// Wait for the SDC service to settle after start/restart/stop and reaches a stable state
256+
int waitTimeInSecs = AgentPropertiesFileHandler.getPropertyValue(SDC_SERVICE_ACTION_WAIT);
257+
if (waitTimeInSecs < 0) {
258+
waitTimeInSecs = SDC_SERVICE_ACTION_WAIT.getDefaultValue();
259+
}
260+
try {
261+
LOGGER.debug(String.format("Waiting for %d secs after SDC service action, to reach a stable state", waitTimeInSecs));
262+
Thread.sleep(waitTimeInSecs * 1000L);
263+
} catch (InterruptedException ignore) {
264+
}
230265
}
231266
}

0 commit comments

Comments
 (0)