Skip to content

Commit 4712856

Browse files
Pearl1594Pearl Dsilva
andauthored
FR563 - Capacity recalculation on migration - considering overcommit ratio (#21)
https://shapeblue.atlassian.net/browse/FRO-916 Co-authored-by: Pearl Dsilva <[email protected]>
1 parent ec20fd1 commit 4712856

File tree

4 files changed

+149
-42
lines changed

4 files changed

+149
-42
lines changed

engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,6 +2166,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
21662166
}
21672167

21682168
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, _offeringDao.findById(vm.getId(), vm.getServiceOfferingId()), null, null);
2169+
21692170
_networkMgr.prepareNicForMigration(profile, dest);
21702171
volumeMgr.prepareForMigration(profile, dest);
21712172
profile.setConfigDriveLabel(VmConfigDriveLabel.value());

server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,15 @@
3939

4040
import com.cloud.api.ApiDBUtils;
4141
import com.cloud.api.query.vo.HostJoinVO;
42+
import com.cloud.dc.ClusterDetailsDao;
4243
import com.cloud.gpu.HostGpuGroupsVO;
4344
import com.cloud.gpu.VGPUTypesVO;
4445
import com.cloud.host.Host;
4546
import com.cloud.host.HostStats;
4647
import com.cloud.host.dao.HostDetailsDao;
4748
import com.cloud.hypervisor.Hypervisor;
4849
import com.cloud.storage.StorageStats;
50+
import com.cloud.utils.NumbersUtil;
4951
import com.cloud.utils.db.GenericDaoBase;
5052
import com.cloud.utils.db.SearchBuilder;
5153
import com.cloud.utils.db.SearchCriteria;
@@ -59,6 +61,8 @@ public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements
5961
@Inject
6062
private HostDetailsDao hostDetailsDao;
6163
@Inject
64+
protected ClusterDetailsDao _clusterDetailsDao;
65+
@Inject
6266
private OutOfBandManagementDao outOfBandManagementDao;
6367

6468
private final SearchBuilder<HostJoinVO> hostSearch;
@@ -168,7 +172,8 @@ public HostResponse newHostResponse(HostJoinVO host, EnumSet<HostDetails> detail
168172

169173
String cpuAlloc = decimalFormat.format(((float)cpu / (float)(host.getCpus() * host.getSpeed())) * 100f) + "%";
170174
hostResponse.setCpuAllocated(cpuAlloc);
171-
String cpuWithOverprovisioning = new Float(host.getCpus() * host.getSpeed() * ApiDBUtils.getCpuOverprovisioningFactor()).toString();
175+
final float clusterCpuOvercommitRatio = NumbersUtil.parseFloat(_clusterDetailsDao.findDetail(host.getClusterId(), "cpuOvercommitRatio").getValue(), ApiDBUtils.getCpuOverprovisioningFactor());
176+
String cpuWithOverprovisioning = Float.toString(host.getCpus() * host.getSpeed() * clusterCpuOvercommitRatio);
172177
hostResponse.setCpuWithOverprovisioning(cpuWithOverprovisioning);
173178
}
174179

@@ -311,7 +316,8 @@ public HostForMigrationResponse newHostForMigrationResponse(HostJoinVO host, Enu
311316

312317
String cpuAlloc = decimalFormat.format(((float)cpu / (float)(host.getCpus() * host.getSpeed())) * 100f) + "%";
313318
hostResponse.setCpuAllocated(cpuAlloc);
314-
String cpuWithOverprovisioning = new Float(host.getCpus() * host.getSpeed() * ApiDBUtils.getCpuOverprovisioningFactor()).toString();
319+
final float clusterCpuOvercommitRatio = NumbersUtil.parseFloat(_clusterDetailsDao.findDetail(host.getClusterId(), "cpuOvercommitRatio").getValue(), ApiDBUtils.getCpuOverprovisioningFactor());
320+
String cpuWithOverprovisioning = Float.toString(host.getCpus() * host.getSpeed() * clusterCpuOvercommitRatio);
315321
hostResponse.setCpuWithOverprovisioning(cpuWithOverprovisioning);
316322
}
317323

server/src/com/cloud/capacity/CapacityManagerImpl.java

Lines changed: 30 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
import com.cloud.configuration.ConfigurationManager;
5656
import com.cloud.dc.ClusterDetailsDao;
5757
import com.cloud.dc.ClusterDetailsVO;
58-
import com.cloud.dc.ClusterVO;
5958
import com.cloud.dc.dao.ClusterDao;
6059
import com.cloud.deploy.DeploymentClusterPlanner;
6160
import com.cloud.event.UsageEventVO;
@@ -277,6 +276,9 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
277276
@Override
278277
public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
279278

279+
if (vm == null) {
280+
return;
281+
}
280282
final long hostId = vm.getHostId();
281283
HostVO host = _hostDao.findById(hostId);
282284
final long clusterId = host.getClusterId();
@@ -287,18 +289,22 @@ public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
287289

288290
CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU);
289291
CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY);
290-
291292
if (capacityCpu == null || capacityMem == null || svo == null) {
292293
return;
293294
}
294295

295-
final int cpu = svo.getCpu() * svo.getSpeed();
296-
final long ram = svo.getRamSize() * 1024L * 1024L;
296+
final int cpu = (int) (svo.getCpu() * svo.getSpeed());
297+
final long ram = (long) (svo.getRamSize() * 1024L * 1024L);
297298

298299
try {
299300
final long capacityCpuId = capacityCpu.getId();
300301
final long capacityMemId = capacityMem.getId();
301302

303+
// Update the over commit ratio of the VM to reflect the same value as that of the cluster to which it has been migrated to / deployed on.
304+
VMInstanceVO vmInstanceVO = _vmDao.findById(vm.getId());
305+
_userVmDetailsDao.addDetail(vmInstanceVO.getId(), "cpuOvercommitRatio", String.valueOf(cpuOvercommitRatio), true);
306+
_userVmDetailsDao.addDetail(vmInstanceVO.getId(), "memoryOvercommitRatio", String.valueOf(memoryOvercommitRatio), true);
307+
302308
Transaction.execute(new TransactionCallbackNoReturn() {
303309
@Override
304310
public void doInTransactionWithoutResult(TransactionStatus status) {
@@ -602,33 +608,18 @@ public void updateCapacityForHost(final Host host) {
602608
s_logger.debug("Found " + vms.size() + " VMs on host " + host.getId());
603609
}
604610

605-
ClusterVO cluster = _clusterDao.findById(host.getClusterId());
606-
ClusterDetailsVO clusterDetailCpu = _clusterDetailsDao.findDetail(cluster.getId(), "cpuOvercommitRatio");
607-
ClusterDetailsVO clusterDetailRam = _clusterDetailsDao.findDetail(cluster.getId(), "memoryOvercommitRatio");
608-
Float clusterCpuOvercommitRatio = Float.parseFloat(clusterDetailCpu.getValue());
609-
Float clusterRamOvercommitRatio = Float.parseFloat(clusterDetailRam.getValue());
610-
Float cpuOvercommitRatio = 1f;
611-
Float ramOvercommitRatio = 1f;
611+
612612
for (VMInstanceVO vm : vms) {
613613
Map<String, String> vmDetails = _userVmDetailsDao.listDetailsKeyPairs(vm.getId());
614-
String vmDetailCpu = vmDetails.get("cpuOvercommitRatio");
615-
String vmDetailRam = vmDetails.get("memoryOvercommitRatio");
616-
if (vmDetailCpu != null) {
617-
//if vmDetail_cpu is not null it means it is running in a overcommited cluster.
618-
cpuOvercommitRatio = Float.parseFloat(vmDetailCpu);
619-
ramOvercommitRatio = Float.parseFloat(vmDetailRam);
620-
}
621614
ServiceOffering so = offeringsMap.get(vm.getServiceOfferingId());
622615
if (so.isDynamic()) {
623616
usedMemory +=
624-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L) / ramOvercommitRatio) *
625-
clusterRamOvercommitRatio;
617+
(Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L);
626618
usedCpu +=
627-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name()))) / cpuOvercommitRatio) *
628-
clusterCpuOvercommitRatio;
619+
(Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name())));
629620
} else {
630-
usedMemory += ((so.getRamSize() * 1024L * 1024L) / ramOvercommitRatio) * clusterRamOvercommitRatio;
631-
usedCpu += ((so.getCpu() * so.getSpeed()) / cpuOvercommitRatio) * clusterCpuOvercommitRatio;
621+
usedMemory += (so.getRamSize() * 1024L * 1024L);
622+
usedCpu += (so.getCpu() * so.getSpeed());
632623
}
633624
}
634625

@@ -639,25 +630,16 @@ public void updateCapacityForHost(final Host host) {
639630
for (VMInstanceVO vm : vmsByLastHostId) {
640631
long secondsSinceLastUpdate = (DateUtil.currentGMTTime().getTime() - vm.getUpdateTime().getTime()) / 1000;
641632
if (secondsSinceLastUpdate < _vmCapacityReleaseInterval) {
642-
UserVmDetailVO vmDetailCpu = _userVmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio");
643-
UserVmDetailVO vmDetailRam = _userVmDetailsDao.findDetail(vm.getId(), "memoryOvercommitRatio");
644-
if (vmDetailCpu != null) {
645-
//if vmDetail_cpu is not null it means it is running in a overcommited cluster.
646-
cpuOvercommitRatio = Float.parseFloat(vmDetailCpu.getValue());
647-
ramOvercommitRatio = Float.parseFloat(vmDetailRam.getValue());
648-
}
649633
ServiceOffering so = offeringsMap.get(vm.getServiceOfferingId());
650634
Map<String, String> vmDetails = _userVmDetailsDao.listDetailsKeyPairs(vm.getId());
651635
if (so.isDynamic()) {
652-
reservedMemory +=
653-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L) / ramOvercommitRatio) *
654-
clusterRamOvercommitRatio;
655-
reservedCpu +=
656-
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name()))) / cpuOvercommitRatio) *
657-
clusterCpuOvercommitRatio;
636+
reservedMemory +=
637+
((Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.memory.name())) * 1024L * 1024L));
638+
reservedCpu +=
639+
(Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuNumber.name())) * Integer.parseInt(vmDetails.get(UsageEventVO.DynamicParameters.cpuSpeed.name())));
658640
} else {
659-
reservedMemory += ((so.getRamSize() * 1024L * 1024L) / ramOvercommitRatio) * clusterRamOvercommitRatio;
660-
reservedCpu += (so.getCpu() * so.getSpeed() / cpuOvercommitRatio) * clusterCpuOvercommitRatio;
641+
reservedMemory += (so.getRamSize() * 1024L * 1024L);
642+
reservedCpu += (so.getCpu() * so.getSpeed());
661643
}
662644
} else {
663645
// signal if not done already, that the VM has been stopped for skip.counting.hours,
@@ -843,6 +825,15 @@ public boolean postStateTransitionEvent(StateMachine2.Transition<State, Event> t
843825
allocateVmCapacity(vm, fromLastHost);
844826
}
845827

828+
if (oldState == State.Migrating && newState == State.Running) {
829+
boolean fromLastHost = false;
830+
if (vm.getHostId().equals(vm.getLastHostId())) {
831+
s_logger.debug("VM starting again on the last host it was stopped on");
832+
fromLastHost = true;
833+
}
834+
allocateVmCapacity(vm, fromLastHost);
835+
}
836+
846837
if (newState == State.Stopped) {
847838
if (vm.getType() == VirtualMachine.Type.User) {
848839

test/integration/smoke/test_vm_life_cycle.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
""" BVT tests for Virtual Machine Life Cycle
1818
"""
1919
# Import Local Modules
20-
from marvin.cloudstackTestCase import cloudstackTestCase
20+
from marvin.cloudstackTestCase import cloudstackTestCase, unittest
2121
from marvin.cloudstackAPI import (recoverVirtualMachine,
2222
destroyVirtualMachine,
2323
attachIso,
@@ -38,6 +38,9 @@
3838
DiskOffering)
3939
from marvin.lib.common import (get_domain,
4040
get_zone,
41+
list_clusters,
42+
list_hosts,
43+
list_storage_pools,
4144
get_template)
4245
from marvin.codes import FAILED, PASS
4346
from nose.plugins.attrib import attr
@@ -752,6 +755,112 @@ def test_10_attachAndDetach_iso(self):
752755
)
753756
return
754757

758+
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg", "security"], required_hardware="false")
759+
def test_11_livemigrate_VM_across_cluster_vmware(self):
760+
count_host = 0
761+
count_pool = 0
762+
try:
763+
self.list_vmware_clusters = list_clusters(self.apiclient, hypervisor="vmware")
764+
except Exception as e:
765+
raise unittest.SkipTest(e)
766+
if len(self.list_vmware_clusters) < 2:
767+
raise self.skipTest("The setup doesn't have more than one cluster, cannot execute live migration across cluster")
768+
if len(self.list_vmware_clusters) >= 2:
769+
for cluster in self.list_vmware_clusters:
770+
if len(list_hosts(self.apiclient, clusterid=cluster.id)) >= 1:
771+
count_host += 1
772+
pools = list_storage_pools(self.apiclient, clusterid=cluster.id)
773+
if len(pools) >= 1:
774+
count_pool += 1
775+
if count_host < 2 or count_pool < 2:
776+
raise self.skipTest("The setup doesn't have enough pools or enough hosts. To run these tests the setup must have atleast 2 clusters, \
777+
each having min 1 host and 1 storage pools")
778+
779+
# get each cluster's cpu and memory over commit ratios
780+
cluster_1_cpu_oc = self.list_vmware_clusters[0].cpuovercommitratio
781+
cluster_1_ram_oc = self.list_vmware_clusters[0].memoryovercommitratio
782+
cluster_2_cpu_oc = self.list_vmware_clusters[1].cpuovercommitratio
783+
cluster_2_ram_oc = self.list_vmware_clusters[1].memoryovercommitratio
784+
785+
if cluster_1_cpu_oc == cluster_2_cpu_oc and cluster_1_ram_oc == cluster_2_ram_oc:
786+
raise self.skipTest("The 2 clusters have same memory and cpu over commit ratios, skipping test")
787+
788+
hosts_c1 = Host.list(
789+
self.apiclient,
790+
zoneid=self.zone.id,
791+
type='Routing',
792+
clusterid=self.list_vmware_clusters[0]
793+
)
794+
795+
hosts_c2 = Host.list(
796+
self.apiclient,
797+
zoneid=self.zone.id,
798+
type='Routing',
799+
clusterid=self.list_vmware_clusters[1]
800+
)
801+
802+
target_host = hosts_c1[0]
803+
migrate_host = hosts_c2[0]
804+
805+
self.vm_to_migrate = VirtualMachine.create(
806+
self.apiclient,
807+
self.services["small"],
808+
accountid=self.account.name,
809+
domainid=self.account.domainid,
810+
serviceofferingid=self.small_offering.id,
811+
mode=self.services["mode"],
812+
hostid=target_host.id
813+
)
814+
self.debug("Migrating VM-ID: %s to Host: %s" % (
815+
self.vm_to_migrate.id,
816+
migrate_host.id
817+
))
818+
819+
self.cleanup.append(self.vm_to_migrate)
820+
821+
try:
822+
self.vm_to_migrate.migrate(self.apiclient, migrate_host.id)
823+
except Exception as e:
824+
self.fail("Failed to migrate instance: %s" % e)
825+
826+
# verify if the overcommit ratios are modified
827+
qresultset = self.dbclient.execute(
828+
"select value from user_vm_details where vm_id = '%s and name = 'cpuOvercommitRatio';" \
829+
% self.vm_to_migrate.id
830+
)
831+
self.assertNotEqual(
832+
len(qresultset),
833+
0,
834+
"Check DB Query result set"
835+
)
836+
self.assertEqual(
837+
isinstance(qresultset, list),
838+
True,
839+
"Check DB query result set for valid data"
840+
)
841+
qresult = str(qresultset)
842+
self.debug("Query result: %s" % qresult)
843+
844+
self.assertEqual(qresultset[0][0], cluster_2_cpu_oc, "VM's cpu over-commit ratio not updated on migration")
845+
846+
qresultset = self.dbclient.execute(
847+
"select value from user_vm_details where vm_id = '%s and name = 'memoryOvercommitRatio';" \
848+
% self.vm_to_migrate.id
849+
)
850+
self.assertNotEqual(
851+
len(qresultset),
852+
0,
853+
"Check DB Query result set"
854+
)
855+
self.assertEqual(
856+
isinstance(qresultset, list),
857+
True,
858+
"Check DB query result set for valid data"
859+
)
860+
qresult = str(qresultset)
861+
self.debug("Query result: %s" % qresult)
862+
863+
self.assertEqual(qresultset[0][0], cluster_2_ram_oc, "VM's memory over-commit ratio not updated on migration")
755864

756865
class TestMigrateVMwithVolume(cloudstackTestCase):
757866

0 commit comments

Comments
 (0)