Skip to content

Commit f14e93c

Browse files
committed
Error handling in nas backup plugin
1 parent f8448b3 commit f14e93c

File tree

8 files changed

+101
-40
lines changed

8 files changed

+101
-40
lines changed

core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public class BackupAnswer extends Answer {
2828
private Long size;
2929
private Long virtualSize;
3030
private Map<String, String> volumes;
31+
Boolean needsCleanup;
3132

3233
public BackupAnswer(final Command command, final boolean success, final String details) {
3334
super(command, success, details);
@@ -56,4 +57,15 @@ public Map<String, String> getVolumes() {
5657
public void setVolumes(Map<String, String> volumes) {
5758
this.volumes = volumes;
5859
}
60+
61+
public Boolean getNeedsCleanup() {
62+
if (needsCleanup == null) {
63+
return false;
64+
}
65+
return needsCleanup;
66+
}
67+
68+
public void setNeedsCleanup(Boolean needsCleanup) {
69+
this.needsCleanup = needsCleanup;
70+
}
5971
}

engine/schema/src/main/java/org/apache/cloudstack/backup/dao/BackupDaoImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,15 @@ public List<Backup> syncBackups(Long zoneId, Long vmId, List<Backup> externalBac
238238
public Long countBackupsForAccount(long accountId) {
239239
SearchCriteria<Long> sc = CountBackupsByAccount.create();
240240
sc.setParameters("account", accountId);
241-
sc.setParameters("status", Backup.Status.Error, Backup.Status.Failed, Backup.Status.Removed, Backup.Status.Expunged);
241+
sc.setParameters("status", Backup.Status.Failed, Backup.Status.Removed, Backup.Status.Expunged);
242242
return customSearch(sc, null).get(0);
243243
}
244244

245245
@Override
246246
public Long calculateBackupStorageForAccount(long accountId) {
247247
SearchCriteria<SumCount> sc = CalculateBackupStorageByAccount.create();
248248
sc.setParameters("account", accountId);
249-
sc.setParameters("status", Backup.Status.Error, Backup.Status.Failed, Backup.Status.Removed, Backup.Status.Expunged);
249+
sc.setParameters("status", Backup.Status.Failed, Backup.Status.Removed, Backup.Status.Expunged);
250250
return customSearch(sc, null).get(0).sum;
251251
}
252252

plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,14 @@ public Pair<Boolean, Backup> takeBackup(final VirtualMachine vm) {
171171
try {
172172
answer = (BackupAnswer) agentManager.send(host.getId(), command);
173173
} catch (AgentUnavailableException e) {
174-
backupVO.setStatus(Backup.Status.Error);
175-
backupDao.update(backupVO.getId(), backupVO);
174+
logger.error("Unable to contact backend control plane to initiate backup for VM {}", vm.getInstanceName());
175+
backupVO.setStatus(Backup.Status.Failed);
176+
backupDao.remove(backupVO.getId());
176177
throw new CloudRuntimeException("Unable to contact backend control plane to initiate backup");
177178
} catch (OperationTimedoutException e) {
178-
backupVO.setStatus(Backup.Status.Error);
179-
backupDao.update(backupVO.getId(), backupVO);
179+
logger.error("Operation to initiate backup timed out for VM {}", vm.getInstanceName());
180+
backupVO.setStatus(Backup.Status.Failed);
181+
backupDao.remove(backupVO.getId());
180182
throw new CloudRuntimeException("Operation to initiate backup timed out, please try again");
181183
}
182184

@@ -192,8 +194,15 @@ public Pair<Boolean, Backup> takeBackup(final VirtualMachine vm) {
192194
throw new CloudRuntimeException("Failed to update backup");
193195
}
194196
} else {
195-
backupVO.setStatus(Backup.Status.Failed);
196-
backupDao.remove(backupVO.getId());
197+
logger.error("Failed to take backup for VM {}: {}", vm.getInstanceName(), answer != null ? answer.getDetails() : "No answer received");
198+
if (answer.getNeedsCleanup()) {
199+
logger.error("Backup cleanup failed for VM {}. Leaving the backup in Error state.", vm.getInstanceName());
200+
backupVO.setStatus(Backup.Status.Error);
201+
backupDao.update(backupVO.getId(), backupVO);
202+
} else {
203+
backupVO.setStatus(Backup.Status.Failed);
204+
backupDao.remove(backupVO.getId());
205+
}
197206
return new Pair<>(false, null);
198207
}
199208
}

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
@ResourceWrapper(handles = TakeBackupCommand.class)
3838
public class LibvirtTakeBackupCommandWrapper extends CommandWrapper<TakeBackupCommand, Answer, LibvirtComputingResource> {
39+
private static final Integer EXIT_CLEANUP_FAILED = 20;
3940
@Override
4041
public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvirtComputingResource) {
4142
final String vmName = command.getVmName();
@@ -61,7 +62,12 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir
6162

6263
if (result.first() != 0) {
6364
logger.debug("Failed to take VM backup: " + result.second());
64-
return new BackupAnswer(command, false, result.second().trim());
65+
BackupAnswer answer = new BackupAnswer(command, false, result.second().trim());
66+
if (result.first() == EXIT_CLEANUP_FAILED) {
67+
logger.debug("Backup cleanup failed");
68+
answer.setNeedsCleanup(true);
69+
}
70+
return answer;
6571
}
6672

6773
long backupSize = 0L;

scripts/vm/hypervisor/kvm/nasbackup.sh

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ BACKUP_DIR=""
3333
DISK_PATHS=""
3434
logFile="/var/log/cloudstack/agent/agent.log"
3535

36+
EXIT_CLEANUP_FAILED=20
37+
3638
log() {
3739
[[ "$verb" -eq 1 ]] && builtin echo "$@"
3840
if [[ "$1" == "-ne" || "$1" == "-e" || "$1" == "-n" ]]; then
@@ -88,7 +90,7 @@ sanity_checks() {
8890

8991
backup_running_vm() {
9092
mount_operation
91-
mkdir -p $dest
93+
mkdir -p "$dest" || { echo "Failed to create backup directory $dest"; exit 1; }
9294

9395
name="root"
9496
echo "<domainbackup mode='push'><disks>" > $dest/backup.xml
@@ -111,8 +113,11 @@ backup_running_vm() {
111113
while true; do
112114
status=$(virsh -c qemu:///system domjobinfo $VM --completed --keep-completed | awk '/Job type:/ {print $3}')
113115
case "$status" in
114-
Completed) break ;;
115-
Failed) log -ne "Virsh backup job failed"; exit 1 ;;
116+
Completed)
117+
break ;;
118+
Failed)
119+
echo "Virsh backup job failed"
120+
cleanup ;;
116121
esac
117122
sleep 5
118123
done
@@ -130,14 +135,18 @@ backup_running_vm() {
130135

131136
backup_stopped_vm() {
132137
mount_operation
133-
mkdir -p $dest
138+
mkdir -p "$dest" || { echo "Failed to create backup directory $dest"; exit 1; }
134139

135140
IFS=","
136141

137142
name="root"
138143
for disk in $DISK_PATHS; do
139144
volUuid="${disk##*/}"
140-
qemu-img convert -O qcow2 $disk $dest/$name.$volUuid.qcow2 | tee -a "$logFile"
145+
output="$dest/$name.$volUuid.qcow2"
146+
if ! qemu-img convert -O qcow2 "$disk" "$output" > "$logFile" 2> >(cat >&2); then
147+
echo "qemu-img convert failed for $disk $output"
148+
cleanup
149+
fi
141150
name="datadisk"
142151
done
143152
sync
@@ -178,6 +187,19 @@ mount_operation() {
178187
fi
179188
}
180189

190+
cleanup() {
191+
local status=0
192+
193+
rm -rf "$dest" || { echo "Failed to delete $dest"; status=1; }
194+
umount "$mount_point" || { echo "Failed to unmount $mount_point"; status=1; }
195+
rmdir "$mount_point" || { echo "Failed to remove mount point $mount_point"; status=1; }
196+
197+
if [[ $status -ne 0 ]]; then
198+
echo "Backup cleanup failed"
199+
exit $EXIT_CLEANUP_FAILED
200+
fi
201+
}
202+
181203
function usage {
182204
echo ""
183205
echo "Usage: $0 -o <operation> -v|--vm <domain name> -t <storage type> -s <storage address> -m <mount options> -p <backup path> -d <disks path>"

server/src/main/java/org/apache/cloudstack/backup/BackupManagerImpl.java

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,11 @@ public boolean createBackup(CreateBackupCmd cmd) throws ResourceAllocationExcept
656656
throw new CloudRuntimeException("VM backup offering not found");
657657
}
658658

659+
final BackupProvider backupProvider = getBackupProvider(offering.getProvider());
660+
if (offering == null) {
661+
throw new CloudRuntimeException("VM backup provider not found for the offering");
662+
}
663+
659664
if (!offering.isUserDrivenBackupAllowed()) {
660665
throw new CloudRuntimeException("The assigned backup offering does not allow ad-hoc user backup");
661666
}
@@ -702,30 +707,26 @@ public boolean createBackup(CreateBackupCmd cmd) throws ResourceAllocationExcept
702707
true, 0);
703708

704709

705-
final BackupProvider backupProvider = getBackupProvider(offering.getProvider());
706-
if (backupProvider != null) {
707-
Pair<Boolean, Backup> result = backupProvider.takeBackup(vm);
708-
if (!result.first()) {
709-
throw new CloudRuntimeException("Failed to create VM backup");
710-
}
711-
Backup backup = result.second();
712-
if (backup != null) {
713-
BackupVO vmBackup = backupDao.findById(result.second().getId());
714-
vmBackup.setBackupIntervalType((short) type.ordinal());
715-
if (cmd.getName() != null) {
716-
vmBackup.setName(cmd.getName());
717-
}
718-
vmBackup.setDescription(cmd.getDescription());
719-
backupDao.update(vmBackup.getId(), vmBackup);
720-
resourceLimitMgr.incrementResourceCount(vm.getAccountId(), Resource.ResourceType.backup);
721-
resourceLimitMgr.incrementResourceCount(vm.getAccountId(), Resource.ResourceType.backup_storage, backup.getSize());
722-
}
723-
if (type != Backup.Type.MANUAL) {
724-
postCreateScheduledBackup(type, vm.getId());
710+
Pair<Boolean, Backup> result = backupProvider.takeBackup(vm);
711+
if (!result.first()) {
712+
throw new CloudRuntimeException("Failed to create VM backup");
713+
}
714+
Backup backup = result.second();
715+
if (backup != null) {
716+
BackupVO vmBackup = backupDao.findById(result.second().getId());
717+
vmBackup.setBackupIntervalType((short) type.ordinal());
718+
if (cmd.getName() != null) {
719+
vmBackup.setName(cmd.getName());
725720
}
726-
return true;
721+
vmBackup.setDescription(cmd.getDescription());
722+
backupDao.update(vmBackup.getId(), vmBackup);
723+
resourceLimitMgr.incrementResourceCount(vm.getAccountId(), Resource.ResourceType.backup);
724+
resourceLimitMgr.incrementResourceCount(vm.getAccountId(), Resource.ResourceType.backup_storage, backup.getSize());
727725
}
728-
throw new CloudRuntimeException("Failed to create VM backup");
726+
if (type != Backup.Type.MANUAL) {
727+
postCreateScheduledBackup(type, vm.getId());
728+
}
729+
return true;
729730
}
730731

731732
@Override
@@ -840,6 +841,9 @@ public boolean restoreBackup(final Long backupId) {
840841
if (backup == null) {
841842
throw new CloudRuntimeException("Backup " + backupId + " does not exist");
842843
}
844+
if (backup.getStatus() != Backup.Status.BackedUp) {
845+
throw new CloudRuntimeException("Backup should be in BackedUp state");
846+
}
843847
validateBackupForZone(backup.getZoneId());
844848

845849
final VMInstanceVO vm = vmInstanceDao.findByIdIncludingRemoved(backup.getVmId());
@@ -1119,6 +1123,9 @@ public boolean restoreBackupToVM(final Long backupId, final Long vmId) throws Re
11191123
if (backup == null) {
11201124
throw new CloudRuntimeException("Backup " + backupId + " does not exist");
11211125
}
1126+
if (backup.getStatus() != Backup.Status.BackedUp) {
1127+
throw new CloudRuntimeException("Backup should be in BackedUp state");
1128+
}
11221129
validateBackupForZone(backup.getZoneId());
11231130

11241131
VMInstanceVO vm = vmInstanceDao.findByIdIncludingRemoved(vmId);
@@ -1200,6 +1207,9 @@ public boolean restoreBackupVolumeAndAttachToVM(final String backedUpVolumeUuid,
12001207
if (backup == null) {
12011208
throw new CloudRuntimeException("Provided backup not found");
12021209
}
1210+
if (backup.getStatus() != Backup.Status.BackedUp) {
1211+
throw new CloudRuntimeException("Backup should be in BackedUp state");
1212+
}
12031213
validateBackupForZone(backup.getZoneId());
12041214

12051215
final VMInstanceVO vm = findVmById(vmId);
@@ -1315,7 +1325,8 @@ public boolean deleteBackup(final Long backupId, final Boolean forced) {
13151325
boolean result = backupProvider.deleteBackup(backup, forced);
13161326
if (result) {
13171327
resourceLimitMgr.decrementResourceCount(backup.getAccountId(), Resource.ResourceType.backup);
1318-
resourceLimitMgr.decrementResourceCount(backup.getAccountId(), Resource.ResourceType.backup_storage, backup.getSize());
1328+
Long backupSize = backup.getSize() != null ? backup.getSize() : 0L;
1329+
resourceLimitMgr.decrementResourceCount(backup.getAccountId(), Resource.ResourceType.backup_storage, backupSize);
13191330
if (backupDao.remove(backup.getId())) {
13201331
checkAndGenerateUsageForLastBackupDeletedAfterOfferingRemove(vm, backup);
13211332
return true;

ui/public/locales/en.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,7 @@
15311531
"label.more.access.dashboard.ui": "More about accessing dashboard UI",
15321532
"label.mount.cks.iso.on.vr": "Use CKS packages from Virtual Router",
15331533
"label.mount.sharedfs": "Mount Shared FileSystem via NFS",
1534+
"label.mountopts": "Mount options",
15341535
"label.move.down.row": "Move down one row",
15351536
"label.move.to.bottom": "Move to bottom",
15361537
"label.move.to.top": "Move to top",

ui/src/config/section/storage.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -432,15 +432,15 @@ export default {
432432
label: 'label.backup.restore',
433433
message: 'message.backup.restore',
434434
dataView: true,
435-
show: (record) => { return record.state !== 'Destroyed' }
435+
show: (record) => { return record.status === 'BackedUp' }
436436
},
437437
{
438438
api: 'restoreVolumeFromBackupAndAttachToVM',
439439
icon: 'paper-clip-outlined',
440440
label: 'label.backup.attach.restore',
441441
message: 'message.backup.attach.restore',
442442
dataView: true,
443-
show: (record) => { return record.state !== 'Destroyed' },
443+
show: (record) => { return record.status === 'BackedUp' },
444444
popup: true,
445445
component: shallowRef(defineAsyncComponent(() => import('@/views/storage/RestoreAttachBackupVolume.vue')))
446446
},
@@ -452,7 +452,7 @@ export default {
452452
message: 'message.backup.restore',
453453
dataView: true,
454454
popup: true,
455-
show: (record) => { return record.state !== 'Destroyed' },
455+
show: (record) => { return record.status === 'BackedUp' },
456456
component: shallowRef(defineAsyncComponent(() => import('@/views/storage/CreateVMFromBackup.vue')))
457457
},
458458
{

0 commit comments

Comments
 (0)