Skip to content

Commit 0385ac2

Browse files
committed
align restore-openstack-ctlplane.sh with the doc
Signed-off-by: Martin Schuppert <[email protected]>
1 parent 1a1f163 commit 0385ac2

File tree

1 file changed

+131
-54
lines changed

1 file changed

+131
-54
lines changed

docs/dev/restore-openstack-ctlplane.sh

Lines changed: 131 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@ set -e
1313
# 5. MariaDBDatabase CRs (needs database password secrets)
1414
# 6. MariaDBAccount CRs (needs MariaDBDatabase CRs)
1515
# 7. Related CRs (NetConfig, OpenStackVersion, Topology)
16-
# 8. OpenStackControlPlane CR (triggers operator reconciliation)
17-
# 9. Operators create Certificate CRs → cert-manager issues fresh certificates
18-
# 10. Manual RabbitMQ user restoration
16+
# 8. OpenStackControlPlane CR with staged deployment annotation
17+
# 9. Wait for InfrastructureReady condition
18+
# 10. Restore database contents (MariaDB and OVN)
19+
# 11. Restore RabbitMQ user credentials
20+
# 12. Resume deployment (remove annotation)
21+
# 13. Operators create Certificate CRs → cert-manager issues fresh certificates
1922
#
2023
# NOTE: Certificate CRs and certificate secrets are NOT restored.
2124
# Operators recreate Certificate CRs during reconciliation, and cert-manager
@@ -396,83 +399,106 @@ fi
396399
echo ""
397400

398401
echo "========================================"
399-
echo "Step 10: Restore OpenStackControlPlane CR"
402+
echo "Step 10: Restore OpenStackControlPlane CR with Staged Deployment"
400403
echo "========================================"
401404
echo ""
402-
echo "When the OpenStackControlPlane CR is restored, operators will:"
403-
echo " 1. Reconcile and create Certificate CRs for all services"
404-
echo " 2. cert-manager will issue fresh certificates from the restored CAs"
405-
echo " 3. Services will use new certificates with fresh expiry dates"
405+
echo "CRITICAL: Using staged deployment annotation to pause after infrastructure creation."
406+
echo "This allows database restore before OpenStack services start."
406407
echo ""
407-
read -p "Ready to restore OpenStackControlPlane CR? This will trigger operator reconciliation. (yes/no): " RESTORE_CONFIRM
408+
echo "When the OpenStackControlPlane CR is restored with annotation, operators will:"
409+
echo " 1. Create infrastructure: Galera, OVN, RabbitMQ, Memcached"
410+
echo " 2. Create Certificate CRs for infrastructure services"
411+
echo " 3. cert-manager issues fresh certificates from the restored CAs"
412+
echo " 4. PAUSE - OpenStack services (Keystone, Nova, etc.) are NOT created yet"
413+
echo " 5. Set InfrastructureReady condition to True"
414+
echo ""
415+
read -p "Ready to restore OpenStackControlPlane CR with staged deployment? (yes/no): " RESTORE_CONFIRM
408416

409417
if [ "${RESTORE_CONFIRM}" != "yes" ]; then
410418
echo "Aborting. You can manually restore later with:"
411419
echo " cd ${BACKUP_DIR}"
412-
echo " oc apply -f openstackcontrolplane-backup.json -n ${NAMESPACE}"
420+
echo " jq '.items[0].metadata.annotations[\"core.openstack.org/deployment-stage\"] = \"infrastructure-only\"' openstackcontrolplane-backup.json > openstackcontrolplane-staged.json"
421+
echo " oc apply -f openstackcontrolplane-staged.json -n ${NAMESPACE}"
413422
popd > /dev/null
414423
rm -rf ${WORK_DIR}
415424
exit 1
416425
fi
417426

418-
oc apply -f openstackcontrolplane-backup.json -n ${NAMESPACE}
419-
echo "✓ OpenStackControlPlane CR restored"
420-
echo ""
427+
echo "Adding deployment-stage annotation..."
428+
CTLPLANE_STAGED=$(mktemp)
429+
jq '.items[0].metadata.annotations["core.openstack.org/deployment-stage"] = "infrastructure-only"' \
430+
openstackcontrolplane-backup.json > ${CTLPLANE_STAGED}
421431

422-
echo "Waiting for operator reconciliation to start..."
423-
sleep 10
432+
oc apply -f ${CTLPLANE_STAGED} -n ${NAMESPACE}
433+
rm -f ${CTLPLANE_STAGED}
434+
echo "✓ OpenStackControlPlane CR restored with staged deployment annotation"
424435
echo ""
425436

426-
echo "Checking RabbitMQ cluster status..."
427-
oc get rabbitmq -n ${NAMESPACE} || echo "No RabbitMQ resources yet"
437+
echo "Waiting for infrastructure to be ready..."
438+
echo "This may take several minutes..."
428439
echo ""
429440

430-
echo "Waiting for RabbitMQ clusters to be created and ready..."
431-
echo "This may take several minutes. Checking every 30 seconds..."
432-
WAIT_COUNT=0
433-
MAX_WAIT=20 # 10 minutes max
434-
435-
while [ ${WAIT_COUNT} -lt ${MAX_WAIT} ]; do
436-
RABBITMQ_COUNT=$(oc get rabbitmq -n ${NAMESPACE} --no-headers 2>/dev/null | wc -l)
441+
echo "Waiting for InfrastructureReady condition..."
442+
if oc wait --for=condition=InfrastructureReady openstackcontrolplane/openstack -n ${NAMESPACE} --timeout=20m; then
443+
echo "✓ Infrastructure is ready!"
444+
else
445+
echo "Warning: Timeout waiting for InfrastructureReady condition"
446+
echo "Check status manually:"
447+
echo " oc get openstackcontrolplane openstack -n ${NAMESPACE} -o jsonpath='{.status.conditions}'"
448+
fi
449+
echo ""
437450

438-
if [ "${RABBITMQ_COUNT}" -gt 0 ]; then
439-
echo ""
440-
echo "RabbitMQ clusters found:"
441-
oc get rabbitmq -n ${NAMESPACE}
442-
echo ""
451+
echo "Verifying infrastructure components..."
452+
echo "Galera clusters:"
453+
oc get galera -n ${NAMESPACE} || echo " No Galera clusters found"
454+
echo ""
455+
echo "OVN database clusters:"
456+
oc get ovndbcluster -n ${NAMESPACE} || echo " No OVN database clusters found"
457+
echo ""
458+
echo "RabbitMQ clusters:"
459+
oc get rabbitmq -n ${NAMESPACE} || echo " No RabbitMQ clusters found"
460+
echo ""
461+
echo "Memcached instances:"
462+
oc get memcached -n ${NAMESPACE} || echo " No Memcached instances found"
463+
echo ""
443464

444-
# Check if all are ready
445-
NOT_READY=$(oc get rabbitmq -n ${NAMESPACE} -o json | jq '[.items[] | select(.status.conditions[] | select(.type=="Ready" and .status!="True"))] | length')
465+
echo "========================================"
466+
echo "Step 11: Restore Database Contents"
467+
echo "========================================"
468+
echo ""
469+
echo "CRITICAL: Restore database contents while services are NOT running."
470+
echo "This is only possible because of the staged deployment pause."
471+
echo ""
472+
echo "You must restore databases using separate procedures:"
473+
echo " - MariaDB: Use backup-mariadb.sh and restore-mariadb.sh"
474+
echo " - OVN Databases: Use OVN database backup/restore procedures"
475+
echo ""
476+
echo "After database restore is complete, press Enter to continue..."
477+
read -p "Have you completed database restore? (yes/no): " DB_RESTORE_CONFIRM
446478

447-
if [ "${NOT_READY}" -eq 0 ]; then
448-
echo "✓ All RabbitMQ clusters are ready!"
449-
break
450-
else
451-
echo "Waiting for ${NOT_READY} RabbitMQ cluster(s) to become ready..."
452-
fi
453-
else
454-
echo " No RabbitMQ clusters yet... (attempt $((WAIT_COUNT+1))/${MAX_WAIT})"
479+
if [ "${DB_RESTORE_CONFIRM}" != "yes" ]; then
480+
echo ""
481+
echo "⚠️ WARNING: Database restore is required before continuing!"
482+
echo "Without database restore, OpenStack services will initialize fresh schemas."
483+
echo ""
484+
read -p "Continue anyway without database restore? (yes/no): " SKIP_DB_CONFIRM
485+
486+
if [ "${SKIP_DB_CONFIRM}" != "yes" ]; then
487+
echo "Aborting. Restore databases and then resume with:"
488+
echo " oc annotate openstackcontrolplane openstack -n ${NAMESPACE} core.openstack.org/deployment-stage-"
489+
popd > /dev/null
490+
rm -rf ${WORK_DIR}
491+
exit 1
455492
fi
456-
457-
sleep 30
458-
WAIT_COUNT=$((WAIT_COUNT+1))
459-
done
460-
461-
if [ ${WAIT_COUNT} -ge ${MAX_WAIT} ]; then
462-
echo "Warning: Timeout waiting for RabbitMQ clusters. Check manually:"
463-
echo " oc get rabbitmq -n ${NAMESPACE}"
464-
echo " oc get pods -n ${NAMESPACE} | grep rabbitmq"
493+
else
494+
echo "✓ Database restore completed"
465495
fi
466496
echo ""
467497

468-
echo "Checking RabbitMQ pods..."
469-
oc get pods -n ${NAMESPACE} | grep rabbitmq || echo "No RabbitMQ pods found yet"
470-
echo ""
471-
472498
# RabbitMQ User Restoration
473499
if [ "${SKIP_RABBITMQ_RESTORE}" != "true" ]; then
474500
echo "========================================"
475-
echo "Step 11: Restore RabbitMQ User Credentials"
501+
echo "Step 12: Restore RabbitMQ User Credentials"
476502
echo "========================================"
477503
echo ""
478504
echo "⚠️ CRITICAL FOR EDPM/DATA PLANE DEPLOYMENTS ⚠️"
@@ -566,12 +592,63 @@ else
566592
fi
567593
echo ""
568594

595+
echo "========================================"
596+
echo "Step 13: Resume Deployment"
597+
echo "========================================"
598+
echo ""
599+
echo "Now that databases and RabbitMQ credentials are restored,"
600+
echo "remove the staged deployment annotation to resume deployment."
601+
echo ""
602+
echo "This will:"
603+
echo " 1. Create all OpenStack services (Keystone, Nova, Neutron, Glance, etc.)"
604+
echo " 2. Services start and connect to the already-restored databases"
605+
echo " 3. Services connect to RabbitMQ using the restored credentials"
606+
echo " 4. No database initialization or db_sync needed (data already restored)"
607+
echo ""
608+
read -p "Ready to resume deployment? (yes/no): " RESUME_CONFIRM
609+
610+
if [ "${RESUME_CONFIRM}" != "yes" ]; then
611+
echo ""
612+
echo "⚠️ Deployment is still paused with annotation."
613+
echo "To resume later, run:"
614+
echo " oc annotate openstackcontrolplane openstack -n ${NAMESPACE} core.openstack.org/deployment-stage-"
615+
popd > /dev/null
616+
rm -rf ${WORK_DIR}
617+
exit 0
618+
fi
619+
620+
echo "Removing deployment-stage annotation..."
621+
oc annotate openstackcontrolplane openstack -n ${NAMESPACE} \
622+
core.openstack.org/deployment-stage-
623+
echo "✓ Annotation removed, deployment resuming"
624+
echo ""
625+
626+
echo "Monitoring deployment progress..."
627+
echo "Services will be created and started with restored data"
628+
echo ""
629+
sleep 5
630+
631+
echo "Current OpenStackControlPlane status:"
632+
oc get openstackcontrolplane openstack -n ${NAMESPACE}
633+
echo ""
634+
635+
echo "Waiting for OpenStackControlPlane to become Ready..."
636+
echo "This may take 10-30 minutes depending on the deployment size..."
637+
if oc wait --for=condition=Ready openstackcontrolplane/openstack -n ${NAMESPACE} --timeout=30m; then
638+
echo "✓ OpenStackControlPlane is Ready!"
639+
else
640+
echo "Warning: Timeout waiting for Ready condition"
641+
echo "Deployment may still be in progress. Check manually:"
642+
echo " oc get openstackcontrolplane openstack -n ${NAMESPACE} --watch"
643+
fi
644+
echo ""
645+
569646
# Return to original directory and cleanup temporary directory
570647
popd > /dev/null
571648
rm -rf ${WORK_DIR}
572649

573650
echo "========================================"
574-
echo "Restore completed!"
651+
echo "Restore Completed!"
575652
echo "========================================"
576653
echo ""
577654
echo "Next steps:"

0 commit comments

Comments
 (0)