@@ -13,9 +13,12 @@ set -e
1313# 5. MariaDBDatabase CRs (needs database password secrets)
1414# 6. MariaDBAccount CRs (needs MariaDBDatabase CRs)
1515# 7. Related CRs (NetConfig, OpenStackVersion, Topology)
16- # 8. OpenStackControlPlane CR (triggers operator reconciliation)
17- # 9. Operators create Certificate CRs → cert-manager issues fresh certificates
18- # 10. Manual RabbitMQ user restoration
16+ # 8. OpenStackControlPlane CR with staged deployment annotation
17+ # 9. Wait for InfrastructureReady condition
18+ # 10. Restore database contents (MariaDB and OVN)
19+ # 11. Restore RabbitMQ user credentials
20+ # 12. Resume deployment (remove annotation)
21+ # 13. Operators create Certificate CRs → cert-manager issues fresh certificates
1922#
2023# NOTE: Certificate CRs and certificate secrets are NOT restored.
2124# Operators recreate Certificate CRs during reconciliation, and cert-manager
@@ -396,83 +399,106 @@ fi
396399echo " "
397400
398401echo " ========================================"
399- echo " Step 10: Restore OpenStackControlPlane CR"
402+ echo " Step 10: Restore OpenStackControlPlane CR with Staged Deployment "
400403echo " ========================================"
401404echo " "
402- echo " When the OpenStackControlPlane CR is restored, operators will:"
403- echo " 1. Reconcile and create Certificate CRs for all services"
404- echo " 2. cert-manager will issue fresh certificates from the restored CAs"
405- echo " 3. Services will use new certificates with fresh expiry dates"
405+ echo " CRITICAL: Using staged deployment annotation to pause after infrastructure creation."
406+ echo " This allows database restore before OpenStack services start."
406407echo " "
407- read -p " Ready to restore OpenStackControlPlane CR? This will trigger operator reconciliation. (yes/no): " RESTORE_CONFIRM
408+ echo " When the OpenStackControlPlane CR is restored with annotation, operators will:"
409+ echo " 1. Create infrastructure: Galera, OVN, RabbitMQ, Memcached"
410+ echo " 2. Create Certificate CRs for infrastructure services"
411+ echo " 3. cert-manager issues fresh certificates from the restored CAs"
412+ echo " 4. PAUSE - OpenStack services (Keystone, Nova, etc.) are NOT created yet"
413+ echo " 5. Set InfrastructureReady condition to True"
414+ echo " "
415+ read -p " Ready to restore OpenStackControlPlane CR with staged deployment? (yes/no): " RESTORE_CONFIRM
408416
409417if [ " ${RESTORE_CONFIRM} " != " yes" ]; then
410418 echo " Aborting. You can manually restore later with:"
411419 echo " cd ${BACKUP_DIR} "
412- echo " oc apply -f openstackcontrolplane-backup.json -n ${NAMESPACE} "
420+ echo " jq '.items[0].metadata.annotations[\" core.openstack.org/deployment-stage\" ] = \" infrastructure-only\" ' openstackcontrolplane-backup.json > openstackcontrolplane-staged.json"
421+ echo " oc apply -f openstackcontrolplane-staged.json -n ${NAMESPACE} "
413422 popd > /dev/null
414423 rm -rf ${WORK_DIR}
415424 exit 1
416425fi
417426
418- oc apply -f openstackcontrolplane-backup.json -n ${NAMESPACE}
419- echo " ✓ OpenStackControlPlane CR restored"
420- echo " "
427+ echo " Adding deployment-stage annotation..."
428+ CTLPLANE_STAGED=$( mktemp)
429+ jq ' .items[0].metadata.annotations["core.openstack.org/deployment-stage"] = "infrastructure-only"' \
430+ openstackcontrolplane-backup.json > ${CTLPLANE_STAGED}
421431
422- echo " Waiting for operator reconciliation to start..."
423- sleep 10
432+ oc apply -f ${CTLPLANE_STAGED} -n ${NAMESPACE}
433+ rm -f ${CTLPLANE_STAGED}
434+ echo " ✓ OpenStackControlPlane CR restored with staged deployment annotation"
424435echo " "
425436
426- echo " Checking RabbitMQ cluster status ..."
427- oc get rabbitmq -n ${NAMESPACE} || echo " No RabbitMQ resources yet "
437+ echo " Waiting for infrastructure to be ready ..."
438+ echo " This may take several minutes... "
428439echo " "
429440
430- echo " Waiting for RabbitMQ clusters to be created and ready..."
431- echo " This may take several minutes. Checking every 30 seconds..."
432- WAIT_COUNT=0
433- MAX_WAIT=20 # 10 minutes max
434-
435- while [ ${WAIT_COUNT} -lt ${MAX_WAIT} ]; do
436- RABBITMQ_COUNT=$( oc get rabbitmq -n ${NAMESPACE} --no-headers 2> /dev/null | wc -l)
441+ echo " Waiting for InfrastructureReady condition..."
442+ if oc wait --for=condition=InfrastructureReady openstackcontrolplane/openstack -n ${NAMESPACE} --timeout=20m; then
443+ echo " ✓ Infrastructure is ready!"
444+ else
445+ echo " Warning: Timeout waiting for InfrastructureReady condition"
446+ echo " Check status manually:"
447+ echo " oc get openstackcontrolplane openstack -n ${NAMESPACE} -o jsonpath='{.status.conditions}'"
448+ fi
449+ echo " "
437450
438- if [ " ${RABBITMQ_COUNT} " -gt 0 ]; then
439- echo " "
440- echo " RabbitMQ clusters found:"
441- oc get rabbitmq -n ${NAMESPACE}
442- echo " "
451+ echo " Verifying infrastructure components..."
452+ echo " Galera clusters:"
453+ oc get galera -n ${NAMESPACE} || echo " No Galera clusters found"
454+ echo " "
455+ echo " OVN database clusters:"
456+ oc get ovndbcluster -n ${NAMESPACE} || echo " No OVN database clusters found"
457+ echo " "
458+ echo " RabbitMQ clusters:"
459+ oc get rabbitmq -n ${NAMESPACE} || echo " No RabbitMQ clusters found"
460+ echo " "
461+ echo " Memcached instances:"
462+ oc get memcached -n ${NAMESPACE} || echo " No Memcached instances found"
463+ echo " "
443464
444- # Check if all are ready
445- NOT_READY=$( oc get rabbitmq -n ${NAMESPACE} -o json | jq ' [.items[] | select(.status.conditions[] | select(.type=="Ready" and .status!="True"))] | length' )
465+ echo " ========================================"
466+ echo " Step 11: Restore Database Contents"
467+ echo " ========================================"
468+ echo " "
469+ echo " CRITICAL: Restore database contents while services are NOT running."
470+ echo " This is only possible because of the staged deployment pause."
471+ echo " "
472+ echo " You must restore databases using separate procedures:"
473+ echo " - MariaDB: Use backup-mariadb.sh and restore-mariadb.sh"
474+ echo " - OVN Databases: Use OVN database backup/restore procedures"
475+ echo " "
476+ echo " After database restore is complete, press Enter to continue..."
477+ read -p " Have you completed database restore? (yes/no): " DB_RESTORE_CONFIRM
446478
447- if [ " ${NOT_READY} " -eq 0 ]; then
448- echo " ✓ All RabbitMQ clusters are ready!"
449- break
450- else
451- echo " Waiting for ${NOT_READY} RabbitMQ cluster(s) to become ready..."
452- fi
453- else
454- echo " No RabbitMQ clusters yet... (attempt $(( WAIT_COUNT+ 1 )) /${MAX_WAIT} )"
479+ if [ " ${DB_RESTORE_CONFIRM} " != " yes" ]; then
480+ echo " "
481+ echo " ⚠️ WARNING: Database restore is required before continuing!"
482+ echo " Without database restore, OpenStack services will initialize fresh schemas."
483+ echo " "
484+ read -p " Continue anyway without database restore? (yes/no): " SKIP_DB_CONFIRM
485+
486+ if [ " ${SKIP_DB_CONFIRM} " != " yes" ]; then
487+ echo " Aborting. Restore databases and then resume with:"
488+ echo " oc annotate openstackcontrolplane openstack -n ${NAMESPACE} core.openstack.org/deployment-stage-"
489+ popd > /dev/null
490+ rm -rf ${WORK_DIR}
491+ exit 1
455492 fi
456-
457- sleep 30
458- WAIT_COUNT=$(( WAIT_COUNT+ 1 ))
459- done
460-
461- if [ ${WAIT_COUNT} -ge ${MAX_WAIT} ]; then
462- echo " Warning: Timeout waiting for RabbitMQ clusters. Check manually:"
463- echo " oc get rabbitmq -n ${NAMESPACE} "
464- echo " oc get pods -n ${NAMESPACE} | grep rabbitmq"
493+ else
494+ echo " ✓ Database restore completed"
465495fi
466496echo " "
467497
468- echo " Checking RabbitMQ pods..."
469- oc get pods -n ${NAMESPACE} | grep rabbitmq || echo " No RabbitMQ pods found yet"
470- echo " "
471-
472498# RabbitMQ User Restoration
473499if [ " ${SKIP_RABBITMQ_RESTORE} " != " true" ]; then
474500 echo " ========================================"
475- echo " Step 11 : Restore RabbitMQ User Credentials"
501+ echo " Step 12 : Restore RabbitMQ User Credentials"
476502 echo " ========================================"
477503 echo " "
478504 echo " ⚠️ CRITICAL FOR EDPM/DATA PLANE DEPLOYMENTS ⚠️"
@@ -566,12 +592,63 @@ else
566592fi
567593echo " "
568594
595+ echo " ========================================"
596+ echo " Step 13: Resume Deployment"
597+ echo " ========================================"
598+ echo " "
599+ echo " Now that databases and RabbitMQ credentials are restored,"
600+ echo " remove the staged deployment annotation to resume deployment."
601+ echo " "
602+ echo " This will:"
603+ echo " 1. Create all OpenStack services (Keystone, Nova, Neutron, Glance, etc.)"
604+ echo " 2. Services start and connect to the already-restored databases"
605+ echo " 3. Services connect to RabbitMQ using the restored credentials"
606+ echo " 4. No database initialization or db_sync needed (data already restored)"
607+ echo " "
608+ read -p " Ready to resume deployment? (yes/no): " RESUME_CONFIRM
609+
610+ if [ " ${RESUME_CONFIRM} " != " yes" ]; then
611+ echo " "
612+ echo " ⚠️ Deployment is still paused with annotation."
613+ echo " To resume later, run:"
614+ echo " oc annotate openstackcontrolplane openstack -n ${NAMESPACE} core.openstack.org/deployment-stage-"
615+ popd > /dev/null
616+ rm -rf ${WORK_DIR}
617+ exit 0
618+ fi
619+
620+ echo " Removing deployment-stage annotation..."
621+ oc annotate openstackcontrolplane openstack -n ${NAMESPACE} \
622+ core.openstack.org/deployment-stage-
623+ echo " ✓ Annotation removed, deployment resuming"
624+ echo " "
625+
626+ echo " Monitoring deployment progress..."
627+ echo " Services will be created and started with restored data"
628+ echo " "
629+ sleep 5
630+
631+ echo " Current OpenStackControlPlane status:"
632+ oc get openstackcontrolplane openstack -n ${NAMESPACE}
633+ echo " "
634+
635+ echo " Waiting for OpenStackControlPlane to become Ready..."
636+ echo " This may take 10-30 minutes depending on the deployment size..."
637+ if oc wait --for=condition=Ready openstackcontrolplane/openstack -n ${NAMESPACE} --timeout=30m; then
638+ echo " ✓ OpenStackControlPlane is Ready!"
639+ else
640+ echo " Warning: Timeout waiting for Ready condition"
641+ echo " Deployment may still be in progress. Check manually:"
642+ echo " oc get openstackcontrolplane openstack -n ${NAMESPACE} --watch"
643+ fi
644+ echo " "
645+
569646# Return to original directory and cleanup temporary directory
570647popd > /dev/null
571648rm -rf ${WORK_DIR}
572649
573650echo " ========================================"
574- echo " Restore completed !"
651+ echo " Restore Completed !"
575652echo " ========================================"
576653echo " "
577654echo " Next steps:"
0 commit comments