@@ -404,6 +404,10 @@ func restore(
404404 job := resumer .job
405405 details := job .Details ().(jobspb.RestoreDetails )
406406
407+ // resolvedURIs holds the main URIs after resolving any external:// aliases.
408+ // This is set in the OnlineImpl block below and used by runRestore.
409+ resolvedURIs := details .URIs
410+
407411 if details .OnlineImpl () {
408412 var linkPhaseComplete bool
409413 if err := execCtx .ExecCfg ().InternalDB .Txn (restoreCtx , func (ctx context.Context , txn isql.Txn ) error {
@@ -417,6 +421,18 @@ func restore(
417421 if linkPhaseComplete {
418422 return emptyRowCount , nil
419423 }
424+
425+ // If any URIs utilize external:// aliases, we need to resolve the alias
426+ // to its underlying URI before feeding it into online restore. This
427+ // applies to the main URIs, the locality info URIs, and the backup
428+ // manifest Dir fields.
429+ var err error
430+ resolvedURIs , backupLocalityInfo , err = resolveExternalStorageURIs (
431+ restoreCtx , execCtx , details .URIs , backupLocalityInfo , backupManifests ,
432+ )
433+ if err != nil {
434+ return emptyRowCount , errors .Wrap (err , "resolving external storage URIs for online restore" )
435+ }
420436 }
421437
422438 // If we've already migrated some of the system tables we're about to
@@ -510,6 +526,7 @@ func restore(
510526 filter ,
511527 fsc ,
512528 spanCh ,
529+ false , /* useLink */
513530 ), "generate and send import spans" )
514531 }
515532
@@ -553,9 +570,15 @@ func restore(
553570 tasks = append (tasks , jobProgressLoop )
554571 }
555572
573+ // Check if online restore should use the distributed flow with file linking
574+ // instead of the simpler sendAddRemoteSSTs loop.
575+ useDistFlow := onlineRestoreUseDistFlow .Get (& execCtx .ExecCfg ().Settings .SV )
576+
556577 progCh := make (chan * execinfrapb.RemoteProducerMetadata_BulkProcessorProgress )
557- if ! details .OnlineImpl () {
558- // Online restore tracks progress by pinging requestFinishedCh instead
578+ // Start the progress checkpoint loop if this is a traditional restore OR
579+ // an online restore using the distributed flow (which also sends progress
580+ // updates via progCh).
581+ if ! details .OnlineImpl () || useDistFlow {
559582 generativeCheckpointLoop := func (ctx context.Context ) error {
560583 defer close (requestFinishedCh )
561584 for progress := range progCh {
@@ -599,7 +622,10 @@ func restore(
599622 }
600623
601624 resumeClusterVersion := execCtx .ExecCfg ().Settings .Version .ActiveVersion (restoreCtx ).Version
602- if clusterversion .V24_3 .Version ().LessEq (resumeClusterVersion ) && ! details .OnlineImpl () {
625+ // Start the countCompletedProcLoop if this is a traditional restore OR
626+ // an online restore using the distributed flow (which also sends processor
627+ // completion signals via procCompleteCh).
628+ if clusterversion .V24_3 .Version ().LessEq (resumeClusterVersion ) && (! details .OnlineImpl () || useDistFlow ) {
603629 tasks = append (tasks , countCompletedProcLoop )
604630 }
605631
@@ -628,39 +654,78 @@ func restore(
628654 runRestore := func (ctx context.Context ) error {
629655 if details .OnlineImpl () {
630656 log .Dev .Warningf (ctx , "EXPERIMENTAL ONLINE RESTORE being used" )
631- approxRows , approxDataSize , err := sendAddRemoteSSTs (
632- ctx ,
633- execCtx ,
634- job ,
635- dataToRestore ,
636- encryption ,
637- details .URIs ,
638- backupLocalityInfo ,
639- requestFinishedCh ,
640- tracingAggCh ,
641- genSpan ,
642- )
643- progressTracker .mu .Lock ()
644- defer progressTracker .mu .Unlock ()
645- // During the link phase of online restore, we do not update stats
646- // progress as job occurs. We merely reuse the `progressTracker.mu.res`
647- // var to reduce the number of local vars floating around in `restore`.
648- progressTracker .mu .res = roachpb.RowCount {Rows : approxRows , DataSize : approxDataSize }
649- return errors .Wrap (err , "sending remote AddSSTable requests" )
657+
658+ // Use the bespoke online restore path that directly splits and links from
659+ // the coordinator without a distSQL restore flow.
660+ if ! useDistFlow {
661+ approxRows , approxDataSize , err := sendAddRemoteSSTs (
662+ ctx ,
663+ execCtx ,
664+ job ,
665+ dataToRestore ,
666+ encryption ,
667+ resolvedURIs ,
668+ backupLocalityInfo ,
669+ requestFinishedCh ,
670+ tracingAggCh ,
671+ genSpan ,
672+ )
673+ progressTracker .mu .Lock ()
674+ defer progressTracker .mu .Unlock ()
675+ // During the link phase of online restore, we do not update stats
676+ // progress as job occurs. We merely reuse the `progressTracker.mu.res`
677+ // var to reduce the number of local vars floating around in `restore`.
678+ progressTracker .mu .res = roachpb.RowCount {Rows : approxRows , DataSize : approxDataSize }
679+ return errors .Wrap (err , "sending remote AddSSTable requests" )
680+ }
681+
682+ // If we did not switch to the bespoke online restore path, we'll proceed
683+ // to the normal restore distSQL flow and let its split and scattter
684+ // processor direct the restore data processors to link files rather than
685+ // ingest them. But we do need to pre-split at the top-level logical spans
686+ // that are being restored, as stored in DownloadSpan, as these are what
687+ // we will clear via kv/pebble excises if we fail at any point after we
688+ // start linking files.
689+ //
690+ // TODO(dt): we should record in persisted progress when we are ready to
691+ // enter the linking/ingesting phase, i.e. *after* we make these splits,
692+ // so any failures prior to it can skip cleanup, as that cleanup could
693+ // fail if we failed prior to making these splits.
694+ var prevSplit roachpb.Key
695+ for _ , span := range details .DownloadSpans {
696+ if ! span .Key .Equal (prevSplit ) {
697+ if err := execCtx .ExecCfg ().DB .AdminSplit (ctx , span .Key , hlc .MaxTimestamp ); err != nil {
698+ return errors .Wrapf (err , "pre-splitting at key %s" , span .Key )
699+ }
700+ }
701+ if err := execCtx .ExecCfg ().DB .AdminSplit (ctx , span .EndKey , hlc .MaxTimestamp ); err != nil {
702+ return errors .Wrapf (err , "pre-splitting at key %s" , span .EndKey )
703+ }
704+ prevSplit = span .EndKey
705+ }
706+ }
707+
708+ // Use the distributed restore flow. If this is an online restore (with
709+ // useDistFlow, since we returned above otherwise), link files instead of
710+ // ingesting them.
711+ useLink := details .OnlineImpl ()
712+ if useLink {
713+ log .Dev .Infof (ctx , "online restore using distributed flow with file linking" )
650714 }
651715 md := restoreJobMetadata {
652716 jobID : job .ID (),
653717 dataToRestore : dataToRestore ,
654718 restoreTime : endTime ,
655719 encryption : encryption ,
656720 kmsEnv : kmsEnv ,
657- uris : details . URIs ,
721+ uris : resolvedURIs ,
658722 backupLocalityInfo : backupLocalityInfo ,
659723 spanFilter : filter ,
660724 numImportSpans : numImportSpans ,
661725 execLocality : details .ExecutionLocality ,
662726 exclusiveEndKeys : fsc .isExclusive (),
663727 resumeClusterVersion : resumeClusterVersion ,
728+ useLink : useLink ,
664729 }
665730 return errors .Wrap (distRestore (
666731 ctx ,
0 commit comments