@@ -131,11 +131,23 @@ public virtual bool TryDownloadPrefetchPacks(GitProcess gitProcess, long latestT
131131 {
132132 long bytesDownloaded = 0 ;
133133
134+ /* Distrusting the indexes from the server is a security feature to prevent a compromised server from sending a
135+ * pack file and an index file that do not match.
136+ * Eventually we will make this the default, but it has a high performance cost for the first prefetch after
137+ * cloning a large repository, so it must be explicitly enabled for now. */
138+ bool trustPackIndexes = true ;
139+ if ( gitProcess . TryGetFromConfig ( GVFSConstants . GitConfig . TrustPackIndexes , forceOutsideEnlistment : false , out var valueString )
140+ && bool . TryParse ( valueString , out var trustPackIndexesConfig ) )
141+ {
142+ trustPackIndexes = trustPackIndexesConfig ;
143+ }
144+ metadata . Add ( "trustPackIndexes" , trustPackIndexes ) ;
145+
134146 long requestId = HttpRequestor . GetNewRequestId ( ) ;
135147 List < string > innerPackIndexes = null ;
136148 RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . InvocationResult result = this . GitObjectRequestor . TrySendProtocolRequest (
137149 requestId : requestId ,
138- onSuccess : ( tryCount , response ) => this . DeserializePrefetchPacks ( response , ref latestTimestamp , ref bytesDownloaded , ref innerPackIndexes , gitProcess ) ,
150+ onSuccess : ( tryCount , response ) => this . DeserializePrefetchPacks ( response , ref latestTimestamp , ref bytesDownloaded , ref innerPackIndexes , gitProcess , trustPackIndexes ) ,
139151 onFailure : RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . StandardErrorHandler ( activity , requestId , "TryDownloadPrefetchPacks" ) ,
140152 method : HttpMethod . Get ,
141153 endPointGenerator : ( ) => new Uri (
@@ -662,7 +674,8 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha)
662674 ref long latestTimestamp ,
663675 ref long bytesDownloaded ,
664676 ref List < string > packIndexes ,
665- GitProcess gitProcess )
677+ GitProcess gitProcess ,
678+ bool trustPackIndexes )
666679 {
667680 if ( packIndexes == null )
668681 {
@@ -676,11 +689,16 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha)
676689 string tempPackFolderPath = Path . Combine ( this . Enlistment . GitPackRoot , TempPackFolder ) ;
677690 this . fileSystem . CreateDirectory ( tempPackFolderPath ) ;
678691
679- List < TempPrefetchPackAndIdx > tempPacks = new List < TempPrefetchPackAndIdx > ( ) ;
680- foreach ( PrefetchPacksDeserializer . PackAndIndex pack in deserializer . EnumeratePacks ( ) )
692+ var tempPacksTasks = new List < Task < TempPrefetchPackAndIdx > > ( ) ;
693+ // Future: We could manage cancellation of index building tasks if one fails (to stop indexing of later
694+ // files if an early one fails), but in practice the first pack file takes the majority of the time and
695+ // all the others will finish long before it so there would be no benefit to doing so.
696+ bool allSucceeded = true ;
697+
698+ // Read each pack from the stream to a temp file, and start a task to index it.
699+ foreach ( PrefetchPacksDeserializer . PackAndIndex packHandle in deserializer . EnumeratePacks ( ) )
681700 {
682- // The advertised size may not match the actual, on-disk size.
683- long indexLength = 0 ;
701+ var pack = packHandle ; // Capture packHandle in a new variable to avoid closure issues with async index task
684702 long packLength ;
685703
686704 // Write the temp and index to a temp folder to avoid putting corrupt files in the pack folder
@@ -701,87 +719,131 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha)
701719 if ( ! this . TryWriteTempFile ( activity , pack . PackStream , packTempPath , out packLength , out packFlushTask ) )
702720 {
703721 bytesDownloaded += packLength ;
704- return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult ( null , true ) ;
722+ allSucceeded = false ;
723+ break ;
705724 }
706725
707726 bytesDownloaded += packLength ;
708727
709- // We will try to build an index if the server does not send one
710- if ( pack . IndexStream = = null )
728+ if ( trustPackIndexes
729+ && pack . IndexStream ! = null )
711730 {
712- GitProcess . Result result ;
713- if ( ! this . TryBuildIndex ( activity , packTempPath , out result , gitProcess ) )
731+ // The server provided an index stream, we can trust it, and were able to read it successfully, so we just need to wait for the index to flush.
732+ if ( this . TryWriteTempFile ( activity , pack . IndexStream , idxTempPath , out var indexLength , out var indexFlushTask ) )
714733 {
715- if ( packFlushTask != null )
716- {
717- packFlushTask . Wait ( ) ;
718- }
719-
720- // Move whatever has been successfully downloaded so far
721- Exception moveException ;
722- this . TryFlushAndMoveTempPacks ( tempPacks , ref latestTimestamp , out moveException ) ;
723-
724- return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult ( null , true ) ;
734+ bytesDownloaded += indexLength ;
735+ tempPacksTasks . Add ( Task . FromResult (
736+ new TempPrefetchPackAndIdx ( pack . Timestamp , packName , packTempPath , packFlushTask , idxName , idxTempPath , indexFlushTask ) ) ) ;
737+ }
738+ else
739+ {
740+ bytesDownloaded += indexLength ;
741+ // we can try to build the index ourself, and if it's successful then on the retry we can pick up from that point.
742+ var indexTask = StartPackIndexAsync ( activity , pack , packName , packTempPath , idxName , idxTempPath , packFlushTask ) ;
743+ tempPacksTasks . Add ( indexTask ) ;
744+ // but we need to stop trying to read from the download stream as that has failed.
745+ allSucceeded = false ;
746+ break ;
725747 }
726-
727- tempPacks . Add ( new TempPrefetchPackAndIdx ( pack . Timestamp , packName , packTempPath , packFlushTask , idxName , idxTempPath , idxFlushTask : null ) ) ;
728748 }
729749 else
730750 {
731- Task indexFlushTask ;
732- if ( this . TryWriteTempFile ( activity , pack . IndexStream , idxTempPath , out indexLength , out indexFlushTask ) )
733- {
734- tempPacks . Add ( new TempPrefetchPackAndIdx ( pack . Timestamp , packName , packTempPath , packFlushTask , idxName , idxTempPath , indexFlushTask ) ) ;
735- }
736- else
751+ // Either we can't trust the index file from the server, or the server didn't provide one, so we will build our own.
752+ // For performance, we run the index build in the background while we continue downloading the next pack.
753+ var indexTask = StartPackIndexAsync ( activity , pack , packName , packTempPath , idxName , idxTempPath , packFlushTask ) ;
754+ tempPacksTasks . Add ( indexTask ) ;
755+
756+ // If the server provided an index stream, we still need to consume and handle any exceptions it even
757+ // though we are otherwise ignoring it.
758+ if ( pack . IndexStream != null )
737759 {
738- bytesDownloaded += indexLength ;
739-
740- // Try to build the index manually, then retry the prefetch
741- GitProcess . Result result ;
742- if ( this . TryBuildIndex ( activity , packTempPath , out result , gitProcess ) )
743- {
744- // If we were able to recreate the failed index
745- // we can start the prefetch at the next timestamp
746- tempPacks . Add ( new TempPrefetchPackAndIdx ( pack . Timestamp , packName , packTempPath , packFlushTask , idxName , idxTempPath , idxFlushTask : null ) ) ;
747- }
748- else
760+ try
749761 {
750- if ( packFlushTask != null )
762+ bytesDownloaded += pack . IndexStream . Length ;
763+ if ( pack . IndexStream . CanSeek )
751764 {
752- packFlushTask . Wait ( ) ;
765+ pack . IndexStream . Seek ( 0 , SeekOrigin . End ) ;
766+ }
767+ else
768+ {
769+ pack . IndexStream . CopyTo ( Stream . Null ) ;
753770 }
754771 }
755-
756- // Move whatever has been successfully downloaded so far
757- Exception moveException ;
758- this . TryFlushAndMoveTempPacks ( tempPacks , ref latestTimestamp , out moveException ) ;
759-
760- // The download stream will not be in a good state if the index download fails.
761- // So we have to restart the prefetch
762- return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult ( null , true ) ;
772+ catch ( Exception e )
773+ {
774+ EventMetadata metadata = CreateEventMetadata ( e ) ;
775+ activity . RelatedWarning ( metadata , "Failed to read to end of index stream" ) ;
776+ allSucceeded = false ;
777+ break ;
778+ }
763779 }
764780 }
781+ }
765782
766- bytesDownloaded += indexLength ;
783+ // Wait for the index tasks to complete. If any fail, we still copy the prior successful ones
784+ // to the pack folder so that the retry will be incremental from where the failure occurred.
785+ var tempPacks = new List < TempPrefetchPackAndIdx > ( ) ;
786+ bool indexTasksSucceededSoFar = true ;
787+ foreach ( var task in tempPacksTasks )
788+ {
789+ TempPrefetchPackAndIdx tempPack = task . Result ;
790+ if ( tempPack != null && indexTasksSucceededSoFar )
791+ {
792+ tempPacks . Add ( tempPack ) ;
793+ }
794+ else
795+ {
796+ indexTasksSucceededSoFar = false ;
797+ tempPack ? . PackFlushTask . Wait ( ) ;
798+ break ;
799+ }
767800 }
801+ allSucceeded = allSucceeded && indexTasksSucceededSoFar ;
768802
769803 Exception exception = null ;
770804 if ( ! this . TryFlushAndMoveTempPacks ( tempPacks , ref latestTimestamp , out exception ) )
771805 {
772- return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult ( exception , true ) ;
806+ allSucceeded = false ;
773807 }
774808
775809 foreach ( TempPrefetchPackAndIdx tempPack in tempPacks )
776810 {
777811 packIndexes . Add ( tempPack . IdxName ) ;
778812 }
779813
780- return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult (
781- new GitObjectsHttpRequestor . GitObjectTaskResult ( success : true ) ) ;
814+ if ( allSucceeded )
815+ {
816+ return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult (
817+ new GitObjectsHttpRequestor . GitObjectTaskResult ( success : true ) ) ;
818+ }
819+ else
820+ {
821+ return new RetryWrapper < GitObjectsHttpRequestor . GitObjectTaskResult > . CallbackResult ( exception , shouldRetry : true ) ;
822+ }
782823 }
783824 }
784825
826+ private Task < TempPrefetchPackAndIdx > StartPackIndexAsync ( ITracer activity , PrefetchPacksDeserializer . PackAndIndex pack , string packName , string packTempPath , string idxName , string idxTempPath , Task packFlushTask )
827+ {
828+ var indexTask = Task . Run ( async ( ) =>
829+ {
830+ // GitProcess only permits one process per instance at a time, so we need to duplicate it to run the index build in parallel.
831+ // This is safe because each process is only accessing the pack file we direct it to which is not yet part
832+ // of the enlistment.
833+ GitProcess gitProcessForIndex = new GitProcess ( this . Enlistment ) ;
834+ if ( this . TryBuildIndex ( activity , packTempPath , out var _ , gitProcessForIndex ) )
835+ {
836+ return new TempPrefetchPackAndIdx ( pack . Timestamp , packName , packTempPath , packFlushTask , idxName , idxTempPath , idxFlushTask : null ) ;
837+ }
838+ else
839+ {
840+ await packFlushTask ;
841+ return null ;
842+ }
843+ } ) ;
844+ return indexTask ;
845+ }
846+
785847 private bool TryFlushAndMoveTempPacks ( List < TempPrefetchPackAndIdx > tempPacks , ref long latestTimestamp , out Exception exception )
786848 {
787849 exception = null ;
0 commit comments