Skip to content

Commit 45fc82a

Browse files
authored
Merge pull request #1847 from microsoft/milestones/m259
Release M259
2 parents 5520b05 + 26b88aa commit 45fc82a

File tree

4 files changed

+132
-56
lines changed

4 files changed

+132
-56
lines changed

GVFS/GVFS.Common/GVFSConstants.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ public static class GitConfig
3737
public const string GVFSTelemetryPipe = GitConfig.GVFSPrefix + "telemetry-pipe";
3838
public const string IKey = GitConfig.GVFSPrefix + "ikey";
3939
public const string HooksExtension = ".hooks";
40+
41+
/* Intended to be a temporary config to allow testing of distrusting pack indexes from cache server
42+
* before it is enabled by default. */
43+
public const string TrustPackIndexes = GVFSPrefix + "trust-pack-indexes";
4044
}
4145

4246
public static class LocalGVFSConfig

GVFS/GVFS.Common/Git/GitAuthentication.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ public void ConfigureHttpClientHandlerSslIfNeeded(ITracer tracer, HttpClientHand
233233
{
234234
if (this.GitSsl != null && !this.GitSsl.ShouldVerify)
235235
{
236-
httpClientHandler.ServerCertificateCustomValidationCallback =
236+
httpClientHandler.ServerCertificateCustomValidationCallback = // CodeQL [SM02184] TLS verification can be disabled by Git itself, so this is just mirroring a feature already exposed.
237237
(httpRequestMessage, c, cetChain, policyErrors) =>
238238
{
239239
return true;

GVFS/GVFS.Common/Git/GitObjects.cs

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,23 @@ public virtual bool TryDownloadPrefetchPacks(GitProcess gitProcess, long latestT
131131
{
132132
long bytesDownloaded = 0;
133133

134+
/* Distrusting the indexes from the server is a security feature to prevent a compromised server from sending a
135+
* pack file and an index file that do not match.
136+
* Eventually we will make this the default, but it has a high performance cost for the first prefetch after
137+
* cloning a large repository, so it must be explicitly enabled for now. */
138+
bool trustPackIndexes = true;
139+
if (gitProcess.TryGetFromConfig(GVFSConstants.GitConfig.TrustPackIndexes, forceOutsideEnlistment: false, out var valueString)
140+
&& bool.TryParse(valueString, out var trustPackIndexesConfig))
141+
{
142+
trustPackIndexes = trustPackIndexesConfig;
143+
}
144+
metadata.Add("trustPackIndexes", trustPackIndexes);
145+
134146
long requestId = HttpRequestor.GetNewRequestId();
135147
List<string> innerPackIndexes = null;
136148
RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.InvocationResult result = this.GitObjectRequestor.TrySendProtocolRequest(
137149
requestId: requestId,
138-
onSuccess: (tryCount, response) => this.DeserializePrefetchPacks(response, ref latestTimestamp, ref bytesDownloaded, ref innerPackIndexes, gitProcess),
150+
onSuccess: (tryCount, response) => this.DeserializePrefetchPacks(response, ref latestTimestamp, ref bytesDownloaded, ref innerPackIndexes, gitProcess, trustPackIndexes),
139151
onFailure: RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.StandardErrorHandler(activity, requestId, "TryDownloadPrefetchPacks"),
140152
method: HttpMethod.Get,
141153
endPointGenerator: () => new Uri(
@@ -662,7 +674,8 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha)
662674
ref long latestTimestamp,
663675
ref long bytesDownloaded,
664676
ref List<string> packIndexes,
665-
GitProcess gitProcess)
677+
GitProcess gitProcess,
678+
bool trustPackIndexes)
666679
{
667680
if (packIndexes == null)
668681
{
@@ -676,11 +689,16 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha)
676689
string tempPackFolderPath = Path.Combine(this.Enlistment.GitPackRoot, TempPackFolder);
677690
this.fileSystem.CreateDirectory(tempPackFolderPath);
678691

679-
List<TempPrefetchPackAndIdx> tempPacks = new List<TempPrefetchPackAndIdx>();
680-
foreach (PrefetchPacksDeserializer.PackAndIndex pack in deserializer.EnumeratePacks())
692+
var tempPacksTasks = new List<Task<TempPrefetchPackAndIdx>>();
693+
// Future: We could manage cancellation of index building tasks if one fails (to stop indexing of later
694+
// files if an early one fails), but in practice the first pack file takes the majority of the time and
695+
// all the others will finish long before it so there would be no benefit to doing so.
696+
bool allSucceeded = true;
697+
698+
// Read each pack from the stream to a temp file, and start a task to index it.
699+
foreach (PrefetchPacksDeserializer.PackAndIndex packHandle in deserializer.EnumeratePacks())
681700
{
682-
// The advertised size may not match the actual, on-disk size.
683-
long indexLength = 0;
701+
var pack = packHandle; // Capture packHandle in a new variable to avoid closure issues with async index task
684702
long packLength;
685703

686704
// Write the temp and index to a temp folder to avoid putting corrupt files in the pack folder
@@ -701,87 +719,131 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha)
701719
if (!this.TryWriteTempFile(activity, pack.PackStream, packTempPath, out packLength, out packFlushTask))
702720
{
703721
bytesDownloaded += packLength;
704-
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(null, true);
722+
allSucceeded = false;
723+
break;
705724
}
706725

707726
bytesDownloaded += packLength;
708727

709-
// We will try to build an index if the server does not send one
710-
if (pack.IndexStream == null)
728+
if (trustPackIndexes
729+
&& pack.IndexStream != null)
711730
{
712-
GitProcess.Result result;
713-
if (!this.TryBuildIndex(activity, packTempPath, out result, gitProcess))
731+
// The server provided an index stream, we can trust it, and were able to read it successfully, so we just need to wait for the index to flush.
732+
if (this.TryWriteTempFile(activity, pack.IndexStream, idxTempPath, out var indexLength, out var indexFlushTask))
714733
{
715-
if (packFlushTask != null)
716-
{
717-
packFlushTask.Wait();
718-
}
719-
720-
// Move whatever has been successfully downloaded so far
721-
Exception moveException;
722-
this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out moveException);
723-
724-
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(null, true);
734+
bytesDownloaded += indexLength;
735+
tempPacksTasks.Add(Task.FromResult(
736+
new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, indexFlushTask)));
737+
}
738+
else
739+
{
740+
bytesDownloaded += indexLength;
741+
// we can try to build the index ourself, and if it's successful then on the retry we can pick up from that point.
742+
var indexTask = StartPackIndexAsync(activity, pack, packName, packTempPath, idxName, idxTempPath, packFlushTask);
743+
tempPacksTasks.Add(indexTask);
744+
// but we need to stop trying to read from the download stream as that has failed.
745+
allSucceeded = false;
746+
break;
725747
}
726-
727-
tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null));
728748
}
729749
else
730750
{
731-
Task indexFlushTask;
732-
if (this.TryWriteTempFile(activity, pack.IndexStream, idxTempPath, out indexLength, out indexFlushTask))
733-
{
734-
tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, indexFlushTask));
735-
}
736-
else
751+
// Either we can't trust the index file from the server, or the server didn't provide one, so we will build our own.
752+
// For performance, we run the index build in the background while we continue downloading the next pack.
753+
var indexTask = StartPackIndexAsync(activity, pack, packName, packTempPath, idxName, idxTempPath, packFlushTask);
754+
tempPacksTasks.Add(indexTask);
755+
756+
// If the server provided an index stream, we still need to consume and handle any exceptions it even
757+
// though we are otherwise ignoring it.
758+
if (pack.IndexStream != null)
737759
{
738-
bytesDownloaded += indexLength;
739-
740-
// Try to build the index manually, then retry the prefetch
741-
GitProcess.Result result;
742-
if (this.TryBuildIndex(activity, packTempPath, out result, gitProcess))
743-
{
744-
// If we were able to recreate the failed index
745-
// we can start the prefetch at the next timestamp
746-
tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null));
747-
}
748-
else
760+
try
749761
{
750-
if (packFlushTask != null)
762+
bytesDownloaded += pack.IndexStream.Length;
763+
if (pack.IndexStream.CanSeek)
751764
{
752-
packFlushTask.Wait();
765+
pack.IndexStream.Seek(0, SeekOrigin.End);
766+
}
767+
else
768+
{
769+
pack.IndexStream.CopyTo(Stream.Null);
753770
}
754771
}
755-
756-
// Move whatever has been successfully downloaded so far
757-
Exception moveException;
758-
this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out moveException);
759-
760-
// The download stream will not be in a good state if the index download fails.
761-
// So we have to restart the prefetch
762-
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(null, true);
772+
catch (Exception e)
773+
{
774+
EventMetadata metadata = CreateEventMetadata(e);
775+
activity.RelatedWarning(metadata, "Failed to read to end of index stream");
776+
allSucceeded = false;
777+
break;
778+
}
763779
}
764780
}
781+
}
765782

766-
bytesDownloaded += indexLength;
783+
// Wait for the index tasks to complete. If any fail, we still copy the prior successful ones
784+
// to the pack folder so that the retry will be incremental from where the failure occurred.
785+
var tempPacks = new List<TempPrefetchPackAndIdx>();
786+
bool indexTasksSucceededSoFar = true;
787+
foreach (var task in tempPacksTasks)
788+
{
789+
TempPrefetchPackAndIdx tempPack = task.Result;
790+
if (tempPack != null && indexTasksSucceededSoFar)
791+
{
792+
tempPacks.Add(tempPack);
793+
}
794+
else
795+
{
796+
indexTasksSucceededSoFar = false;
797+
tempPack?.PackFlushTask.Wait();
798+
break;
799+
}
767800
}
801+
allSucceeded = allSucceeded && indexTasksSucceededSoFar;
768802

769803
Exception exception = null;
770804
if (!this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out exception))
771805
{
772-
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(exception, true);
806+
allSucceeded = false;
773807
}
774808

775809
foreach (TempPrefetchPackAndIdx tempPack in tempPacks)
776810
{
777811
packIndexes.Add(tempPack.IdxName);
778812
}
779813

780-
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(
781-
new GitObjectsHttpRequestor.GitObjectTaskResult(success: true));
814+
if (allSucceeded)
815+
{
816+
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(
817+
new GitObjectsHttpRequestor.GitObjectTaskResult(success: true));
818+
}
819+
else
820+
{
821+
return new RetryWrapper<GitObjectsHttpRequestor.GitObjectTaskResult>.CallbackResult(exception, shouldRetry: true);
822+
}
782823
}
783824
}
784825

826+
private Task<TempPrefetchPackAndIdx> StartPackIndexAsync(ITracer activity, PrefetchPacksDeserializer.PackAndIndex pack, string packName, string packTempPath, string idxName, string idxTempPath, Task packFlushTask)
827+
{
828+
var indexTask = Task.Run(async () =>
829+
{
830+
// GitProcess only permits one process per instance at a time, so we need to duplicate it to run the index build in parallel.
831+
// This is safe because each process is only accessing the pack file we direct it to which is not yet part
832+
// of the enlistment.
833+
GitProcess gitProcessForIndex = new GitProcess(this.Enlistment);
834+
if (this.TryBuildIndex(activity, packTempPath, out var _, gitProcessForIndex))
835+
{
836+
return new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null);
837+
}
838+
else
839+
{
840+
await packFlushTask;
841+
return null;
842+
}
843+
});
844+
return indexTask;
845+
}
846+
785847
private bool TryFlushAndMoveTempPacks(List<TempPrefetchPackAndIdx> tempPacks, ref long latestTimestamp, out Exception exception)
786848
{
787849
exception = null;

GVFS/GVFS.Common/Git/GitProcess.cs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,17 @@ public Result VerifyCommitGraph(string objectDir)
580580

581581
public Result IndexPack(string packfilePath, string idxOutputPath)
582582
{
583-
return this.InvokeGitAgainstDotGitFolder($"index-pack -o \"{idxOutputPath}\" \"{packfilePath}\"");
583+
/* Git's default thread count is Environment.ProcessorCount / 2, with a maximum of 20.
584+
* Testing shows that we can get a 5% decrease in gvfs clone time for large repositories by using more threads, but
585+
* we won't go over ProcessorCount or 20. */
586+
var threadCount = Math.Min(Environment.ProcessorCount, 20);
587+
string command = $"index-pack --threads={threadCount} -o \"{idxOutputPath}\" \"{packfilePath}\"";
588+
589+
// If index-pack is invoked within an enlistment, then it reads all the other objects and pack indexes available
590+
// in the enlistment in order to verify references from within this pack file, even if --verify or similar
591+
// options are not passed.
592+
// Since we aren't verifying, we invoke index-pack outside the enlistment for performance.
593+
return this.InvokeGitOutsideEnlistment(command);
584594
}
585595

586596
/// <summary>

0 commit comments

Comments
 (0)