Skip to content

Commit 9d079d8

Browse files
committed
feat: Improved working directory traverse.
1 parent 66f64d4 commit 9d079d8

File tree

3 files changed

+178
-65
lines changed

3 files changed

+178
-65
lines changed

GitReader.Core/Internal/WorkingDirectoryAccessor.cs

Lines changed: 176 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -19,126 +19,175 @@ namespace GitReader.Internal;
1919
internal static class WorkingDirectoryAccessor
2020
{
2121
/// <summary>
22-
/// Scans the working directory for untracked files.
22+
/// Traverse context.
2323
/// </summary>
24-
/// <param name="repository">The repository to scan.</param>
25-
/// <param name="workingDirectoryPath">The path to the working directory.</param>
24+
/// <remarks>
25+
/// This class is used to limit the number of parallel tasks.
26+
/// This is a very loose way to limit the number of parallel tasks.
27+
/// </remarks>
28+
private sealed class TraverseContext
29+
{
30+
// Ceiling task count. This may be exceeded by tasks.
31+
private readonly int ceilingRunningCount;
32+
// Current running task count.
33+
private int runningCount = 0;
34+
35+
public readonly Repository Repository;
36+
public readonly string WorkingDirectoryPath;
37+
public readonly HashSet<string> ProcessedPaths;
38+
public readonly GlobFilter OverrideGlobFilter;
39+
public readonly List<PrimitiveWorkingDirectoryFile> UntrackedFiles;
40+
public readonly CancellationToken CancellationToken;
41+
42+
/// <summary>
43+
/// Initializes a new instance.
44+
/// </summary>
45+
/// <param name="repository">The repository to scan.</param>
46+
/// <param name="workingDirectoryPath">The path to the working directory.</param>
47+
/// <param name="processedPaths">The paths that have already been processed.</param>
48+
/// <param name="overrideGlobFilter">The override glob filter.</param>
49+
/// <param name="untrackedFiles">The list of untracked files (output)</param>
50+
/// <param name="cancellationToken">The cancellation token.</param>
51+
/// <param name="ceilingRunningCount">Ceiling task count.</param>
52+
public TraverseContext(
53+
Repository repository,
54+
string workingDirectoryPath,
55+
HashSet<string> processedPaths,
56+
GlobFilter overrideGlobFilter,
57+
List<PrimitiveWorkingDirectoryFile> untrackedFiles,
58+
CancellationToken cancellationToken,
59+
int ceilingRunningCount)
60+
{
61+
this.Repository = repository;
62+
this.WorkingDirectoryPath = workingDirectoryPath;
63+
this.ProcessedPaths = processedPaths;
64+
this.OverrideGlobFilter = overrideGlobFilter;
65+
this.UntrackedFiles = untrackedFiles;
66+
this.CancellationToken = cancellationToken;
67+
this.ceilingRunningCount = ceilingRunningCount;
68+
}
69+
70+
/// <summary>
71+
/// Gets a value indicating whether parallel execution is allowed.
72+
/// </summary>
73+
public bool CanParallel =>
74+
this.runningCount < this.ceilingRunningCount;
75+
76+
/// <summary>
77+
/// Increments the running count.
78+
/// </summary>
79+
/// <param name="count">The count to increment.</param>
80+
public void IncrementRunningCount(int count) =>
81+
Interlocked.Add(ref this.runningCount, count);
82+
83+
/// <summary>
84+
/// Decrements the running count.
85+
/// </summary>
86+
/// <param name="count">The count to decrement.</param>
87+
public void DecrementRunningCount(int count) =>
88+
Interlocked.Add(ref this.runningCount, -count);
89+
}
90+
91+
/// <summary>
92+
/// Scans the working directory for untracked files recursively.
93+
/// </summary>
94+
/// <param name="context">Traverse context</param>
2695
/// <param name="currentPath">The current path to scan.</param>
27-
/// <param name="processedPaths">The paths that have already been processed.</param>
28-
/// <param name="overrideGlobFilter">The override glob filter.</param>
29-
/// <param name="parentPathFilter">The parent path filter.</param>
30-
/// <param name="untrackedFiles">The list of untracked files (output)</param>
31-
/// <param name="ct">The cancellation token.</param>
96+
/// <param name="parentGlobFilter">The parent glob filter.</param>
3297
#if NET45_OR_GREATER || NETSTANDARD || NETCOREAPP2_1_OR_GREATER
33-
public static async ValueTask ScanWorkingDirectoryRecursiveAsync(
34-
Repository repository,
35-
string workingDirectoryPath,
98+
private static async ValueTask ExtractUntrackedFilesRecursiveAsync(
99+
TraverseContext context,
36100
string currentPath,
37-
HashSet<string> processedPaths,
38-
GlobFilter overrideGlobFilter,
39-
GlobFilter parentPathFilter,
40-
List<PrimitiveWorkingDirectoryFile> untrackedFiles,
41-
CancellationToken ct)
101+
GlobFilter parentGlobFilter)
42102
#else
43-
public static async Task ScanWorkingDirectoryRecursiveAsync(
44-
Repository repository,
45-
string workingDirectoryPath,
103+
private static async Task ExtractUntrackedFilesRecursiveAsync(
104+
TraverseContext context,
46105
string currentPath,
47-
HashSet<string> processedPaths,
48-
GlobFilter overrideGlobFilter,
49-
GlobFilter parentPathFilter,
50-
List<PrimitiveWorkingDirectoryFile> untrackedFiles,
51-
CancellationToken ct)
106+
GlobFilter parentGlobFilter)
52107
#endif
53108
{
54109
// Skip .git directory/file (hardcoded exclusion, same as Git official behavior)
55-
var currentName = repository.fileSystem.GetFileName(currentPath);
110+
var currentName = context.Repository.fileSystem.GetFileName(currentPath);
56111
if (currentName.Equals(".git", StringComparison.OrdinalIgnoreCase))
57112
{
58113
return;
59114
}
60115

61116
try
62117
{
63-
if (!await repository.fileSystem.IsDirectoryExistsAsync(currentPath, ct))
118+
if (!await context.Repository.fileSystem.IsDirectoryExistsAsync(currentPath, context.CancellationToken))
64119
{
65120
return;
66121
}
67122

68123
// Read .gitignore in current directory and combine with pathFilter.
69-
GlobFilter candidatePathFilter;
70-
GlobFilter exactlyPathFilter;
71-
var gitignorePath = repository.fileSystem.Combine(currentPath, ".gitignore");
124+
GlobFilter candidateGlobFilter;
125+
var gitignorePath = context.Repository.fileSystem.Combine(currentPath, ".gitignore");
72126
try
73127
{
74128
// When .gitignore exists
75-
if (await repository.fileSystem.IsFileExistsAsync(gitignorePath, ct))
129+
if (await context.Repository.fileSystem.IsFileExistsAsync(gitignorePath, context.CancellationToken))
76130
{
77131
// Generate .gitignore filter
78-
using var gitignoreStream = await repository.fileSystem.OpenAsync(gitignorePath, false, ct);
79-
var gitignoreFilter = await Glob.CreateExcludeFilterFromGitignoreAsync(gitignoreStream, ct);
132+
using var gitignoreStream = await context.Repository.fileSystem.OpenAsync(gitignorePath, false, context.CancellationToken);
133+
var gitignoreFilter = await Glob.CreateExcludeFilterFromGitignoreAsync(gitignoreStream, context.CancellationToken);
80134

81135
// Combine filters with correct order: parent filter, .gitignore filter, override filter
82-
candidatePathFilter = Glob.Combine([parentPathFilter, gitignoreFilter]);
83-
exactlyPathFilter = Glob.Combine([parentPathFilter, gitignoreFilter, overrideGlobFilter]);
136+
candidateGlobFilter = Glob.Combine([parentGlobFilter, gitignoreFilter]);
84137
}
85138
else
86139
{
87140
// When .gitignore does not exist, continue with parent filter
88-
candidatePathFilter = parentPathFilter;
89-
exactlyPathFilter = Glob.Combine([parentPathFilter, overrideGlobFilter]);
141+
candidateGlobFilter = parentGlobFilter;
90142
}
91143
}
92144
catch
93145
{
94146
// If .gitignore cannot be read, continue with parent filter
95-
candidatePathFilter = parentPathFilter;
96-
exactlyPathFilter = Glob.Combine([parentPathFilter, overrideGlobFilter]);
147+
candidateGlobFilter = parentGlobFilter;
97148
}
98-
149+
99150
// Scan directory entries
100-
var entries = await repository.fileSystem.GetDirectoryEntriesAsync(currentPath, ct);
101-
await Utilities.WhenAll(entries.Select(async entry =>
151+
var entries = await context.Repository.fileSystem.GetDirectoryEntriesAsync(currentPath, context.CancellationToken);
152+
153+
// Makes sub tasks iterator
154+
var tasks = entries.Select(async entry =>
102155
{
103156
// Skip .git directory/files (hardcoded exclusion matching Git's behavior)
104-
var fileName = repository.fileSystem.GetFileName(entry);
157+
var fileName = context.Repository.fileSystem.GetFileName(entry);
105158
if (fileName.Equals(".git", StringComparison.OrdinalIgnoreCase))
106159
{
107160
return;
108161
}
109162

110163
// Get relative path and filter it
111-
var relativePath = repository.fileSystem.ToPosixPath(
112-
repository.fileSystem.GetRelativePath(workingDirectoryPath, entry));
113-
var filterDecision = exactlyPathFilter(
114-
GlobFilterStates.NotExclude, // Start from neutral.
115-
relativePath);
164+
var relativePath = context.Repository.fileSystem.ToPosixPath(
165+
context.Repository.fileSystem.GetRelativePath(context.WorkingDirectoryPath, entry));
166+
167+
var exactlyPathFilter = Glob.Combine([candidateGlobFilter, context.OverrideGlobFilter]);
168+
var filterResult = Glob.ApplyFilter(exactlyPathFilter, relativePath);
116169

117170
// When entry is excluded, ignore it.
118-
if (filterDecision == GlobFilterStates.Exclude)
171+
if (filterResult == GlobFilterStates.Exclude)
119172
{
120173
return;
121174
}
122175

123176
// When entry is a directory
124-
if (await repository.fileSystem.IsDirectoryExistsAsync(entry, ct))
177+
if (await context.Repository.fileSystem.IsDirectoryExistsAsync(entry, context.CancellationToken))
125178
{
126179
// Recursively scan subdirectories with the current candidate filter
127-
await ScanWorkingDirectoryRecursiveAsync(
128-
repository, workingDirectoryPath, entry,
129-
processedPaths,
130-
overrideGlobFilter, candidatePathFilter,
131-
untrackedFiles,
132-
ct);
180+
await ExtractUntrackedFilesRecursiveAsync(
181+
context, entry, candidateGlobFilter);
133182
}
134183
// When entry is a file
135-
else if (await repository.fileSystem.IsFileExistsAsync(entry, ct))
184+
else if (await context.Repository.fileSystem.IsFileExistsAsync(entry, context.CancellationToken))
136185
{
137186
// When entry is a file, add it to untracked files if it is not processed yet
138-
if (!processedPaths.Contains(relativePath))
187+
if (!context.ProcessedPaths.Contains(relativePath))
139188
{
140189
// This is an untracked file that passes the filter
141-
var fileHash = await CalculateFileHashAsync(repository, entry, ct);
190+
var fileHash = await CalculateFileHashAsync(context.Repository, entry, context.CancellationToken);
142191

143192
var untrackedFile = new PrimitiveWorkingDirectoryFile(
144193
relativePath,
@@ -147,13 +196,37 @@ await ScanWorkingDirectoryRecursiveAsync(
147196
fileHash);
148197

149198
// Avoid race condition
150-
lock (untrackedFiles)
199+
lock (context.UntrackedFiles)
151200
{
152-
untrackedFiles.Add(untrackedFile);
201+
context.UntrackedFiles.Add(untrackedFile);
153202
}
154203
}
155204
}
156-
}));
205+
});
206+
207+
// Limits the number of parallel tasks
208+
var canParallel = context.CanParallel;
209+
context.IncrementRunningCount(entries.Length);
210+
try
211+
{
212+
// When parallel is allowed, run all tasks in parallel
213+
if (canParallel)
214+
{
215+
await Utilities.WhenAll(tasks);
216+
}
217+
else
218+
{
219+
// When parallel is not allowed, run all tasks sequentially
220+
foreach (var task in tasks)
221+
{
222+
await task;
223+
}
224+
}
225+
}
226+
finally
227+
{
228+
context.DecrementRunningCount(entries.Length);
229+
}
157230
}
158231
catch (UnauthorizedAccessException)
159232
{
@@ -165,6 +238,48 @@ await ScanWorkingDirectoryRecursiveAsync(
165238
}
166239
}
167240

241+
/// <summary>
242+
/// Scans the working directory for untracked files.
243+
/// </summary>
244+
/// <param name="repository">The repository to scan.</param>
245+
/// <param name="workingDirectoryPath">The path to the working directory.</param>
246+
/// <param name="processedPaths">The paths that have already been processed.</param>
247+
/// <param name="overrideGlobFilter">The override glob filter.</param>
248+
/// <param name="untrackedFiles">The list of untracked files (output)</param>
249+
/// <param name="ct">The cancellation token.</param>
250+
#if NET45_OR_GREATER || NETSTANDARD || NETCOREAPP2_1_OR_GREATER
251+
public static ValueTask ExtractUntrackedFilesAsync(
252+
Repository repository,
253+
string workingDirectoryPath,
254+
HashSet<string> processedPaths,
255+
GlobFilter overrideGlobFilter,
256+
List<PrimitiveWorkingDirectoryFile> untrackedFiles,
257+
CancellationToken ct) =>
258+
#else
259+
public static Task ExtractUntrackedFilesAsync(
260+
Repository repository,
261+
string workingDirectoryPath,
262+
HashSet<string> processedPaths,
263+
GlobFilter overrideGlobFilter,
264+
List<PrimitiveWorkingDirectoryFile> untrackedFiles,
265+
CancellationToken ct) =>
266+
#endif
267+
ExtractUntrackedFilesRecursiveAsync(
268+
new TraverseContext(
269+
repository,
270+
workingDirectoryPath,
271+
processedPaths,
272+
overrideGlobFilter,
273+
untrackedFiles,
274+
ct,
275+
// Exactly, it should be a value that depends on the I/O parallel degree that the machine can accept,
276+
// but it is very difficult to determine it mechanically.
277+
// Therefore, we use the number of processors as a substitute.
278+
Environment.ProcessorCount),
279+
workingDirectoryPath,
280+
// Initial glob filter (always nothing)
281+
Glob.nothingFilter);
282+
168283
/// <summary>
169284
/// Builds a dictionary of file paths and their hashes from a tree.
170285
/// </summary>

GitReader.Core/Primitive/PrimitiveRepositoryFacade.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,11 @@ public static async Task<ReadOnlyArray<PrimitiveWorkingDirectoryFile>> GetUntrac
354354
var untrackedFiles = new List<PrimitiveWorkingDirectoryFile>();
355355

356356
// Find untracked files in working directory
357-
await WorkingDirectoryAccessor.ScanWorkingDirectoryRecursiveAsync(
357+
await WorkingDirectoryAccessor.ExtractUntrackedFilesAsync(
358358
repository,
359359
workingDirectoryStatus.workingDirectoryPath,
360-
workingDirectoryStatus.workingDirectoryPath,
361360
new(workingDirectoryStatus.processedPaths),
362361
overrideGlobFilter, // Override path filter
363-
Glob.nothingFilter, // Initial path filter (always nothing)
364362
untrackedFiles, // Results
365363
ct);
366364

GitReader.Core/Primitive/PrimitiveWorkingDirectoryStatus.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,4 @@ public override int GetHashCode()
9595
/// <returns>A string that represents the current instance.</returns>
9696
public override string ToString() =>
9797
$"Staged: {this.StagedFiles.Count}, Unstaged: {this.UnstagedFiles.Count}";
98-
}
98+
}

0 commit comments

Comments
 (0)