@@ -19,126 +19,175 @@ namespace GitReader.Internal;
1919internal static class WorkingDirectoryAccessor
2020{
2121 /// <summary>
22- /// Scans the working directory for untracked files .
22+ /// Traverse context .
2323 /// </summary>
24- /// <param name="repository">The repository to scan.</param>
25- /// <param name="workingDirectoryPath">The path to the working directory.</param>
24+ /// <remarks>
25+ /// This class is used to limit the number of parallel tasks.
26+ /// This is a very loose way to limit the number of parallel tasks.
27+ /// </remarks>
28+ private sealed class TraverseContext
29+ {
30+ // Ceiling task count. This may be exceeded by tasks.
31+ private readonly int ceilingRunningCount ;
32+ // Current running task count.
33+ private int runningCount = 0 ;
34+
35+ public readonly Repository Repository ;
36+ public readonly string WorkingDirectoryPath ;
37+ public readonly HashSet < string > ProcessedPaths ;
38+ public readonly GlobFilter OverrideGlobFilter ;
39+ public readonly List < PrimitiveWorkingDirectoryFile > UntrackedFiles ;
40+ public readonly CancellationToken CancellationToken ;
41+
42+ /// <summary>
43+ /// Initializes a new instance.
44+ /// </summary>
45+ /// <param name="repository">The repository to scan.</param>
46+ /// <param name="workingDirectoryPath">The path to the working directory.</param>
47+ /// <param name="processedPaths">The paths that have already been processed.</param>
48+ /// <param name="overrideGlobFilter">The override glob filter.</param>
49+ /// <param name="untrackedFiles">The list of untracked files (output)</param>
50+ /// <param name="cancellationToken">The cancellation token.</param>
51+ /// <param name="ceilingRunningCount">Ceiling task count.</param>
52+ public TraverseContext (
53+ Repository repository ,
54+ string workingDirectoryPath ,
55+ HashSet < string > processedPaths ,
56+ GlobFilter overrideGlobFilter ,
57+ List < PrimitiveWorkingDirectoryFile > untrackedFiles ,
58+ CancellationToken cancellationToken ,
59+ int ceilingRunningCount )
60+ {
61+ this . Repository = repository ;
62+ this . WorkingDirectoryPath = workingDirectoryPath ;
63+ this . ProcessedPaths = processedPaths ;
64+ this . OverrideGlobFilter = overrideGlobFilter ;
65+ this . UntrackedFiles = untrackedFiles ;
66+ this . CancellationToken = cancellationToken ;
67+ this . ceilingRunningCount = ceilingRunningCount ;
68+ }
69+
70+ /// <summary>
71+ /// Gets a value indicating whether parallel execution is allowed.
72+ /// </summary>
73+ public bool CanParallel =>
74+ this . runningCount < this . ceilingRunningCount ;
75+
76+ /// <summary>
77+ /// Increments the running count.
78+ /// </summary>
79+ /// <param name="count">The count to increment.</param>
80+ public void IncrementRunningCount ( int count ) =>
81+ Interlocked . Add ( ref this . runningCount , count ) ;
82+
83+ /// <summary>
84+ /// Decrements the running count.
85+ /// </summary>
86+ /// <param name="count">The count to decrement.</param>
87+ public void DecrementRunningCount ( int count ) =>
88+ Interlocked . Add ( ref this . runningCount , - count ) ;
89+ }
90+
91+ /// <summary>
92+ /// Scans the working directory for untracked files recursively.
93+ /// </summary>
94+ /// <param name="context">Traverse context</param>
2695 /// <param name="currentPath">The current path to scan.</param>
27- /// <param name="processedPaths">The paths that have already been processed.</param>
28- /// <param name="overrideGlobFilter">The override glob filter.</param>
29- /// <param name="parentPathFilter">The parent path filter.</param>
30- /// <param name="untrackedFiles">The list of untracked files (output)</param>
31- /// <param name="ct">The cancellation token.</param>
96+ /// <param name="parentGlobFilter">The parent glob filter.</param>
3297#if NET45_OR_GREATER || NETSTANDARD || NETCOREAPP2_1_OR_GREATER
33- public static async ValueTask ScanWorkingDirectoryRecursiveAsync (
34- Repository repository ,
35- string workingDirectoryPath ,
98+ private static async ValueTask ExtractUntrackedFilesRecursiveAsync (
99+ TraverseContext context ,
36100 string currentPath ,
37- HashSet < string > processedPaths ,
38- GlobFilter overrideGlobFilter ,
39- GlobFilter parentPathFilter ,
40- List < PrimitiveWorkingDirectoryFile > untrackedFiles ,
41- CancellationToken ct )
101+ GlobFilter parentGlobFilter )
42102#else
43- public static async Task ScanWorkingDirectoryRecursiveAsync (
44- Repository repository ,
45- string workingDirectoryPath ,
103+ private static async Task ExtractUntrackedFilesRecursiveAsync (
104+ TraverseContext context ,
46105 string currentPath ,
47- HashSet < string > processedPaths ,
48- GlobFilter overrideGlobFilter ,
49- GlobFilter parentPathFilter ,
50- List < PrimitiveWorkingDirectoryFile > untrackedFiles ,
51- CancellationToken ct )
106+ GlobFilter parentGlobFilter )
52107#endif
53108 {
54109 // Skip .git directory/file (hardcoded exclusion, same as Git official behavior)
55- var currentName = repository . fileSystem . GetFileName ( currentPath ) ;
110+ var currentName = context . Repository . fileSystem . GetFileName ( currentPath ) ;
56111 if ( currentName . Equals ( ".git" , StringComparison . OrdinalIgnoreCase ) )
57112 {
58113 return ;
59114 }
60115
61116 try
62117 {
63- if ( ! await repository . fileSystem . IsDirectoryExistsAsync ( currentPath , ct ) )
118+ if ( ! await context . Repository . fileSystem . IsDirectoryExistsAsync ( currentPath , context . CancellationToken ) )
64119 {
65120 return ;
66121 }
67122
68123 // Read .gitignore in current directory and combine with pathFilter.
69- GlobFilter candidatePathFilter ;
70- GlobFilter exactlyPathFilter ;
71- var gitignorePath = repository . fileSystem . Combine ( currentPath , ".gitignore" ) ;
124+ GlobFilter candidateGlobFilter ;
125+ var gitignorePath = context . Repository . fileSystem . Combine ( currentPath , ".gitignore" ) ;
72126 try
73127 {
74128 // When .gitignore exists
75- if ( await repository . fileSystem . IsFileExistsAsync ( gitignorePath , ct ) )
129+ if ( await context . Repository . fileSystem . IsFileExistsAsync ( gitignorePath , context . CancellationToken ) )
76130 {
77131 // Generate .gitignore filter
78- using var gitignoreStream = await repository . fileSystem . OpenAsync ( gitignorePath , false , ct ) ;
79- var gitignoreFilter = await Glob . CreateExcludeFilterFromGitignoreAsync ( gitignoreStream , ct ) ;
132+ using var gitignoreStream = await context . Repository . fileSystem . OpenAsync ( gitignorePath , false , context . CancellationToken ) ;
133+ var gitignoreFilter = await Glob . CreateExcludeFilterFromGitignoreAsync ( gitignoreStream , context . CancellationToken ) ;
80134
81135 // Combine filters with correct order: parent filter, .gitignore filter, override filter
82- candidatePathFilter = Glob . Combine ( [ parentPathFilter , gitignoreFilter ] ) ;
83- exactlyPathFilter = Glob . Combine ( [ parentPathFilter , gitignoreFilter , overrideGlobFilter ] ) ;
136+ candidateGlobFilter = Glob . Combine ( [ parentGlobFilter , gitignoreFilter ] ) ;
84137 }
85138 else
86139 {
87140 // When .gitignore does not exist, continue with parent filter
88- candidatePathFilter = parentPathFilter ;
89- exactlyPathFilter = Glob . Combine ( [ parentPathFilter , overrideGlobFilter ] ) ;
141+ candidateGlobFilter = parentGlobFilter ;
90142 }
91143 }
92144 catch
93145 {
94146 // If .gitignore cannot be read, continue with parent filter
95- candidatePathFilter = parentPathFilter ;
96- exactlyPathFilter = Glob . Combine ( [ parentPathFilter , overrideGlobFilter ] ) ;
147+ candidateGlobFilter = parentGlobFilter ;
97148 }
98-
149+
99150 // Scan directory entries
100- var entries = await repository . fileSystem . GetDirectoryEntriesAsync ( currentPath , ct ) ;
101- await Utilities . WhenAll ( entries . Select ( async entry =>
151+ var entries = await context . Repository . fileSystem . GetDirectoryEntriesAsync ( currentPath , context . CancellationToken ) ;
152+
153+ // Makes sub tasks iterator
154+ var tasks = entries . Select ( async entry =>
102155 {
103156 // Skip .git directory/files (hardcoded exclusion matching Git's behavior)
104- var fileName = repository . fileSystem . GetFileName ( entry ) ;
157+ var fileName = context . Repository . fileSystem . GetFileName ( entry ) ;
105158 if ( fileName . Equals ( ".git" , StringComparison . OrdinalIgnoreCase ) )
106159 {
107160 return ;
108161 }
109162
110163 // Get relative path and filter it
111- var relativePath = repository . fileSystem . ToPosixPath (
112- repository . fileSystem . GetRelativePath ( workingDirectoryPath , entry ) ) ;
113- var filterDecision = exactlyPathFilter (
114- GlobFilterStates . NotExclude , // Start from neutral.
115- relativePath ) ;
164+ var relativePath = context . Repository . fileSystem . ToPosixPath (
165+ context . Repository . fileSystem . GetRelativePath ( context . WorkingDirectoryPath , entry ) ) ;
166+
167+ var exactlyPathFilter = Glob . Combine ( [ candidateGlobFilter , context . OverrideGlobFilter ] ) ;
168+ var filterResult = Glob . ApplyFilter ( exactlyPathFilter , relativePath ) ;
116169
117170 // When entry is excluded, ignore it.
118- if ( filterDecision == GlobFilterStates . Exclude )
171+ if ( filterResult == GlobFilterStates . Exclude )
119172 {
120173 return ;
121174 }
122175
123176 // When entry is a directory
124- if ( await repository . fileSystem . IsDirectoryExistsAsync ( entry , ct ) )
177+ if ( await context . Repository . fileSystem . IsDirectoryExistsAsync ( entry , context . CancellationToken ) )
125178 {
126179 // Recursively scan subdirectories with the current candidate filter
127- await ScanWorkingDirectoryRecursiveAsync (
128- repository , workingDirectoryPath , entry ,
129- processedPaths ,
130- overrideGlobFilter , candidatePathFilter ,
131- untrackedFiles ,
132- ct ) ;
180+ await ExtractUntrackedFilesRecursiveAsync (
181+ context , entry , candidateGlobFilter ) ;
133182 }
134183 // When entry is a file
135- else if ( await repository . fileSystem . IsFileExistsAsync ( entry , ct ) )
184+ else if ( await context . Repository . fileSystem . IsFileExistsAsync ( entry , context . CancellationToken ) )
136185 {
137186 // When entry is a file, add it to untracked files if it is not processed yet
138- if ( ! processedPaths . Contains ( relativePath ) )
187+ if ( ! context . ProcessedPaths . Contains ( relativePath ) )
139188 {
140189 // This is an untracked file that passes the filter
141- var fileHash = await CalculateFileHashAsync ( repository , entry , ct ) ;
190+ var fileHash = await CalculateFileHashAsync ( context . Repository , entry , context . CancellationToken ) ;
142191
143192 var untrackedFile = new PrimitiveWorkingDirectoryFile (
144193 relativePath ,
@@ -147,13 +196,37 @@ await ScanWorkingDirectoryRecursiveAsync(
147196 fileHash ) ;
148197
149198 // Avoid race condition
150- lock ( untrackedFiles )
199+ lock ( context . UntrackedFiles )
151200 {
152- untrackedFiles . Add ( untrackedFile ) ;
201+ context . UntrackedFiles . Add ( untrackedFile ) ;
153202 }
154203 }
155204 }
156- } ) ) ;
205+ } ) ;
206+
207+ // Limits the number of parallel tasks
208+ var canParallel = context . CanParallel ;
209+ context . IncrementRunningCount ( entries . Length ) ;
210+ try
211+ {
212+ // When parallel is allowed, run all tasks in parallel
213+ if ( canParallel )
214+ {
215+ await Utilities . WhenAll ( tasks ) ;
216+ }
217+ else
218+ {
219+ // When parallel is not allowed, run all tasks sequentially
220+ foreach ( var task in tasks )
221+ {
222+ await task ;
223+ }
224+ }
225+ }
226+ finally
227+ {
228+ context . DecrementRunningCount ( entries . Length ) ;
229+ }
157230 }
158231 catch ( UnauthorizedAccessException )
159232 {
@@ -165,6 +238,48 @@ await ScanWorkingDirectoryRecursiveAsync(
165238 }
166239 }
167240
241+ /// <summary>
242+ /// Scans the working directory for untracked files.
243+ /// </summary>
244+ /// <param name="repository">The repository to scan.</param>
245+ /// <param name="workingDirectoryPath">The path to the working directory.</param>
246+ /// <param name="processedPaths">The paths that have already been processed.</param>
247+ /// <param name="overrideGlobFilter">The override glob filter.</param>
248+ /// <param name="untrackedFiles">The list of untracked files (output)</param>
249+ /// <param name="ct">The cancellation token.</param>
250+ #if NET45_OR_GREATER || NETSTANDARD || NETCOREAPP2_1_OR_GREATER
251+ public static ValueTask ExtractUntrackedFilesAsync (
252+ Repository repository ,
253+ string workingDirectoryPath ,
254+ HashSet < string > processedPaths ,
255+ GlobFilter overrideGlobFilter ,
256+ List < PrimitiveWorkingDirectoryFile > untrackedFiles ,
257+ CancellationToken ct ) =>
258+ #else
259+ public static Task ExtractUntrackedFilesAsync (
260+ Repository repository ,
261+ string workingDirectoryPath ,
262+ HashSet < string > processedPaths ,
263+ GlobFilter overrideGlobFilter ,
264+ List < PrimitiveWorkingDirectoryFile > untrackedFiles ,
265+ CancellationToken ct ) =>
266+ #endif
267+ ExtractUntrackedFilesRecursiveAsync (
268+ new TraverseContext (
269+ repository ,
270+ workingDirectoryPath ,
271+ processedPaths ,
272+ overrideGlobFilter ,
273+ untrackedFiles ,
274+ ct ,
275+ // Exactly, it should be a value that depends on the I/O parallel degree that the machine can accept,
276+ // but it is very difficult to determine it mechanically.
277+ // Therefore, we use the number of processors as a substitute.
278+ Environment . ProcessorCount ) ,
279+ workingDirectoryPath ,
280+ // Initial glob filter (always nothing)
281+ Glob . nothingFilter ) ;
282+
168283 /// <summary>
169284 /// Builds a dictionary of file paths and their hashes from a tree.
170285 /// </summary>
0 commit comments