Skip to content

Commit c95d219

Browse files
committed
C#: Re-factor the processing of all files into a helper class and make sure to only do one pass of the files.
1 parent db685e5 commit c95d219

File tree

2 files changed

+153
-112
lines changed

2 files changed

+153
-112
lines changed

csharp/extractor/Semmle.Extraction.CSharp.Standalone/BuildAnalysis.cs

Lines changed: 7 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88
using System.Collections.Concurrent;
99
using System.Text;
1010
using System.Security.Cryptography;
11-
using System.Text.RegularExpressions;
1211

1312
namespace Semmle.BuildAnalyser
1413
{
1514
/// <summary>
1615
/// Main implementation of the build analysis.
1716
/// </summary>
18-
internal sealed partial class BuildAnalysis : IDisposable
17+
internal sealed class BuildAnalysis : IDisposable
1918
{
2019
private readonly AssemblyCache assemblyCache;
2120
private readonly ProgressMonitor progressMonitor;
@@ -29,7 +28,7 @@ internal sealed partial class BuildAnalysis : IDisposable
2928
private readonly Options options;
3029
private readonly DirectoryInfo sourceDir;
3130
private readonly DotNet dotnet;
32-
private readonly Lazy<IEnumerable<string>> allFiles;
31+
private readonly FileContent fileContent;
3332
private readonly TemporaryDirectory packageDirectory;
3433

3534

@@ -58,7 +57,9 @@ public BuildAnalysis(Options options, ProgressMonitor progressMonitor)
5857

5958
this.progressMonitor.FindingFiles(options.SrcDir);
6059

61-
this.allFiles = new(() => GetFiles("*.*"));
60+
packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName));
61+
62+
this.fileContent = new FileContent(packageDirectory, progressMonitor, () => GetFiles("*.*"));
6263
this.allSources = GetFiles("*.cs").ToArray();
6364
var allProjects = GetFiles("*.csproj");
6465
var solutions = options.SolutionFile is not null
@@ -75,7 +76,7 @@ public BuildAnalysis(Options options, ProgressMonitor progressMonitor)
7576
progressMonitor.LogInfo($".NET runtime location selected: {runtimeLocation}");
7677
dllDirNames.Add(runtimeLocation);
7778

78-
if (UseAspNetDlls() && runtime.GetAspRuntime() is string aspRuntime)
79+
if (fileContent.UseAspNetDlls && runtime.GetAspRuntime() is string aspRuntime)
7980
{
8081
progressMonitor.LogInfo($"ASP.NET runtime location selected: {aspRuntime}");
8182
dllDirNames.Add(aspRuntime);
@@ -87,8 +88,6 @@ public BuildAnalysis(Options options, ProgressMonitor progressMonitor)
8788
UseReference(typeof(object).Assembly.Location);
8889
}
8990

90-
packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName));
91-
9291
if (options.UseNuGet)
9392
{
9493
dllDirNames.Add(packageDirectory.DirInfo.FullName);
@@ -230,11 +229,6 @@ private void ResolveConflicts()
230229
/// <param name="sourceFile">The source file.</param>
231230
private void UseSource(FileInfo sourceFile) => sources[sourceFile.FullName] = sourceFile.Exists;
232231

233-
/// <summary>
234-
/// All files in the source directory.
235-
/// </summary>
236-
private IEnumerable<string> AllFiles => allFiles.Value;
237-
238232
/// <summary>
239233
/// The list of resolved reference files.
240234
/// </summary>
@@ -335,73 +329,9 @@ private void Restore(IEnumerable<string> targets, string? pathToNugetConfig = nu
335329
}
336330
}
337331

338-
private static string GetGroup(ReadOnlySpan<char> input, ValueMatch valueMatch, string groupPrefix)
339-
{
340-
var match = input.Slice(valueMatch.Index, valueMatch.Length);
341-
var includeIndex = match.IndexOf(groupPrefix, StringComparison.InvariantCultureIgnoreCase);
342-
if (includeIndex == -1)
343-
{
344-
return string.Empty;
345-
}
346-
347-
match = match.Slice(includeIndex + groupPrefix.Length + 1);
348-
349-
var quoteIndex1 = match.IndexOf("\"");
350-
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");
351-
352-
return match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
353-
}
354-
355-
private static bool IsGroupMatch(ReadOnlySpan<char> line, Regex regex, string groupPrefix, string value)
356-
{
357-
foreach (var valueMatch in regex.EnumerateMatches(line))
358-
{
359-
// We can't get the group from the ValueMatch, so doing it manually:
360-
if (GetGroup(line, valueMatch, groupPrefix) == value.ToLowerInvariant())
361-
{
362-
return true;
363-
}
364-
}
365-
return false;
366-
}
367-
368-
/// <summary>
369-
/// Returns true if any file in the source directory indicates that ASP.NET is used.
370-
/// The following heuristic is used to decide, if ASP.NET is used:
371-
/// If any file in the source directory contains something like (this will most like be a .csproj file)
372-
/// <Project Sdk="Microsoft.NET.Sdk.Web">
373-
/// <FrameworkReference Include="Microsoft.AspNetCore.App"/>
374-
/// </summary>
375-
private bool UseAspNetDlls()
376-
{
377-
foreach (var file in AllFiles)
378-
{
379-
try
380-
{
381-
using var sr = new StreamReader(file);
382-
ReadOnlySpan<char> line;
383-
while ((line = sr.ReadLine()) != null)
384-
{
385-
if (IsGroupMatch(line, ProjectSdk(), "Sdk", "Microsoft.NET.Sdk.Web") ||
386-
IsGroupMatch(line, FrameworkReference(), "Include", "Microsoft.AspNetCore.App"))
387-
{
388-
return true;
389-
}
390-
}
391-
}
392-
catch (Exception ex)
393-
{
394-
progressMonitor.FailedToReadFile(file, ex);
395-
}
396-
}
397-
return false;
398-
}
399332

400333
private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
401334
{
402-
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
403-
var notYetDownloadedPackages = new HashSet<string>();
404-
405335
var nugetConfigs = GetFiles("nuget.config", recurseSubdirectories: true).ToArray();
406336
string? nugetConfig = null;
407337
if (nugetConfigs.Length > 1)
@@ -418,32 +348,7 @@ private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
418348
nugetConfig = nugetConfigs.FirstOrDefault();
419349
}
420350

421-
foreach (var file in AllFiles)
422-
{
423-
try
424-
{
425-
using var sr = new StreamReader(file);
426-
ReadOnlySpan<char> line;
427-
while ((line = sr.ReadLine()) != null)
428-
{
429-
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
430-
{
431-
// We can't get the group from the ValueMatch, so doing it manually:
432-
var packageName = GetGroup(line, valueMatch, "Include");
433-
if (!string.IsNullOrEmpty(packageName) && !alreadyDownloadedPackages.Contains(packageName))
434-
{
435-
notYetDownloadedPackages.Add(packageName);
436-
}
437-
}
438-
}
439-
}
440-
catch (Exception ex)
441-
{
442-
progressMonitor.FailedToReadFile(file, ex);
443-
}
444-
}
445-
446-
foreach (var package in notYetDownloadedPackages)
351+
foreach (var package in fileContent.NotYetDownloadedPackages)
447352
{
448353
progressMonitor.NugetInstall(package);
449354
using var tempDir = new TemporaryDirectory(ComputeTempDirectory(package));
@@ -487,15 +392,5 @@ private void AnalyseSolutions(IEnumerable<string> solutions)
487392
}
488393

489394
public void Dispose() => packageDirectory?.Dispose();
490-
491-
[GeneratedRegex("<PackageReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
492-
private static partial Regex PackageReference();
493-
494-
[GeneratedRegex("<FrameworkReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
495-
private static partial Regex FrameworkReference();
496-
497-
[GeneratedRegex("<Project\\s+Sdk=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
498-
private static partial Regex ProjectSdk();
499-
500395
}
501396
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
using Semmle.Util;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.IO;
5+
using System.Linq;
6+
using System.Text.RegularExpressions;
7+
8+
namespace Semmle.BuildAnalyser
9+
{
10+
11+
// <summary>
12+
// This class is used to read a set of files and decide different properties about the
13+
// content (by reading the content of the files only once).
14+
// The implementation is lazy, so the properties are only calculated when
15+
// the first property is accessed.
16+
// </summary>
17+
internal partial class FileContent
18+
{
19+
private readonly ProgressMonitor progressMonitor;
20+
private readonly TemporaryDirectory packageDirectory;
21+
private readonly Func<IEnumerable<string>> getFiles;
22+
private readonly HashSet<string> notYetDownloadedPackages = new HashSet<string>();
23+
24+
private bool IsInitialized { get; set; } = false;
25+
26+
public HashSet<string> NotYetDownloadedPackages
27+
{
28+
get
29+
{
30+
Initialize();
31+
return notYetDownloadedPackages;
32+
}
33+
}
34+
35+
private bool useAspNetDlls = false;
36+
37+
/// <summary>
38+
/// True if any file in the source directory indicates that ASP.NET is used.
39+
/// The following heuristic is used to decide, if ASP.NET is used:
40+
/// If any file in the source directory contains something like (this will most like be a .csproj file)
41+
/// <Project Sdk="Microsoft.NET.Sdk.Web">
42+
/// <FrameworkReference Include="Microsoft.AspNetCore.App"/>
43+
/// </summary>
44+
public bool UseAspNetDlls
45+
{
46+
get
47+
{
48+
Initialize();
49+
return useAspNetDlls;
50+
}
51+
}
52+
53+
public FileContent(TemporaryDirectory packageDirectory, ProgressMonitor progressMonitor, Func<IEnumerable<string>> getFiles)
54+
{
55+
this.progressMonitor = progressMonitor;
56+
this.packageDirectory = packageDirectory;
57+
this.getFiles = getFiles;
58+
}
59+
60+
private static string GetGroup(ReadOnlySpan<char> input, ValueMatch valueMatch, string groupPrefix)
61+
{
62+
var match = input.Slice(valueMatch.Index, valueMatch.Length);
63+
var includeIndex = match.IndexOf(groupPrefix, StringComparison.InvariantCultureIgnoreCase);
64+
if (includeIndex == -1)
65+
{
66+
return string.Empty;
67+
}
68+
69+
match = match.Slice(includeIndex + groupPrefix.Length + 1);
70+
71+
var quoteIndex1 = match.IndexOf("\"");
72+
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");
73+
74+
return match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
75+
}
76+
77+
private static bool IsGroupMatch(ReadOnlySpan<char> line, Regex regex, string groupPrefix, string value)
78+
{
79+
foreach (var valueMatch in regex.EnumerateMatches(line))
80+
{
81+
// We can't get the group from the ValueMatch, so doing it manually:
82+
if (GetGroup(line, valueMatch, groupPrefix) == value.ToLowerInvariant())
83+
{
84+
return true;
85+
}
86+
}
87+
return false;
88+
}
89+
90+
private void Initialize()
91+
{
92+
if (IsInitialized)
93+
{
94+
return;
95+
}
96+
97+
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
98+
foreach (var file in getFiles())
99+
{
100+
try
101+
{
102+
using var sr = new StreamReader(file);
103+
ReadOnlySpan<char> line;
104+
while ((line = sr.ReadLine()) != null)
105+
{
106+
107+
// Find the not yet downloaded packages.
108+
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
109+
{
110+
// We can't get the group from the ValueMatch, so doing it manually:
111+
var packageName = GetGroup(line, valueMatch, "Include");
112+
if (!string.IsNullOrEmpty(packageName) && !alreadyDownloadedPackages.Contains(packageName))
113+
{
114+
notYetDownloadedPackages.Add(packageName);
115+
}
116+
}
117+
118+
// Determine if ASP.NET is used.
119+
if (!useAspNetDlls)
120+
{
121+
useAspNetDlls =
122+
IsGroupMatch(line, ProjectSdk(), "Sdk", "Microsoft.NET.Sdk.Web") ||
123+
IsGroupMatch(line, FrameworkReference(), "Include", "Microsoft.AspNetCore.App");
124+
}
125+
126+
}
127+
}
128+
catch (Exception ex)
129+
{
130+
progressMonitor.FailedToReadFile(file, ex);
131+
}
132+
133+
}
134+
IsInitialized = true;
135+
}
136+
137+
[GeneratedRegex("<PackageReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
138+
private static partial Regex PackageReference();
139+
140+
[GeneratedRegex("<FrameworkReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
141+
private static partial Regex FrameworkReference();
142+
143+
[GeneratedRegex("<Project\\s+Sdk=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
144+
private static partial Regex ProjectSdk();
145+
}
146+
}

0 commit comments

Comments
 (0)