Skip to content

Commit ef0e102

Browse files
committed
Retrieve package IDs from files and restore the not yet restored ones
Read all files in the repo and look for `PackageReference` XML elements to extract the package IDs, then restore the packages that are not yet restored. This change improves the percentage of found assemblies on the Powershell repo from 95% to 97% compared to a traced extraction. Also, it increases the number of assemblied only referenced in the standalone extraction from 79 to 134.
1 parent cd64195 commit ef0e102

File tree

3 files changed

+111
-5
lines changed

3 files changed

+111
-5
lines changed

csharp/extractor/Semmle.Extraction.CSharp.Standalone/BuildAnalysis.cs

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
using System.Collections.Concurrent;
99
using System.Text;
1010
using System.Security.Cryptography;
11+
using System.Text.RegularExpressions;
1112

1213
namespace Semmle.BuildAnalyser
1314
{
1415
/// <summary>
1516
/// Main implementation of the build analysis.
1617
/// </summary>
17-
internal sealed class BuildAnalysis : IDisposable
18+
internal sealed partial class BuildAnalysis : IDisposable
1819
{
1920
private readonly AssemblyCache assemblyCache;
2021
private readonly ProgressMonitor progressMonitor;
@@ -95,6 +96,7 @@ public BuildAnalysis(Options options, ProgressMonitor progressMonitor)
9596
{
9697
Restore(solutions);
9798
Restore(allProjects);
99+
DownloadMissingPackages(allProjects);
98100
}
99101
}
100102

@@ -316,9 +318,9 @@ private void AnalyseProject(FileInfo project)
316318

317319
}
318320

319-
private void Restore(string target)
321+
private bool Restore(string target)
320322
{
321-
dotnet.RestoreToDirectory(target, packageDirectory.DirInfo.FullName);
323+
return dotnet.RestoreToDirectory(target, packageDirectory.DirInfo.FullName);
322324
}
323325

324326
private void Restore(IEnumerable<string> targets)
@@ -329,6 +331,76 @@ private void Restore(IEnumerable<string> targets)
329331
}
330332
}
331333

334+
private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
335+
{
336+
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
337+
var notYetDownloadedPackages = new HashSet<string>();
338+
339+
var allFiles = GetFiles("*.*").ToArray();
340+
foreach (var file in allFiles)
341+
{
342+
try
343+
{
344+
using var sr = new StreamReader(file);
345+
ReadOnlySpan<char> line;
346+
while ((line = sr.ReadLine()) != null)
347+
{
348+
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
349+
{
350+
// We can't get the group from the ValueMatch, so doing it manually:
351+
var match = line.Slice(valueMatch.Index, valueMatch.Length);
352+
var includeIndex = match.IndexOf("Include", StringComparison.InvariantCultureIgnoreCase);
353+
if (includeIndex == -1)
354+
{
355+
continue;
356+
}
357+
358+
match = match.Slice(includeIndex + "Include".Length + 1);
359+
360+
var quoteIndex1 = match.IndexOf("\"");
361+
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");
362+
363+
var packageName = match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
364+
if (!alreadyDownloadedPackages.Contains(packageName))
365+
{
366+
notYetDownloadedPackages.Add(packageName);
367+
}
368+
}
369+
}
370+
}
371+
catch (Exception ex)
372+
{
373+
progressMonitor.FailedToReadFile(file, ex);
374+
continue;
375+
}
376+
}
377+
378+
foreach (var package in notYetDownloadedPackages)
379+
{
380+
progressMonitor.NugetInstall(package);
381+
using var tempDir = new TemporaryDirectory(ComputeTempDirectory(package));
382+
var success = dotnet.New(tempDir.DirInfo.FullName);
383+
if (!success)
384+
{
385+
continue;
386+
}
387+
success = dotnet.AddPackage(tempDir.DirInfo.FullName, package);
388+
if (!success)
389+
{
390+
continue;
391+
}
392+
393+
success = Restore(tempDir.DirInfo.FullName);
394+
395+
// TODO: the restore might fail, we could retry with a prerelease (*-* instead of *) version of the package.
396+
397+
if (!success)
398+
{
399+
progressMonitor.FailedToRestoreNugetPackage(package);
400+
}
401+
}
402+
}
403+
332404
private void AnalyseSolutions(IEnumerable<string> solutions)
333405
{
334406
Parallel.ForEach(solutions, new ParallelOptions { MaxDegreeOfParallelism = 4 }, solutionFile =>
@@ -350,5 +422,8 @@ public void Dispose()
350422
{
351423
packageDirectory?.Dispose();
352424
}
425+
426+
[GeneratedRegex("<PackageReference .*Include=\"(.*?)\".*/>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
427+
private static partial Regex PackageReference();
353428
}
354429
}

csharp/extractor/Semmle.Extraction.CSharp.Standalone/DotNet.cs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,36 @@ private void Info()
3636
}
3737
}
3838

39-
public void RestoreToDirectory(string projectOrSolutionFile, string packageDirectory)
39+
private bool RunCommand(string args)
4040
{
41-
var args = $"restore --no-dependencies \"{projectOrSolutionFile}\" --packages \"{packageDirectory}\" /p:DisableImplicitNuGetFallbackFolder=true";
4241
progressMonitor.RunningProcess($"dotnet {args}");
4342
using var proc = Process.Start("dotnet", args);
4443
proc.WaitForExit();
4544
if (proc.ExitCode != 0)
4645
{
4746
progressMonitor.CommandFailed("dotnet", args, proc.ExitCode);
47+
return false;
4848
}
49+
50+
return true;
51+
}
52+
53+
public bool RestoreToDirectory(string projectOrSolutionFile, string packageDirectory)
54+
{
55+
var args = $"restore --no-dependencies \"{projectOrSolutionFile}\" --packages \"{packageDirectory}\" /p:DisableImplicitNuGetFallbackFolder=true";
56+
return RunCommand(args);
57+
}
58+
59+
public bool New(string folder)
60+
{
61+
var args = $"new console --no-restore --output \"{folder}\"";
62+
return RunCommand(args);
63+
}
64+
65+
public bool AddPackage(string folder, string package)
66+
{
67+
var args = $"add \"{folder}\" package \"{package}\" --no-restore";
68+
return RunCommand(args);
4969
}
5070
}
5171
}

csharp/extractor/Semmle.Extraction.CSharp.Standalone/ProgressMonitor.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,5 +107,16 @@ public void RunningProcess(string command)
107107
{
108108
logger.Log(Severity.Info, $"Running {command}");
109109
}
110+
111+
public void FailedToRestoreNugetPackage(string package)
112+
{
113+
logger.Log(Severity.Info, $"Failed to restore nuget package {package}");
114+
}
115+
116+
public void FailedToReadFile(string file, Exception ex)
117+
{
118+
logger.Log(Severity.Info, $"Failed to read file {file}");
119+
logger.Log(Severity.Debug, $"Failed to read file {file}, exception: {ex}");
120+
}
110121
}
111122
}

0 commit comments

Comments
 (0)