Skip to content

Commit b853e6e

Browse files
[util/package-info] tool for creating report on packages (#1189)
I mostly "vibe-coded" this with Copilot. Summary of changes: * Sorted by downloads * Includes "Used By" column * Backoff/retry logic when rate limited * Save `package-analysis.md`
1 parent c37afa0 commit b853e6e

File tree

5 files changed

+1315
-0
lines changed

5 files changed

+1315
-0
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net9.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="HtmlAgilityPack" Version="1.11.54" />
12+
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
13+
</ItemGroup>
14+
15+
</Project>

util/package-info/Program.cs

Lines changed: 376 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,376 @@
1+
using System.Text.Json;
2+
using System.Text.RegularExpressions;
3+
using HtmlAgilityPack;
4+
5+
namespace PackageAnalyzer;
6+
7+
public class PackageInfo
8+
{
9+
public string NugetId { get; set; } = "";
10+
public string Version { get; set; } = "";
11+
public string NugetVersion { get; set; } = "";
12+
public string? Downloads { get; set; }
13+
public string? UsedBy { get; set; }
14+
public string PackageUrl { get; set; } = "";
15+
}
16+
17+
public class ConfigArtifact
18+
{
19+
public string groupId { get; set; } = "";
20+
public string artifactId { get; set; } = "";
21+
public string version { get; set; } = "";
22+
public string nugetVersion { get; set; } = "";
23+
public string nugetId { get; set; } = "";
24+
public bool dependencyOnly { get; set; }
25+
}
26+
27+
public class Config
28+
{
29+
public string slnFile { get; set; } = "";
30+
public bool strictRuntimeDependencies { get; set; }
31+
public List<string> additionalProjects { get; set; } = new();
32+
public List<ConfigArtifact> artifacts { get; set; } = new();
33+
}
34+
35+
class Program
36+
{
37+
private static readonly HttpClient httpClient = new()
38+
{
39+
Timeout = TimeSpan.FromSeconds(30) // Increase timeout for better reliability
40+
};
41+
42+
static Program()
43+
{
44+
// Set a user agent to be more respectful
45+
httpClient.DefaultRequestHeaders.Add("User-Agent", "NuGet-Package-Analyzer/1.0 (https://github.com/xamarin/AndroidX)");
46+
}
47+
48+
static async Task Main(string[] args)
49+
{
50+
bool testMode = args.Length > 0 && args[0] == "--test";
51+
52+
// Read and parse config.json
53+
var configPath = Path.Combine("..", "..", "config.json");
54+
if (!File.Exists(configPath))
55+
{
56+
Console.WriteLine("config.json not found. Please run from tools/package-info directory.");
57+
return;
58+
}
59+
60+
var configJson = await File.ReadAllTextAsync(configPath);
61+
var configs = JsonSerializer.Deserialize<List<Config>>(configJson, new JsonSerializerOptions
62+
{
63+
PropertyNameCaseInsensitive = true
64+
});
65+
66+
if (configs == null || configs.Count == 0 || configs[0].artifacts == null)
67+
{
68+
Console.WriteLine("Could not parse config.json or no artifacts found");
69+
return;
70+
}
71+
72+
var config = configs[0]; // Take the first config object
73+
74+
var artifactsToProcess = testMode ? config.artifacts.Take(5).ToList() : config.artifacts;
75+
76+
Console.WriteLine($"Found {config.artifacts.Count} packages in config.json");
77+
if (testMode)
78+
{
79+
Console.WriteLine($"Running in test mode - processing first {artifactsToProcess.Count} packages only");
80+
}
81+
Console.WriteLine("Fetching package information from NuGet.org...\n");
82+
83+
var packages = new List<PackageInfo>();
84+
var semaphore = new SemaphoreSlim(testMode ? 2 : 3); // Reduced concurrent requests to avoid rate limiting
85+
86+
var tasks = artifactsToProcess.Select(async artifact =>
87+
{
88+
await semaphore.WaitAsync();
89+
try
90+
{
91+
var packageInfo = await FetchPackageInfo(artifact, testMode);
92+
lock (packages)
93+
{
94+
packages.Add(packageInfo);
95+
if (testMode)
96+
{
97+
Console.WriteLine($"Processed {packages.Count}/{artifactsToProcess.Count}: {packageInfo.NugetId}");
98+
Console.WriteLine($" Downloads: {packageInfo.Downloads}, Used By: {packageInfo.UsedBy}");
99+
}
100+
else if (packages.Count % 10 == 0)
101+
{
102+
var rateLimited = packages.Count(p => p.Downloads == "Rate Limited");
103+
var errors = packages.Count(p => p.Downloads != null && p.Downloads.Contains("Error"));
104+
Console.WriteLine($"Processed {packages.Count}/{artifactsToProcess.Count} packages... ({packages.Count * 100.0 / artifactsToProcess.Count:F1}%) [Rate Limited: {rateLimited}, Errors: {errors}]");
105+
}
106+
}
107+
return packageInfo;
108+
}
109+
finally
110+
{
111+
semaphore.Release();
112+
}
113+
});
114+
115+
await Task.WhenAll(tasks);
116+
117+
// Generate markdown tables
118+
var outputFile = testMode ? "package-analysis-test.md" : "package-analysis.md";
119+
await GenerateMarkdownTables(packages, outputFile);
120+
121+
Console.WriteLine($"\nCompleted! Generated {outputFile} with information for {packages.Count} packages.");
122+
}
123+
124+
static async Task<PackageInfo> FetchPackageInfo(ConfigArtifact artifact, bool testMode = false)
125+
{
126+
var packageInfo = new PackageInfo
127+
{
128+
NugetId = artifact.nugetId,
129+
Version = artifact.version,
130+
NugetVersion = artifact.nugetVersion,
131+
PackageUrl = $"https://www.nuget.org/packages/{artifact.nugetId}"
132+
};
133+
134+
const int maxRetries = 3;
135+
var baseDelay = TimeSpan.FromSeconds(testMode ? 1 : 2);
136+
137+
for (int attempt = 0; attempt <= maxRetries; attempt++)
138+
{
139+
try
140+
{
141+
if (!testMode && attempt == 0)
142+
{
143+
Console.WriteLine($" Fetching: {packageInfo.PackageUrl}");
144+
}
145+
else if (attempt > 0)
146+
{
147+
Console.WriteLine($" Retry {attempt}/{maxRetries}: {packageInfo.NugetId}");
148+
}
149+
150+
var response = await httpClient.GetAsync(packageInfo.PackageUrl);
151+
152+
if (response.StatusCode == System.Net.HttpStatusCode.TooManyRequests)
153+
{
154+
if (attempt < maxRetries)
155+
{
156+
// Calculate exponential backoff delay
157+
var delay = TimeSpan.FromSeconds(baseDelay.TotalSeconds * Math.Pow(2, attempt));
158+
159+
// Check for Retry-After header
160+
if (response.Headers.RetryAfter?.Delta.HasValue == true)
161+
{
162+
delay = response.Headers.RetryAfter.Delta.Value;
163+
}
164+
else if (response.Headers.RetryAfter?.Date.HasValue == true)
165+
{
166+
delay = response.Headers.RetryAfter.Date.Value - DateTimeOffset.Now;
167+
}
168+
169+
Console.WriteLine($" Rate limited. Waiting {delay.TotalSeconds:F1}s before retry {attempt + 1}/{maxRetries}...");
170+
await Task.Delay(delay);
171+
continue;
172+
}
173+
else
174+
{
175+
packageInfo.Downloads = "Rate Limited";
176+
packageInfo.UsedBy = "Rate Limited";
177+
return packageInfo;
178+
}
179+
}
180+
181+
if (response.IsSuccessStatusCode)
182+
{
183+
var html = await response.Content.ReadAsStringAsync();
184+
var doc = new HtmlDocument();
185+
doc.LoadHtml(html);
186+
187+
// Extract download count - try multiple selectors
188+
string? downloads = null;
189+
190+
// Look for "Total X.XM" pattern in the text
191+
var allText = doc.DocumentNode.InnerText;
192+
193+
// Handle multiline text with potential whitespace
194+
var downloadMatch = Regex.Match(allText, @"Total\s+([\d,.]+[KMB]?)", RegexOptions.IgnoreCase | RegexOptions.Multiline);
195+
if (downloadMatch.Success)
196+
{
197+
downloads = downloadMatch.Groups[1].Value;
198+
}
199+
200+
packageInfo.Downloads = downloads ?? "N/A";
201+
202+
// Extract "Used by" count - look for "NuGet packages (89)" pattern
203+
string? usedBy = null;
204+
205+
// Look for "NuGet packages (X)" pattern in the all text
206+
var usedByMatch = Regex.Match(allText, @"NuGet packages[^\d]*\((\d+)\)", RegexOptions.IgnoreCase);
207+
if (usedByMatch.Success)
208+
{
209+
usedBy = usedByMatch.Groups[1].Value;
210+
}
211+
212+
packageInfo.UsedBy = usedBy ?? "N/A";
213+
214+
// Clean up the values
215+
packageInfo.Downloads = CleanStatValue(packageInfo.Downloads);
216+
packageInfo.UsedBy = CleanStatValue(packageInfo.UsedBy);
217+
218+
// Add a small delay to be respectful (longer after rate limiting)
219+
var normalDelay = testMode ? 500 : (attempt > 0 ? 1000 : 300);
220+
await Task.Delay(normalDelay);
221+
222+
return packageInfo; // Success!
223+
}
224+
else
225+
{
226+
packageInfo.Downloads = $"HTTP {response.StatusCode}";
227+
packageInfo.UsedBy = $"HTTP {response.StatusCode}";
228+
return packageInfo;
229+
}
230+
}
231+
catch (Exception ex)
232+
{
233+
if (attempt < maxRetries)
234+
{
235+
var delay = TimeSpan.FromSeconds(baseDelay.TotalSeconds * Math.Pow(2, attempt));
236+
Console.WriteLine($" Error: {ex.Message}. Retrying in {delay.TotalSeconds:F1}s...");
237+
await Task.Delay(delay);
238+
continue;
239+
}
240+
else
241+
{
242+
packageInfo.Downloads = $"Error: {ex.Message}";
243+
packageInfo.UsedBy = $"Error: {ex.Message}";
244+
return packageInfo;
245+
}
246+
}
247+
}
248+
249+
return packageInfo;
250+
}
251+
252+
static string CleanStatValue(string value)
253+
{
254+
if (string.IsNullOrWhiteSpace(value))
255+
return "N/A";
256+
257+
// Remove extra whitespace and newlines
258+
value = Regex.Replace(value, @"\s+", " ").Trim();
259+
260+
// If it already looks like a formatted number (e.g., "52.9M", "1,234"), keep it as is
261+
if (Regex.IsMatch(value, @"^[\d,]+(\.\d+)?[KMB]?$", RegexOptions.IgnoreCase))
262+
return value;
263+
264+
// Extract numbers from text like "1,234 downloads"
265+
var numberMatch = Regex.Match(value, @"([\d,]+(\.\d+)?[KMB]?)", RegexOptions.IgnoreCase);
266+
if (numberMatch.Success)
267+
return numberMatch.Groups[1].Value;
268+
269+
return value;
270+
}
271+
272+
static async Task GenerateMarkdownTables(List<PackageInfo> packages, string outputFile)
273+
{
274+
var markdown = new System.Text.StringBuilder();
275+
276+
markdown.AppendLine("# NuGet Package Analysis");
277+
markdown.AppendLine($"Generated on: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
278+
markdown.AppendLine($"Total packages analyzed: {packages.Count}");
279+
markdown.AppendLine();
280+
281+
// Create a single table sorted by downloads
282+
markdown.AppendLine("| Package ID | Version | NuGet Version | Downloads | Used By | Package URL |");
283+
markdown.AppendLine("|------------|---------|---------------|-----------|---------|-------------|");
284+
285+
// Sort packages by download count (convert to numeric for proper sorting)
286+
var sortedPackages = packages
287+
.OrderByDescending(p => ConvertDownloadsToNumber(p.Downloads))
288+
.ThenBy(p => p.NugetId);
289+
290+
foreach (var package in sortedPackages)
291+
{
292+
var escapedId = package.NugetId.Replace("|", "\\|");
293+
var packageLink = $"[{escapedId}]({package.PackageUrl})";
294+
295+
markdown.AppendLine($"| {packageLink} | {package.Version} | {package.NugetVersion} | {package.Downloads} | {package.UsedBy} | {package.PackageUrl} |");
296+
}
297+
298+
markdown.AppendLine();
299+
300+
// Add summary statistics
301+
markdown.AppendLine("## Summary Statistics");
302+
markdown.AppendLine();
303+
304+
var validDownloads = packages
305+
.Select(p => ConvertDownloadsToNumber(p.Downloads))
306+
.Where(d => d > 0)
307+
.ToList();
308+
309+
var totalDownloads = validDownloads.Sum();
310+
var packagesWithStats = packages.Count(p => p.Downloads != "N/A" && !p.Downloads!.Contains("Error"));
311+
312+
markdown.AppendLine($"- **Total packages analyzed:** {packages.Count}");
313+
markdown.AppendLine($"- **Packages with download stats:** {packagesWithStats}");
314+
markdown.AppendLine($"- **Total downloads (estimated):** {FormatLargeNumber(totalDownloads)}");
315+
markdown.AppendLine($"- **Average downloads per package:** {(validDownloads.Any() ? FormatLargeNumber((long)validDownloads.Average()) : "0")}");
316+
markdown.AppendLine($"- **Analysis date:** {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
317+
318+
await File.WriteAllTextAsync(outputFile, markdown.ToString());
319+
}
320+
321+
static long ConvertDownloadsToNumber(string? downloads)
322+
{
323+
if (string.IsNullOrEmpty(downloads) || downloads == "N/A" || downloads.Contains("Error"))
324+
return 0;
325+
326+
// Remove any extra characters and get the numeric part
327+
var cleanValue = downloads.Replace(",", "").Trim();
328+
329+
// Handle K, M, B suffixes
330+
var multiplier = 1L;
331+
if (cleanValue.EndsWith("K", StringComparison.OrdinalIgnoreCase))
332+
{
333+
multiplier = 1_000L;
334+
cleanValue = cleanValue[..^1];
335+
}
336+
else if (cleanValue.EndsWith("M", StringComparison.OrdinalIgnoreCase))
337+
{
338+
multiplier = 1_000_000L;
339+
cleanValue = cleanValue[..^1];
340+
}
341+
else if (cleanValue.EndsWith("B", StringComparison.OrdinalIgnoreCase))
342+
{
343+
multiplier = 1_000_000_000L;
344+
cleanValue = cleanValue[..^1];
345+
}
346+
347+
if (double.TryParse(cleanValue, out var number))
348+
{
349+
return (long)(number * multiplier);
350+
}
351+
352+
return 0;
353+
}
354+
355+
static string FormatLargeNumber(long number)
356+
{
357+
if (number >= 1_000_000_000)
358+
return $"{number / 1_000_000_000.0:F1}B";
359+
if (number >= 1_000_000)
360+
return $"{number / 1_000_000.0:F1}M";
361+
if (number >= 1_000)
362+
return $"{number / 1_000.0:F1}K";
363+
364+
return number.ToString("N0");
365+
}
366+
367+
static string GetPackagePrefix(string packageId)
368+
{
369+
var parts = packageId.Split('.');
370+
if (parts.Length >= 3)
371+
{
372+
return string.Join(".", parts.Take(3)); // e.g., "Xamarin.AndroidX.Activity"
373+
}
374+
return packageId;
375+
}
376+
}

0 commit comments

Comments
 (0)