|
| 1 | +using System.Text.Json; |
| 2 | +using System.Text.RegularExpressions; |
| 3 | +using HtmlAgilityPack; |
| 4 | + |
| 5 | +namespace PackageAnalyzer; |
| 6 | + |
| 7 | +public class PackageInfo |
| 8 | +{ |
| 9 | + public string NugetId { get; set; } = ""; |
| 10 | + public string Version { get; set; } = ""; |
| 11 | + public string NugetVersion { get; set; } = ""; |
| 12 | + public string? Downloads { get; set; } |
| 13 | + public string? UsedBy { get; set; } |
| 14 | + public string PackageUrl { get; set; } = ""; |
| 15 | +} |
| 16 | + |
| 17 | +public class ConfigArtifact |
| 18 | +{ |
| 19 | + public string groupId { get; set; } = ""; |
| 20 | + public string artifactId { get; set; } = ""; |
| 21 | + public string version { get; set; } = ""; |
| 22 | + public string nugetVersion { get; set; } = ""; |
| 23 | + public string nugetId { get; set; } = ""; |
| 24 | + public bool dependencyOnly { get; set; } |
| 25 | +} |
| 26 | + |
| 27 | +public class Config |
| 28 | +{ |
| 29 | + public string slnFile { get; set; } = ""; |
| 30 | + public bool strictRuntimeDependencies { get; set; } |
| 31 | + public List<string> additionalProjects { get; set; } = new(); |
| 32 | + public List<ConfigArtifact> artifacts { get; set; } = new(); |
| 33 | +} |
| 34 | + |
| 35 | +class Program |
| 36 | +{ |
| 37 | + private static readonly HttpClient httpClient = new() |
| 38 | + { |
| 39 | + Timeout = TimeSpan.FromSeconds(30) // Increase timeout for better reliability |
| 40 | + }; |
| 41 | + |
| 42 | + static Program() |
| 43 | + { |
| 44 | + // Set a user agent to be more respectful |
| 45 | + httpClient.DefaultRequestHeaders.Add("User-Agent", "NuGet-Package-Analyzer/1.0 (https://github.com/xamarin/AndroidX)"); |
| 46 | + } |
| 47 | + |
| 48 | + static async Task Main(string[] args) |
| 49 | + { |
| 50 | + bool testMode = args.Length > 0 && args[0] == "--test"; |
| 51 | + |
| 52 | + // Read and parse config.json |
| 53 | + var configPath = Path.Combine("..", "..", "config.json"); |
| 54 | + if (!File.Exists(configPath)) |
| 55 | + { |
| 56 | + Console.WriteLine("config.json not found. Please run from tools/package-info directory."); |
| 57 | + return; |
| 58 | + } |
| 59 | + |
| 60 | + var configJson = await File.ReadAllTextAsync(configPath); |
| 61 | + var configs = JsonSerializer.Deserialize<List<Config>>(configJson, new JsonSerializerOptions |
| 62 | + { |
| 63 | + PropertyNameCaseInsensitive = true |
| 64 | + }); |
| 65 | + |
| 66 | + if (configs == null || configs.Count == 0 || configs[0].artifacts == null) |
| 67 | + { |
| 68 | + Console.WriteLine("Could not parse config.json or no artifacts found"); |
| 69 | + return; |
| 70 | + } |
| 71 | + |
| 72 | + var config = configs[0]; // Take the first config object |
| 73 | + |
| 74 | + var artifactsToProcess = testMode ? config.artifacts.Take(5).ToList() : config.artifacts; |
| 75 | + |
| 76 | + Console.WriteLine($"Found {config.artifacts.Count} packages in config.json"); |
| 77 | + if (testMode) |
| 78 | + { |
| 79 | + Console.WriteLine($"Running in test mode - processing first {artifactsToProcess.Count} packages only"); |
| 80 | + } |
| 81 | + Console.WriteLine("Fetching package information from NuGet.org...\n"); |
| 82 | + |
| 83 | + var packages = new List<PackageInfo>(); |
| 84 | + var semaphore = new SemaphoreSlim(testMode ? 2 : 3); // Reduced concurrent requests to avoid rate limiting |
| 85 | + |
| 86 | + var tasks = artifactsToProcess.Select(async artifact => |
| 87 | + { |
| 88 | + await semaphore.WaitAsync(); |
| 89 | + try |
| 90 | + { |
| 91 | + var packageInfo = await FetchPackageInfo(artifact, testMode); |
| 92 | + lock (packages) |
| 93 | + { |
| 94 | + packages.Add(packageInfo); |
| 95 | + if (testMode) |
| 96 | + { |
| 97 | + Console.WriteLine($"Processed {packages.Count}/{artifactsToProcess.Count}: {packageInfo.NugetId}"); |
| 98 | + Console.WriteLine($" Downloads: {packageInfo.Downloads}, Used By: {packageInfo.UsedBy}"); |
| 99 | + } |
| 100 | + else if (packages.Count % 10 == 0) |
| 101 | + { |
| 102 | + var rateLimited = packages.Count(p => p.Downloads == "Rate Limited"); |
| 103 | + var errors = packages.Count(p => p.Downloads != null && p.Downloads.Contains("Error")); |
| 104 | + Console.WriteLine($"Processed {packages.Count}/{artifactsToProcess.Count} packages... ({packages.Count * 100.0 / artifactsToProcess.Count:F1}%) [Rate Limited: {rateLimited}, Errors: {errors}]"); |
| 105 | + } |
| 106 | + } |
| 107 | + return packageInfo; |
| 108 | + } |
| 109 | + finally |
| 110 | + { |
| 111 | + semaphore.Release(); |
| 112 | + } |
| 113 | + }); |
| 114 | + |
| 115 | + await Task.WhenAll(tasks); |
| 116 | + |
| 117 | + // Generate markdown tables |
| 118 | + var outputFile = testMode ? "package-analysis-test.md" : "package-analysis.md"; |
| 119 | + await GenerateMarkdownTables(packages, outputFile); |
| 120 | + |
| 121 | + Console.WriteLine($"\nCompleted! Generated {outputFile} with information for {packages.Count} packages."); |
| 122 | + } |
| 123 | + |
| 124 | + static async Task<PackageInfo> FetchPackageInfo(ConfigArtifact artifact, bool testMode = false) |
| 125 | + { |
| 126 | + var packageInfo = new PackageInfo |
| 127 | + { |
| 128 | + NugetId = artifact.nugetId, |
| 129 | + Version = artifact.version, |
| 130 | + NugetVersion = artifact.nugetVersion, |
| 131 | + PackageUrl = $"https://www.nuget.org/packages/{artifact.nugetId}" |
| 132 | + }; |
| 133 | + |
| 134 | + const int maxRetries = 3; |
| 135 | + var baseDelay = TimeSpan.FromSeconds(testMode ? 1 : 2); |
| 136 | + |
| 137 | + for (int attempt = 0; attempt <= maxRetries; attempt++) |
| 138 | + { |
| 139 | + try |
| 140 | + { |
| 141 | + if (!testMode && attempt == 0) |
| 142 | + { |
| 143 | + Console.WriteLine($" Fetching: {packageInfo.PackageUrl}"); |
| 144 | + } |
| 145 | + else if (attempt > 0) |
| 146 | + { |
| 147 | + Console.WriteLine($" Retry {attempt}/{maxRetries}: {packageInfo.NugetId}"); |
| 148 | + } |
| 149 | + |
| 150 | + var response = await httpClient.GetAsync(packageInfo.PackageUrl); |
| 151 | + |
| 152 | + if (response.StatusCode == System.Net.HttpStatusCode.TooManyRequests) |
| 153 | + { |
| 154 | + if (attempt < maxRetries) |
| 155 | + { |
| 156 | + // Calculate exponential backoff delay |
| 157 | + var delay = TimeSpan.FromSeconds(baseDelay.TotalSeconds * Math.Pow(2, attempt)); |
| 158 | + |
| 159 | + // Check for Retry-After header |
| 160 | + if (response.Headers.RetryAfter?.Delta.HasValue == true) |
| 161 | + { |
| 162 | + delay = response.Headers.RetryAfter.Delta.Value; |
| 163 | + } |
| 164 | + else if (response.Headers.RetryAfter?.Date.HasValue == true) |
| 165 | + { |
| 166 | + delay = response.Headers.RetryAfter.Date.Value - DateTimeOffset.Now; |
| 167 | + } |
| 168 | + |
| 169 | + Console.WriteLine($" Rate limited. Waiting {delay.TotalSeconds:F1}s before retry {attempt + 1}/{maxRetries}..."); |
| 170 | + await Task.Delay(delay); |
| 171 | + continue; |
| 172 | + } |
| 173 | + else |
| 174 | + { |
| 175 | + packageInfo.Downloads = "Rate Limited"; |
| 176 | + packageInfo.UsedBy = "Rate Limited"; |
| 177 | + return packageInfo; |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + if (response.IsSuccessStatusCode) |
| 182 | + { |
| 183 | + var html = await response.Content.ReadAsStringAsync(); |
| 184 | + var doc = new HtmlDocument(); |
| 185 | + doc.LoadHtml(html); |
| 186 | + |
| 187 | + // Extract download count - try multiple selectors |
| 188 | + string? downloads = null; |
| 189 | + |
| 190 | + // Look for "Total X.XM" pattern in the text |
| 191 | + var allText = doc.DocumentNode.InnerText; |
| 192 | + |
| 193 | + // Handle multiline text with potential whitespace |
| 194 | + var downloadMatch = Regex.Match(allText, @"Total\s+([\d,.]+[KMB]?)", RegexOptions.IgnoreCase | RegexOptions.Multiline); |
| 195 | + if (downloadMatch.Success) |
| 196 | + { |
| 197 | + downloads = downloadMatch.Groups[1].Value; |
| 198 | + } |
| 199 | + |
| 200 | + packageInfo.Downloads = downloads ?? "N/A"; |
| 201 | + |
| 202 | + // Extract "Used by" count - look for "NuGet packages (89)" pattern |
| 203 | + string? usedBy = null; |
| 204 | + |
| 205 | + // Look for "NuGet packages (X)" pattern in the all text |
| 206 | + var usedByMatch = Regex.Match(allText, @"NuGet packages[^\d]*\((\d+)\)", RegexOptions.IgnoreCase); |
| 207 | + if (usedByMatch.Success) |
| 208 | + { |
| 209 | + usedBy = usedByMatch.Groups[1].Value; |
| 210 | + } |
| 211 | + |
| 212 | + packageInfo.UsedBy = usedBy ?? "N/A"; |
| 213 | + |
| 214 | + // Clean up the values |
| 215 | + packageInfo.Downloads = CleanStatValue(packageInfo.Downloads); |
| 216 | + packageInfo.UsedBy = CleanStatValue(packageInfo.UsedBy); |
| 217 | + |
| 218 | + // Add a small delay to be respectful (longer after rate limiting) |
| 219 | + var normalDelay = testMode ? 500 : (attempt > 0 ? 1000 : 300); |
| 220 | + await Task.Delay(normalDelay); |
| 221 | + |
| 222 | + return packageInfo; // Success! |
| 223 | + } |
| 224 | + else |
| 225 | + { |
| 226 | + packageInfo.Downloads = $"HTTP {response.StatusCode}"; |
| 227 | + packageInfo.UsedBy = $"HTTP {response.StatusCode}"; |
| 228 | + return packageInfo; |
| 229 | + } |
| 230 | + } |
| 231 | + catch (Exception ex) |
| 232 | + { |
| 233 | + if (attempt < maxRetries) |
| 234 | + { |
| 235 | + var delay = TimeSpan.FromSeconds(baseDelay.TotalSeconds * Math.Pow(2, attempt)); |
| 236 | + Console.WriteLine($" Error: {ex.Message}. Retrying in {delay.TotalSeconds:F1}s..."); |
| 237 | + await Task.Delay(delay); |
| 238 | + continue; |
| 239 | + } |
| 240 | + else |
| 241 | + { |
| 242 | + packageInfo.Downloads = $"Error: {ex.Message}"; |
| 243 | + packageInfo.UsedBy = $"Error: {ex.Message}"; |
| 244 | + return packageInfo; |
| 245 | + } |
| 246 | + } |
| 247 | + } |
| 248 | + |
| 249 | + return packageInfo; |
| 250 | + } |
| 251 | + |
| 252 | + static string CleanStatValue(string value) |
| 253 | + { |
| 254 | + if (string.IsNullOrWhiteSpace(value)) |
| 255 | + return "N/A"; |
| 256 | + |
| 257 | + // Remove extra whitespace and newlines |
| 258 | + value = Regex.Replace(value, @"\s+", " ").Trim(); |
| 259 | + |
| 260 | + // If it already looks like a formatted number (e.g., "52.9M", "1,234"), keep it as is |
| 261 | + if (Regex.IsMatch(value, @"^[\d,]+(\.\d+)?[KMB]?$", RegexOptions.IgnoreCase)) |
| 262 | + return value; |
| 263 | + |
| 264 | + // Extract numbers from text like "1,234 downloads" |
| 265 | + var numberMatch = Regex.Match(value, @"([\d,]+(\.\d+)?[KMB]?)", RegexOptions.IgnoreCase); |
| 266 | + if (numberMatch.Success) |
| 267 | + return numberMatch.Groups[1].Value; |
| 268 | + |
| 269 | + return value; |
| 270 | + } |
| 271 | + |
| 272 | + static async Task GenerateMarkdownTables(List<PackageInfo> packages, string outputFile) |
| 273 | + { |
| 274 | + var markdown = new System.Text.StringBuilder(); |
| 275 | + |
| 276 | + markdown.AppendLine("# NuGet Package Analysis"); |
| 277 | + markdown.AppendLine($"Generated on: {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); |
| 278 | + markdown.AppendLine($"Total packages analyzed: {packages.Count}"); |
| 279 | + markdown.AppendLine(); |
| 280 | + |
| 281 | + // Create a single table sorted by downloads |
| 282 | + markdown.AppendLine("| Package ID | Version | NuGet Version | Downloads | Used By | Package URL |"); |
| 283 | + markdown.AppendLine("|------------|---------|---------------|-----------|---------|-------------|"); |
| 284 | + |
| 285 | + // Sort packages by download count (convert to numeric for proper sorting) |
| 286 | + var sortedPackages = packages |
| 287 | + .OrderByDescending(p => ConvertDownloadsToNumber(p.Downloads)) |
| 288 | + .ThenBy(p => p.NugetId); |
| 289 | + |
| 290 | + foreach (var package in sortedPackages) |
| 291 | + { |
| 292 | + var escapedId = package.NugetId.Replace("|", "\\|"); |
| 293 | + var packageLink = $"[{escapedId}]({package.PackageUrl})"; |
| 294 | + |
| 295 | + markdown.AppendLine($"| {packageLink} | {package.Version} | {package.NugetVersion} | {package.Downloads} | {package.UsedBy} | {package.PackageUrl} |"); |
| 296 | + } |
| 297 | + |
| 298 | + markdown.AppendLine(); |
| 299 | + |
| 300 | + // Add summary statistics |
| 301 | + markdown.AppendLine("## Summary Statistics"); |
| 302 | + markdown.AppendLine(); |
| 303 | + |
| 304 | + var validDownloads = packages |
| 305 | + .Select(p => ConvertDownloadsToNumber(p.Downloads)) |
| 306 | + .Where(d => d > 0) |
| 307 | + .ToList(); |
| 308 | + |
| 309 | + var totalDownloads = validDownloads.Sum(); |
| 310 | + var packagesWithStats = packages.Count(p => p.Downloads != "N/A" && !p.Downloads!.Contains("Error")); |
| 311 | + |
| 312 | + markdown.AppendLine($"- **Total packages analyzed:** {packages.Count}"); |
| 313 | + markdown.AppendLine($"- **Packages with download stats:** {packagesWithStats}"); |
| 314 | + markdown.AppendLine($"- **Total downloads (estimated):** {FormatLargeNumber(totalDownloads)}"); |
| 315 | + markdown.AppendLine($"- **Average downloads per package:** {(validDownloads.Any() ? FormatLargeNumber((long)validDownloads.Average()) : "0")}"); |
| 316 | + markdown.AppendLine($"- **Analysis date:** {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); |
| 317 | + |
| 318 | + await File.WriteAllTextAsync(outputFile, markdown.ToString()); |
| 319 | + } |
| 320 | + |
| 321 | + static long ConvertDownloadsToNumber(string? downloads) |
| 322 | + { |
| 323 | + if (string.IsNullOrEmpty(downloads) || downloads == "N/A" || downloads.Contains("Error")) |
| 324 | + return 0; |
| 325 | + |
| 326 | + // Remove any extra characters and get the numeric part |
| 327 | + var cleanValue = downloads.Replace(",", "").Trim(); |
| 328 | + |
| 329 | + // Handle K, M, B suffixes |
| 330 | + var multiplier = 1L; |
| 331 | + if (cleanValue.EndsWith("K", StringComparison.OrdinalIgnoreCase)) |
| 332 | + { |
| 333 | + multiplier = 1_000L; |
| 334 | + cleanValue = cleanValue[..^1]; |
| 335 | + } |
| 336 | + else if (cleanValue.EndsWith("M", StringComparison.OrdinalIgnoreCase)) |
| 337 | + { |
| 338 | + multiplier = 1_000_000L; |
| 339 | + cleanValue = cleanValue[..^1]; |
| 340 | + } |
| 341 | + else if (cleanValue.EndsWith("B", StringComparison.OrdinalIgnoreCase)) |
| 342 | + { |
| 343 | + multiplier = 1_000_000_000L; |
| 344 | + cleanValue = cleanValue[..^1]; |
| 345 | + } |
| 346 | + |
| 347 | + if (double.TryParse(cleanValue, out var number)) |
| 348 | + { |
| 349 | + return (long)(number * multiplier); |
| 350 | + } |
| 351 | + |
| 352 | + return 0; |
| 353 | + } |
| 354 | + |
| 355 | + static string FormatLargeNumber(long number) |
| 356 | + { |
| 357 | + if (number >= 1_000_000_000) |
| 358 | + return $"{number / 1_000_000_000.0:F1}B"; |
| 359 | + if (number >= 1_000_000) |
| 360 | + return $"{number / 1_000_000.0:F1}M"; |
| 361 | + if (number >= 1_000) |
| 362 | + return $"{number / 1_000.0:F1}K"; |
| 363 | + |
| 364 | + return number.ToString("N0"); |
| 365 | + } |
| 366 | + |
| 367 | + static string GetPackagePrefix(string packageId) |
| 368 | + { |
| 369 | + var parts = packageId.Split('.'); |
| 370 | + if (parts.Length >= 3) |
| 371 | + { |
| 372 | + return string.Join(".", parts.Take(3)); // e.g., "Xamarin.AndroidX.Activity" |
| 373 | + } |
| 374 | + return packageId; |
| 375 | + } |
| 376 | +} |
0 commit comments