|
| 1 | +using System.Text.RegularExpressions; |
| 2 | +using LibGit2Sharp; |
| 3 | + |
| 4 | +if (args.Length < 2) |
| 5 | +{ |
| 6 | + Console.WriteLine("Usage: GitRepoAnonymizer <source-repo-path> <destination-repo-path>"); |
| 7 | + return; |
| 8 | +} |
| 9 | + |
| 10 | +var sourcePath = args[0]; |
| 11 | +var destPath = args[1]; |
| 12 | + |
| 13 | +try |
| 14 | +{ |
| 15 | + AnonymizeRepository(sourcePath, destPath); |
| 16 | + Console.WriteLine("Repository anonymization completed successfully!"); |
| 17 | +} |
| 18 | +catch (Exception ex) |
| 19 | +{ |
| 20 | + Console.WriteLine($"Error: {ex.Message}"); |
| 21 | + Console.WriteLine(ex.StackTrace); |
| 22 | +} |
| 23 | + |
| 24 | +return; |
| 25 | + |
| 26 | +static void AnonymizeRepository(string sourcePath, string destPath) |
| 27 | +{ |
| 28 | + // Initialize destination repository |
| 29 | + if (Directory.Exists(destPath)) |
| 30 | + { |
| 31 | + Console.WriteLine("Destination directory already exists. Please remove it first."); |
| 32 | + return; |
| 33 | + } |
| 34 | + |
| 35 | + Repository.Init(destPath); |
| 36 | + |
| 37 | + using var sourceRepo = new Repository(sourcePath); |
| 38 | + using var destRepo = new Repository(destPath); |
| 39 | + // Create anonymous signature |
| 40 | + var anonSignature = new Signature("Anonymous", "[email protected]", DateTimeOffset.Now); |
| 41 | + |
| 42 | + // Dictionary to map old commit SHAs to new commit SHAs |
| 43 | + var commitMap = new Dictionary<string, string>(); |
| 44 | + |
| 45 | + // Get all commits reachable from any ref (branches, tags, remotes) in topological order |
| 46 | + var commits = sourceRepo.Commits.QueryBy(new CommitFilter |
| 47 | + { |
| 48 | + IncludeReachableFrom = sourceRepo.Refs, |
| 49 | + SortBy = CommitSortStrategies.Topological | CommitSortStrategies.Reverse |
| 50 | + }).ToList(); |
| 51 | + |
| 52 | + Console.WriteLine($"Processing {commits.Count} commits..."); |
| 53 | + |
| 54 | + // Process each commit |
| 55 | + foreach (var commit in commits) |
| 56 | + { |
| 57 | + // Transform the commit message |
| 58 | + var transformedMessage = TransformCommitMessage(commit.Message); |
| 59 | + |
| 60 | + Console.WriteLine($"Processing commit: {commit.Sha[..7]} - {commit.MessageShort}"); |
| 61 | + if (commit.Message != transformedMessage) |
| 62 | + { |
| 63 | + Console.WriteLine(" Message transformed"); |
| 64 | + } |
| 65 | + |
| 66 | + // Get parent commits in the new repository |
| 67 | + var newParents = commit.Parents |
| 68 | + .Select(p => commitMap.TryGetValue(p.Sha, out var value) ? destRepo.Lookup<Commit>(value) : null) |
| 69 | + .Where(p => p != null) |
| 70 | + .ToList(); |
| 71 | + |
| 72 | + // Create an empty tree (no blobs) |
| 73 | + var emptyTree = destRepo.ObjectDatabase.CreateTree(new TreeDefinition()); |
| 74 | + |
| 75 | + // Create the new commit (preserve original author/committer dates) |
| 76 | + var anonAuthor = new Signature(anonSignature.Name, anonSignature.Email, commit.Author.When); |
| 77 | + var anonCommitter = new Signature(anonSignature.Name, anonSignature.Email, commit.Committer.When); |
| 78 | + var newCommit = destRepo.ObjectDatabase.CreateCommit( |
| 79 | + anonAuthor, |
| 80 | + anonCommitter, |
| 81 | + transformedMessage, |
| 82 | + emptyTree, |
| 83 | + newParents.Count != 0 ? newParents : [], |
| 84 | + false); |
| 85 | + |
| 86 | + commitMap[commit.Sha] = newCommit.Sha; |
| 87 | + } |
| 88 | + |
| 89 | + // Recreate branches |
| 90 | + Console.WriteLine("\nRecreating branches..."); |
| 91 | + foreach (var branch in sourceRepo.Branches) |
| 92 | + { |
| 93 | + if (branch.Tip != null && commitMap.TryGetValue(branch.Tip.Sha, out var newCommitSha)) |
| 94 | + { |
| 95 | + if (branch.IsRemote) |
| 96 | + { |
| 97 | + // Skip remote branches for simplicity |
| 98 | + continue; |
| 99 | + } |
| 100 | + |
| 101 | + var transformedBranchName = TransformBranchName(branch.FriendlyName); |
| 102 | + |
| 103 | + try |
| 104 | + { |
| 105 | + if (branch.FriendlyName is "master" or "main") |
| 106 | + { |
| 107 | + // Create branch and update HEAD to point to it |
| 108 | + destRepo.Refs.Add($"refs/heads/{transformedBranchName}", newCommitSha); |
| 109 | + destRepo.Refs.UpdateTarget(destRepo.Refs.Head, $"refs/heads/{transformedBranchName}"); |
| 110 | + Console.WriteLine($" Created branch: {transformedBranchName}"); |
| 111 | + } |
| 112 | + else |
| 113 | + { |
| 114 | + destRepo.Refs.Add($"refs/heads/{transformedBranchName}", newCommitSha); |
| 115 | + if (branch.FriendlyName != transformedBranchName) |
| 116 | + { |
| 117 | + Console.WriteLine($" Created branch: {branch.FriendlyName} → {transformedBranchName}"); |
| 118 | + } |
| 119 | + else |
| 120 | + { |
| 121 | + Console.WriteLine($" Created branch: {transformedBranchName}"); |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + catch (Exception ex) |
| 126 | + { |
| 127 | + Console.WriteLine($" Warning: Could not create branch {transformedBranchName}: {ex.Message}"); |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + // Recreate tags |
| 133 | + Console.WriteLine("\nRecreating tags..."); |
| 134 | + foreach (var tag in sourceRepo.Tags) |
| 135 | + { |
| 136 | + var targetCommit = tag.Target as Commit ?? (tag.Target as TagAnnotation)?.Target as Commit; |
| 137 | + |
| 138 | + if (targetCommit != null && commitMap.TryGetValue(targetCommit.Sha, out var newCommitSha)) |
| 139 | + { |
| 140 | + var transformedTagName = TransformTagName(tag.FriendlyName); |
| 141 | + |
| 142 | + try |
| 143 | + { |
| 144 | + if (tag.IsAnnotated) |
| 145 | + { |
| 146 | + var transformedTagMessage = TransformCommitMessage(tag.Annotation.Message); |
| 147 | + var anonTagger = new Signature(anonSignature.Name, anonSignature.Email, tag.Annotation.Tagger.When); |
| 148 | + destRepo.ApplyTag(transformedTagName, newCommitSha, anonTagger, transformedTagMessage); |
| 149 | + } |
| 150 | + else |
| 151 | + { |
| 152 | + destRepo.ApplyTag(transformedTagName, newCommitSha); |
| 153 | + } |
| 154 | + |
| 155 | + Console.WriteLine(tag.FriendlyName != transformedTagName |
| 156 | + ? $" Created tag: {tag.FriendlyName} → {transformedTagName}" |
| 157 | + : $" Created tag: {transformedTagName}"); |
| 158 | + } |
| 159 | + catch (Exception ex) |
| 160 | + { |
| 161 | + Console.WriteLine($" Warning: Could not create tag {transformedTagName}: {ex.Message}"); |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + Console.WriteLine($"\nTotal commits created: {commitMap.Count}"); |
| 167 | +} |
| 168 | + |
| 169 | +static string TransformCommitMessage(string originalMessage) |
| 170 | +{ |
| 171 | + if (string.IsNullOrWhiteSpace(originalMessage)) |
| 172 | + return originalMessage; |
| 173 | + |
| 174 | + var transformed = originalMessage; |
| 175 | + |
| 176 | + // Remove email addresses |
| 177 | + transformed = RegexPatterns.EmailPattern.Replace(transformed, "[EMAIL]"); |
| 178 | + |
| 179 | + // Remove potential usernames (e.g., @username) |
| 180 | + transformed = RegexPatterns.UsernamePattern.Replace(transformed, "@[USER]"); |
| 181 | + |
| 182 | + // Remove URLs |
| 183 | + transformed = RegexPatterns.UrlPattern.Replace(transformed, "[URL]"); |
| 184 | + |
| 185 | + // Remove IP addresses |
| 186 | + transformed = RegexPatterns.IpAddressPattern.Replace(transformed, "[IP]"); |
| 187 | + |
| 188 | + // Remove potential API keys or tokens (common patterns) |
| 189 | + transformed = RegexPatterns.TokenPattern.Replace(transformed, "[TOKEN]"); |
| 190 | + |
| 191 | + // Remove JIRA/ticket references (e.g., PROJ-123, ABC-456) |
| 192 | + transformed = RegexPatterns.TicketPattern.Replace(transformed, "[TICKET]"); |
| 193 | + |
| 194 | + // Remove file paths (optional - uncomment if needed) |
| 195 | + // transformed = Regex.Replace(transformed, @"[/\\][\w/\\.-]+", "[PATH]"); |
| 196 | + |
| 197 | + return transformed; |
| 198 | +} |
| 199 | + |
| 200 | +static string TransformBranchName(string originalName) |
| 201 | +{ |
| 202 | + if (string.IsNullOrWhiteSpace(originalName)) |
| 203 | + return originalName; |
| 204 | + |
| 205 | + var transformed = originalName; |
| 206 | + |
| 207 | + // Keep common branch names unchanged |
| 208 | + var commonBranches = new[] { "master", "main", "develop", "development", "staging", "production" }; |
| 209 | + if (commonBranches.Contains(transformed.ToLower())) |
| 210 | + return transformed; |
| 211 | + |
| 212 | + // Remove usernames from branch names (e.g., feature/john/my-feature) |
| 213 | + transformed = RegexPatterns.BranchUsernamePattern.Replace(transformed, "/user/"); |
| 214 | + |
| 215 | + // Remove JIRA/ticket references |
| 216 | + transformed = RegexPatterns.TicketPatternIgnoreCase.Replace(transformed, "TICKET"); |
| 217 | + |
| 218 | + // Replace email-like patterns in branch names |
| 219 | + transformed = RegexPatterns.EmailLikePattern.Replace(transformed, "user"); |
| 220 | + |
| 221 | + // Optional: completely anonymize feature branches (uncomment if needed) |
| 222 | + // if (transformed.StartsWith("feature/") || transformed.StartsWith("bugfix/")) |
| 223 | + // { |
| 224 | + // var prefix = transformed.Split('/')[0]; |
| 225 | + // return $"{prefix}/anonymous-branch"; |
| 226 | + // } |
| 227 | + |
| 228 | + return transformed; |
| 229 | +} |
| 230 | + |
| 231 | +static string TransformTagName(string originalName) |
| 232 | +{ |
| 233 | + if (string.IsNullOrWhiteSpace(originalName)) |
| 234 | + return originalName; |
| 235 | + |
| 236 | + var transformed = originalName; |
| 237 | + |
| 238 | + // Keep version tags unchanged (e.g., v1.0.0, 1.2.3, v2.0.0-beta) |
| 239 | + if (RegexPatterns.VersionTagPattern.IsMatch(transformed)) |
| 240 | + return transformed; |
| 241 | + |
| 242 | + // Remove JIRA/ticket references |
| 243 | + transformed = RegexPatterns.TicketPatternIgnoreCase.Replace(transformed, "TICKET"); |
| 244 | + |
| 245 | + // Remove usernames |
| 246 | + transformed = RegexPatterns.TagUsernamePattern.Replace(transformed, "-user-"); |
| 247 | + |
| 248 | + // Replace email-like patterns |
| 249 | + transformed = RegexPatterns.EmailLikePattern.Replace(transformed, "user"); |
| 250 | + |
| 251 | + // Optional: completely anonymize non-version tags (uncomment if needed) |
| 252 | + // if (!Regex.IsMatch(originalName, @"^v?\d+\.\d+")) |
| 253 | + // { |
| 254 | + // return "anonymous-tag"; |
| 255 | + // } |
| 256 | + |
| 257 | + return transformed; |
| 258 | +} |
| 259 | + |
| 260 | +internal static partial class RegexPatterns |
| 261 | +{ |
| 262 | + // Email addresses |
| 263 | + [GeneratedRegex(@"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")] |
| 264 | + public static partial Regex EmailPattern { get; } |
| 265 | + |
| 266 | + // Usernames (e.g., @username) |
| 267 | + [GeneratedRegex(@"@\w+")] |
| 268 | + public static partial Regex UsernamePattern { get; } |
| 269 | + |
| 270 | + // URLs |
| 271 | + [GeneratedRegex(@"https?://[^\s]+")] |
| 272 | + public static partial Regex UrlPattern { get; } |
| 273 | + |
| 274 | + // IP addresses |
| 275 | + [GeneratedRegex(@"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b")] |
| 276 | + public static partial Regex IpAddressPattern { get; } |
| 277 | + |
| 278 | + // API keys or tokens (32+ characters) |
| 279 | + [GeneratedRegex(@"\b[A-Za-z0-9]{32,}\b")] |
| 280 | + public static partial Regex TokenPattern { get; } |
| 281 | + |
| 282 | + // JIRA/ticket references (e.g., PROJ-123, ABC-456) |
| 283 | + [GeneratedRegex(@"\b[A-Z]{2,}-\d+\b")] |
| 284 | + public static partial Regex TicketPattern { get; } |
| 285 | + |
| 286 | + // JIRA/ticket references (case insensitive) |
| 287 | + [GeneratedRegex(@"\b[A-Z]{2,}-\d+\b", RegexOptions.IgnoreCase)] |
| 288 | + public static partial Regex TicketPatternIgnoreCase { get; } |
| 289 | + |
| 290 | + // Usernames in branch names (e.g., feature/john/my-feature) |
| 291 | + [GeneratedRegex(@"/([\w.-]+)/")] |
| 292 | + public static partial Regex BranchUsernamePattern { get; } |
| 293 | + |
| 294 | + // Email-like patterns in branch/tag names |
| 295 | + [GeneratedRegex("[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+")] |
| 296 | + public static partial Regex EmailLikePattern { get; } |
| 297 | + |
| 298 | + // Version tags (e.g., v1.0.0, 1.2.3, v2.0.0-beta) |
| 299 | + [GeneratedRegex(@"^v?\d+\.\d+(\.\d+)?(-[\w.]+)?$")] |
| 300 | + public static partial Regex VersionTagPattern { get; } |
| 301 | + |
| 302 | + // Usernames in tags (e.g., -username-) |
| 303 | + [GeneratedRegex(@"[-_/]([\w]+)[-_/]")] |
| 304 | + public static partial Regex TagUsernamePattern { get; } |
| 305 | +} |
0 commit comments