Skip to content

Commit 4d71bde

Browse files
committed
add repository anonymizer project
Adds a new `Anonymize` project to the solution for anonymizing repositories. Implements logic to transform commit metadata (messages, authors, tags, branches) and remove sensitive information while preserving repository history.
1 parent b874c3f commit 4d71bde

File tree

3 files changed

+321
-1
lines changed

3 files changed

+321
-1
lines changed

src/Anonymize/Anonymize.csproj

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net9.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="LibGit2Sharp" />
12+
</ItemGroup>
13+
14+
</Project>

src/Anonymize/Program.cs

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
using System.Text.RegularExpressions;
2+
using LibGit2Sharp;
3+
4+
if (args.Length < 2)
5+
{
6+
Console.WriteLine("Usage: GitRepoAnonymizer <source-repo-path> <destination-repo-path>");
7+
return;
8+
}
9+
10+
var sourcePath = args[0];
11+
var destPath = args[1];
12+
13+
try
14+
{
15+
AnonymizeRepository(sourcePath, destPath);
16+
Console.WriteLine("Repository anonymization completed successfully!");
17+
}
18+
catch (Exception ex)
19+
{
20+
Console.WriteLine($"Error: {ex.Message}");
21+
Console.WriteLine(ex.StackTrace);
22+
}
23+
24+
return;
25+
26+
static void AnonymizeRepository(string sourcePath, string destPath)
27+
{
28+
// Initialize destination repository
29+
if (Directory.Exists(destPath))
30+
{
31+
Console.WriteLine("Destination directory already exists. Please remove it first.");
32+
return;
33+
}
34+
35+
Repository.Init(destPath);
36+
37+
using var sourceRepo = new Repository(sourcePath);
38+
using var destRepo = new Repository(destPath);
39+
// Create anonymous signature
40+
var anonSignature = new Signature("Anonymous", "[email protected]", DateTimeOffset.Now);
41+
42+
// Dictionary to map old commit SHAs to new commit SHAs
43+
var commitMap = new Dictionary<string, string>();
44+
45+
// Get all commits reachable from any ref (branches, tags, remotes) in topological order
46+
var commits = sourceRepo.Commits.QueryBy(new CommitFilter
47+
{
48+
IncludeReachableFrom = sourceRepo.Refs,
49+
SortBy = CommitSortStrategies.Topological | CommitSortStrategies.Reverse
50+
}).ToList();
51+
52+
Console.WriteLine($"Processing {commits.Count} commits...");
53+
54+
// Process each commit
55+
foreach (var commit in commits)
56+
{
57+
// Transform the commit message
58+
var transformedMessage = TransformCommitMessage(commit.Message);
59+
60+
Console.WriteLine($"Processing commit: {commit.Sha[..7]} - {commit.MessageShort}");
61+
if (commit.Message != transformedMessage)
62+
{
63+
Console.WriteLine(" Message transformed");
64+
}
65+
66+
// Get parent commits in the new repository
67+
var newParents = commit.Parents
68+
.Select(p => commitMap.TryGetValue(p.Sha, out var value) ? destRepo.Lookup<Commit>(value) : null)
69+
.Where(p => p != null)
70+
.ToList();
71+
72+
// Create an empty tree (no blobs)
73+
var emptyTree = destRepo.ObjectDatabase.CreateTree(new TreeDefinition());
74+
75+
// Create the new commit (preserve original author/committer dates)
76+
var anonAuthor = new Signature(anonSignature.Name, anonSignature.Email, commit.Author.When);
77+
var anonCommitter = new Signature(anonSignature.Name, anonSignature.Email, commit.Committer.When);
78+
var newCommit = destRepo.ObjectDatabase.CreateCommit(
79+
anonAuthor,
80+
anonCommitter,
81+
transformedMessage,
82+
emptyTree,
83+
newParents.Count != 0 ? newParents : [],
84+
false);
85+
86+
commitMap[commit.Sha] = newCommit.Sha;
87+
}
88+
89+
// Recreate branches
90+
Console.WriteLine("\nRecreating branches...");
91+
foreach (var branch in sourceRepo.Branches)
92+
{
93+
if (branch.Tip != null && commitMap.TryGetValue(branch.Tip.Sha, out var newCommitSha))
94+
{
95+
if (branch.IsRemote)
96+
{
97+
// Skip remote branches for simplicity
98+
continue;
99+
}
100+
101+
var transformedBranchName = TransformBranchName(branch.FriendlyName);
102+
103+
try
104+
{
105+
if (branch.FriendlyName is "master" or "main")
106+
{
107+
// Create branch and update HEAD to point to it
108+
destRepo.Refs.Add($"refs/heads/{transformedBranchName}", newCommitSha);
109+
destRepo.Refs.UpdateTarget(destRepo.Refs.Head, $"refs/heads/{transformedBranchName}");
110+
Console.WriteLine($" Created branch: {transformedBranchName}");
111+
}
112+
else
113+
{
114+
destRepo.Refs.Add($"refs/heads/{transformedBranchName}", newCommitSha);
115+
if (branch.FriendlyName != transformedBranchName)
116+
{
117+
Console.WriteLine($" Created branch: {branch.FriendlyName}{transformedBranchName}");
118+
}
119+
else
120+
{
121+
Console.WriteLine($" Created branch: {transformedBranchName}");
122+
}
123+
}
124+
}
125+
catch (Exception ex)
126+
{
127+
Console.WriteLine($" Warning: Could not create branch {transformedBranchName}: {ex.Message}");
128+
}
129+
}
130+
}
131+
132+
// Recreate tags
133+
Console.WriteLine("\nRecreating tags...");
134+
foreach (var tag in sourceRepo.Tags)
135+
{
136+
var targetCommit = tag.Target as Commit ?? (tag.Target as TagAnnotation)?.Target as Commit;
137+
138+
if (targetCommit != null && commitMap.TryGetValue(targetCommit.Sha, out var newCommitSha))
139+
{
140+
var transformedTagName = TransformTagName(tag.FriendlyName);
141+
142+
try
143+
{
144+
if (tag.IsAnnotated)
145+
{
146+
var transformedTagMessage = TransformCommitMessage(tag.Annotation.Message);
147+
var anonTagger = new Signature(anonSignature.Name, anonSignature.Email, tag.Annotation.Tagger.When);
148+
destRepo.ApplyTag(transformedTagName, newCommitSha, anonTagger, transformedTagMessage);
149+
}
150+
else
151+
{
152+
destRepo.ApplyTag(transformedTagName, newCommitSha);
153+
}
154+
155+
Console.WriteLine(tag.FriendlyName != transformedTagName
156+
? $" Created tag: {tag.FriendlyName}{transformedTagName}"
157+
: $" Created tag: {transformedTagName}");
158+
}
159+
catch (Exception ex)
160+
{
161+
Console.WriteLine($" Warning: Could not create tag {transformedTagName}: {ex.Message}");
162+
}
163+
}
164+
}
165+
166+
Console.WriteLine($"\nTotal commits created: {commitMap.Count}");
167+
}
168+
169+
static string TransformCommitMessage(string originalMessage)
170+
{
171+
if (string.IsNullOrWhiteSpace(originalMessage))
172+
return originalMessage;
173+
174+
var transformed = originalMessage;
175+
176+
// Remove email addresses
177+
transformed = RegexPatterns.EmailPattern.Replace(transformed, "[EMAIL]");
178+
179+
// Remove potential usernames (e.g., @username)
180+
transformed = RegexPatterns.UsernamePattern.Replace(transformed, "@[USER]");
181+
182+
// Remove URLs
183+
transformed = RegexPatterns.UrlPattern.Replace(transformed, "[URL]");
184+
185+
// Remove IP addresses
186+
transformed = RegexPatterns.IpAddressPattern.Replace(transformed, "[IP]");
187+
188+
// Remove potential API keys or tokens (common patterns)
189+
transformed = RegexPatterns.TokenPattern.Replace(transformed, "[TOKEN]");
190+
191+
// Remove JIRA/ticket references (e.g., PROJ-123, ABC-456)
192+
transformed = RegexPatterns.TicketPattern.Replace(transformed, "[TICKET]");
193+
194+
// Remove file paths (optional - uncomment if needed)
195+
// transformed = Regex.Replace(transformed, @"[/\\][\w/\\.-]+", "[PATH]");
196+
197+
return transformed;
198+
}
199+
200+
static string TransformBranchName(string originalName)
201+
{
202+
if (string.IsNullOrWhiteSpace(originalName))
203+
return originalName;
204+
205+
var transformed = originalName;
206+
207+
// Keep common branch names unchanged
208+
var commonBranches = new[] { "master", "main", "develop", "development", "staging", "production" };
209+
if (commonBranches.Contains(transformed.ToLower()))
210+
return transformed;
211+
212+
// Remove usernames from branch names (e.g., feature/john/my-feature)
213+
transformed = RegexPatterns.BranchUsernamePattern.Replace(transformed, "/user/");
214+
215+
// Remove JIRA/ticket references
216+
transformed = RegexPatterns.TicketPatternIgnoreCase.Replace(transformed, "TICKET");
217+
218+
// Replace email-like patterns in branch names
219+
transformed = RegexPatterns.EmailLikePattern.Replace(transformed, "user");
220+
221+
// Optional: completely anonymize feature branches (uncomment if needed)
222+
// if (transformed.StartsWith("feature/") || transformed.StartsWith("bugfix/"))
223+
// {
224+
// var prefix = transformed.Split('/')[0];
225+
// return $"{prefix}/anonymous-branch";
226+
// }
227+
228+
return transformed;
229+
}
230+
231+
static string TransformTagName(string originalName)
232+
{
233+
if (string.IsNullOrWhiteSpace(originalName))
234+
return originalName;
235+
236+
var transformed = originalName;
237+
238+
// Keep version tags unchanged (e.g., v1.0.0, 1.2.3, v2.0.0-beta)
239+
if (RegexPatterns.VersionTagPattern.IsMatch(transformed))
240+
return transformed;
241+
242+
// Remove JIRA/ticket references
243+
transformed = RegexPatterns.TicketPatternIgnoreCase.Replace(transformed, "TICKET");
244+
245+
// Remove usernames
246+
transformed = RegexPatterns.TagUsernamePattern.Replace(transformed, "-user-");
247+
248+
// Replace email-like patterns
249+
transformed = RegexPatterns.EmailLikePattern.Replace(transformed, "user");
250+
251+
// Optional: completely anonymize non-version tags (uncomment if needed)
252+
// if (!Regex.IsMatch(originalName, @"^v?\d+\.\d+"))
253+
// {
254+
// return "anonymous-tag";
255+
// }
256+
257+
return transformed;
258+
}
259+
260+
internal static partial class RegexPatterns
261+
{
262+
// Email addresses
263+
[GeneratedRegex(@"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")]
264+
public static partial Regex EmailPattern { get; }
265+
266+
// Usernames (e.g., @username)
267+
[GeneratedRegex(@"@\w+")]
268+
public static partial Regex UsernamePattern { get; }
269+
270+
// URLs
271+
[GeneratedRegex(@"https?://[^\s]+")]
272+
public static partial Regex UrlPattern { get; }
273+
274+
// IP addresses
275+
[GeneratedRegex(@"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b")]
276+
public static partial Regex IpAddressPattern { get; }
277+
278+
// API keys or tokens (32+ characters)
279+
[GeneratedRegex(@"\b[A-Za-z0-9]{32,}\b")]
280+
public static partial Regex TokenPattern { get; }
281+
282+
// JIRA/ticket references (e.g., PROJ-123, ABC-456)
283+
[GeneratedRegex(@"\b[A-Z]{2,}-\d+\b")]
284+
public static partial Regex TicketPattern { get; }
285+
286+
// JIRA/ticket references (case insensitive)
287+
[GeneratedRegex(@"\b[A-Z]{2,}-\d+\b", RegexOptions.IgnoreCase)]
288+
public static partial Regex TicketPatternIgnoreCase { get; }
289+
290+
// Usernames in branch names (e.g., feature/john/my-feature)
291+
[GeneratedRegex(@"/([\w.-]+)/")]
292+
public static partial Regex BranchUsernamePattern { get; }
293+
294+
// Email-like patterns in branch/tag names
295+
[GeneratedRegex("[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+")]
296+
public static partial Regex EmailLikePattern { get; }
297+
298+
// Version tags (e.g., v1.0.0, 1.2.3, v2.0.0-beta)
299+
[GeneratedRegex(@"^v?\d+\.\d+(\.\d+)?(-[\w.]+)?$")]
300+
public static partial Regex VersionTagPattern { get; }
301+
302+
// Usernames in tags (e.g., -username-)
303+
[GeneratedRegex(@"[-_/]([\w]+)[-_/]")]
304+
public static partial Regex TagUsernamePattern { get; }
305+
}

src/GitVersion.slnx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
<File Path="Directory.Build.props" />
2929
<File Path="Directory.Packages.props" />
3030
</Folder>
31+
<Project Path="Anonymize/Anonymize.csproj" />
3132
<Project Path="GitVersion.App.Tests/GitVersion.App.Tests.csproj" />
3233
<Project Path="GitVersion.App/GitVersion.App.csproj" />
3334
<Project Path="GitVersion.Core.Tests/GitVersion.Core.Tests.csproj" />
@@ -36,4 +37,4 @@
3637
<Project Path="GitVersion.MsBuild/GitVersion.MsBuild.csproj" />
3738
<Project Path="GitVersion.Schema/GitVersion.Schema.csproj" />
3839
<Project Path="GitVersion.Testing/GitVersion.Testing.csproj" />
39-
</Solution>
40+
</Solution>

0 commit comments

Comments
 (0)