Skip to content

Commit b9ce896

Browse files
konardclaude
andcommitted
Implement code duplication detection and automated refactoring bot
Added comprehensive code duplication detection capabilities: - CodeDuplicationAnalysisService: Analyzes repositories for duplicate code fragments * Supports multiple programming languages (.cs, .js, .ts, .py, .java, etc.) * Computes normalized hashes for similarity detection * Groups duplicated code fragments and suggests method names - CodeDuplicationDetectionTrigger: Responds to issues requesting analysis * Triggered by issues containing "find repeated code" * Creates pull requests for significant duplications * Posts analysis results and asks for reviewer feedback - CodeDuplicationBranchMonitorTrigger: Monitors refactoring pull requests * Detects default branch updates * Notifies about potential conflicts * Tracks pull request status - FileStorageHelperService: Simple file storage for bot state * Stores duplication metadata * Tracks branch update timestamps * Persists pull request information 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 28b9565 commit b9ce896

File tree

6 files changed

+1103
-2
lines changed

6 files changed

+1103
-2
lines changed

csharp/Platform.Bot/Platform.Bot.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
<PackageReference Include="Platform.Data.Doublets.Sequences" Version="0.1.1" />
1414
<PackageReference Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
1515
<PackageReference Include="System.CommandLine.Parser" Version="0.1.1" />
16+
<PackageReference Include="System.Text.Json" Version="8.0.4" />
1617
</ItemGroup>
1718

1819
<ItemGroup>

csharp/Platform.Bot/Program.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
using Platform.Bot.Trackers;
1616
using Platform.Bot.Triggers;
1717
using Platform.Bot.Triggers.Decorators;
18+
using Platform.Bot.Services;
1819

1920
namespace Platform.Bot
2021
{
@@ -95,8 +96,8 @@ private static async Task<int> Main(string[] args)
9596
var dbContext = new FileStorage(databaseFilePath?.FullName ?? new TemporaryFile().Filename);
9697
Console.WriteLine($"Bot has been started. {Environment.NewLine}Press CTRL+C to close");
9798
var githubStorage = new GitHubStorage(githubUserName, githubApiToken, githubApplicationName);
98-
var issueTracker = new IssueTracker(githubStorage, new HelloWorldTrigger(githubStorage, dbContext, fileSetName), new OrganizationLastMonthActivityTrigger(githubStorage), new LastCommitActivityTrigger(githubStorage), new AdminAuthorIssueTriggerDecorator(new ProtectDefaultBranchTrigger(githubStorage), githubStorage), new AdminAuthorIssueTriggerDecorator(new ChangeOrganizationRepositoriesDefaultBranchTrigger(githubStorage, dbContext), githubStorage), new AdminAuthorIssueTriggerDecorator(new ChangeOrganizationPullRequestsBaseBranchTrigger(githubStorage, dbContext), githubStorage));
99-
var pullRequenstTracker = new PullRequestTracker(githubStorage, new MergeDependabotBumpsTrigger(githubStorage));
99+
var issueTracker = new IssueTracker(githubStorage, new HelloWorldTrigger(githubStorage, dbContext, fileSetName), new OrganizationLastMonthActivityTrigger(githubStorage), new LastCommitActivityTrigger(githubStorage), new AdminAuthorIssueTriggerDecorator(new ProtectDefaultBranchTrigger(githubStorage), githubStorage), new AdminAuthorIssueTriggerDecorator(new ChangeOrganizationRepositoriesDefaultBranchTrigger(githubStorage, dbContext), githubStorage), new AdminAuthorIssueTriggerDecorator(new ChangeOrganizationPullRequestsBaseBranchTrigger(githubStorage, dbContext), githubStorage), new CodeDuplicationDetectionTrigger(githubStorage, dbContext));
100+
var pullRequenstTracker = new PullRequestTracker(githubStorage, new MergeDependabotBumpsTrigger(githubStorage), new CodeDuplicationBranchMonitorTrigger(githubStorage, dbContext));
100101
var timestampTracker = new DateTimeTracker(githubStorage, new CreateAndSaveOrganizationRepositoriesMigrationTrigger(githubStorage, dbContext, Path.Combine(Directory.GetCurrentDirectory(), "/github-migrations")));
101102
var cancellation = new CancellationTokenSource();
102103
while (true)
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Text.RegularExpressions;
7+
using System.Threading.Tasks;
8+
using Octokit;
9+
using Storage.Remote.GitHub;
10+
11+
namespace Platform.Bot.Services
12+
{
13+
/// <summary>
14+
/// <para>
15+
/// Represents the code duplication analysis service.
16+
/// </para>
17+
/// <para></para>
18+
/// </summary>
19+
public class CodeDuplicationAnalysisService
20+
{
21+
private readonly GitHubStorage _storage;
22+
private const int MinimumCodeFragmentLength = 3;
23+
private const int MinimumSimilarityThreshold = 80;
24+
25+
/// <summary>
26+
/// <para>
27+
/// Initializes a new <see cref="CodeDuplicationAnalysisService"/> instance.
28+
/// </para>
29+
/// <para></para>
30+
/// </summary>
31+
/// <param name="storage">
32+
/// <para>A git hub storage.</para>
33+
/// <para></para>
34+
/// </param>
35+
public CodeDuplicationAnalysisService(GitHubStorage storage)
36+
{
37+
_storage = storage;
38+
}
39+
40+
/// <summary>
41+
/// <para>
42+
/// Represents code fragment information.
43+
/// </para>
44+
/// <para></para>
45+
/// </summary>
46+
public class CodeFragment
47+
{
48+
public string Content { get; set; }
49+
public string FilePath { get; set; }
50+
public int StartLine { get; set; }
51+
public int EndLine { get; set; }
52+
public string Hash { get; set; }
53+
54+
public CodeFragment(string content, string filePath, int startLine, int endLine)
55+
{
56+
Content = content;
57+
FilePath = filePath;
58+
StartLine = startLine;
59+
EndLine = endLine;
60+
Hash = ComputeHash(content);
61+
}
62+
63+
private static string ComputeHash(string content)
64+
{
65+
var normalized = NormalizeCode(content);
66+
using var sha256 = System.Security.Cryptography.SHA256.Create();
67+
var hash = sha256.ComputeHash(Encoding.UTF8.GetBytes(normalized));
68+
return Convert.ToBase64String(hash);
69+
}
70+
71+
private static string NormalizeCode(string code)
72+
{
73+
code = Regex.Replace(code, @"\s+", " ");
74+
code = Regex.Replace(code, @"//.*", "");
75+
code = Regex.Replace(code, @"/\*.*?\*/", "", RegexOptions.Singleline);
76+
return code.Trim();
77+
}
78+
}
79+
80+
/// <summary>
81+
/// <para>
82+
/// Represents duplication group information.
83+
/// </para>
84+
/// <para></para>
85+
/// </summary>
86+
public class DuplicationGroup
87+
{
88+
public List<CodeFragment> Fragments { get; set; } = new();
89+
public int Count => Fragments.Count;
90+
public double SimilarityScore { get; set; }
91+
public string SuggestedMethodName { get; set; } = string.Empty;
92+
}
93+
94+
/// <summary>
95+
/// <para>
96+
/// Analyzes repository for code duplications.
97+
/// </para>
98+
/// <para></para>
99+
/// </summary>
100+
/// <param name="repository">
101+
/// <para>The repository.</para>
102+
/// <para></para>
103+
/// </param>
104+
/// <returns>
105+
/// <para>The list of duplication groups</para>
106+
/// <para></para>
107+
/// </returns>
108+
public async Task<List<DuplicationGroup>> AnalyzeRepositoryAsync(Repository repository)
109+
{
110+
var codeFragments = await ExtractCodeFragmentsAsync(repository);
111+
var duplications = FindDuplications(codeFragments);
112+
return duplications;
113+
}
114+
115+
private async Task<List<CodeFragment>> ExtractCodeFragmentsAsync(Repository repository)
116+
{
117+
var fragments = new List<CodeFragment>();
118+
var contents = await GetRepositoryContentsAsync(repository);
119+
120+
foreach (var content in contents)
121+
{
122+
if (IsCodeFile(content.Name))
123+
{
124+
var fileContent = await GetFileContentAsync(repository, content.Path);
125+
var fileFragments = ExtractFragmentsFromFile(fileContent, content.Path);
126+
fragments.AddRange(fileFragments);
127+
}
128+
}
129+
130+
return fragments;
131+
}
132+
133+
private async Task<IReadOnlyList<RepositoryContent>> GetRepositoryContentsAsync(Repository repository)
134+
{
135+
try
136+
{
137+
return await GetAllContentsRecursively(repository, "");
138+
}
139+
catch (Exception ex)
140+
{
141+
Console.WriteLine($"Error getting repository contents: {ex.Message}");
142+
return new List<RepositoryContent>();
143+
}
144+
}
145+
146+
private async Task<List<RepositoryContent>> GetAllContentsRecursively(Repository repository, string path)
147+
{
148+
var allContents = new List<RepositoryContent>();
149+
150+
try
151+
{
152+
var contents = await _storage.Client.Repository.Content.GetAllContents(repository.Id, path);
153+
154+
foreach (var content in contents)
155+
{
156+
if (content.Type == ContentType.File)
157+
{
158+
allContents.Add(content);
159+
}
160+
else if (content.Type == ContentType.Dir)
161+
{
162+
var subContents = await GetAllContentsRecursively(repository, content.Path);
163+
allContents.AddRange(subContents);
164+
}
165+
}
166+
}
167+
catch (Exception ex)
168+
{
169+
Console.WriteLine($"Error getting contents for path {path}: {ex.Message}");
170+
}
171+
172+
return allContents;
173+
}
174+
175+
private async Task<string> GetFileContentAsync(Repository repository, string path)
176+
{
177+
try
178+
{
179+
var contents = await _storage.Client.Repository.Content.GetAllContents(repository.Id, path);
180+
return contents.First().Content;
181+
}
182+
catch (Exception ex)
183+
{
184+
Console.WriteLine($"Error getting file content for {path}: {ex.Message}");
185+
return string.Empty;
186+
}
187+
}
188+
189+
private static bool IsCodeFile(string fileName)
190+
{
191+
var codeExtensions = new[] { ".cs", ".js", ".ts", ".py", ".java", ".cpp", ".c", ".h", ".php", ".rb", ".go", ".rs", ".swift" };
192+
return codeExtensions.Any(ext => fileName.EndsWith(ext, StringComparison.OrdinalIgnoreCase));
193+
}
194+
195+
private List<CodeFragment> ExtractFragmentsFromFile(string content, string filePath)
196+
{
197+
var fragments = new List<CodeFragment>();
198+
var lines = content.Split('\n');
199+
200+
for (int i = 0; i < lines.Length - MinimumCodeFragmentLength + 1; i++)
201+
{
202+
for (int length = MinimumCodeFragmentLength; length <= Math.Min(10, lines.Length - i); length++)
203+
{
204+
var fragmentLines = lines.Skip(i).Take(length).ToArray();
205+
var fragmentContent = string.Join("\n", fragmentLines);
206+
207+
if (IsValidCodeFragment(fragmentContent))
208+
{
209+
fragments.Add(new CodeFragment(fragmentContent, filePath, i + 1, i + length));
210+
}
211+
}
212+
}
213+
214+
return fragments;
215+
}
216+
217+
private static bool IsValidCodeFragment(string content)
218+
{
219+
content = content.Trim();
220+
if (string.IsNullOrWhiteSpace(content)) return false;
221+
if (content.Length < 50) return false;
222+
223+
var lines = content.Split('\n').Where(l => !string.IsNullOrWhiteSpace(l)).ToArray();
224+
if (lines.Length < MinimumCodeFragmentLength) return false;
225+
226+
var codeLineCount = lines.Count(line =>
227+
!line.Trim().StartsWith("//") &&
228+
!line.Trim().StartsWith("/*") &&
229+
!line.Trim().StartsWith("*") &&
230+
line.Trim() != "{" &&
231+
line.Trim() != "}");
232+
233+
return codeLineCount >= MinimumCodeFragmentLength;
234+
}
235+
236+
private List<DuplicationGroup> FindDuplications(List<CodeFragment> fragments)
237+
{
238+
var groups = new Dictionary<string, DuplicationGroup>();
239+
240+
foreach (var fragment in fragments)
241+
{
242+
if (groups.ContainsKey(fragment.Hash))
243+
{
244+
groups[fragment.Hash].Fragments.Add(fragment);
245+
}
246+
else
247+
{
248+
groups[fragment.Hash] = new DuplicationGroup
249+
{
250+
Fragments = new List<CodeFragment> { fragment },
251+
SimilarityScore = 100.0
252+
};
253+
}
254+
}
255+
256+
var duplications = groups.Values
257+
.Where(g => g.Count > 1)
258+
.OrderByDescending(g => g.Count)
259+
.ThenByDescending(g => g.Fragments.First().Content.Length)
260+
.ToList();
261+
262+
foreach (var group in duplications)
263+
{
264+
group.SuggestedMethodName = GenerateMethodName(group.Fragments.First().Content);
265+
}
266+
267+
return duplications;
268+
}
269+
270+
private static string GenerateMethodName(string content)
271+
{
272+
var words = new List<string>();
273+
var normalizedContent = content.ToLowerInvariant();
274+
275+
var keywords = new[] { "get", "set", "create", "update", "delete", "find", "search", "calculate", "process", "validate", "convert" };
276+
var foundKeyword = keywords.FirstOrDefault(k => normalizedContent.Contains(k));
277+
278+
if (!string.IsNullOrEmpty(foundKeyword))
279+
{
280+
words.Add(char.ToUpper(foundKeyword[0]) + foundKeyword[1..]);
281+
}
282+
else
283+
{
284+
words.Add("Process");
285+
}
286+
287+
var identifierMatches = Regex.Matches(content, @"\b[A-Z][a-z]+\b");
288+
foreach (Match match in identifierMatches.Take(2))
289+
{
290+
if (!words.Contains(match.Value))
291+
{
292+
words.Add(match.Value);
293+
}
294+
}
295+
296+
if (words.Count == 1)
297+
{
298+
words.Add("Data");
299+
}
300+
301+
return string.Join("", words);
302+
}
303+
}
304+
}

0 commit comments

Comments
 (0)