Skip to content

Commit 7f64e98

Browse files
authored
Add script to remove duplicate issues on declarations repository (#1115)
2 parents 776815a + 903f399 commit 7f64e98

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22

33
All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
44

5+
## Unreleased [minor]
6+
7+
> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs.
8+
9+
### Added
10+
11+
- Add script to remove duplicate issues in GitHub reports
12+
513
## 2.4.0 - 2024-10-24
614

715
_Full changeset and discussions: [#1114](https://github.com/OpenTermsArchive/engine/pull/1114)._
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Duplicate issues removal script
2+
3+
This script helps remove duplicate issues from a GitHub repository by closing issues that have the same title as any older issue.
4+
5+
## Prerequisites
6+
7+
1. Set up environment variables:
8+
- Create a `.env` file in the root directory
9+
- Add the GitHub personal access token of the bot that manages issues on your collection, with `repo` permissions:
10+
11+
```shell
12+
OTA_ENGINE_GITHUB_TOKEN=your_github_token
13+
```
14+
15+
2. Configure the target repository in your chosen configuration file within the `config` folder:
16+
17+
```json
18+
{
19+
"@opentermsarchive/engine": {
20+
"reporter": {
21+
"githubIssues": {
22+
"repositories": {
23+
"declarations": "owner/repository"
24+
}
25+
}
26+
}
27+
}
28+
}
29+
```
30+
31+
## Usage
32+
33+
Run the script using:
34+
35+
```shell
36+
node scripts/reporter/duplicate/index.js
37+
```
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import 'dotenv/config';
2+
import config from 'config';
3+
import { Octokit } from 'octokit';
4+
5+
async function removeDuplicateIssues() {
6+
const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations');
7+
8+
if (!repository.includes('/') || repository.includes('https://')) {
9+
throw new Error(`Configuration entry "reporter.githubIssues.repositories.declarations" is expected to be a string in the format <owner>/<repo>, but received: "${repository}"`);
10+
}
11+
12+
const [ owner, repo ] = repository.split('/');
13+
14+
const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN });
15+
16+
console.log(`Getting issues from repository ${repository}…`);
17+
18+
const issues = await octokit.paginate('GET /repos/{owner}/{repo}/issues', {
19+
owner,
20+
repo,
21+
state: 'open',
22+
per_page: 100,
23+
});
24+
25+
const onlyIssues = issues.filter(issue => !issue.pull_request);
26+
const issuesByTitle = new Map();
27+
let counter = 0;
28+
29+
console.log(`Found ${onlyIssues.length} issues`);
30+
31+
for (const issue of onlyIssues) {
32+
if (!issuesByTitle.has(issue.title)) {
33+
issuesByTitle.set(issue.title, [issue]);
34+
} else {
35+
issuesByTitle.get(issue.title).push(issue);
36+
}
37+
}
38+
39+
for (const [ title, duplicateIssues ] of issuesByTitle) {
40+
if (duplicateIssues.length === 1) continue;
41+
42+
const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest));
43+
44+
console.log(`\nFound ${duplicateIssues.length - 1} duplicates for issue #${originalIssue.number} "${title}"`);
45+
46+
for (const issue of duplicateIssues) {
47+
if (issue.number === originalIssue.number) {
48+
continue;
49+
}
50+
51+
await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */
52+
owner,
53+
repo,
54+
issue_number: issue.number,
55+
state: 'closed',
56+
});
57+
58+
await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */
59+
owner,
60+
repo,
61+
issue_number: issue.number,
62+
body: `This issue is detected as duplicate as it has the same title as #${originalIssue.number}. It most likely was created accidentally by an engine older than [v2.3.2](https://github.com/OpenTermsArchive/engine/releases/tag/v2.3.2). Closing automatically.`,
63+
});
64+
65+
counter++;
66+
console.log(`Closed issue #${issue.number}: ${issue.html_url}`);
67+
}
68+
}
69+
70+
console.log(`\nDuplicate removal process completed; ${counter} issues closed`);
71+
}
72+
73+
removeDuplicateIssues();

0 commit comments

Comments
 (0)