Skip to content

Commit 451a58f

Browse files
committed
implement script for parsing diff
1 parent 43299f3 commit 451a58f

File tree

3 files changed

+125
-42
lines changed

3 files changed

+125
-42
lines changed

.github/workflows/copyPasteDetection.yml

Lines changed: 28 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -26,61 +26,47 @@ jobs:
2626
with:
2727
node-version: ${{ matrix.node-version }}
2828

29+
- name: Determine if local
30+
run: echo "IS_LOCAL=false" >> $GITHUB_ENV
31+
2932
- name: Fetch fork upstream
33+
if: ${{ env.IS_LOCAL == 'false' }}
3034
run: |
3135
git remote add forkUpstream https://github.com/${{ github.event.pull_request.head.repo.full_name }} # URL of the fork
3236
git fetch forkUpstream # Fetch fork
3337
3438
- name: Determine base and target branches for comparison.
3539
run: |
36-
echo "CURRENT_BRANCH=${{ github.head_ref }}" >> $GITHUB_ENV
37-
echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV
38-
- run: git diff --name-only origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt
39-
- run: |
40-
npm install -g jscpd
40+
if [[ $IS_LOCAL == 'false' ]]; then
41+
echo "CURRENT_BRANCH=${{ github.head_ref }}" >> $GITHUB_ENV
42+
echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV
43+
else
44+
echo "CURRENT_BRANCH=${{ github.ref_name }}" >> $GITHUB_ENV
45+
echo "TARGET_BRANCH=master" >> $GITHUB_ENV
46+
fi
47+
48+
- name: Print base and target branches for comparison.
49+
run: |
50+
echo "CURRENT_BRANCH=$CURRENT_BRANCH"
51+
echo "TARGET_BRANCH=$TARGET_BRANCH"
52+
53+
- name: Compare target and current branches.
54+
run: |
55+
if [[ $IS_LOCAL == 'false' ]]; then
56+
git diff origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt
57+
else
58+
git diff origin/$TARGET_BRANCH $CURRENT_BRANCH > diff_output.txt
59+
fi
60+
61+
- run: npm install -g jscpd
4162

4263
- run: jscpd --config "$GITHUB_WORKSPACE/.github/workflows/jscpd.json"
4364

44-
- if: always()
65+
- if: ${{ env.IS_LOCAL == 'false' }}
4566
uses: actions/upload-artifact@v4
4667
with:
4768
name: unfiltered-jscpd-report
4869
path: ./jscpd-report.json
4970

50-
- name: Filter jscpd report for changed files
51-
run: |
52-
if [ ! -f ./jscpd-report.json ]; then
53-
echo "jscpd-report.json not found"
54-
exit 1
55-
fi
56-
echo "Filtering jscpd report for changed files..."
57-
CHANGED_FILES=$(jq -R -s -c 'split("\n")[:-1]' diff_output.txt)
58-
echo "Changed files: $CHANGED_FILES"
59-
jq --argjson changed_files "$CHANGED_FILES" '
60-
.duplicates | map(select(
61-
(.firstFile?.name as $fname | $changed_files | any(. == $fname)) or
62-
(.secondFile?.name as $sname | $changed_files | any(. == $sname))
63-
))
64-
' ./jscpd-report.json > filtered-jscpd-report.json
65-
cat filtered-jscpd-report.json
66-
6771
- name: Check for duplicates
68-
run: |
69-
if [ $(wc -l < ./filtered-jscpd-report.json) -gt 1 ]; then
70-
echo "filtered_report_exists=true" >> $GITHUB_ENV
71-
else
72-
echo "filtered_report_exists=false" >> $GITHUB_ENV
73-
fi
74-
- name: upload filtered report (if applicable)
75-
if: env.filtered_report_exists == 'true'
76-
uses: actions/upload-artifact@v4
77-
with:
78-
name: filtered-jscpd-report
79-
path: ./filtered-jscpd-report.json
80-
81-
- name: Fail and log found duplicates.
82-
if: env.filtered_report_exists == 'true'
83-
run: |
84-
cat ./filtered-jscpd-report.json
85-
echo "Duplications found, failing the check."
86-
exit 1
72+
run: node "$GITHUB_WORKSPACE/.github/workflows/filterDuplicates.js" diff_output.txt jscpd-report.json
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
const fs = require('fs/promises')
2+
3+
function parseDiffFilePath(filePathLine) {
4+
return filePathLine.split(' ')[2].split('/').slice(1).join('/')
5+
}
6+
7+
function parseDiffRange(rangeLine) {
8+
const [_fromRange, toRange] = rangeLine.split(' ').slice(1, 3)
9+
const [startLine, numLines] = toRange.slice(1).split(',').map(Number)
10+
const range = [startLine, startLine + numLines]
11+
return range
12+
}
13+
14+
async function parseDiff(diffPath) {
15+
const diff = await fs.readFile(diffPath, 'utf8')
16+
const lines = diff.split('\n')
17+
let currentFile = null
18+
let currentFileChanges = []
19+
const fileChanges = new Map()
20+
21+
for (const line of lines) {
22+
if (line.startsWith('diff')) {
23+
if (currentFile) {
24+
fileChanges.set(currentFile, currentFileChanges)
25+
}
26+
currentFile = parseDiffFilePath(line)
27+
currentFileChanges = []
28+
}
29+
if (line.startsWith('@@')) {
30+
currentFileChanges.push(parseDiffRange(line))
31+
}
32+
}
33+
34+
return fileChanges
35+
}
36+
37+
function doesOverlap(range1, range2) {
38+
const [start1, end1] = range1
39+
const [start2, end2] = range2
40+
return (start1 >= start2 && start1 <= end2) || (end1 >= start2 && end1 <= end2)
41+
}
42+
43+
function isCloneInChanges(changes, cloneInstance) {
44+
const fileName = cloneInstance.name
45+
const cloneStart = cloneInstance.start
46+
const cloneEnd = cloneInstance.end
47+
const lineChangeRanges = changes.get(fileName)
48+
49+
if (!lineChangeRanges) {
50+
return false
51+
}
52+
53+
return lineChangeRanges.some((range) => doesOverlap([cloneStart, cloneEnd], range))
54+
}
55+
56+
function isInChanges(changes, dupe) {
57+
return isCloneInChanges(changes, dupe.firstFile) || isCloneInChanges(changes, dupe.secondFile)
58+
}
59+
60+
function filterDuplicates(report, changes) {
61+
duplicates = []
62+
for (const dupe of report.duplicates) {
63+
if (isInChanges(changes, dupe)) {
64+
duplicates.push(dupe)
65+
}
66+
}
67+
return duplicates
68+
}
69+
70+
async function main() {
71+
const rawDiffPath = process.argv[2]
72+
const jscpdReportPath = process.argv[3]
73+
const changes = await parseDiff(rawDiffPath)
74+
const jscpdReport = JSON.parse(await fs.readFile(jscpdReportPath, 'utf8'))
75+
const filteredDuplicates = filterDuplicates(jscpdReport, changes)
76+
77+
console.log(filteredDuplicates)
78+
console.log('%s files changes', changes.size)
79+
console.log('%s duplicates found', filteredDuplicates.length)
80+
if (filteredDuplicates.length > 0) {
81+
process.exit(1)
82+
}
83+
}
84+
85+
void main()

.github/workflows/filterJscpd.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
const fs = require('fs/promises')
2+
3+
async function main() {
4+
const jscpdReportPath = process.argv[2]
5+
const diffPath = process.argv[3]
6+
console.log('Recieved jscpd path: %s', jscpdReportPath)
7+
console.log('Recieved diff path: %s', diffPath)
8+
9+
const jscpdReport = JSON.parse(await fs.readFile(jscpdReportPath, 'utf8'))
10+
}
11+
12+
void main()

0 commit comments

Comments
 (0)