Skip to content

Commit 5da958b

Browse files
committed
add chars threshold
1 parent b4c7528 commit 5da958b

File tree

2 files changed

+204
-2
lines changed

2 files changed

+204
-2
lines changed

.github/workflows/deploy-web.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,18 @@ jobs:
4949
if [ -z "$CHANGED_FILES" ]; then
5050
echo "has_changes=false" >> "$GITHUB_OUTPUT"
5151
else
52-
printf '%s\n' "$CHANGED_FILES" > changed_english_docs.txt
53-
echo "has_changes=true" >> "$GITHUB_OUTPUT"
52+
if [ -n "$DIFF_BASE" ]; then
53+
printf '%s\n' "$CHANGED_FILES" > changed_english_docs_raw.txt
54+
node .github/workflows/scripts/filter-small-doc-changes.js "$DIFF_BASE" "${{ github.sha }}" changed_english_docs_raw.txt changed_english_docs.txt
55+
if [ -s changed_english_docs.txt ]; then
56+
echo "has_changes=true" >> "$GITHUB_OUTPUT"
57+
else
58+
echo "has_changes=false" >> "$GITHUB_OUTPUT"
59+
fi
60+
else
61+
printf '%s\n' "$CHANGED_FILES" > changed_english_docs.txt
62+
echo "has_changes=true" >> "$GITHUB_OUTPUT"
63+
fi
5464
fi
5565
5666
- name: Mark translations as outdated
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
#!/usr/bin/env node
2+
3+
const fs = require('fs');
4+
const { execSync } = require('child_process');
5+
6+
const MEANINGFUL_CHAR_THRESHOLD = 10; // minimum characters to count the change as worthy
7+
8+
const [, , baseRef, headRef, inputPath, outputPath] = process.argv;
9+
10+
if (!headRef || !inputPath || !outputPath) {
11+
console.error('Usage: node filter-small-doc-changes.js <baseRef> <headRef> <inputList> <outputList>');
12+
process.exit(1);
13+
}
14+
15+
const fileList = fs
16+
.readFileSync(inputPath, 'utf8')
17+
.split('\n')
18+
.map(line => line.trim())
19+
.filter(Boolean);
20+
21+
const keptFiles = [];
22+
23+
const getFileLines = (ref, file) => {
24+
if (!ref) {
25+
return null;
26+
}
27+
28+
try {
29+
const content = execSync(`git show ${ref}:${file}`, { encoding: 'utf8' });
30+
return content.replace(/\r/g, '').split('\n');
31+
} catch {
32+
return null;
33+
}
34+
};
35+
36+
const frontmatterEndLine = lines => {
37+
if (!lines || lines[0] !== '---') {
38+
return 0;
39+
}
40+
41+
for (let i = 1; i < lines.length; i++) {
42+
if (lines[i] === '---') {
43+
return i + 1;
44+
}
45+
}
46+
return 0;
47+
};
48+
49+
const isTrivialLine = (lineText, lineNumber, frontmatterLimit) => {
50+
if (lineNumber > 0 && frontmatterLimit > 0 && lineNumber <= frontmatterLimit) {
51+
return true;
52+
}
53+
return lineText.trim().length === 0;
54+
};
55+
56+
const levenshtein = (a, b) => {
57+
if (a === b) {
58+
return 0;
59+
}
60+
const lenA = a.length;
61+
const lenB = b.length;
62+
if (lenA === 0) {
63+
return lenB;
64+
}
65+
if (lenB === 0) {
66+
return lenA;
67+
}
68+
69+
const matrix = Array.from({ length: lenA + 1 }, () => new Array(lenB + 1).fill(0));
70+
71+
for (let i = 0; i <= lenA; i++) {
72+
matrix[i][0] = i;
73+
}
74+
for (let j = 0; j <= lenB; j++) {
75+
matrix[0][j] = j;
76+
}
77+
78+
for (let i = 1; i <= lenA; i++) {
79+
for (let j = 1; j <= lenB; j++) {
80+
if (a[i - 1] === b[j - 1]) {
81+
matrix[i][j] = matrix[i - 1][j - 1];
82+
} else {
83+
matrix[i][j] = Math.min(
84+
matrix[i - 1][j] + 1,
85+
matrix[i][j - 1] + 1,
86+
matrix[i - 1][j - 1] + 1
87+
);
88+
}
89+
}
90+
}
91+
92+
return matrix[lenA][lenB];
93+
};
94+
95+
const hasMeaningfulDiff = (file, diffOutput) => {
96+
const baseLines = getFileLines(baseRef, file);
97+
const headLines = getFileLines(headRef, file);
98+
99+
if (!diffOutput.trim()) {
100+
return false;
101+
}
102+
103+
if (!baseLines) {
104+
return true;
105+
}
106+
107+
const baseFrontmatterLimit = frontmatterEndLine(baseLines);
108+
const headFrontmatterLimit = frontmatterEndLine(headLines);
109+
110+
let currentOldLine = 0;
111+
let currentNewLine = 0;
112+
const diffLines = diffOutput.split('\n');
113+
const pendingRemoved = [];
114+
let totalChangedChars = 0;
115+
116+
for (const diffLine of diffLines) {
117+
if (diffLine.startsWith('@@')) {
118+
const match = diffLine.match(/@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/);
119+
if (match) {
120+
currentOldLine = Number(match[1]);
121+
currentNewLine = Number(match[3]);
122+
}
123+
continue;
124+
}
125+
126+
if (diffLine.startsWith('---') || diffLine.startsWith('+++')) {
127+
continue;
128+
}
129+
130+
if (diffLine.startsWith('-')) {
131+
const text = baseLines[currentOldLine - 1] ?? '';
132+
if (!isTrivialLine(text, currentOldLine, baseFrontmatterLimit)) {
133+
pendingRemoved.push(text);
134+
}
135+
currentOldLine++;
136+
continue;
137+
}
138+
139+
if (diffLine.startsWith('+')) {
140+
const text = headLines?.[currentNewLine - 1] ?? '';
141+
if (!isTrivialLine(text, currentNewLine, headFrontmatterLimit)) {
142+
if (pendingRemoved.length > 0) {
143+
const removed = pendingRemoved.shift();
144+
totalChangedChars += levenshtein(removed, text);
145+
} else {
146+
totalChangedChars += text.trim().length;
147+
}
148+
if (totalChangedChars >= MEANINGFUL_CHAR_THRESHOLD) {
149+
return true;
150+
}
151+
}
152+
currentNewLine++;
153+
continue;
154+
}
155+
}
156+
157+
while (pendingRemoved.length > 0) {
158+
totalChangedChars += pendingRemoved.shift().trim().length;
159+
if (totalChangedChars >= MEANINGFUL_CHAR_THRESHOLD) {
160+
return true;
161+
}
162+
}
163+
164+
return totalChangedChars >= MEANINGFUL_CHAR_THRESHOLD;
165+
};
166+
167+
for (const file of fileList) {
168+
if (!baseRef) {
169+
keptFiles.push(file);
170+
continue;
171+
}
172+
173+
let diffOutput = '';
174+
try {
175+
diffOutput = execSync(`git diff ${baseRef} ${headRef} --unified=0 -- ${file}`, {
176+
encoding: 'utf8',
177+
stdio: ['ignore', 'pipe', 'ignore'],
178+
});
179+
} catch (error) {
180+
diffOutput = error.stdout?.toString() ?? '';
181+
if (!diffOutput && !error.stdout) {
182+
keptFiles.push(file);
183+
continue;
184+
}
185+
}
186+
187+
if (hasMeaningfulDiff(file, diffOutput)) {
188+
keptFiles.push(file);
189+
}
190+
}
191+
192+
fs.writeFileSync(outputPath, keptFiles.join('\n'), 'utf8');

0 commit comments

Comments
 (0)