Skip to content

Commit 33d2b9e

Browse files
committed
#3414 webpage: add mechanism to deal with corrupted translation files
Signed-off-by: Patrizio Bekerle <[email protected]>
1 parent f08bfdb commit 33d2b9e

File tree

2 files changed

+90
-3
lines changed

2 files changed

+90
-3
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/sh
2+
3+
# Script to identify markdown files with severe HTML corruption
4+
# These files will be excluded from the build
5+
6+
echo "🔍 Scanning for corrupted translation files..." >&2
7+
8+
# Array to store corrupted files (sh compatible)
9+
corrupted_files=""
10+
11+
# Find all markdown files in translation directories (any 2-letter language code)
12+
find src -name "*.md" -type f 2>/dev/null | grep -E 'src/[a-z]{2}(-[A-Z]{2})?/' | while IFS= read -r file; do
13+
# Check for common corruption patterns
14+
has_error=false
15+
error_details=""
16+
17+
# Pattern 1: Tags with numbers like </0>, </1> or <0>, <1>, including with spaces like </0 >
18+
if grep -qE '</?[0-9]+\s*>' "$file" 2>/dev/null || grep -qE '</\s*[0-9]+\s*>' "$file" 2>/dev/null; then
19+
has_error=true
20+
error_details="${error_details}numeric tag (<0>, </0>, </0 >, etc); "
21+
fi
22+
23+
# Pattern 2: Unclosed code tags followed by table syntax
24+
if grep -q '<code>[^<]*`.*|' "$file" 2>/dev/null; then
25+
has_error=true
26+
error_details="${error_details}unclosed <code> with table syntax; "
27+
fi
28+
29+
# Pattern 3: Mixing HTML table with markdown table syntax
30+
if grep -q '<table' "$file" 2>/dev/null && grep -q '<code>.*|.*|' "$file" 2>/dev/null; then
31+
has_error=true
32+
error_details="${error_details}mixed HTML/markdown table; "
33+
fi
34+
35+
# Pattern 4: Orphaned closing paragraph tags or other malformed closing tags
36+
if grep -qE '^[^<]*</p>|</[0-9 ]+>' "$file" 2>/dev/null; then
37+
has_error=true
38+
error_details="${error_details}orphaned/malformed closing tag; "
39+
fi
40+
41+
# Pattern 5: Opening tags with numbers
42+
if grep -qE '<[0-9]+[^>]*>' "$file" 2>/dev/null; then
43+
has_error=true
44+
error_details="${error_details}numeric opening tag; "
45+
fi
46+
47+
if [ "$has_error" = true ]; then
48+
corrupted_files="$corrupted_files$file
49+
"
50+
echo " ⚠️ $file" >&2
51+
echo " └─ Errors: $error_details" >&2
52+
# Output to stdout for the build script to capture
53+
echo "$file"
54+
fi
55+
done

webpage/scripts/run-build.sh

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,41 @@
22

33
set -e
44

5-
echo "⚠️ Removing severely corrupted translation files..."
6-
# Remove files that are too corrupted to fix automatically
7-
rm -f src/ko/getting-started/markdown.md && echo " ✓ Removed src/ko/getting-started/markdown.md" || true
5+
echo "⚠️ Replacing corrupted translation files with English versions..."
6+
# Replace corrupted files with English versions instead of removing them
7+
# This allows the build to complete successfully while corrupted translations are being fixed
8+
9+
replace_with_english() {
10+
corrupted_file="$1"
11+
# Extract the path after the language code (e.g., src/ko/editor/time-formats.md -> editor/time-formats.md)
12+
relative_path=$(echo "$corrupted_file" | sed -E 's|^src/[a-z]{2}(-[A-Z]{2})?/||')
13+
english_file="src/$relative_path"
14+
15+
if [ -f "$english_file" ]; then
16+
cp "$english_file" "$corrupted_file" && echo " ✓ Replaced $corrupted_file with English version"
17+
else
18+
rm -f "$corrupted_file" && echo " ✓ Removed $corrupted_file (no English version found)"
19+
fi
20+
}
21+
22+
# Run detection script and replace found files
23+
if [ -x "./scripts/find-corrupted-files.sh" ]; then
24+
corrupted_files=$(./scripts/find-corrupted-files.sh | grep "^src/")
25+
if [ -n "$corrupted_files" ]; then
26+
echo "$corrupted_files" | while read -r file; do
27+
if [ -f "$file" ]; then
28+
replace_with_english "$file"
29+
fi
30+
done
31+
fi
32+
else
33+
# Fallback: manually specified files
34+
[ -f "src/ko/getting-started/markdown.md" ] && replace_with_english "src/ko/getting-started/markdown.md"
35+
[ -f "src/ko/editor/time-formats.md" ] && replace_with_english "src/ko/editor/time-formats.md"
36+
[ -f "src/fa/editor/time-formats.md" ] && replace_with_english "src/fa/editor/time-formats.md"
37+
[ -f "src/fa/contributing/translation.md" ] && replace_with_english "src/fa/contributing/translation.md"
38+
[ -f "src/it/getting-started/overview.md" ] && replace_with_english "src/it/getting-started/overview.md"
39+
fi
840

941
#echo ""
1042
#echo "🔧 Fixing HTML issues in markdown files..."

0 commit comments

Comments
 (0)