Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/pre-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
with:
token: ${{ secrets.QLTY_COVERAGE_TOKEN }}
files: coverage.xml
# Check translation tags
- name: Check translation tags
id: translation_check
run: |
pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..."
- name: Generate new output files
run: |
#
Expand Down Expand Up @@ -112,12 +117,26 @@

cp output/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml output/cornucopia_webapp/
zip -r output/owasp_cornucopia_webapp_3.0_en.zip output/cornucopia_webapp/Links/* output/cornucopia_webapp/Fonts/* output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml ./resources/templates/owasp_cornucopia_webapp_scoresheet.pdf
- name: Prepare release body with translation report
id: prepare_release
run: |
# Read the translation report and create a combined release body
cat > release_body.md << 'EOF'
## OWASP Cornucopia Pre-Release

This is an automated pre-release build from the latest master branch.

---

EOF
cat translation_check_report.md >> release_body.md
- uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0
name: "Create pre-release"
with:
tag_name: pre-release
prerelease: true
name: Latest pre-release
body_path: release_body.md
files: |
CHANGELOG.md
LICENSE.md
Expand Down
43 changes: 37 additions & 6 deletions .github/workflows/run-tests-generate-output.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ jobs:
run: |
pip install -r requirements.txt --require-hashes
pipenv install -d
- name: Check translation tags
run: |
pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you pipe the result? The script is creating the file translation_check_report.md right?
You shouldn't append errors to that file.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ohh thank you for mentioning it , I just noticed it ,I will make sure to change that

- name: Upload translation check report
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
retention-days: 5
name: translation-check-report.${{ github.sha }}.md
path: translation_check_report.md
- name: Generate new output files
run: |
#
Expand Down Expand Up @@ -146,23 +156,44 @@ jobs:
contents: read
needs: uploadoutputfiles
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
ref: ${{ github.event.pull_request.head.ref }}
- name: Download translation check report
uses: actions/download-artifact@v6 # v6
with:
name: translation-check-report.${{ github.sha }}.md
path: .
- uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
env:
PR_NUMBER: ${{ github.event.number }}
PR_NOTES: |
[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge
ARTIFACT_URL: ${{needs.uploadoutputfiles.outputs.artifact-url}}
with:
script: |
const fs = require('fs');
let translationReport = '';
try {
translationReport = fs.readFileSync('translation_check_report.md', 'utf8');
} catch (error) {
translationReport = 'Translation check report not found.';
}

const prNotes = `[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge

## Build artifacts:

| Name | Link |
|------|------|
| Output files | [cornucopia-build-files.${{ github.sha }}.zip](${{needs.uploadoutputfiles.outputs.artifact-url}}) |
| Output files | [cornucopia-build-files.${{ github.sha }}.zip](${process.env.ARTIFACT_URL}) |

---

${translationReport}`;

with:
script: |
github.rest.issues.createComment({
issue_number: process.env.PR_NUMBER,
owner: context.repo.owner,
repo: context.repo.repo,
body: process.env.PR_NOTES
body: prNotes
})
229 changes: 229 additions & 0 deletions scripts/check_translations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
"""
Translation Tag Checker for OWASP Cornucopia

This script checks that translation files have the same T0xxx tags as the English version.
It detects:
- Missing tags in translations
- Untranslated tags (text identical to English)
- Empty tag values
"""

import sys
import yaml
from pathlib import Path
from typing import Dict, List
from collections import defaultdict


class TranslationChecker:
"""Check translations for missing, untranslated, or empty tags."""

def __init__(self, source_dir: Path):
self.source_dir = source_dir
self.results = defaultdict(lambda: defaultdict(dict))

def extract_tags(self, yaml_file: Path) -> Dict[str, str]:
"""Extract T0xxx tags and their text from a YAML file."""
tags = {}
try:
with open(yaml_file, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)

# Check if data has common_ids section
if data and 'common_ids' in data:
for item in data['common_ids']:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

common_ids doesn't exist. This is an AI halucination. This function returns an empty hash map.

tag_id = item.get('id', '')
if tag_id.startswith('T0'):
tags[tag_id] = item.get('text', '')

except Exception as e:
print(f"Error reading {yaml_file}: {e}", file=sys.stderr)

return tags

def get_file_groups(self) -> Dict[str, List[Path]]:
"""Group YAML files by their base name (e.g., webapp-cards-2.2)."""
file_groups = defaultdict(list)

for yaml_file in self.source_dir.glob('*-*.yaml'):
# Skip archived files
if 'archive' in str(yaml_file):
continue

# Extract base name and language
# Format: {edition}-{component}-{version}-{lang}.yaml
parts = yaml_file.stem.split('-')
if len(parts) >= 3:
# Find language code (usually last part or second to last)
lang = parts[-1]
base_name = '-'.join(parts[:-1])

# Only process card files with language codes
if 'cards' in base_name and (len(lang) == 2 or ('_' in lang and all(len(part) == 2 for part in lang.split('_')))):
file_groups[base_name].append(yaml_file)

return file_groups

def check_translations(self) -> Dict[str, Dict[str, Dict[str, List[str]]]]:
"""
Check all translation files against English versions.

Returns:
Dict with structure:
{
'base_name': {
'language': {
'missing': ['T00145', ...],
'untranslated': ['T00100', ...],
'empty': ['T00200', ...]
}
}
}
"""
file_groups = self.get_file_groups()

for base_name, files in file_groups.items():
# Find English reference file
english_file = None
translation_files = []

for f in files:
lang = f.stem.split('-')[-1]
if lang == 'en':
english_file = f
else:
translation_files.append(f)

if not english_file:
print(f"Warning: No English file found for {base_name}", file=sys.stderr)
continue

# Extract English tags
english_tags = self.extract_tags(english_file)

if not english_tags:
continue

# Check each translation
for trans_file in translation_files:
lang = trans_file.stem.split('-')[-1]
trans_tags = self.extract_tags(trans_file)

# Find missing tags
missing = []
untranslated = []
empty = []

for tag_id, eng_text in english_tags.items():
if tag_id not in trans_tags:
missing.append(tag_id)
elif not trans_tags[tag_id]:
empty.append(tag_id)
elif trans_tags[tag_id] == eng_text:
untranslated.append(tag_id)

# Store results
if missing or untranslated or empty:
self.results[base_name][lang] = {
'missing': sorted(missing),
'untranslated': sorted(untranslated),
'empty': sorted(empty),
'file': str(trans_file.name)
}

return dict(self.results)

def generate_markdown_report(self) -> str:
"""Generate a Markdown report of translation issues."""
report_lines = []

if not self.results:
report_lines.append("# Translation Check Report\n")
report_lines.append("✅ All translations have the same tags as the English version.\n")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be:

✅ All existing translations have been completed.

return '\n'.join(report_lines)

report_lines.append("# Translation Check Report\n")
report_lines.append("The following sentences/tags have issues in the translations:\n")

# Language name mapping
lang_names = {
'es': 'Spanish',
'fr': 'French',
'hu': 'Hungarian',
'it': 'Italian',
'nl': 'Dutch',
'no_nb': 'Norwegian',
'pt_br': 'Portuguese (Brazil)',
'pt_pt': 'Portuguese (Portugal)',
'ru': 'Russian'
}

for base_name in sorted(self.results.keys()):
languages = self.results[base_name]

for lang in sorted(languages.keys()):
lang_name = lang_names.get(lang, lang.upper())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lang.upper() is never used.

issues = languages[lang]
filename = issues.get('file', '')

report_lines.append(f"\n## {lang_name}\n")
report_lines.append(f"**File:** `{filename}`\n")

if issues['missing']:
report_lines.append("### Missing Tags\n")
report_lines.append("The following tags are present in the English version but missing in this translation:\n")
tags_str = ', '.join(issues['missing'])
report_lines.append(f"{tags_str}\n")

if issues['untranslated']:
report_lines.append("### Untranslated Tags\n")
report_lines.append("The following tags have identical text to English (not translated):\n")
tags_str = ', '.join(issues['untranslated'])
report_lines.append(f"{tags_str}\n")

if issues['empty']:
report_lines.append("### Empty Tags\n")
report_lines.append("The following tags are empty:\n")
tags_str = ', '.join(issues['empty'])
report_lines.append(f"{tags_str}\n")

return '\n'.join(report_lines)


def main():
"""Main entry point for the translation checker."""
# Determine source directory
script_dir = Path(__file__).parent
base_dir = script_dir.parent
source_dir = base_dir / 'source'

if not source_dir.exists():
print(f"Error: Source directory not found: {source_dir}", file=sys.stderr)
sys.exit(1)

# Run checker
checker = TranslationChecker(source_dir)
results = checker.check_translations()

# Generate report
report = checker.generate_markdown_report()

# Output report
print(report)

# Write to file
output_file = base_dir / 'translation_check_report.md'
with open(output_file, 'w', encoding='utf-8') as f:
f.write(report)

print(f"\n---\nReport written to: {output_file}", file=sys.stderr)

# Exit with error code if issues found
if results:
sys.exit(1)
else:
sys.exit(0)


if __name__ == '__main__':
main()
Loading