Skip to content

Merge pull request #230 from searabbitx/master #64

Merge pull request #230 from searabbitx/master

Merge pull request #230 from searabbitx/master #64

Workflow file for this run

name: Translator All
on:
push:
branches:
- master
paths-ignore:
- 'scripts/**'
- '.gitignore'
- '.github/**'
- Dockerfile
workflow_dispatch:
permissions:
packages: write
id-token: write
contents: write
jobs:
translate:
name: Translate → ${{ matrix.name }} (${{ matrix.branch }})
runs-on: ubuntu-latest
environment: prod
# Run N languages in parallel (tune max-parallel if needed)
strategy:
fail-fast: false
# max-parallel: 3 #Nothing to run all in parallel
matrix:
include:
- { name: "Afrikaans", language: "Afrikaans", branch: "af" }
- { name: "German", language: "German", branch: "de" }
- { name: "Greek", language: "Greek", branch: "el" }
- { name: "Spanish", language: "Spanish", branch: "es" }
- { name: "French", language: "French", branch: "fr" }
- { name: "Hindi", language: "Hindi", branch: "hi" }
- { name: "Italian", language: "Italian", branch: "it" }
- { name: "Japanese", language: "Japanese", branch: "ja" }
- { name: "Korean", language: "Korean", branch: "ko" }
- { name: "Polish", language: "Polish", branch: "pl" }
- { name: "Portuguese", language: "Portuguese", branch: "pt" }
- { name: "Serbian", language: "Serbian", branch: "sr" }
- { name: "Swahili", language: "Swahili", branch: "sw" }
- { name: "Turkish", language: "Turkish", branch: "tr" }
- { name: "Ukrainian", language: "Ukrainian", branch: "uk" }
- { name: "Chinese", language: "Chinese", branch: "zh" }
# Ensure only one job per branch runs at a time (even across workflow runs)
concurrency:
group: translate-${{ matrix.branch }}
cancel-in-progress: false
container:
image: ghcr.io/hacktricks-wiki/hacktricks-cloud/translator-image:latest
env:
LANGUAGE: ${{ matrix.language }}
BRANCH: ${{ matrix.branch }}
steps:
- name: Wait for build_master to start
run: |
echo "Waiting 30 seconds to let build_master.yml initialize and reset the searchindex repo..."
sleep 30
echo "Continuing with translation workflow"
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Update and download scripts
run: |
sudo apt-get update
# Install GitHub CLI properly
curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
&& sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& sudo apt update \
&& sudo apt install gh -y \
&& sudo apt-get install -y wget
wget -O /tmp/get_and_save_refs.py https://raw.githubusercontent.com/HackTricks-wiki/hacktricks-cloud/master/scripts/get_and_save_refs.py
wget -O /tmp/compare_and_fix_refs.py https://raw.githubusercontent.com/HackTricks-wiki/hacktricks-cloud/master/scripts/compare_and_fix_refs.py
wget -O /tmp/translator.py https://raw.githubusercontent.com/HackTricks-wiki/hacktricks-cloud/master/scripts/translator.py
- name: Run get_and_save_refs.py
run: |
python /tmp/get_and_save_refs.py
- name: Download language branch & update refs
run: |
pwd
ls -la
git config --global --add safe.directory "$GITHUB_WORKSPACE"
git config --global user.name 'Translator'
git config --global user.email '[email protected]'
git config pull.rebase false
git checkout $BRANCH
git pull
python /tmp/compare_and_fix_refs.py --files-unmatched-paths /tmp/file_paths.txt
git add .
git commit -m "Fix unmatched refs" || echo "No changes to commit"
git push || echo "No changes to push"
- name: Run translation script on changed files
run: |
git checkout master
cp src/SUMMARY.md /tmp/master-summary.md
export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
git diff --name-only HEAD~1 | grep -v "SUMMARY.md" | while read -r file; do
if echo "$file" | grep -qE '\.md$'; then
echo -n ",$file" >> /tmp/file_paths.txt
fi
done
echo "Files to translate (`wc -l < /tmp/file_paths.txt`):"
cat /tmp/file_paths.txt
echo ""
echo ""
touch /tmp/file_paths.txt
if [ -s /tmp/file_paths.txt ]; then
python /tmp/translator.py \
--language "$LANGUAGE" \
--branch "$BRANCH" \
--api-key "$OPENAI_API_KEY" \
-f "$(cat /tmp/file_paths.txt)" \
-t 3
else
echo "No markdown files changed, skipping translation."
fi
- name: Sync SUMMARY.md from master
run: |
git checkout "$BRANCH"
git pull
if [ -f /tmp/master-summary.md ]; then
cp /tmp/master-summary.md src/SUMMARY.md
git add src/SUMMARY.md
git commit -m "Sync SUMMARY.md with master" || echo "SUMMARY already up to date"
git push || echo "No SUMMARY updates to push"
else
echo "master summary not exported; failing"
exit 1
fi
- name: Build mdBook
run: |
git checkout "$BRANCH"
git pull
MDBOOK_BOOK__LANGUAGE=$BRANCH mdbook build || (echo "Error logs" && cat hacktricks-preprocessor-error.log && echo "" && echo "" && echo "Debug logs" && (cat hacktricks-preprocessor.log | tail -n 20) && exit 1)
- name: Push search index to hacktricks-searchindex repo
shell: bash
env:
PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
run: |
set -euo pipefail
ASSET="book/searchindex.js"
TARGET_REPO="HackTricks-wiki/hacktricks-searchindex"
FILENAME="searchindex-cloud-${BRANCH}.js"
if [ ! -f "$ASSET" ]; then
echo "Expected $ASSET to exist after build" >&2
exit 1
fi
TOKEN="${PAT_TOKEN}"
if [ -z "$TOKEN" ]; then
echo "No PAT_TOKEN available" >&2
exit 1
fi
# Clone the searchindex repo
git clone https://x-access-token:${TOKEN}@github.com/${TARGET_REPO}.git /tmp/searchindex-repo
# Compress the searchindex file
gzip -9 -k -f "$ASSET"
# Show compression stats
ORIGINAL_SIZE=$(wc -c < "$ASSET")
COMPRESSED_SIZE=$(wc -c < "${ASSET}.gz")
RATIO=$(awk "BEGIN {printf \"%.1f\", ($COMPRESSED_SIZE / $ORIGINAL_SIZE) * 100}")
echo "Compression: ${ORIGINAL_SIZE} bytes -> ${COMPRESSED_SIZE} bytes (${RATIO}%)"
# XOR encrypt the compressed file
KEY='Prevent_Online_AVs_From_Flagging_HackTricks_Search_Gzip_As_Malicious_394h7gt8rf9u3rf9g'
cat > /tmp/xor_encrypt.py << 'EOF'
import sys
key = sys.argv[1]
input_file = sys.argv[2]
output_file = sys.argv[3]
with open(input_file, 'rb') as f:
data = f.read()
key_bytes = key.encode('utf-8')
encrypted = bytearray(len(data))
for i in range(len(data)):
encrypted[i] = data[i] ^ key_bytes[i % len(key_bytes)]
with open(output_file, 'wb') as f:
f.write(encrypted)
print(f"Encrypted: {len(data)} bytes")
EOF
python3 /tmp/xor_encrypt.py "$KEY" "${ASSET}.gz" "${ASSET}.gz.enc"
# Copy ONLY the encrypted .gz version to the searchindex repo (no uncompressed .js)
cp "${ASSET}.gz.enc" "/tmp/searchindex-repo/${FILENAME}.gz"
# Commit and push with retry logic
cd /tmp/searchindex-repo
git config user.name "GitHub Actions"
git config user.email "[email protected]"
git add "${FILENAME}.gz"
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "Update ${FILENAME} from hacktricks-cloud build"
# Retry push up to 20 times with pull --rebase between attempts
MAX_RETRIES=20
RETRY_COUNT=0
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
if git push origin master; then
echo "Successfully pushed on attempt $((RETRY_COUNT + 1))"
break
else
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then
echo "Push failed, attempt $RETRY_COUNT/$MAX_RETRIES. Pulling and retrying..."
# Try normal rebase first
if git pull --rebase origin master 2>&1 | tee /tmp/pull_output.txt; then
echo "Rebase successful, retrying push..."
else
# If rebase fails due to divergent histories (orphan branch reset), re-clone
if grep -q "unrelated histories\|refusing to merge\|fatal: invalid upstream\|couldn't find remote ref" /tmp/pull_output.txt; then
echo "Detected history rewrite, re-cloning repository..."
cd /tmp
rm -rf searchindex-repo
git clone https://x-access-token:${TOKEN}@github.com/${TARGET_REPO}.git searchindex-repo
cd searchindex-repo
git config user.name "GitHub Actions"
git config user.email "[email protected]"
# Re-copy ONLY the encrypted .gz version (no uncompressed .js)
cp "${ASSET}.gz.enc" "${FILENAME}.gz"
git add "${FILENAME}.gz"
git commit -m "Update ${FILENAME}.gz from hacktricks-cloud build"
echo "Re-cloned and re-committed, will retry push..."
else
echo "Rebase failed for unknown reason, retrying anyway..."
fi
fi
sleep 1
else
echo "Failed to push after $MAX_RETRIES attempts"
exit 1
fi
fi
done
fi
# Login in AWs
- name: Configure AWS credentials using OIDC
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: us-east-1
# Sync the build to S3
- name: Sync to S3
run: |
echo "Current branch:"
git rev-parse --abbrev-ref HEAD
echo "Syncing $BRANCH to S3"
aws s3 sync ./book s3://hacktricks-cloud/$BRANCH --delete
echo "Sync completed"
echo "Cat 3 files from the book"
find . -type f -name 'index.html' -print | head -n 3 | xargs -r cat