-
Notifications
You must be signed in to change notification settings - Fork 424
docs: LLM translation. #2113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
docs: LLM translation. #2113
Changes from all commits
1420230
01bcaaf
d2721e2
d5776e5
6eef23f
6fa23d3
947a9a5
8bea108
f8db906
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| name: Translate Docs | ||
| concurrency: | ||
| cancel-in-progress: true | ||
| group: ${{ github.workflow }}-${{ github.ref }} | ||
| on: | ||
| push: | ||
| branches: | ||
| - main | ||
| paths: | ||
| - "docs/*" | ||
| permissions: | ||
| contents: write | ||
| pull-requests: write | ||
| jobs: | ||
| build: | ||
| name: Translate Docs | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - name: Checkout Code | ||
| uses: actions/checkout@v6 | ||
| with: | ||
| fetch-depth: 0 | ||
| persist-credentials: false | ||
| - id: md_files | ||
| run: | | ||
| FILES=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'docs/*.md') | ||
| FILES=$(echo "$FILES" | xargs -n1 basename | tr '\n' ' ') | ||
| [ -z "$FILES" ] && echo "found=false" >> "$GITHUB_OUTPUT" || echo "found=true" >> "$GITHUB_OUTPUT" | ||
| echo "files=$FILES" >> "$GITHUB_OUTPUT" | ||
| - name: Set up PHP | ||
| if: steps.md_files.outputs.found == 'true' | ||
| uses: shivammathur/setup-php@v2 | ||
| with: | ||
| php-version: '8.5' | ||
| - name: run translation script | ||
| if: steps.md_files.outputs.found == 'true' | ||
| env: | ||
| GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' | ||
| MD_FILES: '${{ steps.md_files.outputs.files }}' | ||
| run: | | ||
| php ./docs/translate.php "$MD_FILES" | ||
| - name: Run Linter | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should run
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You want to use prettier instead of the superlinter to fix formatting issues?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super-linter Prettier does use prettier under the hood. |
||
| if: steps.md_files.outputs.found == 'true' | ||
| uses: super-linter/super-linter/slim@v8 | ||
| env: | ||
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
| LINTER_RULES_PATH: / | ||
| MARKDOWN_CONFIG_FILE: .markdown-lint.yaml | ||
| FIX_NATURAL_LANGUAGE: true | ||
| FIX_MARKDOWN: true | ||
| - name: Create Pull Request | ||
| if: steps.md_files.outputs.found == 'true' | ||
| uses: peter-evans/create-pull-request@v8 | ||
| env: | ||
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
| with: | ||
| title: "docs: update translations" | ||
| commit-message: "docs: update translations" | ||
| committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> | ||
| author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> | ||
| branch: translations/${{ github.run_id }} | ||
| delete-branch: true | ||
| body: | | ||
| Translation updates for: ${{ steps.md_files.outputs.files }}. | ||
| labels: | | ||
| translations | ||
| bot | ||
| draft: false | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| <?php | ||
|
|
||
| # update all translations to match the english docs | ||
| # usage: php docs/translate.php [specific-file.md] | ||
| # needs: php with openssl and gemini api key | ||
|
|
||
| const MODEL = 'gemini-2.5-flash'; | ||
| const SLEEP_SECONDS_BETWEEN_REQUESTS = 10; | ||
| const LANGUAGES = [ | ||
| 'cn' => 'Chinese', | ||
| 'fr' => 'French', | ||
| 'ja' => 'Japanese', | ||
| 'pt-br' => 'Portuguese (Brazilian)', | ||
| 'ru' => 'Russian', | ||
| 'tr' => 'Turkish', | ||
| ]; | ||
|
|
||
| function makeGeminiRequest(string $systemPrompt, string $userPrompt, string $model, string $apiKey, int $reties = 2): string | ||
| { | ||
| $url = "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent"; | ||
| $body = json_encode([ | ||
| "contents" => [ | ||
| ["role" => "model", "parts" => ['text' => $systemPrompt]], | ||
| ["role" => "user", "parts" => ['text' => $userPrompt]] | ||
| ], | ||
| ]); | ||
|
|
||
| $response = @file_get_contents($url, false, stream_context_create([ | ||
| 'http' => [ | ||
| 'method' => 'POST', | ||
| 'header' => "Content-Type: application/json\r\nX-Goog-Api-Key: $apiKey\r\nContent-Length: " . strlen($body) . "\r\n", | ||
| 'content' => $body, | ||
| 'timeout' => 300, | ||
| ] | ||
| ])); | ||
| $generatedDocs = json_decode($response, true)['candidates'][0]['content']['parts'][0]['text'] ?? ''; | ||
|
|
||
| if (!$response || !$generatedDocs) { | ||
| print_r(error_get_last()); | ||
| print_r($response); | ||
| if ($reties > 0) { | ||
| echo "Retrying... ($reties retries left)\n"; | ||
| sleep(SLEEP_SECONDS_BETWEEN_REQUESTS); | ||
| return makeGeminiRequest($systemPrompt, $userPrompt, $model, $apiKey, $reties - 1); | ||
| } | ||
| exit(1); | ||
| } | ||
|
|
||
| return $generatedDocs; | ||
| } | ||
|
|
||
| function createPrompt(string $language, string $englishFile, string $currentTranslation): array | ||
| { | ||
| $systemPrompt = <<<PROMPT | ||
| You are translating the docs of the FrankenPHP server from english to other languages. | ||
| You will receive the english version (authoritative) and a translation (possibly incomplete or incorrect). | ||
| Your task is to produce a corrected and complete translation in the target language. | ||
| You must strictly follow these rules: | ||
| - You must not change the structure of the document (headings, code blocks, etc.) | ||
| - You must not translate code, only comments and strings inside the code. | ||
| - You must not translate links to other documentation pages, only the link text. | ||
| - You must not add or remove any content, only translate what is present. | ||
| - You must ensure that the translation is accurate and faithful to the original meaning. | ||
| - You must write in a natural and fluent style, appropriate for technical documentation. | ||
| - You must use the correct terminology for technical terms in the target language, don't translate if unsure. | ||
| - You must not include any explanations or notes, only the translated document. | ||
| PROMPT; | ||
|
|
||
| $languageName = LANGUAGES[$language]; | ||
| $userPrompt = <<<PROMPT | ||
| Here is the english version of the document: | ||
|
|
||
| ```markdown | ||
| $englishFile | ||
| ``` | ||
|
|
||
| Here is the current translation in $languageName: | ||
|
|
||
| ```markdown | ||
| $currentTranslation | ||
| ``` | ||
|
|
||
| Here is the corrected and completed translation in $languageName: | ||
|
|
||
| ```markdown | ||
| PROMPT; | ||
|
|
||
| return [$systemPrompt, $userPrompt]; | ||
| } | ||
|
|
||
| function sanitizeMarkdown(string $markdown): string | ||
| { | ||
| if (str_starts_with($markdown, '```markdown')) { | ||
| $markdown = substr($markdown, strlen('```markdown')); | ||
| } | ||
| $markdown = rtrim($markdown, '`'); | ||
| return trim($markdown) . "\n"; | ||
| } | ||
|
|
||
| $fileToTranslate = $argv; | ||
| array_shift($fileToTranslate); | ||
| $apiKey = $_SERVER['GEMINI_API_KEY'] ?? $_ENV['GEMINI_API_KEY'] ?? ''; | ||
| if (!$apiKey) { | ||
| echo 'Enter gemini api key ($GEMINI_API_KEY): '; | ||
| $apiKey = trim(fgets(STDIN)); | ||
| } | ||
|
|
||
| $files = array_filter(scandir(__DIR__), fn($filename) => str_ends_with($filename, '.md')); | ||
| foreach ($files as $file) { | ||
| $englishFile = file_get_contents(__DIR__ . "/$file"); | ||
| if ($fileToTranslate && !in_array($file, $fileToTranslate)) { | ||
| continue; | ||
| } | ||
| foreach (LANGUAGES as $language => $languageName) { | ||
| echo "Translating $file to $languageName\n"; | ||
| $currentTranslation = file_get_contents(__DIR__ . "/$language/$file") ?: ''; | ||
| [$systemPrompt, $userPrompt] = createPrompt($language, $englishFile, $currentTranslation); | ||
| $markdown = makeGeminiRequest($systemPrompt, $userPrompt, MODEL, $apiKey); | ||
|
|
||
| echo "Writing translated file to $language/$file\n"; | ||
| file_put_contents(__DIR__ . "/$language/$file", sanitizeMarkdown($markdown)); | ||
|
|
||
| echo "sleeping to avoid rate limiting...\n"; | ||
| sleep(SLEEP_SECONDS_BETWEEN_REQUESTS); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe could we use
gemini-cliorcopilot-clidirectly, and make it running the linter and fixing the issues?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The script doesn't need to be PHP, can also be bash. You'll usually get more consistent results if input and output of the LLM are minimized though. Prompting the cli directly makes this too unpredictable and noisy IMO, but we can try as an experiment if you want.