44import argparse
55from openai import OpenAI
66from concurrent .futures import ThreadPoolExecutor
7- from typing import Any , Dict , List , Optional
8-
9- try :
10- import yaml # type: ignore
11- except Exception :
12- yaml = None # PyYAML may not be available in some environments
137
148# import logging
159# logging.basicConfig(level=logging.INFO)
3630 # Add more languages here, e.g., "fr": "French"
3731}
3832
39- # Comma-separated list to restrict which languages to translate (e.g., "ko" or "ja,ko")
40- ONLY_LANGS = [
41- s .strip ()
42- for s in (os .environ .get ("ONLY_LANG" ) or os .environ .get ("LANGS" ) or "" ).split ("," )
43- if s .strip ()
44- ]
45-
4633# Initialize OpenAI client
4734api_key = os .getenv ("PROD_OPENAI_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )
4835openai_client = OpenAI (api_key = api_key )
9077 "file search" : "ファイル検索" ,
9178 "streaming" : "ストリーミング" ,
9279 "system prompt" : "システムプロンプト" ,
93- "Python- first" : "Python ファースト" ,
80+ "Python first" : "Python ファースト" ,
9481 # Add more Japanese mappings here
9582 },
9683 "ko" : {
153140}
154141
155142
156- def _extract_sidebar_translations (lang_code : str ) -> Dict [str , Dict [str , Optional [str ]]]:
157- """Extract mapping of doc file paths to labels/translations from mkdocs.yml.
158-
159- Returns a map: { path: { "label": str, "translation": str|None } }
160- """
161- sidebar_map : Dict [str , Dict [str , Optional [str ]]] = {}
162- repo_root = os .path .dirname (os .path .dirname (os .path .dirname (__file__ )))
163- mkdocs_path = os .path .join (repo_root , "mkdocs.yml" )
164- if yaml is None :
165- return sidebar_map
166- try :
167- with open (mkdocs_path , "r" , encoding = "utf-8" ) as f :
168- data = yaml .safe_load (f )
169- except Exception :
170- return sidebar_map
171-
172- try :
173- languages_block = []
174- for plugin in data .get ("plugins" , []):
175- if isinstance (plugin , dict ) and "i18n" in plugin :
176- languages_block = plugin ["i18n" ].get ("languages" , [])
177- break
178- if not languages_block :
179- return sidebar_map
180-
181- nav_by_locale : Dict [str , Any ] = {}
182- for lang in languages_block :
183- locale = lang .get ("locale" )
184- nav_by_locale [locale ] = lang .get ("nav" )
185-
186- en_nav = nav_by_locale .get ("en" )
187- tgt_nav = nav_by_locale .get (lang_code )
188-
189- def collect (nav : Any ) -> Dict [str , str ]:
190- result : Dict [str , str ] = {}
191- if not isinstance (nav , list ):
192- return result
193- for item in nav :
194- if isinstance (item , dict ):
195- for label , value in item .items ():
196- if isinstance (value , str ):
197- result [value ] = str (label )
198- else :
199- result .update (collect (value ))
200- elif isinstance (item , str ):
201- continue
202- return result
203-
204- en_map = collect (en_nav ) if en_nav else {}
205- tgt_map = collect (tgt_nav ) if tgt_nav else {}
206- for path_key , en_label in en_map .items ():
207- sidebar_map [path_key ] = {
208- "label" : en_label ,
209- "translation" : tgt_map .get (path_key ),
210- }
211- except Exception :
212- return {}
213- return sidebar_map
214-
215-
216- def built_instructions (
217- target_language : str ,
218- lang_code : str ,
219- sidebar_map : Optional [Dict [str , Dict [str , Optional [str ]]]] = None ,
220- ) -> str :
143+ def built_instructions (target_language : str , lang_code : str ) -> str :
221144 do_not_translate_terms = "\n " .join (do_not_translate )
222145 specific_terms = "\n " .join (
223146 [f"* { k } -> { v } " for k , v in eng_to_non_eng_mapping .get (lang_code , {}).items ()]
@@ -226,23 +149,6 @@ def built_instructions(
226149 eng_to_non_eng_instructions .get ("common" , [])
227150 + eng_to_non_eng_instructions .get (lang_code , [])
228151 )
229- sidebar_labels_block = ""
230- if sidebar_map :
231- label_lines : List [str ] = []
232- for link , entry in sidebar_map .items ():
233- if entry .get ("translation" ):
234- label_lines .append (
235- f"- { link } : { entry ['translation' ]} (sidebar translation)"
236- )
237- elif entry .get ("label" ):
238- label_lines .append (f"- { link } : { entry ['label' ]} (sidebar label)" )
239- if label_lines :
240- sidebar_labels_block = (
241- "\n \n #########################\n ## PAGE TITLES ##\n #########################\n "
242- "When you see links to another page, consistently use the following labels:\n "
243- + "\n " .join (label_lines )
244- + "\n \n Always use these canonical translations for page titles and references."
245- )
246152 return f"""You are an expert technical translator.
247153
248154Your task: translate the markdown passed as a user input from English into { target_language } .
@@ -270,8 +176,6 @@ def built_instructions(
270176 - Inline code surrounded by single back‑ticks ( `like_this` ).
271177 - Fenced code blocks delimited by ``` or ~~~, including all comments inside them.
272178 - Link URLs inside `[label](URL)` – translate the label, never the URL.
273- - When translating Markdown tables, preserve the exact table structure, including all delimiters (|), header separators (---), and row/column counts. Only translate the cell contents. Do not add, remove, or reorder columns or rows.
274- { sidebar_labels_block }
275179
276180#########################
277181## HARD CONSTRAINTS ##
@@ -362,15 +266,6 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
362266 code_blocks : list [str ] = []
363267 code_block_chunks : list [str ] = []
364268 for line in lines :
365- # Treat single-line import statements as code blocks to avoid accidental translation
366- if (
367- ENABLE_CODE_SNIPPET_EXCLUSION is True
368- and (in_code_block is False )
369- and line .startswith ("import " )
370- ):
371- code_blocks .append (line )
372- current_chunk .append (f"CODE_BLOCK_{ (len (code_blocks ) - 1 ):02} " )
373- continue
374269 if (
375270 ENABLE_SMALL_CHUNK_TRANSLATION is True
376271 and len (current_chunk ) >= 120 # required for gpt-4.5
@@ -397,11 +292,7 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
397292 # Translate each chunk separately and combine results
398293 translated_content : list [str ] = []
399294 for chunk in chunks :
400- instructions = built_instructions (
401- languages [lang_code ],
402- lang_code ,
403- sidebar_map = _extract_sidebar_translations (lang_code ),
404- )
295+ instructions = built_instructions (languages [lang_code ], lang_code )
405296 if OPENAI_MODEL .startswith ("gpt-5" ):
406297 response = openai_client .responses .create (
407298 model = OPENAI_MODEL ,
@@ -440,18 +331,10 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
440331
441332def translate_single_source_file (file_path : str ) -> None :
442333 relative_path = os .path .relpath (file_path , source_dir )
443- if "ref/" in relative_path or not (
444- file_path .endswith (".md" ) or file_path .endswith (".mdx" )
445- ):
334+ if "ref/" in relative_path or not file_path .endswith (".md" ):
446335 return
447336
448- # Determine target languages
449- target_langs = (
450- [code for code in ONLY_LANGS if code in languages ]
451- if ONLY_LANGS
452- else list (languages .keys ())
453- )
454- for lang_code in target_langs :
337+ for lang_code in languages :
455338 target_dir = os .path .join (source_dir , lang_code )
456339 target_path = os .path .join (target_dir , relative_path )
457340
0 commit comments