4
4
import argparse
5
5
from openai import OpenAI
6
6
from concurrent .futures import ThreadPoolExecutor
7
- from typing import Any , Dict , List , Optional
8
-
9
- try :
10
- import yaml # type: ignore
11
- except Exception :
12
- yaml = None # PyYAML may not be available in some environments
13
7
14
8
# import logging
15
9
# logging.basicConfig(level=logging.INFO)
36
30
# Add more languages here, e.g., "fr": "French"
37
31
}
38
32
39
- # Comma-separated list to restrict which languages to translate (e.g., "ko" or "ja,ko")
40
- ONLY_LANGS = [
41
- s .strip ()
42
- for s in (os .environ .get ("ONLY_LANG" ) or os .environ .get ("LANGS" ) or "" ).split ("," )
43
- if s .strip ()
44
- ]
45
-
46
33
# Initialize OpenAI client
47
34
api_key = os .getenv ("PROD_OPENAI_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )
48
35
openai_client = OpenAI (api_key = api_key )
90
77
"file search" : "ファイル検索" ,
91
78
"streaming" : "ストリーミング" ,
92
79
"system prompt" : "システムプロンプト" ,
93
- "Python- first" : "Python ファースト" ,
80
+ "Python first" : "Python ファースト" ,
94
81
# Add more Japanese mappings here
95
82
},
96
83
"ko" : {
153
140
}
154
141
155
142
156
- def _extract_sidebar_translations (lang_code : str ) -> Dict [str , Dict [str , Optional [str ]]]:
157
- """Extract mapping of doc file paths to labels/translations from mkdocs.yml.
158
-
159
- Returns a map: { path: { "label": str, "translation": str|None } }
160
- """
161
- sidebar_map : Dict [str , Dict [str , Optional [str ]]] = {}
162
- repo_root = os .path .dirname (os .path .dirname (os .path .dirname (__file__ )))
163
- mkdocs_path = os .path .join (repo_root , "mkdocs.yml" )
164
- if yaml is None :
165
- return sidebar_map
166
- try :
167
- with open (mkdocs_path , "r" , encoding = "utf-8" ) as f :
168
- data = yaml .safe_load (f )
169
- except Exception :
170
- return sidebar_map
171
-
172
- try :
173
- languages_block = []
174
- for plugin in data .get ("plugins" , []):
175
- if isinstance (plugin , dict ) and "i18n" in plugin :
176
- languages_block = plugin ["i18n" ].get ("languages" , [])
177
- break
178
- if not languages_block :
179
- return sidebar_map
180
-
181
- nav_by_locale : Dict [str , Any ] = {}
182
- for lang in languages_block :
183
- locale = lang .get ("locale" )
184
- nav_by_locale [locale ] = lang .get ("nav" )
185
-
186
- en_nav = nav_by_locale .get ("en" )
187
- tgt_nav = nav_by_locale .get (lang_code )
188
-
189
- def collect (nav : Any ) -> Dict [str , str ]:
190
- result : Dict [str , str ] = {}
191
- if not isinstance (nav , list ):
192
- return result
193
- for item in nav :
194
- if isinstance (item , dict ):
195
- for label , value in item .items ():
196
- if isinstance (value , str ):
197
- result [value ] = str (label )
198
- else :
199
- result .update (collect (value ))
200
- elif isinstance (item , str ):
201
- continue
202
- return result
203
-
204
- en_map = collect (en_nav ) if en_nav else {}
205
- tgt_map = collect (tgt_nav ) if tgt_nav else {}
206
- for path_key , en_label in en_map .items ():
207
- sidebar_map [path_key ] = {
208
- "label" : en_label ,
209
- "translation" : tgt_map .get (path_key ),
210
- }
211
- except Exception :
212
- return {}
213
- return sidebar_map
214
-
215
-
216
- def built_instructions (
217
- target_language : str ,
218
- lang_code : str ,
219
- sidebar_map : Optional [Dict [str , Dict [str , Optional [str ]]]] = None ,
220
- ) -> str :
143
+ def built_instructions (target_language : str , lang_code : str ) -> str :
221
144
do_not_translate_terms = "\n " .join (do_not_translate )
222
145
specific_terms = "\n " .join (
223
146
[f"* { k } -> { v } " for k , v in eng_to_non_eng_mapping .get (lang_code , {}).items ()]
@@ -226,23 +149,6 @@ def built_instructions(
226
149
eng_to_non_eng_instructions .get ("common" , [])
227
150
+ eng_to_non_eng_instructions .get (lang_code , [])
228
151
)
229
- sidebar_labels_block = ""
230
- if sidebar_map :
231
- label_lines : List [str ] = []
232
- for link , entry in sidebar_map .items ():
233
- if entry .get ("translation" ):
234
- label_lines .append (
235
- f"- { link } : { entry ['translation' ]} (sidebar translation)"
236
- )
237
- elif entry .get ("label" ):
238
- label_lines .append (f"- { link } : { entry ['label' ]} (sidebar label)" )
239
- if label_lines :
240
- sidebar_labels_block = (
241
- "\n \n #########################\n ## PAGE TITLES ##\n #########################\n "
242
- "When you see links to another page, consistently use the following labels:\n "
243
- + "\n " .join (label_lines )
244
- + "\n \n Always use these canonical translations for page titles and references."
245
- )
246
152
return f"""You are an expert technical translator.
247
153
248
154
Your task: translate the markdown passed as a user input from English into { target_language } .
@@ -270,8 +176,6 @@ def built_instructions(
270
176
- Inline code surrounded by single back‑ticks ( `like_this` ).
271
177
- Fenced code blocks delimited by ``` or ~~~, including all comments inside them.
272
178
- Link URLs inside `[label](URL)` – translate the label, never the URL.
273
- - When translating Markdown tables, preserve the exact table structure, including all delimiters (|), header separators (---), and row/column counts. Only translate the cell contents. Do not add, remove, or reorder columns or rows.
274
- { sidebar_labels_block }
275
179
276
180
#########################
277
181
## HARD CONSTRAINTS ##
@@ -362,15 +266,6 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
362
266
code_blocks : list [str ] = []
363
267
code_block_chunks : list [str ] = []
364
268
for line in lines :
365
- # Treat single-line import statements as code blocks to avoid accidental translation
366
- if (
367
- ENABLE_CODE_SNIPPET_EXCLUSION is True
368
- and (in_code_block is False )
369
- and line .startswith ("import " )
370
- ):
371
- code_blocks .append (line )
372
- current_chunk .append (f"CODE_BLOCK_{ (len (code_blocks ) - 1 ):02} " )
373
- continue
374
269
if (
375
270
ENABLE_SMALL_CHUNK_TRANSLATION is True
376
271
and len (current_chunk ) >= 120 # required for gpt-4.5
@@ -397,11 +292,7 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
397
292
# Translate each chunk separately and combine results
398
293
translated_content : list [str ] = []
399
294
for chunk in chunks :
400
- instructions = built_instructions (
401
- languages [lang_code ],
402
- lang_code ,
403
- sidebar_map = _extract_sidebar_translations (lang_code ),
404
- )
295
+ instructions = built_instructions (languages [lang_code ], lang_code )
405
296
if OPENAI_MODEL .startswith ("gpt-5" ):
406
297
response = openai_client .responses .create (
407
298
model = OPENAI_MODEL ,
@@ -440,18 +331,10 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
440
331
441
332
def translate_single_source_file (file_path : str ) -> None :
442
333
relative_path = os .path .relpath (file_path , source_dir )
443
- if "ref/" in relative_path or not (
444
- file_path .endswith (".md" ) or file_path .endswith (".mdx" )
445
- ):
334
+ if "ref/" in relative_path or not file_path .endswith (".md" ):
446
335
return
447
336
448
- # Determine target languages
449
- target_langs = (
450
- [code for code in ONLY_LANGS if code in languages ]
451
- if ONLY_LANGS
452
- else list (languages .keys ())
453
- )
454
- for lang_code in target_langs :
337
+ for lang_code in languages :
455
338
target_dir = os .path .join (source_dir , lang_code )
456
339
target_path = os .path .join (target_dir , relative_path )
457
340
0 commit comments