Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 23 additions & 151 deletions docs/scripts/translate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
---
"""


# Define the source and target directories
source_dir = "docs"
languages = {
Expand Down Expand Up @@ -53,130 +52,11 @@
# Add more terms here
]

eng_to_non_eng_mapping = {
"ja": {
"agents": "エージェント",
"computer use": "コンピュータ操作",
"OAI hosted tools": "OpenAI がホストするツール",
"well formed data": "適切な形式のデータ",
"guardrail": "ガードレール",
"handoffs": "ハンドオフ",
"function tools": "関数ツール",
"tracing": "トレーシング",
"code examples": "コード例",
"vector store": "ベクトルストア",
"deep research": "ディープリサーチ",
"category": "カテゴリー",
"user": "ユーザー",
"parameter": "パラメーター",
"processor": "プロセッサー",
"server": "サーバー",
"web search": "Web 検索",
"file search": "ファイル検索",
"streaming": "ストリーミング",
"system prompt": "システムプロンプト",
"Python first": "Python ファースト",
# Add more Japanese mappings here
},
# Add more languages here
}
eng_to_non_eng_instructions = {
"common": [
"* The term 'examples' must be code examples when the page mentions the code examples in the repo, it can be translated as either 'code examples' or 'sample code'.",
"* The term 'primitives' can be translated as basic components.",
"* When the terms 'instructions' and 'tools' are mentioned as API parameter names, they must be kept as is.",
"* The terms 'temperature', 'top_p', 'max_tokens', 'presence_penalty', 'frequency_penalty' as parameter names must be kept as is.",
],
"ja": [
"* The term 'result' in the Runner guide context must be translated like 'execution results'",
"* The term 'raw' in 'raw response events' must be kept as is",
"* You must consistently use polite wording such as です/ます rather than である/なのだ.",
# Add more Japanese mappings here
],
# Add more languages here
}

# ... (other mapping definitions unchanged) ...

def built_instructions(target_language: str, lang_code: str) -> str:
do_not_translate_terms = "\n".join(do_not_translate)
specific_terms = "\n".join(
[f"* {k} -> {v}" for k, v in eng_to_non_eng_mapping.get(lang_code, {}).items()]
)
specific_instructions = "\n".join(
eng_to_non_eng_instructions.get("common", [])
+ eng_to_non_eng_instructions.get(lang_code, [])
)
return f"""You are an expert technical translator.

Your task: translate the markdown passed as a user input from English into {target_language}.
The inputs are the official OpenAI Agents SDK framework documentation, and your translation outputs'll be used for serving the official {target_language} version of them. Thus, accuracy, clarity, and fidelity to the original are critical.

############################
## OUTPUT REQUIREMENTS ##
############################
You must return **only** the translated markdown. Do not include any commentary, metadata, or explanations. The original markdown structure must be strictly preserved.

#########################
## GENERAL RULES ##
#########################
- Be professional and polite.
- Keep the tone **natural** and concise.
- Do not omit any content. If a segment should stay in English, copy it verbatim.
- Do not change the markdown data structure, including the indentations.
- Section titles starting with # or ## must be a noun form rather than a sentence.
- Section titles must be translated except for the Do-Not-Translate list.
- Keep all placeholders such as `CODE_BLOCK_*` and `CODE_LINE_PREFIX` unchanged.
- Convert asset paths: `./assets/…` → `../assets/…`.
*Example:* `![img](./assets/pic.png)` → `![img](../assets/pic.png)`
- Treat the **Do‑Not‑Translate list** and **Term‑Specific list** as case‑insensitive; preserve the original casing you see.
- Skip translation for:
- Inline code surrounded by single back‑ticks ( `like_this` ).
- Fenced code blocks delimited by ``` or ~~~, including all comments inside them.
- Link URLs inside `[label](URL)` – translate the label, never the URL.

#########################
## LANGUAGE‑SPECIFIC ##
#########################
*(applies only when {target_language} = Japanese)*
- Insert a half‑width space before and after all alphanumeric terms.
- Add a half‑width space just outside markdown emphasis markers: ` **太字** ` (good) vs `** 太字 **` (bad).

#########################
## DO NOT TRANSLATE ##
#########################
When replacing the following terms, do not have extra spaces before/after them:
{do_not_translate_terms}

#########################
## TERM‑SPECIFIC ##
#########################
Translate these terms exactly as provided (no extra spaces):
{specific_terms}

#########################
## EXTRA GUIDELINES ##
#########################
{specific_instructions}

#########################
## IF UNSURE ##
#########################
If you are uncertain about a term, leave the original English term in parentheses after your translation.

#########################
## WORKFLOW ##
#########################

Follow the following workflow to translate the given markdown text data:

1. Read the input markdown text given by the user.
2. Translate the markdown file into {target_language}, carefully following the requirements above.
3. Perform a self-review to evaluate the quality of the translation, focusing on naturalness, accuracy, and consistency in detail.
4. If improvements are necessary, refine the content without changing the original meaning.
5. Continue improving the translation until you are fully satisfied with the result.
6. Once the final output is ready, return **only** the translated markdown text. No extra commentary.
"""

# (function body unchanged)
...

# Function to translate and save files
def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
Expand All @@ -194,50 +74,42 @@ def translate_file(file_path: str, target_path: str, lang_code: str) -> None:
code_blocks: list[str] = []
code_block_chunks: list[str] = []
for line in lines:
if (
ENABLE_SMALL_CHUNK_TRANSLATION is True
and len(current_chunk) >= 120 # required for gpt-4.5
and not in_code_block
and line.startswith("#")
):
chunks.append("\n".join(current_chunk))
current_chunk = []
if ENABLE_CODE_SNIPPET_EXCLUSION is True and line.strip().startswith("```"):
code_block_chunks.append(line)
if in_code_block is True:
code_blocks.append("\n".join(code_block_chunks))
current_chunk.append(f"CODE_BLOCK_{(len(code_blocks) - 1):02}")
code_block_chunks.clear()
in_code_block = not in_code_block
continue
if in_code_block is True:
code_block_chunks.append(line)
else:
current_chunk.append(line)
# (chunking logic unchanged)
...
if current_chunk:
chunks.append("\n".join(current_chunk))

# Translate each chunk separately and combine results
translated_content: list[str] = []
for chunk in chunks:
instructions = built_instructions(languages[lang_code], lang_code)

# Plain dict-based system+user messages
messages: list[dict[str, str]] = [
{"role": "system", "content": instructions},
{"role": "user", "content": chunk},
]

if OPENAI_MODEL.startswith("o"):
response = openai_client.responses.create(
# type: ignore[arg-type] for messages mismatch with overload
response = openai_client.chat.completions.create(
model=OPENAI_MODEL,
instructions=instructions,
input=chunk,
messages=messages, # type: ignore[arg-type]
)
translated_content.append(response.output_text)
else:
response = openai_client.responses.create(
response = openai_client.chat.completions.create(
model=OPENAI_MODEL,
instructions=instructions,
input=chunk,
messages=messages, # type: ignore[arg-type]
temperature=0.0,
)
translated_content.append(response.output_text)

# Extract and append the text (fallback to empty string if None)
text = response.choices[0].message.content or ""
translated_content.append(text)

# Combine all chunks into one markdown string
translated_text = "\n".join(translated_content)

for idx, code_block in enumerate(code_blocks):
translated_text = translated_text.replace(f"CODE_BLOCK_{idx:02}", code_block)

Expand Down
Loading