llm-ppt-translator/main.py at main · engchina/llm-ppt-translator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import os
import re
import time

import gradio as gr
import oci
from dotenv import load_dotenv, find_dotenv
from openai import OpenAI
from pptx import Presentation
from pptx.enum.shapes import PP_PLACEHOLDER_TYPE

# read local .env file
_ = load_dotenv(find_dotenv())

# Ensure the 'outputs' folder exists
output_dir = os.path.join(os.path.dirname(__file__), "outputs")
os.makedirs(output_dir, exist_ok=True)

client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=os.environ["OPENAI_BASE_URL"])


def translate_text(text, target_lang, model_name="gpt-4"):
    max_attempts = 5  # 最大尝试次数

    if model_name == "gpt-4":
        for attempt in range(max_attempts):
            try:
                completion = client.chat.completions.create(
                    model=os.environ["OPENAI_MODEL_NAME"],
                    # messages=[
                    #     {"role": "system", "content": "You are a helpful assistant that translates text."},
                    #     {"role": "user",
                    #      "content": f"Translate to {target_lang}, maintain the original tone and style, DO NOT translate or modify placeholders in the format [PLACEHOLDER_X]. Ensure the placeholders remain in their original positions and with the same quantity as in the original text. Only output the translated text. \n\nText: {text}"}
                    # ]
                    messages=[
                        {"role": "system",
                         "content": "You are a translation assistant specialized in maintaining the concise and professional style often used in presentation slides. Your task is to translate text while preserving placeholders and respecting language-specific style conventions."},
                        {"role": "user",
                         "content": f"""
                             Translate the text below into {target_lang}, adhering to the following rules:
                             1. Keep the original tone and style, ensuring the translation is concise and suitable for presentation slides.
                             2. Avoid overly formal or verbose expressions. For example:
                                - In Japanese, avoid 'です' and 'ます' unless absolutely necessary.
                                - In Chinese, use straightforward and professional wording.
                                - In English, prioritize brevity and clarity.
                             3. Do not translate or modify placeholders in the format [PLACEHOLDER_X]. Ensure placeholders remain in their original positions and with the same quantity as in the original text.
                             4. Only output the translated text without explanations or extra comments.

                             Text: {text}
                         """}
                    ]
                )
                print(f"{text=}")
                print(f"translated: {completion.choices[0].message.content}\n")
                return completion.choices[0].message.content
            except Exception as e:
                if attempt < max_attempts - 1:
                    print(f"Attempt {attempt + 1} failed with error: {str(e)}. Retrying...")
                    time.sleep(1)
                else:
                    print(f"All {max_attempts} attempts failed with error: {str(e)}")
                    return text
    else:
        for attempt in range(max_attempts):
            try:
                # Initialize OCI GenAI client
                config = oci.config.from_file('~/.oci/config', os.environ.get("CONFIG_PROFILE"))
                endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

                generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(
                    config=config,
                    service_endpoint=endpoint,
                    retry_strategy=oci.retry.NoneRetryStrategy(),
                    timeout=(10, 240)
                )

                # Prepare chat request
                chat_detail = oci.generative_ai_inference.models.ChatDetails()
                chat_request = oci.generative_ai_inference.models.CohereChatRequest()

                # chat_request.preamble_override = "You are a helpful assistant that translates text."
                # chat_request.message = f"Translate to {target_lang}, maintain the original tone and style, DO NOT translate or modify placeholders in the format [PLACEHOLDER_X]. Ensure the placeholders remain in their original positions and with the same quantity as in the original text. Only output the translated text. \n\nText: {text}"
                chat_request.preamble_override = "You are a translation assistant specialized in maintaining the concise and professional style often used in presentation slides. Your task is to translate text while preserving placeholders and respecting language-specific style conventions."
                chat_request.message = f"""
                    Translate the text below into {target_lang}, adhering to the following rules:
                    1. Keep the original tone and style, ensuring the translation is concise and suitable for presentation slides.
                    2. Avoid overly formal or verbose expressions. For example:
                       - In Japanese, avoid 'です' and 'ます' unless absolutely necessary.
                       - In Chinese, use straightforward and professional wording.
                       - In English, prioritize brevity and clarity.
                    3. Do not translate or modify placeholders in the format [PLACEHOLDER_X]. Ensure placeholders remain in their original positions and with the same quantity as in the original text.
                    4. Only output the translated text without explanations or extra comments.

                    Text: {text}
                """

                # Set other parameters
                chat_request.max_tokens = 2000
                chat_request.temperature = 0
                chat_request.frequency_penalty = 0
                chat_request.top_p = 0.75
                chat_request.top_k = 0
                chat_request.is_stream = False

                chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(
                    model_id=model_name
                )
                chat_detail.chat_request = chat_request
                chat_detail.compartment_id = os.environ.get("COMPARTMENT_ID")

                # Make the API call
                chat_response = generative_ai_inference_client.chat(chat_detail)

                print(f"{text=}")
                print(f"translated: {chat_response.data.chat_response.text}\n")
                return chat_response.data.chat_response.text
            except Exception as e:
                if attempt < max_attempts - 1:
                    print(f"Attempt {attempt + 1} failed with error: {str(e)}. Retrying...")
                    time.sleep(1)
                else:
                    print(f"All {max_attempts} attempts failed with error: {str(e)}")
                    return text


def translate_ppt(model_name, input_ppt, target_lang):
    # 读取输入PPT
    ppt = Presentation(input_ppt)
    input_file_name = os.path.basename(input_ppt)

    # 遍历每一张幻灯片
    for slide_index, slide in enumerate(ppt.slides, start=1):
        print(f'Translate slide {slide_index}/{len(ppt.slides)}')
        print('-------------------------------------------')
        for shape in slide.shapes:
            # 跳过页脚部分
            if shape.is_placeholder and shape.placeholder_format.type in [
                PP_PLACEHOLDER_TYPE.FOOTER,
                PP_PLACEHOLDER_TYPE.SLIDE_NUMBER,
                PP_PLACEHOLDER_TYPE.DATE,
            ]:
                continue

            # print(f"{shape.shape_type=}")
            # print(f"{shape.has_table=}, {shape.has_text_frame=}")
            if shape.has_table:
                for row in shape.table.rows:
                    for cell in row.cells:
                        original_text = cell.text_frame.text
                        if original_text and original_text.strip() and len(original_text.strip()) > 0:
                            # 判断是否为数字（包括整数、负数、小数）
                            if re.match(r'^-?\d+\.?\d*$', original_text.strip()):
                                continue
                            translated_text = translate_text(original_text, target_lang, model_name)
                            cell.text_frame.text = translated_text
                            # 处理 SmartArt
            elif shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    # Step 1: 提取段落的完整文本，并为每个 run 添加唯一标记符
                    original_runs = []
                    full_text_with_delimiters = ""

                    for idx, run in enumerate(paragraph.runs):
                        original_text = run.text.strip()
                        if original_text and len(original_text) > 0:
                            delimiter = f"[PLACEHOLDER_{idx}]"  # 唯一标记符
                            full_text_with_delimiters += f"{delimiter}{original_text}"
                            original_runs.append({"run": run, "delimiter": delimiter})

                    if full_text_with_delimiters == "" or full_text_with_delimiters.strip() == "":
                        continue
                    # 判断是否为数字（包括整数、负数、小数）
                    if re.match(r'^-?\d+\.?\d*$', full_text_with_delimiters.strip()):
                        continue
                    # Step 2: 翻译整个段落（包含标记符）
                    translated_text_with_delimiters = translate_text(full_text_with_delimiters, target_lang, model_name)

                    # Step 3: 根据标记符分割翻译结果，并写回每个 run
                    for item in original_runs:
                        delimiter = item["delimiter"]
                        run = item["run"]

                        # 找到标记符的位置，并提取对应翻译文本
                        start_idx = translated_text_with_delimiters.find(delimiter)
                        if start_idx != -1:
                            end_idx = start_idx + len(delimiter)
                            # 提取翻译后的内容，去掉标记符
                            translated_run_text = translated_text_with_delimiters[end_idx:].split("[PLACEHOLDER_", 1)[0]
                            run.text = translated_run_text

        if slide.has_notes_slide:
            original_text = slide.notes_slide.notes_text_frame.text
            if original_text and original_text.strip() and len(original_text.strip()) > 0:
                # 判断是否为数字（包括整数、负数、小数）
                if re.match(r'^-?\d+\.?\d*$', original_text.strip()):
                    continue
                translated_text = translate_text(original_text, target_lang, model_name)
                slide.notes_slide.notes_text_frame.text = translated_text

    # 保存翻译后的PPT
    output_file_name = f"{input_file_name.rsplit('.', 1)[0]}_{target_lang}.{input_file_name.rsplit('.', 1)[-1]}"

    # Save the PowerPoint file
    output_file_path = os.path.join(output_dir, output_file_name)
    print(f"{output_file_path=}")
    ppt.save(output_file_path)
    print("Translation completed.")
    return output_file_path


model_type = gr.Radio(
    choices=[
        "cohere.command-r-08-2024",
        "cohere.command-r-plus-08-2024",
        "gpt-4"
    ],
    label="Model Type",
    value="gpt-4"
)
input_ppt = gr.File(label="Upload PPTX file", file_types=[".pptx"])
target_lang = gr.Dropdown(choices=["English", "Japanese", "Chinese"], label="Target Language", value="Japanese")
output_ppt = gr.File(label="Download Translated PPTX")

gr.Interface(
    fn=translate_ppt,
    inputs=[model_type, input_ppt, target_lang],
    outputs=output_ppt,
    title="PPT Translator",
    description="Upload a PPTX file and specify target language to get a translated PPTX file.",
    flagging_mode="never",
).launch(server_port=8080)