-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathauto_translate_razor.py
More file actions
235 lines (219 loc) · 8.95 KB
/
auto_translate_razor.py
File metadata and controls
235 lines (219 loc) · 8.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import hashlib
import time
import logging
import requests
import xml.etree.ElementTree as ET
from collections import OrderedDict
import argparse
# === Конфигурация ===
# Директивы для .cshtml
USING_DIRECTIVE = "@using Microsoft.Extensions.Localization"
INJECT_DIRECTIVE = "@inject IStringLocalizer<CPG_CaptchaWebPentestGame.SharedResource> Localizer"
# Пути
ROOT_DIR = "." # корневой каталог проекта
PROJECT_DIR = os.path.join(ROOT_DIR, "CaptchaWebPentestGame")
RESOURCES_DIR = os.path.join(PROJECT_DIR, "Resources")
BASE_NAME = "SharedResource"
DEFAULT_RESX = os.path.join(RESOURCES_DIR, f"{BASE_NAME}.resx")
RU_RESX = os.path.join(RESOURCES_DIR, f"{BASE_NAME}.ru.resx")
# Языки перевода
SOURCE_LANG = "EN"
TARGET_LANG = "RU"
# DeepL API
DEEPL_URL = os.getenv("DEEPL_URL", "https://api-free.deepl.com/v2/translate")
DEEPL_AUTH_KEY = os.getenv("DEEPL_AUTH_KEY")
if not DEEPL_AUTH_KEY:
print("ERROR: Не задан ключ DeepL. Установите переменную окружения DEEPL_AUTH_KEY.")
exit(1)
# === Шаблоны для извлечения ===
TITLE_PATTERN = re.compile(r'ViewData\["Title"\]\s*=\s*"([^"]+)"')
HTML_TEXT_PATTERN = re.compile(r'>([\s\S]*?)(?=<)', re.MULTILINE)
PLACEHOLDER_PATTERN = re.compile(r'placeholder="([^"]+)"')
# === Утилита для чтения файла с детектом кодировки ===
def read_file(path):
"""
Пытается прочитать файл в нескольких кодировках (utf-8, cp1251, latin1).
При неудаче возвращает содержимое с заменой некорректных байт.
"""
for enc in ('utf-8', 'cp1251', 'latin1'):
try:
with open(path, 'r', encoding=enc) as f:
return f.read()
except UnicodeDecodeError:
continue
with open(path, 'r', encoding='utf-8', errors='replace') as f:
return f.read()
# === Функции для DeepL ===
def deepl_translate(texts, source, target, auth_key, batch_size=20, pause=1.0):
headers = {
"Content-Type": "application/json",
"Authorization": f"DeepL-Auth-Key {auth_key}"
}
translated = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i+batch_size]
payload = {
"text": batch,
"source_lang": source.upper(),
"target_lang": target.upper(),
"preserve_formatting": True
}
resp = requests.post(DEEPL_URL, headers=headers, json=payload)
resp.raise_for_status()
data = resp.json()
translated.extend([t['text'] for t in data["translations"]])
time.sleep(pause)
return translated
# === Сбор строк ===
def extract_strings_from_file(path):
if not path.endswith('.cshtml'):
return []
content = read_file(path)
texts = []
# Фильтр: исключаем кодовые конструкции и системные символы
def should_translate(txt):
return (txt and
not txt.startswith('@') and
not (txt.startswith('if ') or txt.startswith('if(')) and
'@' not in txt and 'times' not in txt and
'{' not in txt and '}' not in txt and
'case ' not in txt and '=>' not in txt and
'Localizer' not in txt and ';' not in txt and
'Model.' not in txt)
# Заголовки страницы
for m in TITLE_PATTERN.finditer(content):
txt = m.group(1).strip()
if should_translate(txt):
texts.append(txt)
# HTML-текст между тегами
for m in HTML_TEXT_PATTERN.finditer(content):
txt = m.group(1).strip()
if should_translate(txt):
texts.append(txt)
# Placeholder атрибуты
for m in PLACEHOLDER_PATTERN.finditer(content):
txt = m.group(1).strip()
if should_translate(txt):
texts.append(txt)
return texts
# === Утилиты ===
def make_key(text):
h = hashlib.md5(text.encode('utf-8')).hexdigest()[:10]
prefix = re.sub(r'[^0-9a-zA-Z]+', '_', text).strip('_')[:20]
return f"{prefix}_{h}"
# Работа с .resx
def load_resx(path):
mapping = {}
if os.path.exists(path):
tree = ET.parse(path)
for data in tree.findall('.//data'):
key = data.get('name')
val_elem = data.find('value')
if key and val_elem is not None:
mapping[key] = val_elem.text or ''
return mapping
def write_resx_file(path, mapping):
root = ET.Element('root')
headers = [
('resmimetype','text/microsoft-resx'),
('version','2.0'),
('reader','System.Resources.ResXResourceReader, System.Windows.Forms'),
('writer','System.Resources.ResXResourceWriter, System.Windows.Forms')
]
for name, val in headers:
ET.SubElement(root, 'resheader', name=name).text = val
for key, val in mapping.items():
data = ET.SubElement(root, 'data', name=key)
ET.SubElement(data, 'value').text = val
os.makedirs(os.path.dirname(path), exist_ok=True)
ET.ElementTree(root).write(path, encoding='utf-8', xml_declaration=True)
logging.info(f"Written resources to {path}")
# === Замена в .cshtml ===
def replace_in_file(path, string_to_key):
content = read_file(path)
modified = content
# Добавляем директивы, если отсутствуют
if USING_DIRECTIVE not in content:
modified = USING_DIRECTIVE + "\n" + modified
if INJECT_DIRECTIVE not in content:
modified = modified.replace(USING_DIRECTIVE, USING_DIRECTIVE + "\n" + INJECT_DIRECTIVE)
# Замена заголовка
def repl_title(m):
txt = m.group(1).strip()
key = string_to_key.get(txt)
return f'ViewData["Title"] = Localizer["{key}"]' if key else m.group(0)
modified = TITLE_PATTERN.sub(repl_title, modified)
# Замена HTML-текста
def repl_html(m):
original = m.group(1).strip()
key = string_to_key.get(original)
if key:
pre = m.group(0)[:m.start(1)-m.start(0)]
post = m.group(0)[m.end(1)-m.start(0):]
return f'{pre}@Localizer["{key}"]{post}'
return m.group(0)
modified = HTML_TEXT_PATTERN.sub(repl_html, modified)
# Замена placeholder
def repl_placeholder(m):
original = m.group(1).strip()
key = string_to_key.get(original)
if key:
return f'placeholder="@Localizer["{key}"]"'
return m.group(0)
modified = PLACEHOLDER_PATTERN.sub(repl_placeholder, modified)
if modified != content:
with open(path, 'w', encoding='utf-8') as fw:
fw.write(modified)
logging.info(f"Updated file: {path}")
# === Основной процесс ===
def main():
parser = argparse.ArgumentParser(description='Translate .cshtml files with DeepL and update resources.')
parser.add_argument('-f', '--file', help='Path to a single .cshtml file to translate')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
default_map = load_resx(DEFAULT_RESX)
ru_map = load_resx(RU_RESX)
text_to_key = {v: k for k, v in default_map.items()}
files_to_process = []
if args.file:
if os.path.exists(args.file) and args.file.endswith('.cshtml'):
files_to_process = [args.file]
else:
logging.error(f"Файл не найден или не .cshtml: {args.file}")
return
else:
for root, _, files in os.walk(ROOT_DIR):
for fname in files:
if fname.endswith('.cshtml'):
files_to_process.append(os.path.join(root, fname))
all_strings = []
for path in files_to_process:
all_strings.extend(extract_strings_from_file(path))
unique_strings = list(OrderedDict.fromkeys(all_strings))
logging.info(f"Found {len(unique_strings)} unique strings to translate.")
if not unique_strings:
logging.warning("No strings found.")
return
to_translate = [s for s in unique_strings if s not in text_to_key]
logging.info(f"{len(to_translate)} new strings to translate.")
translations = deepl_translate(to_translate, SOURCE_LANG, TARGET_LANG, DEEPL_AUTH_KEY) if to_translate else []
for orig in unique_strings:
if orig in text_to_key:
key = text_to_key[orig]
else:
key = make_key(orig)
default_map[key] = orig
text_to_key[orig] = key
idx = to_translate.index(orig)
ru_map[key] = translations[idx]
write_resx_file(DEFAULT_RESX, default_map)
write_resx_file(RU_RESX, ru_map)
for path in files_to_process:
replace_in_file(path, text_to_key)
logging.info("All done.")
if __name__ == "__main__":
main()