-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathtag_extractor.py
More file actions
272 lines (230 loc) · 9.23 KB
/
tag_extractor.py
File metadata and controls
272 lines (230 loc) · 9.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import hashlib
import hmac
import json
import random
import time
from datetime import datetime
import requests
from flask import Blueprint, request, jsonify
import yaml
# 从yaml文件加载配置
def load_config(yaml_file):
with open(yaml_file, 'r') as file:
return yaml.safe_load(file)
config = load_config('config.yaml')
# 百度翻译API信息
BAIDU_TRANSLATE_URL = config['baidu_translate_url']
BAIDU_TRANSLATE_CREDENTIALS = config['baidu_translate_credentials']
# 腾讯翻译API信息
TENCENT_SECRET_ID = config['tencent_secret_id']
TENCENT_SECRET_KEY = config['tencent_secret_key']
TENCENT_TRANSLATE_URL = config['tencent_translate_url']
# 用于轮询的索引
current_index = 0
def get_next_credentials():
"""
获取下一个 APP_ID 和 SECRET_KEY 的组合,自动轮询。
"""
global current_index
credentials = BAIDU_TRANSLATE_CREDENTIALS[current_index]
current_index = (current_index + 1) % len(BAIDU_TRANSLATE_CREDENTIALS)
return credentials
def sign(key, msg):
"""
使用HMAC-SHA256算法生成签名。
"""
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
def generate_tc3_signature(secret_key, date, service, string_to_sign):
"""
生成腾讯云TC3-HMAC-SHA256签名。
"""
secret_date = sign(("TC3" + secret_key).encode("utf-8"), date)
secret_service = sign(secret_date, service)
secret_signing = sign(secret_service, "tc3_request")
return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
def translate_with_tencent(texts, from_lang='auto', to_lang='zh'):
"""
使用腾讯翻译API翻译文本列表。
"""
service = "tmt"
host = "tmt.tencentcloudapi.com"
action = "TextTranslate"
version = "2018-03-21"
region = "ap-beijing"
timestamp = int(time.time())
date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
algorithm = "TC3-HMAC-SHA256" # 在这里定义 algorithm
# 构造请求参数
payload = {
"SourceText": "\n".join(texts),
"Source": from_lang,
"Target": to_lang,
"ProjectId": 0
}
payload_str = json.dumps(payload)
# ************* 步骤 1:拼接规范请求串 *************
http_request_method = "POST"
canonical_uri = "/"
canonical_querystring = ""
ct = "application/json; charset=utf-8"
canonical_headers = f"content-type:{ct}\nhost:{host}\nx-tc-action:{action.lower()}\n"
signed_headers = "content-type;host;x-tc-action"
hashed_request_payload = hashlib.sha256(payload_str.encode("utf-8")).hexdigest()
canonical_request = (http_request_method + "\n" +
canonical_uri + "\n" +
canonical_querystring + "\n" +
canonical_headers + "\n" +
signed_headers + "\n" +
hashed_request_payload)
# ************* 步骤 2:拼接待签名字符串 *************
credential_scope = date + "/" + service + "/" + "tc3_request"
hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
string_to_sign = (algorithm + "\n" +
str(timestamp) + "\n" +
credential_scope + "\n" +
hashed_canonical_request)
# ************* 步骤 3:计算签名 *************
signature = generate_tc3_signature(TENCENT_SECRET_KEY, date, service, string_to_sign)
# ************* 步骤 4:拼接 Authorization *************
authorization = (algorithm + " " +
"Credential=" + TENCENT_SECRET_ID + "/" + credential_scope + ", " +
"SignedHeaders=" + signed_headers + ", " +
"Signature=" + signature)
# ************* 步骤 5:构造并发起请求 *************
headers = {
"Authorization": authorization,
"Content-Type": ct,
"Host": host,
"X-TC-Action": action,
"X-TC-Timestamp": str(timestamp),
"X-TC-Version": version,
"X-TC-Region": region
}
try:
response = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str)
response.raise_for_status()
result = response.json()
if "Response" in result and "TargetText" in result["Response"]:
return result["Response"]["TargetText"].split("\n")
else:
return None
except Exception as e:
print(f"腾讯翻译API请求失败: {e}")
return None
def translate_with_baidu(texts, from_lang='auto', to_lang='zh'):
"""
使用百度翻译API翻译文本列表。
"""
credentials = get_next_credentials()
app_id = credentials['app_id']
secret_key = credentials['secret_key']
salt = random.randint(32768, 65536)
query = '\n'.join(texts)
sign_str = app_id + query + str(salt) + secret_key
sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()
params = {
'q': query,
'from': from_lang,
'to': to_lang,
'appid': app_id,
'salt': salt,
'sign': sign
}
try:
response = requests.get(BAIDU_TRANSLATE_URL, params=params)
response.raise_for_status()
result = response.json()
if 'trans_result' in result:
return [item['dst'] for item in result['trans_result']]
else:
return None
except Exception as e:
print(f"百度翻译API请求失败: {e}")
return None
def translate_texts(texts, from_lang='auto', to_lang='zh'):
"""
优先使用腾讯翻译API翻译文本列表,失败后使用百度翻译API。
如果两者都失败,则返回未翻译的原始文本。
"""
# 优先使用腾讯翻译API
translated_texts = translate_with_tencent(texts, from_lang, to_lang)
if translated_texts is not None:
return translated_texts
# 腾讯翻译失败后使用百度翻译API
translated_texts = translate_with_baidu(texts, from_lang, to_lang)
if translated_texts is not None:
return translated_texts
# 两者都失败,返回原始文本
return texts
# 创建蓝图
tag_extractorbp = Blueprint('tag_extractor', __name__)
# 移除原来的 extract_tags 接口,因为现在前端直接获取和解析HTML
@tag_extractorbp.route('/Tagtranslate', methods=['POST'])
def translate():
"""
翻译文本列表接口
接收格式: {"texts": ["text1", "text2", ...]}
返回格式: {"translated_texts": ["译文1", "译文2", ...]}
"""
try:
data = request.get_json()
if not data:
return jsonify({"error": "请求体为空"}), 400
texts = data.get('texts')
if not texts:
return jsonify({"error": "缺少texts参数"}), 400
if not isinstance(texts, list):
return jsonify({"error": "texts参数必须是数组"}), 400
if len(texts) == 0:
return jsonify({"translated_texts": []}), 200
# 过滤空字符串
valid_texts = [text.strip() for text in texts if text and text.strip()]
if len(valid_texts) == 0:
return jsonify({"translated_texts": []}), 200
print(f"开始翻译 {len(valid_texts)} 个文本...")
translated_texts = translate_texts(valid_texts)
print(f"翻译完成")
return jsonify({"translated_texts": translated_texts})
except Exception as e:
print(f"翻译接口错误: {e}")
return jsonify({"error": f"服务器内部错误: {str(e)}"}), 500
@tag_extractorbp.route('/translate_batch', methods=['POST'])
def translate_batch():
"""
批量翻译接口,支持更多参数
接收格式: {
"texts": ["text1", "text2", ...],
"from_lang": "auto", // 可选,默认auto
"to_lang": "zh" // 可选,默认zh
}
返回格式: {"translated_texts": ["译文1", "译文2", ...]}
"""
try:
data = request.get_json()
if not data:
return jsonify({"error": "请求体为空"}), 400
texts = data.get('texts')
if not texts:
return jsonify({"error": "缺少texts参数"}), 400
if not isinstance(texts, list):
return jsonify({"error": "texts参数必须是数组"}), 400
from_lang = data.get('from_lang', 'auto')
to_lang = data.get('to_lang', 'zh')
if len(texts) == 0:
return jsonify({"translated_texts": []}), 200
# 过滤空字符串
valid_texts = [text.strip() for text in texts if text and text.strip()]
if len(valid_texts) == 0:
return jsonify({"translated_texts": []}), 200
print(f"开始批量翻译 {len(valid_texts)} 个文本 ({from_lang} -> {to_lang})...")
translated_texts = translate_texts(valid_texts, from_lang, to_lang)
print(f"批量翻译完成")
return jsonify({
"translated_texts": translated_texts,
"from_lang": from_lang,
"to_lang": to_lang,
"count": len(translated_texts)
})
except Exception as e:
print(f"批量翻译接口错误: {e}")
return jsonify({"error": f"服务器内部错误: {str(e)}"}), 500