Skip to content

Commit ce37d10

Browse files
committed
add api
1 parent 1cc86c1 commit ce37d10

File tree

5 files changed

+73
-65
lines changed

5 files changed

+73
-65
lines changed

OCR.py

Lines changed: 73 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,107 @@
1+
import atexit
12
import wcocr
23
import os
3-
from docx import Document
4-
from docx.shared import Pt
5-
from docx.oxml.ns import qn
6-
from colorama import init, Fore, Style
4+
from flask import Flask, request, jsonify
5+
from werkzeug.datastructures.file_storage import FileStorage
6+
import uuid
7+
8+
# 创建 Flask 应用
9+
app = Flask(__name__)
10+
11+
# 设置图片保存目录
12+
UPLOAD_FOLDER = os.path.dirname(os.path.abspath(__file__)) + "/img"
13+
14+
# 设置允许上传的文件类型
15+
ALLOWED_EXTENSIONS = ("jpg", "jpeg", "png", "bmp", "tif")
16+
717

818
def find_wechat_path():
919
script_dir = os.path.dirname(os.path.abspath(__file__))
10-
common_paths = os.path.join(script_dir, 'path')
20+
common_paths = os.path.join(script_dir, "path")
1121
if os.path.exists(common_paths):
1222
return common_paths
1323
else:
1424
print(f"The path folder does not exist at {common_paths}.")
1525
return None
1626

27+
1728
def find_wechatocr_exe():
1829
script_dir = os.path.dirname(os.path.abspath(__file__))
19-
wechatocr_path = os.path.join(script_dir, 'path', 'WeChatOCR', 'WeChatOCR.exe')
30+
wechatocr_path = os.path.join(script_dir, "path", "WeChatOCR", "WeChatOCR.exe")
2031
if os.path.isfile(wechatocr_path):
2132
return wechatocr_path
2233
else:
2334
print(f"The WeChatOCR.exe does not exist at {wechatocr_path}.")
2435
return None
2536

26-
def wechat_ocr(image_path):
37+
38+
def wechat_ocr_init():
2739
wechat_path = find_wechat_path()
2840
wechatocr_path = find_wechatocr_exe()
2941
if not wechat_path or not wechatocr_path:
30-
return [] # 返回空结果
31-
42+
raise Exception("WeChatOCR.exe not found.")
43+
3244
wcocr.init(wechatocr_path, wechat_path)
45+
46+
47+
def wechat_ocr(image_path):
48+
3349
result = wcocr.ocr(image_path)
3450
texts = []
3551

36-
for temp in result['ocr_response']:
37-
text = temp['text']
52+
for temp in result["ocr_response"]:
53+
text = temp["text"]
3854
if isinstance(text, bytes):
39-
text = text.decode('utf-8', errors='ignore')
55+
text = text.decode("utf-8", errors="ignore")
4056
texts.append(text)
41-
57+
4258
return texts
4359

44-
def save_to_docx(texts, output_path):
45-
doc = Document()
4660

47-
for text in texts:
48-
# 添加段落并设置宋体字体
49-
paragraph = doc.add_paragraph()
50-
run = paragraph.add_run(text)
51-
run.font.name = '宋体'
61+
def save_file(file: FileStorage) -> str:
5262

53-
# 设置字体为宋体 (兼容中文设置)
54-
r = run._element
55-
r.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
63+
# 检查文件类型
64+
if not file.filename.split(".")[-1] in ALLOWED_EXTENSIONS:
65+
return ""
5666

57-
# 设置字体大小为五号字体 (10.5 磅)
58-
run.font.size = Pt(10.5)
59-
60-
doc.save(output_path)
67+
# 生成唯一文件名
68+
new_filename = uuid.uuid4().hex + "." + file.filename.split(".")[-1]
6169

62-
def process_all_images():
63-
script_dir = os.path.dirname(os.path.abspath(__file__))
64-
src_folder = os.path.join(script_dir, 'src')
65-
docx_folder = os.path.join(script_dir, 'docx')
66-
67-
if not os.path.exists(docx_folder):
68-
os.makedirs(docx_folder)
69-
70-
# 支持的图像格式
71-
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif')
72-
73-
# 遍历 src 文件夹及其所有子文件夹
74-
for root, dirs, files in os.walk(src_folder):
75-
for file in files:
76-
if file.lower().endswith(image_extensions):
77-
image_path = os.path.join(root, file)
78-
relative_path = os.path.relpath(root, src_folder)
79-
docx_folder_path = os.path.join(docx_folder, relative_path)
80-
81-
# 确保 docx 文件夹路径存在
82-
if not os.path.exists(docx_folder_path):
83-
os.makedirs(docx_folder_path)
84-
85-
# 处理图片文件
86-
print(Fore.GREEN + f"正在处理: {os.path.relpath(image_path, script_dir)}" + Style.RESET_ALL)
87-
texts = wechat_ocr(image_path)
88-
image_name = os.path.splitext(file)[0]
89-
output_docx = os.path.join(docx_folder_path, f'{image_name}_OCR.docx')
90-
save_to_docx(texts, output_docx)
91-
# 显示相对路径
92-
relative_docx_path = os.path.relpath(output_docx, script_dir)
93-
print(f"OCR 结果已保存到: {relative_docx_path}\n")
94-
95-
if __name__ == '__main__':
96-
init(autoreset=True) # 初始化 colorama
97-
process_all_images()
98-
print(Fore.RED + "全部文件处理完成,请按 Enter 键退出……" + Style.RESET_ALL)
99-
input()
70+
# 保存图片
71+
if not os.path.exists(UPLOAD_FOLDER):
72+
os.mkdir(UPLOAD_FOLDER)
73+
74+
file_path = os.path.join(UPLOAD_FOLDER, new_filename)
75+
file.save(file_path)
76+
77+
return file_path
78+
79+
80+
# 定义上传图片路由
81+
@app.route("/upload_ocr", methods=["POST"])
82+
def upload_image():
83+
# 检查请求是否包含文件
84+
if "file" not in request.files:
85+
return jsonify({"code": 400, "msg": "没有上传文件"})
86+
87+
# 获取上传的文件
88+
file = request.files["file"]
89+
if file.filename == "":
90+
return jsonify({"code": 400, "msg": "没有选择文件"})
91+
92+
file_path = save_file(file)
93+
if file_path == "":
94+
return jsonify({"code": 400, "msg": "不支持的文件类型"})
95+
96+
texts = wechat_ocr(file_path)
97+
98+
# 返回上传成功信息
99+
return jsonify({"code": 200, "msg": "上传成功", "data": texts})
100+
101+
# 释放
102+
atexit.register(wcocr.destroy)
103+
104+
if __name__ == "__main__":
105+
wechat_ocr_init()
106+
# 设置端口
107+
app.run(host="0.0.0.0", port=5001)

src/bmp/.md

Whitespace-only changes.

src/jpg/.md

Whitespace-only changes.

src/png/.md

Whitespace-only changes.

src/tif/.md

Whitespace-only changes.

0 commit comments

Comments
 (0)