code-jar
diff --git a/‎OCR.py‎
Lines changed: 99 additions & 0 deletions b/‎OCR.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 10 additions & 0 deletions b/‎README.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎path/WeChatOCR/Model/FPOCRRecog.xnet‎
9.13 MB b/‎path/WeChatOCR/Model/FPOCRRecog.xnet‎
9.13 MB
diff --git a/‎path/WeChatOCR/Model/OCRDetFP32.xnet.nas‎
1.53 MB b/‎path/WeChatOCR/Model/OCRDetFP32.xnet.nas‎
1.53 MB
diff --git a/‎path/WeChatOCR/Model/OCRParaDetV1.1.0.26.xnet‎
2.7 MB b/‎path/WeChatOCR/Model/OCRParaDetV1.1.0.26.xnet‎
2.7 MB
diff --git a/‎path/WeChatOCR/Model/OCRRecogFP32V1.1.0.26.xnet‎
7.29 MB b/‎path/WeChatOCR/Model/OCRRecogFP32V1.1.0.26.xnet‎
7.29 MB
@@ -0,0 +1,99 @@
+import wcocr
+import os
+from docx import Document
+from docx.shared import Pt
+from docx.oxml.ns import qn
+from colorama import init, Fore, Style
+
+def find_wechat_path():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    common_paths = os.path.join(script_dir, 'path')
+    if os.path.exists(common_paths):
+        return common_paths
+    else:
+        print(f"The path folder does not exist at {common_paths}.")
+        return None
+
+def find_wechatocr_exe():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    wechatocr_path = os.path.join(script_dir, 'path', 'WeChatOCR', 'WeChatOCR.exe')
+    if os.path.isfile(wechatocr_path):
+        return wechatocr_path
+    else:
+        print(f"The WeChatOCR.exe does not exist at {wechatocr_path}.")
+        return None
+
+def wechat_ocr(image_path):
+    wechat_path = find_wechat_path()
+    wechatocr_path = find_wechatocr_exe()
+    if not wechat_path or not wechatocr_path:
+        return []  # 返回空结果
+    
+    wcocr.init(wechatocr_path, wechat_path)
+    result = wcocr.ocr(image_path)
+    texts = []
+
+    for temp in result['ocr_response']:
+        text = temp['text']
+        if isinstance(text, bytes):
+            text = text.decode('utf-8', errors='ignore')
+        texts.append(text)
+    
+    return texts
+
+def save_to_docx(texts, output_path):
+    doc = Document()
+
+    for text in texts:
+        # 添加段落并设置宋体字体
+        paragraph = doc.add_paragraph()
+        run = paragraph.add_run(text)
+        run.font.name = '宋体'
+
+        # 设置字体为宋体 (兼容中文设置)
+        r = run._element
+        r.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
+
+        # 设置字体大小为五号字体 (10.5 磅)
+        run.font.size = Pt(10.5)
+    
+    doc.save(output_path)
+
+def process_all_images():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    src_folder = os.path.join(script_dir, 'src')
+    docx_folder = os.path.join(script_dir, 'docx')
+
+    if not os.path.exists(docx_folder):
+        os.makedirs(docx_folder)
+
+    # 支持的图像格式
+    image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tif')
+
+    # 遍历 src 文件夹及其所有子文件夹
+    for root, dirs, files in os.walk(src_folder):
+        for file in files:
+            if file.lower().endswith(image_extensions):
+                image_path = os.path.join(root, file)
+                relative_path = os.path.relpath(root, src_folder)
+                docx_folder_path = os.path.join(docx_folder, relative_path)
+
+                # 确保 docx 文件夹路径存在
+                if not os.path.exists(docx_folder_path):
+                    os.makedirs(docx_folder_path)
+
+                # 处理图片文件
+                print(Fore.GREEN + f"正在处理: {os.path.relpath(image_path, script_dir)}" + Style.RESET_ALL)
+                texts = wechat_ocr(image_path)
+                image_name = os.path.splitext(file)[0]
+                output_docx = os.path.join(docx_folder_path, f'{image_name}_OCR.docx')
+                save_to_docx(texts, output_docx)
+                # 显示相对路径
+                relative_docx_path = os.path.relpath(output_docx, script_dir)
+                print(f"OCR 结果已保存到： {relative_docx_path}\n")
+
+if __name__ == '__main__':
+    init(autoreset=True)  # 初始化 colorama
+    process_all_images()
+    print(Fore.RED + "全部文件处理完成，请按 Enter 键退出……" + Style.RESET_ALL)
+    input()
@@ -1,2 +1,12 @@
 # WeChatOCR
 这是一个采用Python调用微信OCR功能，进行批处理图片OCR的代码。
+
+首先非常感谢swigger，52PJ的FeiyuYip，nulptr以及其他对此做出贡献的朋友。
+
+基于他们的工作，改动如下：
+1. 将WeChatOCR.exe做了本地化，不再依赖微信的安装路径。
+2. 将图片处理的格式多样化，增加了jpg，jpeg，bmp，tif格式的处理，只需要将文件放入scr文件夹中的即可。
+3. 将OCR的处理结果将以docx格式保存到docx文件夹中。
+
+关于源文件的问题：
+我感觉wenchatocr对png格式的处理能力比较好，所以建议将图片格式转换为png以后再做OCR处理。