File tree Expand file tree Collapse file tree 4 files changed +8
-6
lines changed
Expand file tree Collapse file tree 4 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -50,9 +50,9 @@ KittyDoc 是一个轻量级、专注于文档解析的开源框架,支持 **OC
5050
5151## 🛠️ 安装KittyDoc
5252
53- #### 使用pip安装KittyDoc (暂未发布)
53+ #### 使用pip安装KittyDoc
5454``` bash
55- pip install kitty_doc -i https://mirrors.aliyun.com/pypi/simple
55+ pip install kitty-doc -i https://mirrors.aliyun.com/pypi/simple
5656```
5757
5858#### 通过源码安装KittyDoc
@@ -101,9 +101,10 @@ os.environ['MINERU_DEVICE_MODE'] = "cuda:1"
101101
102102## 📌 TODO
103103
104- - [x] 复选框识别,包括勾选/未勾选(默认关闭、存在误检)
105104- [x] 表格非OCR文本提取
106105- [x] 跨页表格合并
106+ - [x] 复选框识别,使用opencv(默认关闭、opencv识别存在误检)
107+ - [ ] 复选框识别,使用模型
107108- [ ] 四方向分类旋转表格解析 rapid_orientation
108109- [ ] 表格内公式提取
109110- [ ] 表格内图片提取
Original file line number Diff line number Diff line change @@ -102,7 +102,7 @@ def do_parse(
102102 }
103103
104104 checkbox_config = {
105- "checkbox_enable" : True , # 是否识别复选框,默认不识别,基于opencv,有可能会误检
105+ # "checkbox_enable": False , # 是否识别复选框,默认不识别,基于opencv,有可能会误检
106106 }
107107
108108
Original file line number Diff line number Diff line change @@ -97,7 +97,7 @@ def do_parse(
9797 # "wireless_table.model_dir_or_path": "", # 无线表结构模型地址,配置SLANEXT时使用,
9898 }
9999 checkbox_config = {
100- "checkbox_enable" : True , # 是否识别复选框,默认不识别,基于opencv,有可能会误检
100+ # "checkbox_enable": False , # 是否识别复选框,默认不识别,基于opencv,有可能会误检
101101 }
102102 for idx , pdf_bytes in enumerate (pdf_bytes_list ):
103103 new_pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2 (pdf_bytes , start_page_id , end_page_id )
Original file line number Diff line number Diff line change @@ -31,7 +31,8 @@ def parse_requirements(filename):
3131 author_email = '' , # 作者邮箱
3232 packages = find_packages () + ["kitty_doc.resources" ], # 包含所有的包
3333 package_data = {
34- "kitty_doc.resources" : ["**" ], # 包含magic_pdf.resources目录下的所有文件
34+ "kitty_doc.resources" : ["**" ], # 包含 kitty_doc/resources 目录下的所有文件
35+ "" : ["*.yaml" ], # 包含所有包里的 .yaml 文件
3536 },
3637 install_requires = parse_requirements ('requirements.txt' ), # 项目依赖的第三方库
3738 # extras_require={
You can’t perform that action at this time.
0 commit comments