From 5293d2fdd32a8bd5dd678b7b07842ea5a816641b Mon Sep 17 00:00:00 2001 From: Deepankar Sharma Date: Tue, 23 Sep 2025 21:34:29 +0530 Subject: [PATCH 1/2] fix(pyproject): correct invalid dependency array syntax Replaced invalid `opencv-python = "^4.6.0"` entry with `"opencv-python>=4.6.0"` inside the dependencies array. This resolves TOML parsing errors during `uv venv` and `uv sync`. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e99d062..ce00128 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "pyyaml", "frontend", "pymupdf", - opencv-python = "^4.6.0" + "opencv-python>=4.6.0", # Add other common dependencies ] From 78aad9f455263bff62db3cce704cd8ff2072b889 Mon Sep 17 00:00:00 2001 From: Deepankar Sharma Date: Tue, 23 Sep 2025 22:40:19 +0530 Subject: [PATCH 2/2] fix(encoding): ensure UTF-8 when writing markdown output - Updated pdf2markdown.py to open output files with encoding="utf-8" - Prevents UnicodeEncodeError on Windows (cp1252 default) - Ensures Chinese characters, math symbols, and other non-ASCII text are written correctly to markdown files --- project/pdf2markdown/scripts/pdf2markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/pdf2markdown/scripts/pdf2markdown.py b/project/pdf2markdown/scripts/pdf2markdown.py index a8bdd98..5182542 100644 --- a/project/pdf2markdown/scripts/pdf2markdown.py +++ b/project/pdf2markdown/scripts/pdf2markdown.py @@ -343,7 +343,7 @@ def process(self, input_path, save_dir=None, visualize=False, merge2markdown=Fal for extract_res in pdf_extract_res: md_text = self.convert2md(extract_res) md_content.append(md_text) - with open(os.path.join(save_dir, f"{basename}.md"), "w") as f: + with open(os.path.join(save_dir, f"{basename}.md"), "w", encoding="utf-8") as f: f.write("\n\n".join(md_content)) if visualize: