Add Python 3.12 support and modern package management

csajedi · csajedi · commit 0190322fe683 · 2026-02-25T18:59:31.000-05:00
This commit adds support for Python 3.12 and provides an alternative
installation method using UV, while maintaining full backward
compatibility with existing pip-based workflows.

Changes:
- Add pyproject.toml with PEP 621 compliant project metadata
- Fix numpy compatibility: constrain to &lt;2.0 for Python 3.12 support
- Relax torch/torchvision constraints to allow Python 3.12 compatible versions
- Fix OpenCV version constraints to avoid known compatibility issues
- Add UV as an optional, faster installation method in README
- Update .gitignore for modern Python tooling

All changes maintain backward compatibility with pip and requirements.txt.
Tested with Python 3.12.9 on macOS with the Dolphin-1.5 model.
diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,7 @@ Desktop.ini
 
 fusion_result.json
 kernel_meta/
+
+# UV package manager
+uv.lock
+.python-version
diff --git a/README.md b/README.md
@@ -121,7 +121,7 @@ Try our demo on [Demo-Dolphin](https://huggingface.co/spaces/ByteDance/Dolphin).
 3. Download the pre-trained models of *Dolphin-v2*:
 
    Visit our Huggingface [model card](https://huggingface.co/ByteDance/Dolphin-v2), or download model by:
-   
+
    ```bash
    # Download the model from Hugging Face Hub
    git lfs install
@@ -131,27 +131,51 @@ Try our demo on [Demo-Dolphin](https://huggingface.co/spaces/ByteDance/Dolphin).
    huggingface-cli download ByteDance/Dolphin-v2 --local-dir ./hf_model
    ```
 
+### Alternative: Using UV
+
+For faster dependency resolution, you can use [UV](https://docs.astral.sh/uv/) as an alternative to pip:
+
+1. Install UV:
+   ```bash
+   # On macOS and Linux
+   curl -LsSf https://astral.sh/uv/install.sh | sh
+
+   # On Windows
+   powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
+   ```
+
+2. Install dependencies:
+   ```bash
+   uv sync
+   ```
+
+3. Download the model:
+   ```bash
+   uv run huggingface-cli download ByteDance/Dolphin-v2 --local-dir ./hf_model
+   ```
+
 ## ⚡ Inference
 
 Dolphin provides two inference frameworks with support for two parsing granularities:
 - **Page-level Parsing**: Parse the entire document page into a structured JSON and Markdown format
 - **Element-level Parsing**: Parse individual document elements (text, table, formula)
 
+**Note:** If you installed using UV, prefix all python commands with `uv run`, e.g., `uv run python demo_page.py ...`
 
 ### 📄 Page-level Parsing
 
 ```bash
 # Process a single document image
 python demo_page.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs/page_1.png 
+    --input_path ./demo/page_imgs/page_1.png
 
 # Process a single document pdf
 python demo_page.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs/page_6.pdf 
+    --input_path ./demo/page_imgs/page_6.pdf
 
 # Process all documents in a directory
 python demo_page.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs 
+    --input_path ./demo/page_imgs
 
 # Process with custom batch size for parallel element decoding
 python demo_page.py --model_path ./hf_model --save_dir ./results \
@@ -173,14 +197,14 @@ python demo_element.py --model_path ./hf_model --save_dir ./results \
 # Process a single document image
 python demo_layout.py --model_path ./hf_model --save_dir ./results \
     --input_path ./demo/page_imgs/page_1.png \
-    
+
 # Process a single PDF document
 python demo_layout.py --model_path ./hf_model --save_dir ./results \
     --input_path ./demo/page_imgs/page_6.pdf \
 
 # Process all documents in a directory
 python demo_layout.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs 
+    --input_path ./demo/page_imgs
 ````
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,24 @@
+[project]
+name = "dolphin"
+version = "1.5.0"
+description = "Document Image Parsing via Heterogeneous Anchor Prompting"
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+dependencies = [
+    "numpy>=1.26.4,<2.0",
+    "omegaconf==2.3.0",
+    "opencv-python>=4.8.0,<4.11",
+    "opencv-python-headless>=4.5.5,<4.6",
+    "pillow>=9.3.0",
+    "timm==0.5.4",
+    "torch>=2.1.0",
+    "torchvision>=0.16.0",
+    "transformers==4.47.0",
+    "accelerate==1.6.0",
+    "pymupdf==1.26",
+]
+
 [tool.black]
 line-length = 120
 include = '\.pyi?$'