bytedance · csajedi · Oct 31, 2025 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,7 @@ Desktop.ini
 
 fusion_result.json
 kernel_meta/
+
+# UV package manager
+uv.lock
+.python-version
diff --git a/README.md b/README.md
@@ -121,7 +121,7 @@ Try our demo on [Demo-Dolphin](https://huggingface.co/spaces/ByteDance/Dolphin).
 3. Download the pre-trained models of *Dolphin-v2*:
 
    Visit our Huggingface [model card](https://huggingface.co/ByteDance/Dolphin-v2), or download model by:
-   
+
    ```bash
    # Download the model from Hugging Face Hub
    git lfs install
@@ -131,27 +131,51 @@ Try our demo on [Demo-Dolphin](https://huggingface.co/spaces/ByteDance/Dolphin).
    huggingface-cli download ByteDance/Dolphin-v2 --local-dir ./hf_model
    ```
 
+### Alternative: Using UV
+
+For faster dependency resolution, you can use [UV](https://docs.astral.sh/uv/) as an alternative to pip:
+
+1. Install UV:
+   ```bash
+   # On macOS and Linux
+   curl -LsSf https://astral.sh/uv/install.sh | sh
+
+   # On Windows
+   powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
+   ```
+
+2. Install dependencies:
+   ```bash
+   uv sync
+   ```
+
+3. Download the model:
+   ```bash
+   uv run huggingface-cli download ByteDance/Dolphin-v2 --local-dir ./hf_model
+   ```
+
 ## ⚡ Inference
 
 Dolphin provides two inference frameworks with support for two parsing granularities:
 - **Page-level Parsing**: Parse the entire document page into a structured JSON and Markdown format
 - **Element-level Parsing**: Parse individual document elements (text, table, formula)
 
+**Note:** If you installed using UV, prefix all python commands with `uv run`, e.g., `uv run python demo_page.py ...`
 
 ### 📄 Page-level Parsing
 
 ```bash
 # Process a single document image
 python demo_page.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs/page_1.png 
+    --input_path ./demo/page_imgs/page_1.png
 
 # Process a single document pdf
 python demo_page.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs/page_6.pdf 
+    --input_path ./demo/page_imgs/page_6.pdf
 
 # Process all documents in a directory
 python demo_page.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs 
+    --input_path ./demo/page_imgs
 
 # Process with custom batch size for parallel element decoding
 python demo_page.py --model_path ./hf_model --save_dir ./results \
@@ -173,14 +197,14 @@ python demo_element.py --model_path ./hf_model --save_dir ./results \
 # Process a single document image
 python demo_layout.py --model_path ./hf_model --save_dir ./results \
     --input_path ./demo/page_imgs/page_1.png \
-    
+
 # Process a single PDF document
 python demo_layout.py --model_path ./hf_model --save_dir ./results \
     --input_path ./demo/page_imgs/page_6.pdf \
 
 # Process all documents in a directory
 python demo_layout.py --model_path ./hf_model --save_dir ./results \
-    --input_path ./demo/page_imgs 
+    --input_path ./demo/page_imgs
 ````
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,16 +1,36 @@
-[tool.black]
-line-length = 120
-include = '\.pyi?$'
-exclude = '''
-/(
-    \.git
-  | \.hg
-  | \.mypy_cache
-  | \.tox
-  | \.venv
-  | _build
-  | buck-out
-  | build
-  | dist
-)/
-'''
+[project]
+name = "dolphin"
+version = "2.0.0"
+description = "Document Image Parsing via Heterogeneous Anchor Prompting"
+readme = "README.md"
+requires-python = ">=3.10,<3.13"
+license = { text = "MIT" }
+dependencies = [
+    "datasets>=3.6.0",
+    "torch>=2.6.0",
+    "torchvision>=0.21.0",
+    "transformers>=4.51.0",
+    "accelerate>=1.4.0",
+    "decord>=0.6.0",
+    "Levenshtein>=0.27.1",
+    "qwen_vl_utils",
+    "matplotlib",
+    "jieba",
+    "opencv-python",
+    "beautifulsoup4",
+    "albumentations>=1.4.0",
+    "pymupdf>=1.26",
+    "huggingface-hub",
+]
+
+[tool.uv]
+index-strategy = "unsafe-best-match"
+environments = ["sys_platform == 'linux'"]
+
+[[tool.uv.index]]
+name = "pytorch-rocm"
+url = "https://download.pytorch.org/whl/rocm6.3"
+
+[tool.uv.sources]
+torch = { index = "pytorch-rocm" }
+torchvision = { index = "pytorch-rocm" }