Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,7 @@ Desktop.ini

fusion_result.json
kernel_meta/

# UV package manager
uv.lock
.python-version
36 changes: 30 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ Try our demo on [Demo-Dolphin](https://huggingface.co/spaces/ByteDance/Dolphin).
3. Download the pre-trained models of *Dolphin-v2*:

Visit our Huggingface [model card](https://huggingface.co/ByteDance/Dolphin-v2), or download model by:

```bash
# Download the model from Hugging Face Hub
git lfs install
Expand All @@ -131,27 +131,51 @@ Try our demo on [Demo-Dolphin](https://huggingface.co/spaces/ByteDance/Dolphin).
huggingface-cli download ByteDance/Dolphin-v2 --local-dir ./hf_model
```

### Alternative: Using UV

For faster dependency resolution, you can use [UV](https://docs.astral.sh/uv/) as an alternative to pip:

1. Install UV:
```bash
# On macOS and Linux
curl -LsSf https://astral.sh/uv/install.sh | sh

# On Windows
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
```

2. Install dependencies:
```bash
uv sync
```

3. Download the model:
```bash
uv run huggingface-cli download ByteDance/Dolphin-v2 --local-dir ./hf_model
```

## ⚡ Inference

Dolphin provides two inference frameworks with support for two parsing granularities:
- **Page-level Parsing**: Parse the entire document page into a structured JSON and Markdown format
- **Element-level Parsing**: Parse individual document elements (text, table, formula)

**Note:** If you installed using UV, prefix all python commands with `uv run`, e.g., `uv run python demo_page.py ...`

### 📄 Page-level Parsing

```bash
# Process a single document image
python demo_page.py --model_path ./hf_model --save_dir ./results \
--input_path ./demo/page_imgs/page_1.png
--input_path ./demo/page_imgs/page_1.png

# Process a single document pdf
python demo_page.py --model_path ./hf_model --save_dir ./results \
--input_path ./demo/page_imgs/page_6.pdf
--input_path ./demo/page_imgs/page_6.pdf

# Process all documents in a directory
python demo_page.py --model_path ./hf_model --save_dir ./results \
--input_path ./demo/page_imgs
--input_path ./demo/page_imgs

# Process with custom batch size for parallel element decoding
python demo_page.py --model_path ./hf_model --save_dir ./results \
Expand All @@ -173,14 +197,14 @@ python demo_element.py --model_path ./hf_model --save_dir ./results \
# Process a single document image
python demo_layout.py --model_path ./hf_model --save_dir ./results \
--input_path ./demo/page_imgs/page_1.png \

# Process a single PDF document
python demo_layout.py --model_path ./hf_model --save_dir ./results \
--input_path ./demo/page_imgs/page_6.pdf \

# Process all documents in a directory
python demo_layout.py --model_path ./hf_model --save_dir ./results \
--input_path ./demo/page_imgs
--input_path ./demo/page_imgs
````


Expand Down
52 changes: 36 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,36 @@
[tool.black]
line-length = 120
include = '\.pyi?$'
exclude = '''
/(
\.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''
[project]
name = "dolphin"
version = "2.0.0"
description = "Document Image Parsing via Heterogeneous Anchor Prompting"
readme = "README.md"
requires-python = ">=3.10,<3.13"
license = { text = "MIT" }
dependencies = [
"datasets>=3.6.0",
"torch>=2.6.0",
"torchvision>=0.21.0",
"transformers>=4.51.0",
"accelerate>=1.4.0",
"decord>=0.6.0",
"Levenshtein>=0.27.1",
"qwen_vl_utils",
"matplotlib",
"jieba",
"opencv-python",
"beautifulsoup4",
"albumentations>=1.4.0",
"pymupdf>=1.26",
"huggingface-hub",
]

[tool.uv]
index-strategy = "unsafe-best-match"
environments = ["sys_platform == 'linux'"]

[[tool.uv.index]]
name = "pytorch-rocm"
url = "https://download.pytorch.org/whl/rocm6.3"

[tool.uv.sources]
torch = { index = "pytorch-rocm" }
torchvision = { index = "pytorch-rocm" }