Skip to content

Commit 7b57a43

Browse files
ywang96yinz-aizip
andauthored
[Model] Support Dots OCR (#24645)
Signed-off-by: Roger Wang <[email protected]> Co-authored-by: yinz-aizip <[email protected]>
1 parent 5aeb925 commit 7b57a43

File tree

7 files changed

+917
-0
lines changed

7 files changed

+917
-0
lines changed

docs/models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ th {
352352
| `DeepseekV2ForCausalLM` | DeepSeek-V2 | `deepseek-ai/DeepSeek-V2`, `deepseek-ai/DeepSeek-V2-Chat`, etc. | ✅︎ | ✅︎ | ✅︎ |
353353
| `DeepseekV3ForCausalLM` | DeepSeek-V3 | `deepseek-ai/DeepSeek-V3`, `deepseek-ai/DeepSeek-R1`, `deepseek-ai/DeepSeek-V3.1`, etc. | ✅︎ | ✅︎ | ✅︎ |
354354
| `Dots1ForCausalLM` | dots.llm1 | `rednote-hilab/dots.llm1.base`, `rednote-hilab/dots.llm1.inst`, etc. | | ✅︎ | ✅︎ |
355+
| `DotsOCRForCausalLM` | dots_ocr | `rednote-hilab/dots.ocr` | | ✅︎ | ✅︎ |
355356
| `Ernie4_5ForCausalLM` | Ernie4.5 | `baidu/ERNIE-4.5-0.3B-PT`, etc. | ✅︎ | ✅︎ | ✅︎ |
356357
| `Ernie4_5_MoeForCausalLM` | Ernie4.5MoE | `baidu/ERNIE-4.5-21B-A3B-PT`, `baidu/ERNIE-4.5-300B-A47B-PT`, etc. |✅︎| ✅︎ | ✅︎ |
357358
| `ExaoneForCausalLM` | EXAONE-3 | `LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |

examples/offline_inference/vision_language.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,23 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData:
126126
)
127127

128128

129+
# Dots-OCR
130+
def run_dots_ocr(questions: list[str], modality: str) -> ModelRequestData:
131+
assert modality == "image"
132+
133+
prompts = [f"<|img|><|imgpad|><|endofimg|>{question}" for question in questions]
134+
engine_args = EngineArgs(
135+
model="rednote-hilab/dots.ocr",
136+
limit_mm_per_prompt={modality: 1},
137+
trust_remote_code=True,
138+
)
139+
140+
return ModelRequestData(
141+
engine_args=engine_args,
142+
prompts=prompts,
143+
)
144+
145+
129146
def run_command_a_vision(questions: list[str], modality: str) -> ModelRequestData:
130147
assert modality == "image"
131148

@@ -1676,6 +1693,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
16761693
"aya_vision": run_aya_vision,
16771694
"blip-2": run_blip2,
16781695
"chameleon": run_chameleon,
1696+
"dots_ocr": run_dots_ocr,
16791697
"command_a_vision": run_command_a_vision,
16801698
"deepseek_vl_v2": run_deepseek_vl2,
16811699
"ernie45_vl": run_ernie45_vl,

tests/models/registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,8 @@ def check_available_online(
448448
max_transformers_version="4.48", # noqa: E501
449449
transformers_version_reason="HF model is not compatible.", # noqa: E501
450450
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501
451+
"DotsOCRForCausalLM": _HfExamplesInfo("rednote-hilab/dots.ocr",
452+
trust_remote_code=True),
451453
"Emu3ForConditionalGeneration": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
452454
"Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo("baidu/ERNIE-4.5-VL-28B-A3B-PT", # noqa: E501
453455
trust_remote_code=True),

0 commit comments

Comments
 (0)