[Model] Support Dots OCR (#24645)

Signed-off-by: Roger Wang <hey@rogerw.io>
Co-authored-by: yinz-aizip <yinz@aizip.ai>
This commit is contained in:
Roger Wang
2025-09-21 19:24:40 -07:00
committed by GitHub
parent 5aeb925452
commit 7b57a433da
7 changed files with 917 additions and 0 deletions

View File

@ -126,6 +126,23 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData:
)
# Dots-OCR
def run_dots_ocr(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
prompts = [f"<|img|><|imgpad|><|endofimg|>{question}" for question in questions]
engine_args = EngineArgs(
model="rednote-hilab/dots.ocr",
limit_mm_per_prompt={modality: 1},
trust_remote_code=True,
)
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
def run_command_a_vision(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
@ -1676,6 +1693,7 @@ model_example_map = {
"aya_vision": run_aya_vision,
"blip-2": run_blip2,
"chameleon": run_chameleon,
"dots_ocr": run_dots_ocr,
"command_a_vision": run_command_a_vision,
"deepseek_vl_v2": run_deepseek_vl2,
"ernie45_vl": run_ernie45_vl,