Skip to content

Commit 23710cf

Browse files
Added Qwen3-VL-235B-A22B-Instruct to transformers models options
1 parent 4f0555b commit 23710cf

File tree

2 files changed

+32
-6
lines changed

2 files changed

+32
-6
lines changed

tools/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ def sanitize_markdown_text(text: str) -> str:
710710

711711
SELECTED_MODEL = get_or_create_env_var(
712712
"SELECTED_MODEL", "Qwen3-VL-4B-Instruct"
713-
) # Selected vision model. Choose from: "Nanonets-OCR2-3B", "Dots.OCR", "Qwen3-VL-2B-Instruct", "Qwen3-VL-4B-Instruct", "Qwen3-VL-8B-Instruct", "PaddleOCR-VL"
713+
) # Selected vision model. Choose from: "Nanonets-OCR2-3B", "Dots.OCR", "Qwen3-VL-2B-Instruct", "Qwen3-VL-4B-Instruct", "Qwen3-VL-8B-Instruct", "Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL-235B-A22B-Instruct", "PaddleOCR-VL"
714714

715715
if SHOW_VLM_MODEL_OPTIONS:
716716
VLM_MODEL_OPTIONS = [

tools/run_vlm.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -400,11 +400,37 @@
400400
"trust_remote_code": True,
401401
}
402402

403-
# budget for image processor, since the compression ratio is 32 for Qwen3-VL, we can set the number of visual tokens of a single image to 256-1280
404-
# processor.image_processor.size = {
405-
# "longest_edge": VLM_MAX_IMAGE_SIZE,
406-
# "shortest_edge": VLM_MIN_IMAGE_SIZE,
407-
# }
403+
if quantization_config is not None:
404+
load_kwargs["quantization_config"] = quantization_config
405+
else:
406+
load_kwargs["dtype"] = "auto"
407+
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
408+
MODEL_ID, **load_kwargs
409+
).eval()
410+
411+
model_default_prompt = """Read all the text in the image."""
412+
model_default_do_sample = False
413+
model_default_top_p = 0.8
414+
model_default_min_p = 0.0
415+
model_default_top_k = 20
416+
model_default_temperature = 0.7
417+
model_default_repetition_penalty = 1.0
418+
model_default_presence_penalty = 1.5
419+
model_default_max_new_tokens = MAX_NEW_TOKENS
420+
model_supports_presence_penalty = (
421+
False # I found that this doesn't work when using transformers
422+
)
423+
424+
elif SELECTED_MODEL == "Qwen3-VL-235B-A22B-Instruct":
425+
MODEL_ID = "Qwen/Qwen3-VL-235B-A22B-Instruct"
426+
from transformers import Qwen3VLMoeForConditionalGeneration
427+
428+
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
429+
load_kwargs = {
430+
"attn_implementation": attn_implementation,
431+
"device_map": "auto",
432+
"trust_remote_code": True,
433+
}
408434

409435
if quantization_config is not None:
410436
load_kwargs["quantization_config"] = quantization_config

0 commit comments

Comments
 (0)