[fix] add reminder for interleave_visual for Qwen2.5-VL, update version control. (#678)

Luodian · web-flow · commit bb20abd023a9 · 2025-05-24T13:19:18.000+08:00
* Update project configuration and scripts

- Updated Python version requirement in pyproject.toml to &gt;=3.12.
- Removed specific version constraint for protobuf in dependencies.
- Added 'uv.lock' to .gitignore.
- Modified example script to change model task from 'mmmu_pro' to 'mme' and updated comments for clarity.

* Update example script comments for clarity on visual token positioning
diff --git a/.gitignore b/.gitignore
@@ -45,4 +45,5 @@ lmms_eval/tasks/mlvu/__pycache__/utils.cpython-310.pyc
 scripts/
 .env
 outputs/
-span.log
+span.log
+uv.lock
diff --git a/examples/models/qwen25vl.sh b/examples/models/qwen25vl.sh
@@ -3,16 +3,16 @@
 export HF_HOME="~/.cache/huggingface"
 # pip install git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git
 # pip3 install qwen_vl_utils
-# use `interleave_visuals=True` to control the visual token position
+# use `interleave_visuals=True` to control the visual token position, currently only for mmmu_val and mmmu_pro (and potentially for other interleaved image-text tasks), please do not use it unless you are sure about the operation details.
 
-accelerate launch --num_processes=8 --main_process_port=12346 -m lmms_eval \
-    --model qwen2_vl \
-    --model_args=pretrained=Qwen/Qwen2-VL-7B-Instruct,max_pixels=12845056,use_flash_attention_2=True,interleave_visuals=True \
-    --tasks mmmu_pro \
-    --batch_size 1
+# accelerate launch --num_processes=8 --main_process_port=12346 -m lmms_eval \
+#     --model qwen2_vl \
+#     --model_args=pretrained=Qwen/Qwen2-VL-7B-Instruct,max_pixels=12845056,use_flash_attention_2=True,interleave_visuals=True \
+#     --tasks mmmu_pro \
+#     --batch_size 1
 
 accelerate launch --num_processes=8 --main_process_port=12346 -m lmms_eval \
     --model qwen2_5_vl \
-    --model_args=pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_pixels=12845056,use_flash_attention_2=True,interleave_visuals=True \
-    --tasks mmmu_pro \
+    --model_args=pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_pixels=12845056,use_flash_attention_2=True,interleave_visuals=False \
+    --tasks mme \
     --batch_size 1
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,7 +18,7 @@ classifiers = [
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.12"
 license = { text = "MIT" }
 dependencies = [
     "accelerate>=0.29.1",
@@ -71,7 +71,7 @@ dependencies = [
     "decord; platform_system != 'Darwin'",
     "eva-decord; platform_system == 'Darwin'",
     "zss",
-    "protobuf==3.20",
+    "protobuf",
     "sentence_transformers",
     "python-dotenv",
 ]
@@ -114,13 +114,6 @@ mmsearch = [
     "FlagEmbedding",
     "rouge",
 ]
-all = [
-    "gemini",
-    "reka",
-    "metrics",
-    "qwen",
-    "mmsearch"
-]
 
 [tool.setuptools.packages.find]
 include = ["lmms_eval*"]