Skip to content

Commit 2410ac3

Browse files
update README
1 parent ca9a644 commit 2410ac3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+12370
-48
lines changed

0.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# CUDA_VISIBLE_DEVICES=0 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29503 run.py --data MME --model bagel_gpt --verbose --batch-size 16
2+
# CUDA_VISIBLE_DEVICES=0 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29503 run.py --data HallusionBench --model bagel_gpt --verbose --batch-size 12
3+
# CUDA_VISIBLE_DEVICES=1 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29502 run.py --data MMVet --model bagel_gpt --verbose --batch-size 12
4+
# CUDA_VISIBLE_DEVICES=1 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29502 run.py --data MMStar --model bagel_gpt --verbose --batch-size 12
5+
# CUDA_VISIBLE_DEVICES=0 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29503 run.py --data MMBench_DEV_EN --model bagel_gpt --verbose --batch-size 16
6+
7+
8+
9+
CUDA_VISIBLE_DEVICES=0 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29500 run.py --data AI2D_TEST --model bagel_zoomin --verbose --batch-size 6
10+
11+
# CUDA_VISIBLE_DEVICES=0 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29500 run.py --data MathVista_MINI --model bagel_zoomin --verbose --batch-size 12
12+
13+
CUDA_VISIBLE_DEVICES=2 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29502 run.py --data MMBench --model bagel_zoomin --verbose --batch-size 8
14+

1.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
# CUDA_VISIBLE_DEVICES=1 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29508 run.py --data MME --model bagel_zoomin --verbose --batch-size 12
3+
4+
CUDA_VISIBLE_DEVICES=1 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29508 run.py --data MMVet --model bagel_zoomin --verbose --batch-size 8 --reuse
5+
6+
# CUDA_VISIBLE_DEVICES=1 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29508 run.py --data MMStar --model bagel_zoomin --verbose --batch-size 12
7+
8+
CUDA_VISIBLE_DEVICES=1 torchrun --nproc-per-node=1 --rdzv_endpoint=localhost:29508 run.py --data HallusionBench --model bagel_zoomin --verbose --batch-size 8 --reuse

run.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def parse_args():
178178
# Infer + Eval or Infer Only
179179
parser.add_argument('--mode', type=str, default='all', choices=['all', 'infer', 'eval'])
180180
# API Kwargs, Apply to API VLMs and Judge API LLMs
181-
parser.add_argument('--api-nproc', type=int, default=4, help='Parallel API calling')
181+
parser.add_argument('--api-nproc', type=int, default=15, help='Parallel API calling')
182182
parser.add_argument('--retry', type=int, default=None, help='retry numbers for API VLMs')
183183
parser.add_argument('--judge-args', type=str, default=None, help='Judge arguments in JSON format')
184184
# Explicitly Set the Judge Model
@@ -195,7 +195,9 @@ def parse_args():
195195
parser.add_argument(
196196
'--use-vllm', action='store_true', help='use vllm to generate, the flag is only supported in Llama4 for now')
197197
parser.add_argument('--use-verifier', action='store_true', help='use verifier to evaluate')
198-
198+
199+
#batch size for inference
200+
parser.add_argument('--batch-size', type=int, default=16, help='batch size for inference')
199201
args = parser.parse_args()
200202
return args
201203

@@ -345,7 +347,8 @@ def main():
345347
verbose=args.verbose,
346348
api_nproc=args.api_nproc,
347349
ignore_failed=args.ignore,
348-
use_vllm=args.use_vllm)
350+
use_vllm=args.use_vllm,
351+
batch_size=args.batch_size)
349352

350353
# Set the judge kwargs first before evaluation or dumping
351354

vlmeval/api/qwen_vl_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def __init__(self,
132132
proxy: str = None,
133133
**kwargs):
134134

135-
assert model in ['qwen-vl-plus', 'qwen-vl-max']
135+
# assert model in ['qwen-vl-plus', 'qwen-vl-max']
136136
self.model = model
137137
import dashscope
138138
self.fail_msg = 'Failed to obtain answer via API. '

vlmeval/config.py

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,57 @@
7979
),
8080
"Pixtral-12B": partial(Pixtral, model_path="mistralai/Pixtral-12B-2409"),
8181
"Falcon2-VLM-11B": partial(Falcon2VLM, model_path="tiiuae/falcon-11B-vlm"),
82+
# "ming": partial(MingUniVisionInfer, model_name_or_path="/root/autodl-tmp/home/tongyujun/models/Ming-UniVision-16B-A3B"),
83+
# "ming_replace_refine_psnr": partial(MingUniVisionInfer_REPLACE, model_name_or_path="/root/autodl-tmp/home/tongyujun/models/Ming-UniVision-16B-A3B"),
84+
# "ming_concat": partial(MingUniVisionInfer_CONCAT, model_name_or_path="/root/autodl-tmp/home/tongyujun/models/Ming-UniVision-16B-A3B"),
85+
"bagel": partial(BagelInfer, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
86+
"bagel_ab": partial(BagelInfer, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
87+
"bagel1": partial(BagelInfer, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
88+
"bagel2": partial(BagelInfer, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
89+
90+
"bagel_prompt": partial(BagelInfer_concat, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
91+
"bagel_prompt1": partial(BagelInfer_concat, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
92+
93+
"bagel_concat": partial(BagelInfer_concat, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
94+
"bagel_zoomin": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
95+
"bagel_zoomin_think": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
96+
"bagel_zoomin_thinkv1": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
97+
"bagel_zoomin_thinkv2": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
98+
"bagel_zoomin_thinkv3": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
99+
"bagel_zoomin_thinkv4": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
100+
"bagel_zoomin_thinkv5": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
101+
"bagel_zoomin_thinkv6": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
102+
"bagel_zoomin_thinkv7": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
103+
"bagel_zoomin_thinkv8": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
104+
"bagel_zoomin_thinkv9": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
105+
"bagel_think": partial(BagelInfer_think, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
106+
107+
108+
109+
"bagel_zoomin_thinkbaseline": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
110+
111+
112+
"bagel_zoomin_think_onlyimage": partial(BagelInfer_zoomin, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
113+
114+
"bagel_gpt": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
115+
"bagel_gptv1": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
116+
"bagel_gptv2": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
117+
"bagel_gptv3": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
118+
"bagel_gptv4": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
119+
"bagel_gptv5": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
120+
"bagel_gptv6": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
121+
"bagel_gptv7": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
122+
"bagel_gptv8": partial(BagelInfer_GPT, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
123+
124+
"bagel_seperate_ab_30": partial(BagelInfer_seperate, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
125+
"bagel_prompt_gen": partial(BagelInfer_prompt_gen, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
126+
"gpt_prompt_gen": partial(BagelInfer_prompt_gen, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
127+
"bagel_prompt_gen1": partial(BagelInfer_prompt_gen, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
128+
"gemini_prompt_gen": partial(BagelInfer_prompt_gen, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
129+
130+
131+
# "bagel_replace_psnr": partial(BagelInfer_replace, model_name_or_path="/data/tongyujun/uni-tts/models/BAGEL-7B-MoT"),
132+
82133
}
83134

84135
o1_key = os.environ.get('O1_API_KEY', None)
@@ -127,7 +178,7 @@
127178
# GPT
128179
"GPT4V": partial(
129180
GPT4V,
130-
model="gpt-4-1106-vision-preview",
181+
model="gpt-4-1106-preview",
131182
temperature=0,
132183
img_size=512,
133184
img_detail="low",
@@ -279,6 +330,35 @@
279330
max_tokens=2**14,
280331
timeout=300,
281332
),
333+
"gpt-5.1": partial(
334+
GPT4V,
335+
model="gpt-5.1",
336+
img_detail="high",
337+
retry=3,
338+
verbose=False,
339+
max_tokens=2**14,
340+
timeout=300,
341+
),
342+
"gpt-4o": partial(
343+
GPT4V,
344+
model="gpt-4o",
345+
img_detail="high",
346+
retry=3,
347+
verbose=False,
348+
max_tokens=2**14,
349+
timeout=300,
350+
),
351+
# "Gemini-3-pro": partial(
352+
# GPT4V,
353+
# model="gemini-3-pro-preview",
354+
# img_detail="high",
355+
# retry=3,
356+
# verbose=False,
357+
# max_tokens=2**14,
358+
# timeout=300,
359+
# key='sk-ZmMmKKNPeShHLRKwHjXA68GXN2AmjHvnYEgnYDYqtb9gcQbo'
360+
# ),
361+
282362
# Gemini
283363
"GeminiPro1-0": partial(
284364
Gemini, model="gemini-1.0-pro", temperature=0, retry=10
@@ -295,6 +375,12 @@
295375
"GeminiFlash1-5-002": partial(
296376
GPT4V, model="gemini-1.5-flash-002", temperature=0, retry=10
297377
), # Internal Use Only
378+
"Gemini-3-pro": partial(
379+
GPT4V, model="gemini-3-pro-all", temperature=0, retry=10
380+
),
381+
"Gemini-2.5-pro": partial(
382+
GPT4V, model="gemini-2.5-pro-nothinking", temperature=0, retry=10
383+
),
298384
"GeminiFlash2-0": partial(
299385
Gemini, model="gemini-2.0-flash", temperature=0, retry=10
300386
),
@@ -307,11 +393,22 @@
307393
"GeminiPro2-5": partial(
308394
Gemini, model="gemini-2.5-pro", temperature=0, retry=10
309395
),
396+
397+
#DS
398+
"DS-vl": partial(
399+
GPT4V, model="deepseek-vl2", temperature=0, retry=10
400+
),
310401

311402
# Qwen-VL
312403
"QwenVLPlus": partial(QwenVLAPI, model="qwen-vl-plus", temperature=0, retry=10),
313404
"QwenVLMax": partial(QwenVLAPI, model="qwen-vl-max", temperature=0, retry=10),
314405
"QwenVLMax-250408": partial(QwenVLAPI, model="qwen-vl-max-2025-04-08", temperature=0, retry=10),
406+
"Qwen3VLPlus": partial(QwenVLAPI, model="qwen3-vl-plus", temperature=0, retry=10, key='sk-3a658ecf56284de185b5960e9b059745'),
407+
"Qwen3VL-A3B-30B": partial(QwenVLAPI, model="qwen3-vl-30b-a3b-instruct", temperature=0, retry=10, key='sk-3a658ecf56284de185b5960e9b059745'),
408+
"Qwen3vl-235b-a22b": partial(QwenVLAPI, model="qwen3-vl-235b-a22b-instruct", temperature=0, retry=10, key='sk-3a658ecf56284de185b5960e9b059745'),
409+
"Qwen3vl-8b": partial(QwenVLAPI, model="qwen3-vl-8b-instruct", temperature=0, retry=10, key='sk-3a658ecf56284de185b5960e9b059745'),
410+
"Qwen2.5vl-7b": partial(QwenVLAPI, model="qwen2.5-vl-7b-instruct", temperature=0, retry=10, key='sk-3a658ecf56284de185b5960e9b059745'),
411+
"Qwen2.5vl-72b": partial(QwenVLAPI, model="qwen2.5-vl-72b-instruct", temperature=0, retry=10, key='sk-3a658ecf56284de185b5960e9b059745'),
315412

316413
# Reka
317414
"RekaEdge": partial(Reka, model="reka-edge-20240208"),
@@ -1341,7 +1438,7 @@
13411438
),
13421439
"Qwen2.5-VL-7B-Instruct": partial(
13431440
Qwen2VLChat,
1344-
model_path="Qwen/Qwen2.5-VL-7B-Instruct",
1441+
model_path="/root/autodl-tmp/home/tongyujun/models/Qwen2.5-VL-7B-Instruct",
13451442
min_pixels=1280 * 28 * 28,
13461443
max_pixels=16384 * 28 * 28,
13471444
use_custom_prompt=False,

vlmeval/dataset/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@
8383
from .medqbench_mcq import MedqbenchMCQDataset
8484
from .medqbench_caption import MedqbenchCaptionDataset
8585
from .medqbench_paired_description import MedqbenchPairedDescriptionDataset
86-
86+
from .myvqabench import MyVQABench
87+
from .visthink import VisThinkBench
8788

8889
class ConcatDataset(ImageBaseDataset):
8990
# This dataset takes multiple dataset names as input and aggregate them into a single dataset.
@@ -231,7 +232,7 @@ def evaluate(self, eval_file, **judge_kwargs):
231232
]
232233

233234
CUSTOM_DATASET = [
234-
CustomMCQDataset, CustomVQADataset, CustomTextMCQDataset
235+
CustomMCQDataset, CustomVQADataset, CustomTextMCQDataset, MyVQABench, VisThinkBench
235236
]
236237

237238
DATASET_COLLECTION = [ConcatDataset, ConcatVideoDataset]

vlmeval/dataset/image_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ def prepare_tsv(self, url, file_md5=None):
9797
pass
9898
else:
9999
warnings.warn(f'The tsv file is in {data_root}, but the md5 does not match, will re-download')
100-
download_file(url, data_path)
101-
update_flag = True
100+
# download_file(url, data_path)
101+
# update_flag = True
102102
else:
103103
if osp.exists(data_path_legacy) and (file_md5 is None or md5(data_path_legacy) == file_md5):
104104
warnings.warn(

vlmeval/dataset/image_vqa.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,7 +2095,7 @@ class QSpatial(ImageBaseDataset):
20952095
# NOTE: To evaluate Q-Spatial-ScanNet, you need to get the permission from ScanNet website
20962096
# Once you get the permission, you can use the helper code here to download and extract necessary images:
20972097
# https://github.com/andrewliao11/Q-Spatial-Bench-code?tab=readme-ov-file#for-qspatial_scannet
2098-
qspatial_root = "TO_BE_REPLACED_WITH_THE_PATH_TO_QSPATIAL_DATASET"
2098+
qspatial_root = "/root/autodl-tmp/home/tongyujun/LMUData/images/QSpatial"
20992099
url = "https://raw.githubusercontent.com/andrewliao11/Q-Spatial-Bench-code/refs/heads/main/prompt_templates/"
21002100

21012101
def post_build(self, dataset):
@@ -2152,7 +2152,7 @@ def load_data(self, dataset):
21522152
from datasets import load_dataset
21532153

21542154
hf_dataset = load_dataset("andrewliao11/Q-Spatial-Bench",
2155-
split=dataset)
2155+
split=dataset,cache_dir=self.qspatial_root)
21562156
df = hf_dataset.to_pandas()
21572157

21582158
df.reset_index(drop=True, inplace=True)

0 commit comments

Comments
 (0)