File tree Expand file tree Collapse file tree 1 file changed +75
-0
lines changed
Expand file tree Collapse file tree 1 file changed +75
-0
lines changed Original file line number Diff line number Diff line change 1+ import argparse
2+ import glob
3+ import json
4+ import os
5+ import random
6+ import subprocess
7+ import sys
8+ import unittest
9+ from types import SimpleNamespace
10+
11+ """
12+ git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
13+ pip install -e lmms-eval/
14+ """
15+
16+ # VLM models for testing
17+ MODELS = [
18+ SimpleNamespace (
19+ model = "Qwen/Qwen2.5-VL-7B-Instruct" ,
20+ mmmu_accuracy = 0.4 ,
21+ ),
22+ ]
23+ os .environ ["OPENAI_API_KEY" ] = "lightllm123"
24+ os .environ ["OPENAI_API_BASE" ] = "http://localhost:8000/v1"
25+
26+
27+ def run_mmmu_eval (
28+ model_version : str ,
29+ output_path : str ,
30+ ):
31+ """
32+ Evaluate a VLM on the MMMU validation set with lmms‑eval.
33+ Only `model_version` (checkpoint) and `chat_template` vary;
34+ We are focusing only on the validation set due to resource constraints.
35+ """
36+ # -------- fixed settings --------
37+ model = "openai_compatible"
38+ tp = 1
39+ tasks = "mmmu_val"
40+ batch_size = 16
41+ log_suffix = "openai_compatible"
42+ os .makedirs (output_path , exist_ok = True )
43+
44+ # -------- compose --model_args --------
45+ model_args = f"model_version={ model_version } ," f"tp={ tp } "
46+ print (model_args )
47+
48+ # -------- build command list --------
49+ cmd = [
50+ "python3" ,
51+ "-m" ,
52+ "lmms_eval" ,
53+ "--model" ,
54+ model ,
55+ "--model_args" ,
56+ model_args ,
57+ "--tasks" ,
58+ tasks ,
59+ "--batch_size" ,
60+ str (batch_size ),
61+ "--log_samples" ,
62+ "--log_samples_suffix" ,
63+ log_suffix ,
64+ "--output_path" ,
65+ str (output_path ),
66+ ]
67+
68+ subprocess .run (
69+ cmd ,
70+ check = True ,
71+ timeout = 3600 ,
72+ )
73+
74+
75+ run_mmmu_eval ("Qwen/Qwen2.5-VL-7B-Instruct" , "./logs" )
You can’t perform that action at this time.
0 commit comments