22import os
33import pandas as pd
44from argparse import ArgumentParser
5- from typing import Dict , List , Optional
5+ from typing import List , Optional
66from loguru import logger
77import eval_mm
88import eval_mm .metrics
3535}
3636
3737
38- def main (result_dir : str , model_list : List [str ], output_path : Optional [str ] = None ):
38+ def main (
39+ result_dir : str ,
40+ model_list : List [str ],
41+ output_path : Optional [str ] = None ,
42+ output_format : str = "markdown" ,
43+ ):
3944 task_dirs = [d for d in os .listdir (result_dir ) if not d .startswith ("." )]
4045
4146 df = pd .DataFrame ()
@@ -67,6 +72,8 @@ def main(result_dir: str, model_list: List[str], output_path: Optional[str] = No
6772 df = df ._append (model_results , ignore_index = True )
6873
6974 df = df .set_index ("Model" )
75+ # round to 2 decimal places
76+ df = df .round (2 )
7077 df = df .rename (
7178 columns = {
7279 k : f"{ TASK_ALIAS [k .split ('/' )[0 ]]} /{ METRIC_ALIAS [k .split ('/' )[1 ]]} "
@@ -76,16 +83,31 @@ def main(result_dir: str, model_list: List[str], output_path: Optional[str] = No
7683 # sort columns
7784 df = df .reindex (sorted (df .columns ), axis = 1 )
7885
79- print (df .to_markdown (mode = "github" ))
86+ # textbf top1 score for each column
87+ for col in df .columns :
88+ top1_model = df [col ].idxmax ()
89+ if output_format == "latex" :
90+ df .loc [top1_model , col ] = f"\\ textbf{{{ df .loc [top1_model , col ]} }}"
91+ else :
92+ df .loc [top1_model , col ] = f"**{ df .loc [top1_model , col ]} **"
93+
94+ if output_format == "markdown" :
95+ table = df .to_markdown (mode = "github" , floatfmt = ".2f" )
96+ elif output_format == "latex" :
97+ table = df .to_latex (float_format = "%.2f" )
98+ print (table )
8099
81100 with open (output_path , "w" ) as f :
82- f .write (df . to_markdown ( mode = "github" ) )
101+ f .write (table )
83102
84103
85104def parse_args ():
86105 parser = ArgumentParser ()
87106 parser .add_argument ("--result_dir" , type = str , default = "result" )
88107 parser .add_argument ("--output_path" , type = str , default = "leaderboard.md" )
108+ parser .add_argument (
109+ "--output_format" , type = str , default = "markdown" , choices = ["markdown" , "latex" ]
110+ )
89111 return parser .parse_args ()
90112
91113
@@ -94,11 +116,29 @@ def parse_args():
94116
95117 # モデルは実行時引数でも取れるようにしても良い
96118 model_list = [
97- "Qwen/Qwen2.5-VL-7B-Instruct" ,
119+ "stabilityai/japanese-instructblip-alpha" ,
120+ "stabilityai/japanese-stable-vlm" ,
121+ "SakanaAI/Llama-3-EvoVLM-JP-v2" ,
122+ "cyberagent/llava-calm2-siglip" ,
123+ "llm-jp/llm-jp-3-vila-14b" ,
98124 "sbintuitions/sarashina2-vision-8b" ,
99125 "sbintuitions/sarashina2-vision-14b" ,
100- "google/gemma-3-12b-it " ,
126+ "MIL-UT/Asagi-14B " ,
101127 "llava-hf/llava-1.5-7b-hf" ,
128+ "llava-hf/llava-v1.6-mistral-7b-hf" ,
129+ "neulab/Pangea-7B-hf" ,
130+ "mistralai/Pixtral-12B-2409" ,
131+ "meta-llama/Llama-3.2-11B-Vision-Instruct" ,
132+ "Efficient-Large-Model/VILA1.5-13b" ,
133+ "OpenGVLab/InternVL2-8B" ,
134+ "OpenGVLab/InternVL2-26B" ,
135+ "Qwen/Qwen2.5-VL-7B-Instruct" ,
136+ "Qwen/Qwen2.5-VL-72B-Instruct" ,
137+ "google/gemma-3-4b-it" ,
138+ "google/gemma-3-12b-it" ,
139+ "google/gemma-3-27b-it" ,
140+ "microsoft/Phi-4-multimodal-instruct" ,
141+ "gpt-4o-2024-11-20" ,
102142 ]
103143
104- main (args .result_dir , model_list , args .output_path )
144+ main (args .result_dir , model_list , args .output_path , args . output_format )
0 commit comments