Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
repos:
- repo: https://github.com/psf/black
rev: 23.12.1
rev: 25.1.0
hooks:
- id: black
language_version: python3
args: ["--line-length=240"]
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
rev: 6.0.1
hooks:
- id: isort
language_version: python3
Expand Down
7 changes: 7 additions & 0 deletions docs/current_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@
- WildVision 0617(wildvision_0617)
- WildVision 0630 (wildvision_0630)
- [SeedBench 2 Plus](https://huggingface.co/datasets/AILab-CVC/SEED-Bench-2-plus) (seedbench_2_plus)
- [SalBench](https://salbench.github.io/)
- p3
- p3_box
- p3_box_img
- o3
- o3_box
- o3_box_img

## 2. Multi-image tasks:

Expand Down
2 changes: 1 addition & 1 deletion examples/models/qwen25vl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ accelerate launch --num_processes=8 --main_process_port=12346 -m lmms_eval \
--model qwen2_5_vl \
--model_args=pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_pixels=12845056,attn_implementation=flash_attention_2,interleave_visuals=False \
--tasks mme \
--batch_size 1
--batch_size 1
4 changes: 1 addition & 3 deletions lmms_eval/api/samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ def get_context(self, doc, num_fewshot):
+ (
str(self.doc_to_target(doc)[0])
if type(self.doc_to_target(doc)) is list
else self.doc_to_target(doc)
if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str)
else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
else self.doc_to_target(doc) if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str) else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
)
for doc in selected_docs
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" MplugOwl model configuration """
"""MplugOwl model configuration"""
import copy
import os
from typing import Union
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/models/mplug_owl_video/modeling_mplug_owl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch MplugOwl model. """
"""PyTorch MplugOwl model."""

import math
from typing import Any, Optional, Tuple, Union
Expand Down
7 changes: 6 additions & 1 deletion lmms_eval/tasks/librispeech/cn_tn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@

FILLER_CHARS = ["呃", "啊"]

ER_WHITELIST = "(儿女|儿子|儿孙|女儿|儿媳|妻儿|" "胎儿|婴儿|新生儿|婴幼儿|幼儿|少儿|小儿|儿歌|儿童|儿科|托儿所|孤儿|" "儿戏|儿化|台儿庄|鹿儿岛|正儿八经|吊儿郎当|生儿育女|托儿带女|养儿防老|痴儿呆女|" "佳儿佳妇|儿怜兽扰|儿无常父|儿不嫌母丑|儿行千里母担忧|儿大不由爷|苏乞儿)"
ER_WHITELIST = (
"(儿女|儿子|儿孙|女儿|儿媳|妻儿|"
"胎儿|婴儿|新生儿|婴幼儿|幼儿|少儿|小儿|儿歌|儿童|儿科|托儿所|孤儿|"
"儿戏|儿化|台儿庄|鹿儿岛|正儿八经|吊儿郎当|生儿育女|托儿带女|养儿防老|痴儿呆女|"
"佳儿佳妇|儿怜兽扰|儿无常父|儿不嫌母丑|儿行千里母担忧|儿大不由爷|苏乞儿)"
)
ER_WHITELIST_PATTERN = re.compile(ER_WHITELIST)

# 中文数字系统类型
Expand Down
73 changes: 73 additions & 0 deletions lmms_eval/tasks/salbench/_o3_default
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
dataset_path: salbench-vlm/salbench
dataset_kwargs:
token: True

test_split: test
output_type: generate_until
doc_to_visual: !function utils.p3o3_doc_to_visual
doc_to_text: !function utils.p3o3_doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 128
# temperature: 0
# top_p: 0
# num_beams: 1
# do_sample: false

process_results: !function utils.o3_process_results
metric_list:
- metric: exact_match
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true
- metric: sample_precision
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true
- metric: sample_recall
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true
- metric: sample_f1
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true

- metric: all_cat_precision
aggregation: !function utils.p3_aggregate_all_category_precision
higher_is_better: true
- metric: all_cat_recall
aggregation: !function utils.p3_aggregate_all_category_recall
higher_is_better: true
- metric: all_cat_f1
aggregation: !function utils.p3_aggregate_all_category_f1
higher_is_better: true

- metric: orientation_precision
aggregation: !function utils.aggregate_per_category_precision
higher_is_better: true
- metric: orientation_recall
aggregation: !function utils.aggregate_per_category_recall
higher_is_better: true
- metric: orientation_f1
aggregation: !function utils.aggregate_per_category_f1
higher_is_better: true

- metric: color_precision
aggregation: !function utils.aggregate_per_category_precision
higher_is_better: true
- metric: color_recall
aggregation: !function utils.aggregate_per_category_recall
higher_is_better: true
- metric: color_f1
aggregation: !function utils.aggregate_per_category_f1
higher_is_better: true

- metric: size_precision
aggregation: !function utils.aggregate_per_category_precision
higher_is_better: true
- metric: size_recall
aggregation: !function utils.aggregate_per_category_recall
higher_is_better: true
- metric: size_f1
aggregation: !function utils.aggregate_per_category_f1
higher_is_better: true

metadata:
- version: 0.0
73 changes: 73 additions & 0 deletions lmms_eval/tasks/salbench/_p3_default
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
dataset_path: salbench-vlm/salbench
dataset_kwargs:
token: True

test_split: test
output_type: generate_until
doc_to_visual: !function utils.p3o3_doc_to_visual
doc_to_text: !function utils.p3o3_doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 128
# temperature: 0
# top_p: 0
# num_beams: 1
# do_sample: false

process_results: !function utils.p3_process_results
metric_list:
- metric: exact_match
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true
- metric: sample_precision
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true
- metric: sample_recall
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true
- metric: sample_f1
aggregation: !function utils.aggregate_per_sample_score
higher_is_better: true

- metric: all_cat_precision
aggregation: !function utils.p3_aggregate_all_category_precision
higher_is_better: true
- metric: all_cat_recall
aggregation: !function utils.p3_aggregate_all_category_recall
higher_is_better: true
- metric: all_cat_f1
aggregation: !function utils.p3_aggregate_all_category_f1
higher_is_better: true

- metric: orientation_precision
aggregation: !function utils.aggregate_per_category_precision
higher_is_better: true
- metric: orientation_recall
aggregation: !function utils.aggregate_per_category_recall
higher_is_better: true
- metric: orientation_f1
aggregation: !function utils.aggregate_per_category_f1
higher_is_better: true

- metric: color_precision
aggregation: !function utils.aggregate_per_category_precision
higher_is_better: true
- metric: color_recall
aggregation: !function utils.aggregate_per_category_recall
higher_is_better: true
- metric: color_f1
aggregation: !function utils.aggregate_per_category_f1
higher_is_better: true

- metric: size_precision
aggregation: !function utils.aggregate_per_category_precision
higher_is_better: true
- metric: size_recall
aggregation: !function utils.aggregate_per_category_recall
higher_is_better: true
- metric: size_f1
aggregation: !function utils.aggregate_per_category_f1
higher_is_better: true

metadata:
- version: 0.0
3 changes: 3 additions & 0 deletions lmms_eval/tasks/salbench/o3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dataset_name: O3
task: "o3"
include: _o3_default
3 changes: 3 additions & 0 deletions lmms_eval/tasks/salbench/o3_box.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dataset_name: O3_box
task: "o3_box"
include: _o3_default
3 changes: 3 additions & 0 deletions lmms_eval/tasks/salbench/o3_box_img.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dataset_name: O3_box_img
task: "o3_box_img"
include: _o3_default
3 changes: 3 additions & 0 deletions lmms_eval/tasks/salbench/p3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dataset_name: P3
task: "p3"
include: _p3_default
3 changes: 3 additions & 0 deletions lmms_eval/tasks/salbench/p3_box.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dataset_name: P3_box
task: "p3_box"
include: _p3_default
3 changes: 3 additions & 0 deletions lmms_eval/tasks/salbench/p3_box_img.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dataset_name: P3_box_img
task: "p3_box_img"
include: _p3_default
Loading