Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions lmms_eval/tasks/vsibench/_default_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
dataset_path: nyu-visionx/VSI-Bench

output_type: generate_until
process_docs: !function utils.process_docs
doc_to_visual: !function utils.vsibench_doc_to_visual
doc_to_text: !function utils.vsibench_doc_to_text
doc_to_target: "ground_truth"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
# The return value of process_results will be used by metrics
process_results: !function utils.vsibench_process_results
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
metric_list:
- metric: vsibench_score
aggregation: !function utils.vsibench_aggregate_results
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
mca_post_prompt: "Answer with the option's letter from the given choices directly."
na_post_prompt: "Please answer the question using a single word or phrase."
gemini_api:
pre_prompt: ""
mca_post_prompt: "Answer with the option's letter from the given choices directly."
na_post_prompt: "Do not response anything other than a single number!"
gpt4v:
pre_prompt: ""
mca_post_prompt: "Answer with the option's letter from the given choices directly."
na_post_prompt: "Do not response anything other than a single number!"
metadata:
- version: 0.0
41 changes: 5 additions & 36 deletions lmms_eval/tasks/vsibench/vsibench.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,9 @@
dataset_path: nyu-visionx/VSI-Bench
dataset_name: full
test_split: test
task: "vsibench"
dataset_kwargs:
token: True
cache_dir: vsibench
video: True
task: vsibench
test_split: test
output_type: generate_until
process_docs: !function utils.process_docs
doc_to_visual: !function utils.vsibench_doc_to_visual
doc_to_text: !function utils.vsibench_doc_to_text
doc_to_target: "ground_truth"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
# The return value of process_results will be used by metrics
process_results: !function utils.vsibench_process_results
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
metric_list:
- metric: vsibench_score
aggregation: !function utils.vsibench_aggregate_results
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
mca_post_prompt: "Answer with the option's letter from the given choices directly."
na_post_prompt: "Please answer the question using a single word or phrase."
gemini_api:
pre_prompt: ""
mca_post_prompt: "Answer with the option's letter from the given choices directly."
na_post_prompt: "Do not response anything other than a single number!"
gpt4v:
pre_prompt: ""
mca_post_prompt: "Answer with the option's letter from the given choices directly."
na_post_prompt: "Do not response anything other than a single number!"
metadata:
- version: 0.0
include: _default_template_yaml

8 changes: 8 additions & 0 deletions lmms_eval/tasks/vsibench/vsibench_debiased.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
dataset_name: debiased
test_split: test
task: "vsibench_debiased"
dataset_kwargs:
token: True
cache_dir: vsibench
video: True
include: _default_template_yaml
8 changes: 8 additions & 0 deletions lmms_eval/tasks/vsibench/vsibench_pruned.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
dataset_name: full
test_split: test
task: "vsibench_pruned"
dataset_kwargs:
token: True
cache_dir: vsibench
video: True
include: _default_template_yaml