12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
15
+ #! /bin/bash
15
16
16
- for task in " ArguAna" " ClimateFEVER" " DBPedia" " FEVER" " FiQA2018" " HotpotQA" " MSMARCO" " NFCorpus" " NQ" " QuoraRetrieval" " SCIDOCS" " SciFact" " Touche2020" " TRECCOVID" " CQADupstackAndroidRetrieval" " CQADupstackEnglishRetrieval" " CQADupstackGamingRetrieval" " CQADupstackGisRetrieval" " CQADupstackMathematicaRetrieval" " CQADupstackPhysicsRetrieval" " CQADupstackProgrammersRetrieval" " CQADupstackStatsRetrieval" " CQADupstackTexRetrieval" " CQADupstackUnixRetrieval" " CQADupstackWebmastersRetrieval" " CQADupstackWordpressRetrieval" " MSMARCOTITLE"
17
- do
17
+ # --- Script Configuration ---
18
+ # Exit immediately if a command exits with a non-zero status.
19
+ set -e
18
20
19
- # 1. RocketQA V1
20
- python3.10 -u eval_mteb.py \
21
- --corpus_model_name_or_path rocketqa-en-base-v1/passage_model \
22
- --query_model_name_or_path rocketqa-en-base-v1/query_model \
21
+ # Define the list of all tasks (datasets) to be evaluated.
22
+ # TASKS=(
23
+ # "ArguAna" "ClimateFEVER" "DBPedia" "FEVER" "FiQA2018" "HotpotQA" "MSMARCO" "NFCorpus" "NQ" "QuoraRetrieval"
24
+ # "SCIDOCS" "SciFact" "Touche2020" "TRECCOVID" "CQADupstackAndroidRetrieval" "CQADupstackEnglishRetrieval"
25
+ # "CQADupstackGamingRetrieval" "CQADupstackGisRetrieval" "CQADupstackMathematicaRetrieval" "CQADupstackPhysicsRetrieval"
26
+ # "CQADupstackProgrammersRetrieval" "CQADupstackStatsRetrieval" "CQADupstackTexRetrieval" "CQADupstackUnixRetrieval"
27
+ # "CQADupstackWebmastersRetrieval" "CQADupstackWordpressRetrieval" "MSMARCOTITLE"
28
+ # )
29
+
30
+ TASKS=(" ArguAna" " SCIDOCS" " FEVER" )
31
+
32
+
33
+ # You can uncomment the models you wish to evaluate.
34
+ # MODELS_TO_RUN=("RocketQA-V1" "RocketQA-V2" "BGE" "RepLLaMA" "NV-Embed-v1" "BGE-EN-ICL" "LLARA-passage")
35
+ MODELS_TO_RUN=(" BGE" )
36
+
37
+
38
+ # ===================================================================================
39
+ # 🚀 1. RocketQA V1
40
+ # ===================================================================================
41
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " RocketQA-V1 " ]]; then
42
+ echo " ===== Running Evaluation for Model: RocketQA V1 ====="
43
+ for task in " ${TASKS[@]} " ; do
44
+ echo " --- Task: $task ---"
45
+ python3.10 -u evaluation/eval_mteb.py \
46
+ --corpus_model_name_or_path rocketqa-v1-marco-para-encoder \
47
+ --query_model_name_or_path rocketqa-v1-marco-query-encoder \
23
48
--model_flag RocketQA-V1 \
24
49
--output_folder en_results/rocketqa-en-base-v1 \
25
50
--task_name " $task " \
26
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
51
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
27
52
--query_instruction " " \
28
53
--document_instruction " " \
29
54
--max_seq_length 512 \
30
55
--eval_batch_size 32 \
31
56
--dtype " float32" \
32
57
--padding_side right \
33
58
--pooling_method " cls"
59
+ done
60
+ fi
61
+
34
62
35
- # 2. RocketQA V2
36
- python3.10 -u eval_mteb.py \
37
- --corpus_model_name_or_path rocketqa-en-base-v2/passage_model \
38
- --query_model_name_or_path rocketqa-en-base-v2/query_model \
63
+ # ===================================================================================
64
+ # 🚀 2. RocketQA V2
65
+ # ===================================================================================
66
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " RocketQA-V2 " ]]; then
67
+ echo " ===== Running Evaluation for Model: RocketQA V2 ====="
68
+ for task in " ${TASKS[@]} " ; do
69
+ echo " --- Task: $task ---"
70
+ python3.10 -u evaluation/eval_mteb.py \
71
+ --corpus_model_name_or_path rocketqav2-en-marco-para-encoder \
72
+ --query_model_name_or_path rocketqav2-en-marco-query-encoder \
39
73
--model_flag RocketQA-V2 \
40
74
--output_folder en_results/rocketqa-en-base-v2 \
41
75
--task_name " $task " \
42
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
76
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
43
77
--query_instruction " " \
44
78
--document_instruction " " \
45
79
--max_seq_length 512 \
46
80
--eval_batch_size 128 \
47
81
--dtype " float32" \
48
82
--padding_side right \
49
83
--pooling_method " cls"
84
+ done
85
+ fi
86
+
50
87
51
- # 3. BGE
52
- python3.10 eval_mteb.py \
88
+ # ===================================================================================
89
+ # 🎯 3. BGE (BAAI/bge-large-en-v1.5)
90
+ # ===================================================================================
91
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " BGE " ]]; then
92
+ echo " ===== Running Evaluation for Model: BGE (bge-large-en-v1.5) ====="
93
+ for task in " ${TASKS[@]} " ; do
94
+ echo " --- Task: $task ---"
95
+ python3.10 evaluation/eval_mteb.py \
53
96
--base_model_name_or_path BAAI/bge-large-en-v1.5 \
54
- --output_folder en_results/bge-large-en-v1.5 \
97
+ --output_folder en_results/bge-large-en-v1.5_2 \
55
98
--task_name " $task " \
56
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
99
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
57
100
--document_instruction ' Represent this sentence for searching relevant passages: ' \
58
101
--pooling_method mean \
59
102
--max_seq_length 512 \
60
103
--eval_batch_size 32 \
61
104
--padding_side right \
62
105
--add_bos_token 0 \
63
- --add_eos_token 0
106
+ --add_eos_token 0
107
+ done
108
+ fi
64
109
65
- # 4. RepLLaMA
66
- python3.10 eval_mteb.py \
110
+
111
+ # ===================================================================================
112
+ # 🦙 4. RepLLaMA
113
+ # ===================================================================================
114
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " RepLLaMA " ]]; then
115
+ echo " ===== Running Evaluation for Model: RepLLaMA ====="
116
+ for task in " ${TASKS[@]} " ; do
117
+ echo " --- Task: $task ---"
118
+ python3.10 evaluation/eval_mteb.py \
67
119
--base_model_name_or_path castorini/repllama-v1-7b-lora-passage \
68
120
--output_folder en_results/repllama-v1-7b-lora-passage \
69
121
--task_name " $task " \
70
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
122
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
71
123
--query_instruction ' query: ' \
72
124
--document_instruction ' passage: ' \
73
125
--pooling_method last \
76
128
--padding_side right \
77
129
--add_bos_token 0 \
78
130
--add_eos_token 1
131
+ done
132
+ fi
133
+
79
134
80
- # 5. NV-Embed-v1
81
- python3.10 eval_mteb.py \
135
+ # ===================================================================================
136
+ # Nvidia 5. NV-Embed-v1
137
+ # ===================================================================================
138
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " NV-Embed-v1 " ]]; then
139
+ echo " ===== Running Evaluation for Model: NV-Embed-v1 ====="
140
+ for task in " ${TASKS[@]} " ; do
141
+ echo " --- Task: $task ---"
142
+ python3.10 evaluation/eval_mteb.py \
82
143
--base_model_name_or_path nvidia/NV-Embed-v1 \
83
144
--output_folder en_results/nv-embed-v1 \
84
145
--query_instruction " Given a claim, find documents that refute the claim" \
85
146
--task_name " $task " \
86
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
147
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
87
148
--eval_batch_size 8
149
+ done
150
+ fi
151
+
88
152
89
- # 6. BGE-EN-ICL
90
- python3.10 eval_mteb.py \
153
+ # ===================================================================================
154
+ # 🎯 6. BGE-EN-ICL
155
+ # ===================================================================================
156
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " BGE-EN-ICL " ]]; then
157
+ echo " ===== Running Evaluation for Model: BGE-EN-ICL ====="
158
+ for task in " ${TASKS[@]} " ; do
159
+ echo " --- Task: $task ---"
160
+ python3.10 evaluation/eval_mteb.py \
91
161
--base_model_name_or_path BAAI/bge-en-icl \
92
162
--output_folder en_results/bge-en-icl \
93
163
--task_name " $task " \
94
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
164
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
95
165
--query_instruction $' <instruct> Given a scientific claim, retrieve documents that support or refute the claim.\n <query>' \
96
166
--max_seq_length 512 \
97
167
--eval_batch_size 32 \
98
168
--dtype " float32" \
99
169
--padding_side left \
100
170
--add_bos_token 1 \
101
171
--add_eos_token 1
172
+ done
173
+ fi
102
174
103
- # 7. LLARA-passage
104
- python3.10 eval_mteb.py \
175
+
176
+ # ===================================================================================
177
+ # 🦙 7. LLARA-passage
178
+ # ===================================================================================
179
+ if [[ " ${MODELS_TO_RUN[*]} " =~ " LLARA-passage " ]]; then
180
+ echo " ===== Running Evaluation for Model: LLARA-passage ====="
181
+ for task in " ${TASKS[@]} " ; do
182
+ echo " --- Task: $task ---"
183
+ python3.10 evaluation/eval_mteb.py \
105
184
--base_model_name_or_path BAAI/LLARA-passage \
106
185
--output_folder en_results/llara-passage \
107
186
--task_name " $task " \
108
- --task_split $( if [[ " $task " == * " MSMARCO" * ]]; then echo " dev" ; else echo " test" ; fi ) \
187
+ --task_split $( [[ " $task " == * " MSMARCO" * ]] && echo " dev" || echo " test" ) \
109
188
--eval_batch_size 8 \
110
189
--pooling_method last_8 \
111
190
--model_flag llara \
112
191
--add_bos_token 1 \
113
192
--add_eos_token 0 \
114
193
--max_seq_length 532
194
+ done
195
+ fi
196
+
197
+
115
198
116
- done
199
+ echo " All specified evaluations are complete. "
0 commit comments