|
39 | 39 | wikibench_datasets # noqa: F401, E501 |
40 | 40 | from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \ |
41 | 41 | winogrande_datasets # noqa: F401, E501 |
42 | | - from opencompass.configs.models.baichuan.hf_baichuan_7b import models as hf_baichuan_7b # noqa: F401, E501 |
43 | | - from opencompass.configs.models.gemma.hf_gemma_7b import models as hf_gemma_7b # noqa: F401, E501 |
44 | | - from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import models as hf_internlm2_5_7b # noqa: F401, E501 |
45 | | - from opencompass.configs.models.hf_internlm.hf_internlm2_7b import models as hf_internlm2_7b # noqa: F401, E501 |
46 | | - from opencompass.configs.models.hf_internlm.hf_internlm2_20b import models as hf_internlm2_20b # noqa: F401, E501 |
47 | | - from opencompass.configs.models.hf_internlm.hf_internlm_7b import models as hf_internlm_7b # noqa: F401, E501 |
48 | | - from opencompass.configs.models.hf_internlm.hf_internlm_20b import models as hf_internlm_20b # noqa: F401, E501 |
49 | | - from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b import \ |
50 | | - models as lmdeploy_internlm2_5_7b # noqa: F401, E501 |
51 | | - from opencompass.configs.models.hf_llama.hf_llama2_7b import models as hf_llama2_7b # noqa: F401, E501 |
52 | | - from opencompass.configs.models.hf_llama.hf_llama3_8b import models as hf_llama3_8b # noqa: F401, E501 |
53 | | - from opencompass.configs.models.mistral.hf_mistral_7b_v0_1 import models as hf_mistral_7b_v0_1 # noqa: F401, E501 |
54 | | - from opencompass.configs.models.mistral.hf_mixtral_8x7b_v0_1 import \ |
55 | | - models as hf_mixtral_8x7b_v0_1 # noqa: F401, E501 |
56 | | - from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b import models as lmdeploy_qwen2_5_7b # noqa: F401, E501 |
57 | | - from opencompass.configs.models.qwen.hf_qwen1_5_7b import models as hf_qwen1_5_7b # noqa: F401, E501 |
58 | | - from opencompass.configs.models.qwen.hf_qwen2_7b import models as hf_qwen2_7b # noqa: F401, E501 |
59 | | - from opencompass.configs.models.qwen.hf_qwen_7b import models as hf_qwen_7b # noqa: F401, E501 |
60 | | - from opencompass.configs.models.qwen.lmdeploy_qwen1_5_7b import models as lmdeploy_qwen1_5_7b # noqa: F401, E501 |
61 | | - from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import models as lmdeploy_qwen2_7b # noqa: F401, E501 |
62 | 42 | # Summary Groups |
63 | 43 | from opencompass.configs.summarizers.groups.cmmlu import cmmlu_summary_groups # noqa: F401, E501 |
64 | 44 | from opencompass.configs.summarizers.groups.GaokaoBench import GaokaoBench_summary_groups # noqa: F401, E501 |
|
69 | 49 |
|
70 | 50 | # read models |
71 | 51 | race_datasets = [race_datasets[1]] |
| 52 | +mmlu_datasets = [ |
| 53 | + x for x in mmlu_datasets if x['abbr'].replace('lukaemon_mmlu_', '') in [ |
| 54 | + 'business_ethics', 'clinical_knowledge', 'college_medicine', 'global_facts', 'human_aging', 'management', |
| 55 | + 'marketing', 'medical_genetics', 'miscellaneous', 'nutrition', 'professional_accounting', |
| 56 | + 'professional_medicine', 'virology' |
| 57 | + ] |
| 58 | +] |
| 59 | + |
72 | 60 | summarizer = dict( |
73 | 61 | dataset_abbrs=[ |
74 | 62 | ['race-high', 'accuracy'], |
|
138 | 126 | summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []), |
139 | 127 | ) |
140 | 128 |
|
141 | | -turbomind_qwen1_5_7b = deepcopy(*lmdeploy_qwen1_5_7b) |
142 | | -turbomind_qwen2_7b = deepcopy(*lmdeploy_qwen2_7b) |
143 | | -turbomind_qwen2_5_7b = deepcopy(*lmdeploy_qwen2_5_7b) |
144 | | -turbomind_qwen2_5_14b = deepcopy(*lmdeploy_qwen2_5_7b) |
145 | | -turbomind_qwen2_5_14b['path'] = 'Qwen/Qwen2.5-14B' |
146 | | -turbomind_internlm2_5_7b = deepcopy(*lmdeploy_internlm2_5_7b) |
147 | | -turbomind_internlm2_5_7b_4bits = deepcopy(*lmdeploy_internlm2_5_7b) |
148 | | -turbomind_internlm2_5_7b_batch1 = deepcopy(*lmdeploy_internlm2_5_7b) |
149 | | -turbomind_internlm2_5_7b_batch1_4bits = deepcopy(*lmdeploy_internlm2_5_7b) |
150 | | - |
151 | 129 | base_model = dict( |
152 | 130 | type=TurboMindModel, |
153 | | - engine_config=dict(session_len=7168, max_batch_size=128, tp=1), |
| 131 | + engine_config=dict(session_len=7168, tp=1), |
154 | 132 | gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), |
155 | 133 | max_seq_len=7168, |
156 | 134 | max_out_len=1024, |
157 | | - batch_size=128, |
| 135 | + batch_size=32, |
158 | 136 | run_cfg=dict(num_gpus=1), |
159 | 137 | ) |
160 | 138 |
|
| 139 | +turbomind_qwen2_5_1_5b = deepcopy(base_model) |
| 140 | +turbomind_qwen2_5_1_5b['path'] = 'Qwen/Qwen2.5-1.5B' |
| 141 | +turbomind_qwen2_5_1_5b['abbr'] = 'turbomind_qwen2_5_1_5b' |
| 142 | +turbomind_qwen2_5_7b = deepcopy(base_model) |
| 143 | +turbomind_qwen2_5_7b['path'] = 'Qwen/Qwen2.5-7B' |
| 144 | +turbomind_qwen2_5_7b['abbr'] = 'turbomind_qwen2_5_7b' |
| 145 | +turbomind_qwen2_5_32b = deepcopy(base_model) |
| 146 | +turbomind_qwen2_5_32b['path'] = 'Qwen/Qwen2.5-32B' |
| 147 | +turbomind_qwen2_5_32b['abbr'] = 'turbomind_qwen2_5_32b' |
| 148 | +turbomind_qwen2_5_32b['run_cfg']['num_gpus'] = 2 |
| 149 | +turbomind_qwen2_5_32b['engine_config']['tp'] = 2 |
| 150 | +turbomind_internlm2_5_7b = deepcopy(base_model) |
| 151 | +turbomind_internlm2_5_7b['path'] = 'internlm/internlm2_5-7b-chat' |
| 152 | +turbomind_internlm2_5_7b['abbr'] = 'turbomind_internlm2_5_7b' |
| 153 | +turbomind_glm_4_9b = deepcopy(base_model) |
| 154 | +turbomind_glm_4_9b['path'] = 'THUDM/glm-4-9b' |
| 155 | +turbomind_glm_4_9b['abbr'] = 'turbomind_glm_4_9b' |
| 156 | +turbomind_llama_3_70b = deepcopy(base_model) |
| 157 | +turbomind_llama_3_70b['path'] = 'meta-llama/Meta-Llama-3-70B' |
| 158 | +turbomind_llama_3_70b['abbr'] = 'turbomind_llama_3_70b' |
| 159 | +turbomind_llama_3_70b['run_cfg']['num_gpus'] = 4 |
| 160 | +turbomind_llama_3_70b['engine_config']['tp'] = 4 |
| 161 | +turbomind_llama_3_1_8b = deepcopy(base_model) |
| 162 | +turbomind_llama_3_1_8b['path'] = 'meta-llama/Llama-3.1-8B' |
| 163 | +turbomind_llama_3_1_8b['abbr'] = 'turbomind_llama_3_1_8b' |
| 164 | +turbomind_qwen3_0_6b_base = deepcopy(base_model) |
| 165 | +turbomind_qwen3_0_6b_base['path'] = 'Qwen/Qwen3-0.6B-Base' |
| 166 | +turbomind_qwen3_0_6b_base['abbr'] = 'turbomind_qwen3_0_6b_base' |
161 | 167 | turbomind_qwen3_8b_base = deepcopy(base_model) |
162 | | -pytorch_qwen3_8b_base = deepcopy(base_model) |
163 | | -turbomind_qwen3_8b_base_4bits = deepcopy(base_model) |
164 | | -turbomind_qwen3_8b_base_kvint8 = deepcopy(base_model) |
165 | | -for model in [ |
166 | | - v for k, v in locals().items() |
167 | | - if k.startswith('turbomind_qwen3_8b_base') or k.startswith('pytorch_qwen3_8b_base') |
168 | | -]: |
169 | | - model['abbr'] = 'qwen3_8b_base_turbomind' |
170 | | - model['path'] = 'Qwen/Qwen3-8B-Base' |
171 | | - model['run_cfg']['num_gpus'] = 1 |
172 | | - model['engine_config']['tp'] = 1 |
| 168 | +turbomind_qwen3_8b_base['path'] = 'Qwen/Qwen3-8B-Base' |
| 169 | +turbomind_qwen3_8b_base['abbr'] = 'turbomind_qwen3_8b_base' |
| 170 | +turbomind_qwen3_30b_A3B_base = deepcopy(base_model) |
| 171 | +turbomind_qwen3_30b_A3B_base['path'] = 'Qwen/Qwen3-30B-A3B-Base' |
| 172 | +turbomind_qwen3_30b_A3B_base['abbr'] = 'turbomind_qwen3_30b_A3B_base' |
| 173 | +turbomind_qwen3_30b_A3B_base['run_cfg']['num_gpus'] = 2 |
| 174 | +turbomind_qwen3_30b_A3B_base['engine_config']['tp'] = 2 |
173 | 175 |
|
174 | | -for model in [v for k, v in locals().items() if k.endswith('_4bits')]: |
175 | | - model['engine_config']['model_format'] = 'awq' |
176 | | - model['abbr'] = model['abbr'] + '_4bits' |
177 | | - model['path'] = model['path'] + '-inner-4bits' |
178 | | - |
179 | | -for model in [v for k, v in locals().items() if '_batch1' in k]: |
180 | | - model['abbr'] = model['abbr'] + '_batch1' |
181 | | - model['engine_config']['max_batch_size'] = 1 |
182 | | - model['batch_size'] = 1 |
| 176 | +pytorch_qwen2_5_1_5b = deepcopy(base_model) |
| 177 | +pytorch_qwen2_5_1_5b['path'] = 'Qwen/Qwen2.5-1.5B' |
| 178 | +pytorch_qwen2_5_1_5b['abbr'] = 'pytorch_qwen2_5_1_5b' |
| 179 | +pytorch_qwen2_5_7b = deepcopy(base_model) |
| 180 | +pytorch_qwen2_5_7b['path'] = 'Qwen/Qwen2.5-7B' |
| 181 | +pytorch_qwen2_5_7b['abbr'] = 'pytorch_qwen2_5_7b' |
| 182 | +pytorch_qwen2_5_32b = deepcopy(base_model) |
| 183 | +pytorch_qwen2_5_32b['path'] = 'Qwen/Qwen2.5-32B' |
| 184 | +pytorch_qwen2_5_32b['abbr'] = 'pytorch_qwen2_5_32b' |
| 185 | +pytorch_qwen2_5_32b['run_cfg']['num_gpus'] = 2 |
| 186 | +pytorch_qwen2_5_32b['engine_config']['tp'] = 2 |
| 187 | +pytorch_internlm2_5_7b = deepcopy(base_model) |
| 188 | +pytorch_internlm2_5_7b['path'] = 'internlm/internlm2_5-7b-chat' |
| 189 | +pytorch_internlm2_5_7b['abbr'] = 'pytorch_internlm2_5_7b' |
| 190 | +pytorch_gemma_2_9b = deepcopy(base_model) |
| 191 | +pytorch_gemma_2_9b['path'] = 'google/gemma-2-9b' |
| 192 | +pytorch_gemma_2_9b['abbr'] = 'pytorch_gemma_2_9b' |
| 193 | +pytorch_llama_3_70b = deepcopy(base_model) |
| 194 | +pytorch_llama_3_70b['path'] = 'meta-llama/Meta-Llama-3-70B' |
| 195 | +pytorch_llama_3_70b['abbr'] = 'pytorch_llama_3_70b' |
| 196 | +pytorch_llama_3_70b['run_cfg']['num_gpus'] = 4 |
| 197 | +pytorch_llama_3_70b['engine_config']['tp'] = 4 |
| 198 | +pytorch_llama_3_1_8b = deepcopy(base_model) |
| 199 | +pytorch_llama_3_1_8b['path'] = 'meta-llama/Llama-3.1-8B' |
| 200 | +pytorch_llama_3_1_8b['abbr'] = 'pytorch_llama_3_1_8b' |
| 201 | +pytorch_qwen3_0_6b_base = deepcopy(base_model) |
| 202 | +pytorch_qwen3_0_6b_base['path'] = 'Qwen/Qwen3-0.6B-Base' |
| 203 | +pytorch_qwen3_0_6b_base['abbr'] = 'pytorch_qwen3_0_6b_base' |
| 204 | +pytorch_qwen3_8b_base = deepcopy(base_model) |
| 205 | +pytorch_qwen3_8b_base['path'] = 'Qwen/Qwen3-8B-Base' |
| 206 | +pytorch_qwen3_8b_base['abbr'] = 'pytorch_qwen3_8b_base' |
| 207 | +pytorch_qwen3_30b_A3B_base = deepcopy(base_model) |
| 208 | +pytorch_qwen3_30b_A3B_base['path'] = 'Qwen/Qwen3-30B-A3B-Base' |
| 209 | +pytorch_qwen3_30b_A3B_base['abbr'] = 'pytorch_qwen3_30b_A3B_base' |
| 210 | +pytorch_qwen3_30b_A3B_base['run_cfg']['num_gpus'] = 2 |
| 211 | +pytorch_qwen3_30b_A3B_base['engine_config']['tp'] = 2 |
183 | 212 |
|
184 | 213 | for model in [v for k, v in locals().items() if k.startswith('pytorch_')]: |
185 | | - model['abbr'] = model['abbr'].replace('turbomind', 'pytorch') |
186 | 214 | model['backend'] = 'pytorch' |
0 commit comments