|
| 1 | +# noqa |
| 2 | +""" |
| 3 | +Take in a YAML, and output all "other" splits with this YAML |
| 4 | +""" |
| 5 | + |
| 6 | +import argparse |
| 7 | +import logging |
| 8 | +import os |
| 9 | + |
| 10 | +import yaml |
| 11 | +from tqdm import tqdm |
| 12 | + |
| 13 | + |
| 14 | +eval_logger = logging.getLogger("lm-eval") |
| 15 | + |
| 16 | + |
| 17 | +SUBJECTS = { |
| 18 | + "abstract_algebra": "stem", |
| 19 | + "anatomy": "stem", |
| 20 | + "astronomy": "stem", |
| 21 | + "business_ethics": "other", |
| 22 | + "clinical_knowledge": "other", |
| 23 | + "college_biology": "stem", |
| 24 | + "college_chemistry": "stem", |
| 25 | + "college_computer_science": "stem", |
| 26 | + "college_mathematics": "stem", |
| 27 | + "college_medicine": "other", |
| 28 | + "college_physics": "stem", |
| 29 | + "computer_security": "stem", |
| 30 | + "conceptual_physics": "stem", |
| 31 | + "econometrics": "social_sciences", |
| 32 | + "electrical_engineering": "stem", |
| 33 | + "elementary_mathematics": "stem", |
| 34 | + "formal_logic": "humanities", |
| 35 | + # "global_facts": "other", |
| 36 | + "high_school_biology": "stem", |
| 37 | + "high_school_chemistry": "stem", |
| 38 | + "high_school_computer_science": "stem", |
| 39 | + "high_school_european_history": "humanities", |
| 40 | + "high_school_geography": "social_sciences", |
| 41 | + "high_school_government_and_politics": "social_sciences", |
| 42 | + "high_school_macroeconomics": "social_sciences", |
| 43 | + "high_school_mathematics": "stem", |
| 44 | + "high_school_microeconomics": "social_sciences", |
| 45 | + "high_school_physics": "stem", |
| 46 | + "high_school_psychology": "social_sciences", |
| 47 | + "high_school_statistics": "stem", |
| 48 | + "high_school_us_history": "humanities", |
| 49 | + "high_school_world_history": "humanities", |
| 50 | + "human_aging": "other", |
| 51 | + "human_sexuality": "social_sciences", |
| 52 | + "international_law": "humanities", |
| 53 | + "jurisprudence": "humanities", |
| 54 | + "logical_fallacies": "humanities", |
| 55 | + "machine_learning": "stem", |
| 56 | + "management": "other", |
| 57 | + "marketing": "other", |
| 58 | + "medical_genetics": "other", |
| 59 | + "miscellaneous": "other", |
| 60 | + # "moral_disputes": "humanities", |
| 61 | + # "moral_scenarios": "humanities", |
| 62 | + "nutrition": "other", |
| 63 | + "philosophy": "humanities", |
| 64 | + "prehistory": "humanities", |
| 65 | + "professional_accounting": "other", |
| 66 | + # "professional_law": "humanities", |
| 67 | + "professional_medicine": "other", |
| 68 | + "professional_psychology": "social_sciences", |
| 69 | + "public_relations": "social_sciences", |
| 70 | + # "security_studies": "social_sciences", |
| 71 | + "sociology": "social_sciences", |
| 72 | + "us_foreign_policy": "social_sciences", |
| 73 | + "virology": "other", |
| 74 | + "world_religions": "humanities", |
| 75 | +} |
| 76 | + |
| 77 | + |
| 78 | +def parse_args(): |
| 79 | + parser = argparse.ArgumentParser() |
| 80 | + parser.add_argument("--base_yaml_path", required=True) |
| 81 | + parser.add_argument("--save_prefix_path", default="noor") |
| 82 | + parser.add_argument("--cot_prompt_path", default=None) |
| 83 | + parser.add_argument("--task_prefix", default="") |
| 84 | + parser.add_argument("--group_prefix", default="") |
| 85 | + return parser.parse_args() |
| 86 | + |
| 87 | + |
| 88 | +if __name__ == "__main__": |
| 89 | + args = parse_args() |
| 90 | + |
| 91 | + # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs. |
| 92 | + base_yaml_name = os.path.split(args.base_yaml_path)[-1] |
| 93 | + with open(args.base_yaml_path, encoding="utf-8") as f: |
| 94 | + base_yaml = yaml.full_load(f) |
| 95 | + |
| 96 | + if args.cot_prompt_path is not None: |
| 97 | + import json |
| 98 | + |
| 99 | + with open(args.cot_prompt_path, encoding="utf-8") as f: |
| 100 | + cot_file = json.load(f) |
| 101 | + |
| 102 | + ALL_CATEGORIES = [] |
| 103 | + for subject, category in tqdm(SUBJECTS.items()): |
| 104 | + if category not in ALL_CATEGORIES: |
| 105 | + ALL_CATEGORIES.append(category) |
| 106 | + |
| 107 | + if args.cot_prompt_path is not None: |
| 108 | + description = cot_file[subject] |
| 109 | + else: |
| 110 | + description = f"The following are multiple choice questions (with answers) about {' '.join(subject.split('_'))}.\n\n" |
| 111 | + |
| 112 | + yaml_dict = { |
| 113 | + "include": base_yaml_name, |
| 114 | + "tag": f"noor_{args.task_prefix}_{category}" |
| 115 | + if args.task_prefix != "" |
| 116 | + else f"noor_{category}", |
| 117 | + "task": f"noor_{args.task_prefix}_{subject}" |
| 118 | + if args.task_prefix != "" |
| 119 | + else f"noor_{subject}", |
| 120 | + "task_alias": subject.replace("_", " "), |
| 121 | + "dataset_name": subject, |
| 122 | + "description": description, |
| 123 | + } |
| 124 | + |
| 125 | + file_save_path = args.save_prefix_path + f"_{subject}.yaml" |
| 126 | + eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}") |
| 127 | + with open(file_save_path, "w", encoding="utf-8") as yaml_file: |
| 128 | + yaml.dump( |
| 129 | + yaml_dict, |
| 130 | + yaml_file, |
| 131 | + allow_unicode=True, |
| 132 | + default_style='"', |
| 133 | + ) |
| 134 | + |
| 135 | + if args.task_prefix != "": |
| 136 | + noor_subcategories = [ |
| 137 | + f"noor_{args.task_prefix}_{category}" for category in ALL_CATEGORIES |
| 138 | + ] |
| 139 | + else: |
| 140 | + noor_subcategories = [f"noor_{category}" for category in ALL_CATEGORIES] |
| 141 | + |
| 142 | + if args.group_prefix != "": |
| 143 | + file_save_path = args.group_prefix + ".yaml" |
| 144 | + else: |
| 145 | + file_save_path = args.save_prefix_path + ".yaml" |
| 146 | + |
| 147 | + eval_logger.info(f"Saving benchmark config to {file_save_path}") |
| 148 | + with open(file_save_path, "w", encoding="utf-8") as yaml_file: |
| 149 | + yaml.dump( |
| 150 | + { |
| 151 | + "group": f"noor_{args.task_prefix}" |
| 152 | + if args.task_prefix != "" |
| 153 | + else "noor", |
| 154 | + "task": noor_subcategories, |
| 155 | + }, |
| 156 | + yaml_file, |
| 157 | + indent=4, |
| 158 | + default_flow_style=False, |
| 159 | + ) |
0 commit comments