|
18 | 18 | TaskConfigLogger, |
19 | 19 | VersionsLogger, |
20 | 20 | ) |
21 | | -from lighteval.utils import is_nanotron_available |
| 21 | +from lighteval.utils import is_nanotron_available, obj_to_markdown |
22 | 22 |
|
23 | 23 |
|
24 | 24 | if is_nanotron_available(): |
25 | | - from brrr.config import BrrrConfig |
26 | | - from brrr.experiment_loggers import obj_to_markdown |
27 | | - from nanotron.config import get_config_from_dict |
| 25 | + from nanotron.config import Config, get_config_from_dict |
28 | 26 |
|
29 | 27 |
|
30 | 28 | class EnhancedJSONEncoder(json.JSONEncoder): |
@@ -104,81 +102,81 @@ def save( |
104 | 102 |
|
105 | 103 | """ |
106 | 104 | hlog("Saving experiment tracker") |
107 | | - try: |
108 | | - date_id = datetime.now().isoformat().replace(":", "-") |
109 | | - |
110 | | - output_dir_results = Path(output_dir) / "results" / self.general_config_logger.model_name |
111 | | - output_dir_details = Path(output_dir) / "details" / self.general_config_logger.model_name |
112 | | - output_dir_details_sub_folder = output_dir_details / date_id |
113 | | - output_dir_results.mkdir(parents=True, exist_ok=True) |
114 | | - output_dir_details_sub_folder.mkdir(parents=True, exist_ok=True) |
115 | | - |
116 | | - output_results_file = output_dir_results / f"results_{date_id}.json" |
117 | | - output_results_in_details_file = output_dir_details / f"results_{date_id}.json" |
118 | | - |
119 | | - hlog(f"Saving results to {output_results_file} and {output_results_in_details_file}") |
120 | | - |
121 | | - to_dump = { |
122 | | - "config_general": asdict(self.general_config_logger), |
123 | | - "results": self.metrics_logger.metric_aggregated, |
124 | | - "versions": self.versions_logger.versions, |
125 | | - "config_tasks": self.task_config_logger.tasks_configs, |
126 | | - "summary_tasks": self.details_logger.compiled_details, |
127 | | - "summary_general": asdict(self.details_logger.compiled_details_over_all_tasks), |
128 | | - } |
129 | | - dumped = json.dumps(to_dump, cls=EnhancedJSONEncoder, indent=2) |
130 | | - |
131 | | - with open(output_results_file, "w") as f: |
132 | | - f.write(dumped) |
133 | | - |
134 | | - with open(output_results_in_details_file, "w") as f: |
135 | | - f.write(dumped) |
136 | | - |
137 | | - for task_name, task_details in self.details_logger.details.items(): |
138 | | - output_file_details = output_dir_details_sub_folder / f"details_{task_name}_{date_id}.parquet" |
139 | | - # Create a dataset from the dictionary |
140 | | - try: |
141 | | - dataset = Dataset.from_list([asdict(detail) for detail in task_details]) |
142 | | - except Exception: |
143 | | - # We force cast to str to avoid formatting problems for nested objects |
144 | | - dataset = Dataset.from_list( |
145 | | - [{k: str(v) for k, v in asdict(detail).items()} for detail in task_details] |
146 | | - ) |
| 105 | + # try: |
| 106 | + date_id = datetime.now().isoformat().replace(":", "-") |
147 | 107 |
|
148 | | - # We don't keep 'id' around if it's there |
149 | | - column_names = dataset.column_names |
150 | | - if "id" in dataset.column_names: |
151 | | - column_names = [t for t in dataset.column_names if t != "id"] |
152 | | - |
153 | | - # Sort column names to make it easier later |
154 | | - dataset = dataset.select_columns(sorted(column_names)) |
155 | | - # Save the dataset to a Parquet file |
156 | | - dataset.to_parquet(output_file_details.as_posix()) |
157 | | - |
158 | | - if push_results_to_hub: |
159 | | - self.api.upload_folder( |
160 | | - repo_id=self.hub_results_repo if public else self.hub_private_results_repo, |
161 | | - folder_path=output_dir_results, |
162 | | - path_in_repo=self.general_config_logger.model_name, |
163 | | - repo_type="dataset", |
164 | | - commit_message=f"Updating model {self.general_config_logger.model_name}", |
165 | | - ) |
| 108 | + output_dir_results = Path(output_dir) / "results" / self.general_config_logger.model_name |
| 109 | + output_dir_details = Path(output_dir) / "details" / self.general_config_logger.model_name |
| 110 | + output_dir_details_sub_folder = output_dir_details / date_id |
| 111 | + output_dir_results.mkdir(parents=True, exist_ok=True) |
| 112 | + output_dir_details_sub_folder.mkdir(parents=True, exist_ok=True) |
166 | 113 |
|
167 | | - if push_details_to_hub: |
168 | | - self.details_to_hub( |
169 | | - model_name=self.general_config_logger.model_name, |
170 | | - results_file_path=output_results_in_details_file, |
171 | | - details_folder_path=output_dir_details_sub_folder, |
172 | | - push_as_public=public, |
173 | | - ) |
| 114 | + output_results_file = output_dir_results / f"results_{date_id}.json" |
| 115 | + output_results_in_details_file = output_dir_details / f"results_{date_id}.json" |
| 116 | + |
| 117 | + hlog(f"Saving results to {output_results_file} and {output_results_in_details_file}") |
174 | 118 |
|
175 | | - if push_results_to_tensorboard: |
176 | | - self.push_results_to_tensorboard( |
177 | | - results=self.metrics_logger.metric_aggregated, details=self.details_logger.details |
| 119 | + to_dump = { |
| 120 | + "config_general": asdict(self.general_config_logger), |
| 121 | + "results": self.metrics_logger.metric_aggregated, |
| 122 | + "versions": self.versions_logger.versions, |
| 123 | + "config_tasks": self.task_config_logger.tasks_configs, |
| 124 | + "summary_tasks": self.details_logger.compiled_details, |
| 125 | + "summary_general": asdict(self.details_logger.compiled_details_over_all_tasks), |
| 126 | + } |
| 127 | + dumped = json.dumps(to_dump, cls=EnhancedJSONEncoder, indent=2) |
| 128 | + |
| 129 | + with open(output_results_file, "w") as f: |
| 130 | + f.write(dumped) |
| 131 | + |
| 132 | + with open(output_results_in_details_file, "w") as f: |
| 133 | + f.write(dumped) |
| 134 | + |
| 135 | + for task_name, task_details in self.details_logger.details.items(): |
| 136 | + output_file_details = output_dir_details_sub_folder / f"details_{task_name}_{date_id}.parquet" |
| 137 | + # Create a dataset from the dictionary |
| 138 | + try: |
| 139 | + dataset = Dataset.from_list([asdict(detail) for detail in task_details]) |
| 140 | + except Exception: |
| 141 | + # We force cast to str to avoid formatting problems for nested objects |
| 142 | + dataset = Dataset.from_list( |
| 143 | + [{k: str(v) for k, v in asdict(detail).items()} for detail in task_details] |
178 | 144 | ) |
179 | | - except Exception as e: |
180 | | - hlog("WARNING: Could not save results") |
181 | | - hlog(repr(e)) |
| 145 | + |
| 146 | + # We don't keep 'id' around if it's there |
| 147 | + column_names = dataset.column_names |
| 148 | + if "id" in dataset.column_names: |
| 149 | + column_names = [t for t in dataset.column_names if t != "id"] |
| 150 | + |
| 151 | + # Sort column names to make it easier later |
| 152 | + dataset = dataset.select_columns(sorted(column_names)) |
| 153 | + # Save the dataset to a Parquet file |
| 154 | + dataset.to_parquet(output_file_details.as_posix()) |
| 155 | + |
| 156 | + if push_results_to_hub: |
| 157 | + self.api.upload_folder( |
| 158 | + repo_id=self.hub_results_repo if public else self.hub_private_results_repo, |
| 159 | + folder_path=output_dir_results, |
| 160 | + path_in_repo=self.general_config_logger.model_name, |
| 161 | + repo_type="dataset", |
| 162 | + commit_message=f"Updating model {self.general_config_logger.model_name}", |
| 163 | + ) |
| 164 | + |
| 165 | + if push_details_to_hub: |
| 166 | + self.details_to_hub( |
| 167 | + model_name=self.general_config_logger.model_name, |
| 168 | + results_file_path=output_results_in_details_file, |
| 169 | + details_folder_path=output_dir_details_sub_folder, |
| 170 | + push_as_public=public, |
| 171 | + ) |
| 172 | + |
| 173 | + if push_results_to_tensorboard: |
| 174 | + self.push_results_to_tensorboard( |
| 175 | + results=self.metrics_logger.metric_aggregated, details=self.details_logger.details |
| 176 | + ) |
| 177 | + # except Exception as e: |
| 178 | + # hlog("WARNING: Could not save results") |
| 179 | + # hlog(repr(e)) |
182 | 180 |
|
183 | 181 | def generate_final_dict(self) -> dict: |
184 | 182 | """Aggregates and returns all the logger's experiment information in a dictionary. |
@@ -487,7 +485,7 @@ def push_results_to_tensorboard( # noqa: C901 |
487 | 485 | if not is_nanotron_available(): |
488 | 486 | hlog_warn("You cannot push results to tensorboard with having nanotron installed. Skipping") |
489 | 487 | return |
490 | | - config: BrrrConfig = get_config_from_dict(self.general_config_logger.config, config_class=BrrrConfig) |
| 488 | + config: Config = get_config_from_dict(self.general_config_logger.config, config_class=Config) |
491 | 489 | lighteval_config = config.lighteval |
492 | 490 | try: |
493 | 491 | global_step = config.general.step |
|
0 commit comments