|
1 | 1 | from pathlib import PosixPath |
2 | | -from typing import Optional, Tuple |
| 2 | +from typing import Optional |
3 | 3 |
|
4 | 4 | from loguru import logger |
5 | 5 | from torch.utils.data import DataLoader |
6 | | -from transformers import HfArgumentParser, PreTrainedModel |
| 6 | +from transformers import PreTrainedModel |
7 | 7 |
|
8 | | -from llmcompressor.args import DatasetArguments, ModelArguments, RecipeArguments |
| 8 | +from llmcompressor.args import parse_args |
9 | 9 | from llmcompressor.core.session_functions import active_session |
10 | 10 | from llmcompressor.transformers.finetune.data.data_helpers import ( |
11 | 11 | get_calibration_dataloader, |
|
18 | 18 | modify_save_pretrained, |
19 | 19 | patch_tied_tensors_bug, |
20 | 20 | ) |
21 | | -from llmcompressor.transformers.utils.helpers import resolve_processor_from_model_args |
22 | 21 |
|
23 | | -__all__ = ["Oneshot", "oneshot", "parse_oneshot_args"] |
| 22 | +__all__ = ["Oneshot", "oneshot"] |
24 | 23 |
|
25 | 24 |
|
26 | 25 | class Oneshot: |
@@ -122,11 +121,10 @@ def __init__( |
122 | 121 | :param output_dir: Path to save the output model after carrying out oneshot |
123 | 122 |
|
124 | 123 | """ |
125 | | - |
126 | | - model_args, dataset_args, recipe_args, output_dir = parse_oneshot_args(**kwargs) |
| 124 | + model_args, dataset_args, recipe_args, _, output_dir = parse_args(**kwargs) |
127 | 125 |
|
128 | 126 | self.model_args = model_args |
129 | | - self.dataset_args = dataset_args |
| 127 | + self.data_args = dataset_args |
130 | 128 | self.recipe_args = recipe_args |
131 | 129 | self.output_dir = output_dir |
132 | 130 |
|
@@ -315,64 +313,3 @@ def oneshot(**kwargs) -> PreTrainedModel: |
315 | 313 | one_shot() |
316 | 314 |
|
317 | 315 | return one_shot.model |
318 | | - |
319 | | - |
320 | | -def parse_oneshot_args( |
321 | | - **kwargs, |
322 | | -) -> Tuple[ModelArguments, DatasetArguments, RecipeArguments, str]: |
323 | | - """ |
324 | | - Parses kwargs by grouping into model, data or training arg groups: |
325 | | - * model_args in |
326 | | - src/llmcompressor/transformers/utils/arg_parser/model_args.py |
327 | | - * dataset_args in |
328 | | - src/llmcompressor/transformers/utils/arg_parser/dataset_args.py |
329 | | - * recipe_args in |
330 | | - src/llmcompressor/transformers/utils/arg_parser/recipe_args.py |
331 | | - * training_args in |
332 | | - src/llmcompressor/transformers/utils/arg_parser/training_args.py |
333 | | - """ |
334 | | - output_dir = kwargs.pop("output_dir", None) |
335 | | - |
336 | | - parser = HfArgumentParser((ModelArguments, DatasetArguments, RecipeArguments)) |
337 | | - |
338 | | - if not kwargs: |
339 | | - |
340 | | - def _get_output_dir_from_argv() -> Optional[str]: |
341 | | - import sys |
342 | | - |
343 | | - output_dir = None |
344 | | - if "--output_dir" in sys.argv: |
345 | | - index = sys.argv.index("--output_dir") |
346 | | - sys.argv.pop(index) |
347 | | - if index < len(sys.argv): # Check if value exists afer the flag |
348 | | - output_dir = sys.argv.pop(index) |
349 | | - |
350 | | - return output_dir |
351 | | - |
352 | | - output_dir = _get_output_dir_from_argv() or output_dir |
353 | | - parsed_args = parser.parse_args_into_dataclasses() |
354 | | - else: |
355 | | - parsed_args = parser.parse_dict(kwargs) |
356 | | - |
357 | | - model_args, dataset_args, recipe_args = parsed_args |
358 | | - |
359 | | - if recipe_args.recipe_args is not None: |
360 | | - if not isinstance(recipe_args.recipe_args, dict): |
361 | | - arg_dict = {} |
362 | | - for recipe_arg in recipe_args.recipe_args: |
363 | | - key, value = recipe_arg.split("=") |
364 | | - arg_dict[key] = value |
365 | | - recipe_args.recipe_args = arg_dict |
366 | | - |
367 | | - # raise depreciation warnings |
368 | | - if dataset_args.remove_columns is not None: |
369 | | - logger.warning( |
370 | | - "`remove_columns` argument is depreciated. When tokenizing datasets, all " |
371 | | - "columns which are invalid inputs the tokenizer will be removed", |
372 | | - DeprecationWarning, |
373 | | - ) |
374 | | - |
375 | | - # silently assign tokenizer to processor |
376 | | - resolve_processor_from_model_args(model_args) |
377 | | - |
378 | | - return model_args, dataset_args, recipe_args, output_dir |
0 commit comments