|
| 1 | +import importlib |
| 2 | +from pathlib import Path |
| 3 | + |
| 4 | +def load_module_from_py_file(py_file: str) -> object: |
| 5 | + """ |
| 6 | + This method loads a module from a py file which is not in the Python path |
| 7 | + """ |
| 8 | + module_name = Path(py_file).name |
| 9 | + loader = importlib.machinery.SourceFileLoader(module_name, py_file) |
| 10 | + spec = importlib.util.spec_from_loader(module_name, loader) |
| 11 | + module = importlib.util.module_from_spec(spec) |
| 12 | + |
| 13 | + loader.exec_module(module) |
| 14 | + |
| 15 | + return module |
| 16 | + |
| 17 | + |
| 18 | +def get_custom_dataset(dataset_config, tokenizer, split: str): |
| 19 | + if ":" in dataset_config.file: |
| 20 | + module_path, func_name = dataset_config.file.split(":") |
| 21 | + else: |
| 22 | + module_path, func_name = dataset_config.file, "get_custom_dataset" |
| 23 | + |
| 24 | + if not module_path.endswith(".py"): |
| 25 | + raise ValueError(f"Dataset file {module_path} is not a .py file.") |
| 26 | + |
| 27 | + module_path = Path(module_path) |
| 28 | + if not module_path.is_file(): |
| 29 | + raise FileNotFoundError(f"Dataset py file {module_path.as_posix()} does not exist or is not a file.") |
| 30 | + |
| 31 | + module = load_module_from_py_file(module_path.as_posix()) |
| 32 | + try: |
| 33 | + return getattr(module, func_name)(dataset_config, tokenizer, split) |
| 34 | + except AttributeError as e: |
| 35 | + print(f"It seems like the given method name ({func_name}) is not present in the dataset .py file ({module_path.as_posix()}).") |
| 36 | + raise e |
| 37 | + |
0 commit comments