1313from pydantic import BaseModel , Field
1414
1515from .errors import InvalidConfigError , InvalidFileFormatError , InvalidFilePathError
16- from .utils .io_helpers import VALID_DATASET_FILE_EXTENSIONS
16+ from .utils .io_helpers import VALID_DATASET_FILE_EXTENSIONS , validate_path_contains_files_of_type
1717
1818if TYPE_CHECKING :
1919 from .seed import SeedDatasetReference
@@ -38,7 +38,7 @@ def get_file_column_names(file_path: Union[str, Path], file_type: str) -> list[s
3838 matching_files = sorted (file_path .parent .glob (file_path .name ))
3939 if not matching_files :
4040 raise InvalidFilePathError (f"🛑 No files found matching pattern: { str (file_path )!r} " )
41- logger .info (f"0️⃣ Using the first matching file in { str (file_path )!r} to determine column names in seed dataset" )
41+ logger .debug (f"0️⃣ Using the first matching file in { str (file_path )!r} to determine column names in seed dataset" )
4242 file_path = matching_files [0 ]
4343
4444 if file_type == "parquet" :
@@ -137,10 +137,7 @@ def _fetch_seed_dataset_column_names_from_local_file(dataset_path: str | Path) -
137137
138138def _validate_dataset_path (dataset_path : Union [str , Path ], allow_glob_pattern : bool = False ) -> Path :
139139 if allow_glob_pattern and "*" in str (dataset_path ):
140- valid_wild_card_versions = {f"*{ ext } " for ext in VALID_DATASET_FILE_EXTENSIONS }
141- if not any (dataset_path .endswith (wildcard ) for wildcard in valid_wild_card_versions ):
142- file_extension = dataset_path .split ("*." )[- 1 ]
143- raise InvalidFilePathError (f"🛑 Path { dataset_path !r} does not contain files of type { file_extension !r} ." )
140+ validate_path_contains_files_of_type (dataset_path , str (dataset_path ).split ("." )[- 1 ])
144141 return Path (dataset_path )
145142 if not Path (dataset_path ).is_file ():
146143 raise InvalidFilePathError ("🛑 To upload a dataset to the datastore, you must provide a valid file path." )
0 commit comments