Skip to content

Commit e8796d7

Browse files
committed
minimal refactoring of Dataset.load() such that arguments are in line with cd.download()
1 parent 413a6a3 commit e8796d7

File tree

1 file changed

+24
-13
lines changed

1 file changed

+24
-13
lines changed

coderdata/dataset/dataset.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ def save(self, path: Path) -> None:
386386

387387
def load(
388388
name: str,
389-
directory: str|Path=Path.cwd(),
389+
local_path: str|Path=Path.cwd(),
390390
from_pickle:bool=False
391391
) -> Dataset:
392392
"""
@@ -411,50 +411,62 @@ def load(
411411
TypeError
412412
_description_
413413
"""
414-
print("Processing Data...", file=sys.stderr)
415414

416-
if type(directory) is not Path:
415+
if type(local_path) is not Path:
417416
try:
418-
directory = Path(directory)
419-
if not directory.exists():
417+
local_path = Path(local_path)
418+
if not local_path.exists():
420419
raise OSError(
421-
f"Given path / directory does not exist: '{directory}'"
420+
f"Given path / directory does not exist: '{local_path}'"
422421
)
423422
except TypeError:
424423
raise TypeError(
425-
f"Invalid path / directory defined: '{directory}'"
424+
f"Invalid path / directory defined: '{local_path}'"
426425
)
427426

428427

429428
if not from_pickle:
430429
dataset = Dataset(name)
431430
accepted_file_endings = ('.csv', '.tsv', '.csv.gz', '.tsv.gz')
432-
for child in directory.iterdir():
431+
print(f"Importing raw data ...", file=sys.stderr)
432+
for child in local_path.iterdir():
433433
if child.name in ["genes.csv", "genes.csv.gz"]:
434+
print(
435+
f"Importing 'genes' from {child} ...",
436+
end=' ',
437+
file=sys.stderr
438+
)
434439
dataset.genes = _load_file(child)
435-
print("Loaded genes dataset.", file=sys.stderr)
440+
print("DONE", file=sys.stderr)
436441

437442
if (
438443
child.name.startswith(name)
439444
and child.name.endswith(accepted_file_endings)
440445
):
441446

442447
dataset_type = child.name[len(name)+1:].split('.')[0]
443-
print(dataset_type)
448+
print(
449+
f"Importing '{dataset_type}' from {child} ...",
450+
end=' ',
451+
file=sys.stderr
452+
)
444453
if hasattr(dataset, dataset_type):
445454
setattr(dataset, dataset_type, _load_file(child))
446-
455+
print("DONE", file=sys.stderr)
456+
print(f"Importing raw data ... DONE", file=sys.stderr)
447457
return dataset
448458

449459
else:
450460
accepted_file_endings = ('.pkl', '.pickle')
451-
for child in directory.iterdir():
461+
for child in local_path.iterdir():
452462
if (
453463
child.name.startswith(name)
454464
and child.name.endswith(accepted_file_endings)
455465
):
466+
print(f"Importing pickled data ...", end=' ', file=sys.stderr)
456467
with open(child, 'rb') as file:
457468
dataset = pickle.load(file=file)
469+
print("DONE", file=sys.stderr)
458470
return dataset
459471

460472

@@ -1004,7 +1016,6 @@ def _load_file(file_path: Path) -> pd.DataFrame:
10041016

10051017

10061018
def _determine_delimiter(file_path):
1007-
print(file_path.suffixes)
10081019
if '.tsv' in file_path.suffixes:
10091020
return '\t'
10101021
else:

0 commit comments

Comments
 (0)