From 2bea7dd81cc5c88481b1e16793ad93a16fb28d1b Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Tue, 15 Jul 2025 13:42:12 +0200 Subject: [PATCH] Handle missing dataset description --- qkay/qkay.py | 40 ++++++++++++++++++++++++++----------- test/test_create_dataset.py | 30 ++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 12 deletions(-) create mode 100644 test/test_create_dataset.py diff --git a/qkay/qkay.py b/qkay/qkay.py index 59ab47c..fd826b0 100644 --- a/qkay/qkay.py +++ b/qkay/qkay.py @@ -820,26 +820,42 @@ def create_dataset(): # Get dataset name from the data_description.json file if it exists # otherwise, use the folder name - desc_file = "" - desc_files = glob.glob(os.path.join(dataset_path, "**", "dataset_description.json"), recursive=True) + + desc_files = glob.glob( + os.path.join(dataset_path, "**", "dataset_description.json"), + recursive=True, + ) if len(desc_files) > 1: - app.logger.warning("More than one dataset_description.json was found!: %s .", desc_files) - - desc_file = desc_files[0] - app.logger.debug("dataset_description.json found at %s.", desc_file) - if desc_file: + app.logger.warning( + "More than one dataset_description.json was found!: %s .", + desc_files, + ) + + if desc_files: + desc_file = desc_files[0] + app.logger.debug( + "dataset_description.json found at %s.", desc_file + ) with open(desc_file, "r") as file: data_description = json.load(file) dataset_name = data_description["Name"] - app.logger.info("The dataset name %s was assigned based on the name in %s", dataset_name, desc_file) - # If the name of the dataset is the default MRIQC value, use the folder name instead + app.logger.info( + "The dataset name %s was assigned based on the name in %s", + dataset_name, + desc_file, + ) if dataset_name == "MRIQC - MRI Quality Control": - app.logger.info("The dataset name is the default of MRIQC which is not informative, using folder name instead: %s.", d) + app.logger.info( + "The dataset name is the default of MRIQC which is not informative, using folder name instead: %s.", + d, + ) dataset_name = d else: - app.logger.info("No dataset_description.json found, assigning dataset name to folder name: %s.", d) + app.logger.info( + "No dataset_description.json found, assigning dataset name to folder name: %s.", + d, + ) dataset_name = d - dataset = Dataset(name=dataset_name, path_dataset=dataset_path) existing_dataset = Dataset.objects(name=dataset_name).first() if not dataset.validate_dataset(): diff --git a/test/test_create_dataset.py b/test/test_create_dataset.py new file mode 100644 index 0000000..8811565 --- /dev/null +++ b/test/test_create_dataset.py @@ -0,0 +1,30 @@ +import sys +import glob +import json +import os +from importlib.machinery import SourceFileLoader + + +def test_dataset_creation_without_description(tmp_path): + dataset_dir = tmp_path / "ds1" + dataset_dir.mkdir() + (dataset_dir / "sub-001.html").write_text("") + + sys.path.append('qkay') + mod = SourceFileLoader('qkay_module', 'qkay/qkay.py').load_module() + Dataset = mod.Dataset + + dataset_path = str(dataset_dir) + desc_files = glob.glob(os.path.join(dataset_path, '**', 'dataset_description.json'), recursive=True) + if desc_files: + with open(desc_files[0]) as f: + data_description = json.load(f) + dataset_name = data_description.get('Name', dataset_dir.name) + if dataset_name == 'MRIQC - MRI Quality Control': + dataset_name = dataset_dir.name + else: + dataset_name = dataset_dir.name + + dataset = Dataset(name=dataset_name, path_dataset=dataset_path) + assert dataset.validate_dataset() is True + assert dataset.name == dataset_dir.name