Merge branch 'bf-unique-json-fields' (early part) into rel-v0.5.2

yarikoptic · yarikoptic · commit 5ca882a0b39d · 2019-01-04T10:25:53.000-05:00
* 'bf-unique-json-fields' (early part):
  RF: extracted functionality into a dedicated populate_aggregated_jsons
  Do regenerate top level task-* stab file, retain only our custom fields
  RF: removed unused imports
diff --git a/heudiconv/bids.py b/heudiconv/bids.py
@@ -69,13 +69,37 @@ def populate_bids_templates(path, defaults={}):
         "TODO: Provide description for the dataset -- basic details about the "
         "study, possibly pointing to pre-registration (if public or embargoed)")
 
+    populate_aggregated_jsons(path)
+
+
+def populate_aggregated_jsons(path):
+    """Aggregate across the entire BIDS dataset .json's into top level .json's
+
+    Top level .json files would contain only the fields which are
+    common to all subject[/session]/type/*_modality.json's.
+
+    ATM aggregating only for *_task*_bold.json files. Only the task- and
+    OPTIONAL _acq- field is retained within the aggregated filename.  The other
+    BIDS _key-value pairs are "aggregated over".
+
+    Parameters
+    ----------
+    path: str
+      Path to the top of the BIDS dataset
+    """
     # TODO: collect all task- .json files for func files to
     tasks = {}
     # way too many -- let's just collect all which are the same!
     # FIELDS_TO_TRACK = {'RepetitionTime', 'FlipAngle', 'EchoTime',
     #                    'Manufacturer', 'SliceTiming', ''}
     for fpath in find_files('.*_task-.*\_bold\.json', topdir=path,
-                        exclude_vcs=True, exclude="/\.(datalad|heudiconv)/"):
+                            exclude_vcs=True,
+                            exclude="/\.(datalad|heudiconv)/"):
+        #
+        # According to BIDS spec I think both _task AND _acq (may be more?
+        # _rec, _dir, ...?) should be retained?
+        # TODO: if we are to fix it, then old ones (without _acq) should be
+        # removed first
         task = re.sub('.*_(task-[^_\.]*(_acq-[^_\.]*)?)_.*', r'\1', fpath)
         json_ = load_json(fpath)
         if task not in tasks:
@@ -94,18 +118,36 @@ def populate_bids_templates(path, defaults={}):
         if not op.lexists(events_file):
             lgr.debug("Generating %s", events_file)
             with open(events_file, 'w') as f:
-                f.write("onset\tduration\ttrial_type\tresponse_time\tstim_file\tTODO -- fill in rows and add more tab-separated columns if desired")
+                f.write(
+                    "onset\tduration\ttrial_type\tresponse_time\tstim_file"
+                    "\tTODO -- fill in rows and add more tab-separated "
+                    "columns if desired")
     # extract tasks files stubs
     for task_acq, fields in tasks.items():
         task_file = op.join(path, task_acq + '_bold.json')
-        # do not touch any existing thing, it may be precious
-        if not op.lexists(task_file):
-            lgr.debug("Generating %s", task_file)
-            fields["TaskName"] = ("TODO: full task name for %s" %
-                                  task_acq.split('_')[0].split('-')[1])
-            fields["CogAtlasID"] = "TODO"
-            with open(task_file, 'w') as f:
-                f.write(json_dumps_pretty(fields, indent=2, sort_keys=True))
+        # Since we are pulling all unique fields we have to possibly
+        # rewrite this file to guarantee consistency.
+        # See https://github.com/nipy/heudiconv/issues/277 for a usecase/bug
+        # when we didn't touch existing one.
+        # But the fields we enter (TaskName and CogAtlasID) might need need
+        # to be populated from the file if it already exists
+        placeholders = {
+            "TaskName": ("TODO: full task name for %s" %
+                         task_acq.split('_')[0].split('-')[1]),
+            "CogAtlasID": "TODO",
+        }
+        if op.lexists(task_file):
+            j = load_json(task_file)
+            # Retain possibly modified placeholder fields
+            for f in placeholders:
+                if f in j:
+                    placeholders[f] = j[f]
+            act = "Regenerating"
+        else:
+            act = "Generating"
+        lgr.debug("%s %s", act, task_file)
+        fields.update(placeholders)
+        save_json(task_file, fields, indent=2, sort_keys=True, pretty=True)
 
 
 def tuneup_bids_json_files(json_files):
diff --git a/heudiconv/parser.py b/heudiconv/parser.py
@@ -4,7 +4,7 @@
 from glob import glob
 import re
 
-from collections import defaultdict, OrderedDict
+from collections import defaultdict
 
 import tarfile
 from tempfile import mkdtemp
@@ -13,10 +13,6 @@
 from .utils import (
     docstring_parameter,
     StudySessionInfo,
-    load_json,
-    save_json,
-    create_file_if_missing,
-    json_dumps_pretty,
 )
 
 lgr = logging.getLogger(__name__)
diff --git a/heudiconv/utils.py b/heudiconv/utils.py
@@ -181,7 +181,7 @@ def assure_no_file_exists(path):
         os.unlink(path)
 
 
-def save_json(filename, data, indent=4):
+def save_json(filename, data, indent=4, sort_keys=True, pretty=False):
     """Save data to a json file
 
     Parameters
@@ -190,11 +190,17 @@ def save_json(filename, data, indent=4):
         Filename to save data in.
     data : dict
         Dictionary to save in json file.
+    indent : int, optional
+    sort_keys : bool, optional
+    pretty : bool, optional
 
     """
     assure_no_file_exists(filename)
     with open(filename, 'w') as fp:
-        fp.write(_canonical_dumps(data, sort_keys=True, indent=indent))
+        fp.write(
+            (json_dumps_pretty if pretty else _canonical_dumps)(
+                data, sort_keys=sort_keys, indent=indent)
+        )
 
 
 def json_dumps_pretty(j, indent=2, sort_keys=True):