minor changes to get pipeline running

josephmje · josephmje · commit 5d3d096068fe · 2019-09-13T13:39:04.000-04:00
diff --git a/dmriprep/interfaces/reports.py b/dmriprep/interfaces/reports.py
@@ -5,21 +5,20 @@
 import os
 import time
 
-from collections import Counter
+# from collections import Counter
 from nipype.interfaces.base import (
     traits, TraitedSpec, BaseInterfaceInputSpec,
     File, Directory, InputMultiObject, Str, isdefined,
     SimpleInterface)
 from nipype.interfaces import freesurfer as fs
-from niworkflows.utils.bids import BIDS_NAME
+# from niworkflows.utils.bids import BIDS_NAME
 
 
 SUBJECT_TEMPLATE = """\
 \t<ul class="elem-desc">
 \t\t<li>Subject ID: {subject_id}</li>
 \t\t<li>Structural images: {n_t1s:d} T1-weighted {t2w}</li>
 \t\t<li>Diffusion Weighted Images: {n_dwi:d}</li>
-{tasks}
 \t\t<li>Standard output spaces: {std_spaces}</li>
 \t\t<li>Non-standard output spaces: {nstd_spaces}</li>
 \t\t<li>FreeSurfer reconstruction: {freesurfer_status}</li>
@@ -109,24 +108,24 @@ def _generate_segment(self):
         dwi_files = self.inputs.dwi if isdefined(self.inputs.dwi) else []
         dwi_files = [s[0] if isinstance(s, list) else s for s in dwi_files]
 
-        counts = Counter(BIDS_NAME.search(series).groupdict()['task_id'][5:]
-                         for series in dwi_files)
+        # counts = Counter(BIDS_NAME.search(series).groupdict()['task_id'][5:]
+        #                  for series in dwi_files)
 
-        tasks = ''
-        if counts:
-            header = '\t\t<ul class="elem-desc">'
-            footer = '\t\t</ul>'
-            lines = ['\t\t\t<li>Task: {task_id} ({n_runs:d} run{s})</li>'.format(
-                     task_id=task_id, n_runs=n_runs, s='' if n_runs == 1 else 's')
-                     for task_id, n_runs in sorted(counts.items())]
-            tasks = '\n'.join([header] + lines + [footer])
+        # tasks = ''
+        # if counts:
+        #     header = '\t\t<ul class="elem-desc">'
+        #     footer = '\t\t</ul>'
+        #     lines = ['\t\t\t<li>Task: {task_id} ({n_runs:d} run{s})</li>'.format(
+        #              task_id=task_id, n_runs=n_runs, s='' if n_runs == 1 else 's')
+        #              for task_id, n_runs in sorted(counts.items())]
+        #     tasks = '\n'.join([header] + lines + [footer])
 
         return SUBJECT_TEMPLATE.format(
             subject_id=self.inputs.subject_id,
             n_t1s=len(self.inputs.t1w),
             t2w=t2w_seg,
             n_dwi=len(dwi_files),
-            tasks=tasks,
+            # tasks=tasks,
             std_spaces=', '.join(self.inputs.std_spaces),
             nstd_spaces=', '.join(self.inputs.nstd_spaces),
             freesurfer_status=freesurfer_status)
diff --git a/dmriprep/utils/bids.py b/dmriprep/utils/bids.py
@@ -8,8 +8,7 @@
 from bids import BIDSLayout
 
 
-def collect_data(bids_dir, participant_label, task=None, echo=None,
-                 bids_validate=True):
+def collect_data(bids_dir, participant_label, bids_validate=True):
     """Replacement for niworkflows' version."""
     if isinstance(bids_dir, BIDSLayout):
         layout = bids_dir
@@ -19,20 +18,12 @@ def collect_data(bids_dir, participant_label, task=None, echo=None,
     queries = {
         'fmap': {'datatype': 'fmap'},
         'dwi': {'datatype': 'dwi', 'suffix': 'dwi'},
-        'bold': {'datatype': 'func', 'suffix': 'bold'},
-        'sbref': {'datatype': 'func', 'suffix': 'sbref'},
         'flair': {'datatype': 'anat', 'suffix': 'FLAIR'},
         't2w': {'datatype': 'anat', 'suffix': 'T2w'},
         't1w': {'datatype': 'anat', 'suffix': 'T1w'},
         'roi': {'datatype': 'anat', 'suffix': 'roi'},
     }
 
-    if task:
-        queries['bold']['task'] = task
-
-    if echo:
-        queries['bold']['echo'] = echo
-
     subj_data = {
         dtype: sorted(layout.get(return_type='file', subject=participant_label,
                                  extension=['nii', 'nii.gz'], **query))
@@ -98,25 +89,20 @@ def validate_input_dir(exec_env, bids_dir, participant_label):
             "TSV_EQUAL_ROWS",
             "TSV_EMPTY_CELL",
             "TSV_IMPROPER_NA",
-            "VOLUME_COUNT_MISMATCH",
-            "BVAL_MULTIPLE_ROWS",
-            "BVEC_NUMBER_ROWS",
-            "DWI_MISSING_BVAL",
             "INCONSISTENT_SUBJECTS",
             "INCONSISTENT_PARAMETERS",
-            "BVEC_ROW_LENGTH",
-            "B_FILE",
             "PARTICIPANT_ID_COLUMN",
             "PARTICIPANT_ID_MISMATCH",
             "TASK_NAME_MUST_DEFINE",
             "PHENOTYPE_SUBJECTS_MISSING",
             "STIMULUS_FILE_MISSING",
-            "DWI_MISSING_BVEC",
+            "BOLD_NOT_4D",
             "EVENTS_TSV_MISSING",
             "TSV_IMPROPER_NA",
             "ACQTIME_FMT",
             "Participants age 89 or higher",
             "DATASET_DESCRIPTION_JSON_MISSING",
+            "TASK_NAME_CONTAIN_ILLEGAL_CHARACTER",
             "FILENAME_COLUMN",
             "WRONG_NEW_LINE",
             "MISSING_TSV_COLUMN_CHANNELS",
@@ -131,8 +117,6 @@ def validate_input_dir(exec_env, bids_dir, participant_label):
             "ACQTIME_FMT",
             "SUSPICIOUSLY_LONG_EVENT_DESIGN",
             "SUSPICIOUSLY_SHORT_EVENT_DESIGN",
-            "MALFORMED_BVEC",
-            "MALFORMED_BVAL",
             "MISSING_TSV_COLUMN_EEG_ELECTRODES",
             "MISSING_SESSION"
         ],
diff --git a/dmriprep/utils/sentry.py b/dmriprep/utils/sentry.py
@@ -0,0 +1,208 @@
+# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# vi: set ft=python sts=4 ts=4 sw=4 et:
+"""Stripped out routines for Sentry"""
+import os
+from pathlib import Path
+import re
+from niworkflows.utils.misc import read_crashfile
+import sentry_sdk
+
+CHUNK_SIZE = 16384
+# Group common events with pre specified fingerprints
+KNOWN_ERRORS = {
+    'permission-denied': [
+        "PermissionError: [Errno 13] Permission denied"
+    ],
+    'memory-error': [
+        "MemoryError",
+        "Cannot allocate memory",
+        "Return code: 134",
+    ],
+    'reconall-already-running': [
+        "ERROR: it appears that recon-all is already running"
+    ],
+    'no-disk-space': [
+        "[Errno 28] No space left on device",
+        "[Errno 122] Disk quota exceeded"
+    ],
+    'segfault': [
+        "Segmentation Fault",
+        "Segfault",
+        "Return code: 139",
+    ],
+    'potential-race-condition': [
+        "[Errno 39] Directory not empty",
+        "_unfinished.json",
+    ],
+    'keyboard-interrupt': [
+        "KeyboardInterrupt",
+    ],
+}
+
+
+def start_ping(run_uuid, npart):
+    with sentry_sdk.configure_scope() as scope:
+        if run_uuid:
+            scope.set_tag('run_uuid', run_uuid)
+        scope.set_tag('npart', npart)
+    sentry_sdk.add_breadcrumb(message='dMRIPrep started', level='info')
+    sentry_sdk.capture_message('dMRIPrep started', level='info')
+
+
+def sentry_setup(opts, exec_env):
+    from os import cpu_count
+    import psutil
+    import hashlib
+    from ..__about__ import __version__
+
+    environment = "prod"
+    release = __version__
+    if not __version__:
+        environment = "dev"
+        release = "dev"
+    elif int(os.getenv('DMRIPREP_DEV', '0')) or ('+' in __version__):
+        environment = "dev"
+
+    sentry_sdk.init("https://d5a16b0c38d84d1584dfc93b9fb1ade6@sentry.io/1137693",
+                    release=release,
+                    environment=environment,
+                    before_send=before_send)
+    with sentry_sdk.configure_scope() as scope:
+        scope.set_tag('exec_env', exec_env)
+
+        if exec_env == 'dmriprep-docker':
+            scope.set_tag('docker_version', os.getenv('DOCKER_VERSION_8395080871'))
+
+        dset_desc_path = opts.bids_dir / 'dataset_description.json'
+        if dset_desc_path.exists():
+            desc_content = dset_desc_path.read_bytes()
+            scope.set_tag('dset_desc_sha256', hashlib.sha256(desc_content).hexdigest())
+
+        free_mem_at_start = round(psutil.virtual_memory().free / 1024**3, 1)
+        scope.set_tag('free_mem_at_start', free_mem_at_start)
+        scope.set_tag('cpu_count', cpu_count())
+
+        # Memory policy may have a large effect on types of errors experienced
+        overcommit_memory = Path('/proc/sys/vm/overcommit_memory')
+        if overcommit_memory.exists():
+            policy = {'0': 'heuristic',
+                      '1': 'always',
+                      '2': 'never'}.get(overcommit_memory.read_text().strip(), 'unknown')
+            scope.set_tag('overcommit_memory', policy)
+            if policy == 'never':
+                overcommit_kbytes = Path('/proc/sys/vm/overcommit_memory')
+                kb = overcommit_kbytes.read_text().strip()
+                if kb != '0':
+                    limit = '{}kB'.format(kb)
+                else:
+                    overcommit_ratio = Path('/proc/sys/vm/overcommit_ratio')
+                    limit = '{}%'.format(overcommit_ratio.read_text().strip())
+                scope.set_tag('overcommit_limit', limit)
+            else:
+                scope.set_tag('overcommit_limit', 'n/a')
+        else:
+            scope.set_tag('overcommit_memory', 'n/a')
+            scope.set_tag('overcommit_limit', 'n/a')
+
+        for k, v in vars(opts).items():
+            scope.set_tag(k, v)
+
+
+def process_crashfile(crashfile):
+    """Parse the contents of a crashfile and submit sentry messages"""
+    crash_info = read_crashfile(str(crashfile))
+    with sentry_sdk.push_scope() as scope:
+        scope.level = 'fatal'
+
+        # Extract node name
+        node_name = crash_info.pop('node').split('.')[-1]
+        scope.set_tag("node_name", node_name)
+
+        # Massage the traceback, extract the gist
+        traceback = crash_info.pop('traceback')
+        # last line is probably most informative summary
+        gist = traceback.splitlines()[-1]
+        exception_text_start = 1
+        for line in traceback.splitlines()[1:]:
+            if not line[0].isspace():
+                break
+            exception_text_start += 1
+
+        exception_text = '\n'.join(
+            traceback.splitlines()[exception_text_start:])
+
+        # Extract inputs, if present
+        inputs = crash_info.pop('inputs', None)
+        if inputs:
+            scope.set_extra('inputs', dict(inputs))
+
+        # Extract any other possible metadata in the crash file
+        for k, v in crash_info.items():
+            strv = list(_chunks(str(v)))
+            if len(strv) == 1:
+                scope.set_extra(k, strv[0])
+            else:
+                for i, chunk in enumerate(strv):
+                    scope.set_extra('%s_%02d' % (k, i), chunk)
+
+        fingerprint = ''
+        issue_title = '{}: {}'.format(node_name, gist)
+        for new_fingerprint, error_snippets in KNOWN_ERRORS.items():
+            for error_snippet in error_snippets:
+                if error_snippet in traceback:
+                    fingerprint = new_fingerprint
+                    issue_title = new_fingerprint
+                    break
+            if fingerprint:
+                break
+
+        message = issue_title + '\n\n'
+        message += exception_text[-(8192 - len(message)):]
+        if fingerprint:
+            sentry_sdk.add_breadcrumb(message=fingerprint, level='fatal')
+        else:
+            # remove file paths
+            fingerprint = re.sub(r"(/[^/ ]*)+/?", '', message)
+            # remove words containing numbers
+            fingerprint = re.sub(r"([a-zA-Z]*[0-9]+[a-zA-Z]*)+", '', fingerprint)
+            # adding the return code if it exists
+            for line in message.splitlines():
+                if line.startswith("Return code"):
+                    fingerprint += line
+                    break
+
+        scope.fingerprint = [fingerprint]
+        sentry_sdk.capture_message(message, 'fatal')
+
+
+def before_send(event, hints):
+    # Filtering log messages about crashed nodes
+    if 'logentry' in event and 'message' in event['logentry']:
+        msg = event['logentry']['message']
+        if msg.startswith("could not run node:"):
+            return None
+        if msg.startswith("Saving crash info to "):
+            return None
+        if re.match("Node .+ failed to run on host .+", msg):
+            return None
+
+    if 'breadcrumbs' in event and isinstance(event['breadcrumbs'], list):
+        fingerprints_to_propagate = ['no-disk-space', 'memory-error', 'permission-denied',
+                                     'keyboard-interrupt']
+        for bc in event['breadcrumbs']:
+            msg = bc.get('message', 'empty-msg')
+            if msg in fingerprints_to_propagate:
+                event['fingerprint'] = [msg]
+                break
+
+    return event
+
+
+def _chunks(string, length=CHUNK_SIZE):
+    """
+    Splits a string into smaller chunks
+    >>> list(_chunks('some longer string.', length=3))
+    ['som', 'e l', 'ong', 'er ', 'str', 'ing', '.']
+    """
+    return (string[i:i + length]
+            for i in range(0, len(string), length))
diff --git a/dmriprep/workflows/base.py b/dmriprep/workflows/base.py
@@ -314,7 +314,7 @@ def init_single_subject_wf(
         # for documentation purposes
         subject_data = {
             't1w': ['/completely/made/up/path/sub-01_T1w.nii.gz'],
-            'dwi': ['/completely/made/up/path/sub-01_task-nback_bold.nii.gz']
+            'dwi': ['/completely/made/up/path/sub-01_dwi.nii.gz']
         }
     else:
         subject_data = collect_data(layout, subject_id)[0]
diff --git a/setup.cfg b/setup.cfg
@@ -7,7 +7,7 @@ maintainer_email = code@oscaresteban.es
 description = dMRIPrep is a robust and easy-to-use pipeline for preprocessing of diverse dMRI data.
 long_description = file:README.rst
 long_description_content_type = text/x-rst; charset=UTF-8
-license = 3-clause BSD
+license = Apache License, Version 2.0
 classifiers =
     Development Status :: 3 - Alpha
     Intended Audience :: Science/Research
@@ -23,15 +23,16 @@ install_requires =
     indexed_gzip >=0.8.8
     nibabel >=2.2.1
     nilearn !=0.5.0, !=0.5.1
-    nipype >=1.2.0
-    niworkflows ~= 0.10.1
+    nipype >=1.2.2
+    niworkflows ~= 0.10.3
     numpy
     pandas
     psutil >=5.4
-    pybids ~= 0.9.2
+    pybids ~= 0.9.3
     pyyaml
     scikit-image
-    smriprep ~= 0.3.0
+    sdcflows ~= 0.1.0
+    smriprep ~= 0.3.2
     statsmodels
     templateflow ~= 0.4.1
 test_requires =

Original file line number	Diff line number	Diff line change
`@@ -314,7 +314,7 @@ def init_single_subject_wf(`
`314`	`314`	`# for documentation purposes`
`315`	`315`	`subject_data = {`
`316`	`316`	`'t1w': ['/completely/made/up/path/sub-01_T1w.nii.gz'],`
`317`		`- 'dwi': ['/completely/made/up/path/sub-01_task-nback_bold.nii.gz']`
	`317`	`+ 'dwi': ['/completely/made/up/path/sub-01_dwi.nii.gz']`
`318`	`318`	`}`
`319`	`319`	`else:`
`320`	`320`	`subject_data = collect_data(layout, subject_id)[0]`