Merge remote-tracking branch 'upstream/master' into rel/0.7.1

effigies · effigies · commit 3bb243769f91 · 2019-02-01T15:14:20.000-05:00
diff --git a/bids/analysis/tests/test_analysis.py b/bids/analysis/tests/test_analysis.py
@@ -31,25 +31,28 @@ def test_get_design_matrix_arguments(analysis):
     kwargs = dict(run=1, subject='01', sparse=True)
     result = analysis['run'].get_design_matrix(**kwargs)
     result = result[0]
-    assert result.sparse.shape == (172, 7)
+    assert result.sparse.shape == (172, 9)
     assert result.dense is None
 
     kwargs = dict(run=1, subject='01', mode='dense', force=False)
     result = analysis['run'].get_design_matrix(**kwargs)[0]
     assert result.sparse is None
     assert result.dense is None
 
-    kwargs = dict(run=1, subject='01', mode='dense', force=True, sampling_rate='highest')
+    kwargs = dict(run=1, subject='01', mode='dense', force=True,
+                  sampling_rate='highest')
     result = analysis['run'].get_design_matrix(**kwargs)[0]
     assert result.sparse is None
     assert result.dense.shape == (4800, 6)
 
-    kwargs = dict(run=1, subject='01', mode='dense', force=True, sampling_rate='TR')
+    kwargs = dict(run=1, subject='01', mode='dense', force=True,
+                  sampling_rate='TR')
     result = analysis['run'].get_design_matrix(**kwargs)[0]
     assert result.sparse is None
     assert result.dense.shape == (240, 6)
 
-    kwargs = dict(run=1, subject='01', mode='dense', force=True, sampling_rate=0.5)
+    kwargs = dict(run=1, subject='01', mode='dense', force=True,
+                  sampling_rate=0.5)
     result = analysis['run'].get_design_matrix(**kwargs)[0]
     assert result.sparse is None
     assert result.dense.shape == (240, 6)
@@ -72,11 +75,11 @@ def test_first_level_sparse_design_matrix(analysis):
     result = analysis['run'].get_design_matrix(subject=['01'])
     assert len(result) == 3
     df = result[0].sparse
-    assert df.shape == (172, 7)
+    assert df.shape == (172, 9)
     assert df['condition'].nunique() == 2
     assert set(result[0][0].columns) == {'amplitude', 'onset', 'duration',
                                          'condition', 'subject', 'run',
-                                         'task'}
+                                         'task', 'datatype', 'suffix'}
 
 
 def test_post_first_level_sparse_design_matrix(analysis):
@@ -87,7 +90,9 @@ def test_post_first_level_sparse_design_matrix(analysis):
     assert result[0].sparse.shape == (9, 2)
     assert result[0].entities == {
         'subject': '01',
-        'task': 'mixedgamblestask'}
+        'task': 'mixedgamblestask',
+        'datatype': 'func',
+        'suffix': 'bold'}
 
     # Participant level and also check integer-based indexing
     result = analysis['participant'].get_design_matrix()
diff --git a/bids/layout/layout.py b/bids/layout/layout.py
@@ -305,17 +305,14 @@ def _validate_file(self, f):
         if not self.validate:
             return True
 
-        # For derivatives, we need to cheat a bit and construct a fake
-        # derivatives path--prepend 'derivatives' and the pipeline name
-        to_check = os.path.relpath(f, self.root)
+        # Derivatives are currently not validated.
         if 'derivatives' in self.domains:
-            to_check = os.path.join(
-                'derivatives', self.description['PipelineDescription']['Name'],
-                to_check)
+            return True
 
-        sep = os.path.sep
-        if to_check[:len(sep)] != sep:
-            to_check = sep + to_check
+        # BIDS validator expects absolute paths, but really these are relative
+        # to the BIDS project root.
+        to_check = os.path.relpath(f, self.root)
+        to_check = os.path.join(os.path.sep, to_check)
 
         return self.validator.is_bids(to_check)
 
diff --git a/bids/variables/io.py b/bids/variables/io.py
@@ -201,7 +201,7 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
                         # Add in all of the run's entities as new columns for
                         # index
                         for entity, value in entities.items():
-                            if entity in BASE_ENTITIES:
+                            if entity in ALL_ENTITIES:
                                 df[entity] = value
 
                         if drop_na:
@@ -327,14 +327,20 @@ def _load_tsv_variables(layout, suffix, dataset=None, columns=None,
         # file (for entities that vary by row), or from the full file path
         # (for entities constant over all rows in the file). We extract both
         # and store them in the main DataFrame alongside other variables (as
-        # they'll be extracted when the Column is initialized anyway).
+        # they'll be extracted when the BIDSVariable is initialized anyway).
         for ent_name, ent_val in f.entities.items():
-            if ent_name in BASE_ENTITIES:
+            if ent_name in ALL_ENTITIES:
                 _data[ent_name] = ent_val
 
         # Handling is a bit more convoluted for scans.tsv, because the first
         # column contains the run filename, which we also need to parse.
         if suffix == 'scans':
+
+            # Suffix is guaranteed to be present in each filename, so drop the
+            # constant column with value 'scans' to make way for it and prevent
+            # two 'suffix' columns.
+            _data.drop(columns='suffix', inplace=True)
+
             image = _data['filename']
             _data = _data.drop('filename', axis=1)
             dn = f.dirname
@@ -369,12 +375,11 @@ def make_patt(x, regex_search=False):
         # Filter rows on all selectors
         comm_cols = list(set(_data.columns) & set(selectors.keys()))
         for col in comm_cols:
-            for val in listify(selectors.get(col)):
-                ent_patts = [make_patt(x, regex_search=layout.regex_search)
-                             for x in listify(selectors.get(col))]
-                patt = '|'.join(ent_patts)
+            ent_patts = [make_patt(x, regex_search=layout.regex_search)
+                            for x in listify(selectors.get(col))]
+            patt = '|'.join(ent_patts)
 
-                _data = _data[_data[col].str.contains(patt)]
+            _data = _data[_data[col].str.contains(patt)]
 
         level = {'scans': 'session', 'sessions': 'subject',
                  'participants': 'dataset'}[suffix]
diff --git a/bids/variables/tests/test_collections.py b/bids/variables/tests/test_collections.py
@@ -57,35 +57,27 @@ def test_run_variable_collection_to_df(run_coll):
 
     # All variables sparse, wide format
     df = run_coll.to_df()
-    assert df.shape == (4096, 13)
+    assert df.shape == (4096, 15)
     wide_cols = {'onset', 'duration', 'subject', 'run', 'task',
                  'PTval', 'RT', 'gain', 'loss', 'parametric gain', 'respcat',
-                 'respnum', 'trial_type'}
+                 'respnum', 'trial_type', 'suffix', 'datatype'}
     assert set(df.columns) == wide_cols
 
     # All variables sparse, wide format
     df = run_coll.to_df(format='long')
-    assert df.shape == (32768, 7)
+    assert df.shape == (32768, 9)
     long_cols = {'amplitude', 'duration', 'onset', 'condition', 'run',
-                 'task', 'subject'}
+                 'task', 'subject', 'suffix', 'datatype'}
     assert set(df.columns) == long_cols
 
     # All variables dense, wide format
     df = run_coll.to_df(sparse=False)
     assert df.shape == (230400, 14)
-    # The inclusion of 'modality' and 'type' here is a minor bug that should
-    # be fixed at some point. There is no reason why to_df() should return
-    # more columns for a DenseRunVariable than a SparseRunVariable, but this
-    # is happening because these columns are not included in the original
-    # SparseRunVariable data, and are being rebuilt from the entity list in
-    # the DenseRunVariable init.
-    wide_cols |= {'datatype', 'suffix'}
     assert set(df.columns) == wide_cols - {'trial_type'}
 
     # All variables dense, wide format
     df = run_coll.to_df(sparse=False, format='long')
     assert df.shape == (1612800, 9)
-    long_cols |= {'datatype', 'suffix'}
     assert set(df.columns) == long_cols
 
 
@@ -100,14 +92,14 @@ def test_merge_collections(run_coll, run_coll_list):
 def test_get_collection_entities(run_coll_list):
     coll = run_coll_list[0]
     ents = coll.entities
-    assert {'run', 'task', 'subject'} == set(ents.keys())
+    assert {'run', 'task', 'subject', 'suffix', 'datatype'} == set(ents.keys())
 
     merged = merge_collections(run_coll_list[:3])
     ents = merged.entities
-    assert {'task', 'subject'} == set(ents.keys())
+    assert {'task', 'subject', 'suffix', 'datatype'} == set(ents.keys())
     assert ents['subject'] == '01'
 
     merged = merge_collections(run_coll_list[3:6])
     ents = merged.entities
-    assert {'task', 'subject'} == set(ents.keys())
+    assert {'task', 'subject', 'suffix', 'datatype'} == set(ents.keys())
     assert ents['subject'] == '02'
diff --git a/bids/variables/tests/test_entities.py b/bids/variables/tests/test_entities.py
@@ -69,7 +69,7 @@ def test_get_collections_merged(layout1):
     vals = collection.variables['RT'].values
     ents = collection.variables['RT'].index
     assert len(ents) == len(vals) == 4096
-    assert set(ents.columns) == {'task', 'run', 'subject'}
+    assert set(ents.columns) == {'task', 'run', 'subject', 'suffix', 'datatype'}
 
 
 def test_get_collections_unmerged(layout2):
diff --git a/bids/variables/tests/test_io.py b/bids/variables/tests/test_io.py
@@ -38,7 +38,7 @@ def test_load_events(layout1):
     targ_cols = {'parametric gain', 'PTval', 'trial_type', 'respnum'}
     assert not (targ_cols - set(variables.keys()))
     assert isinstance(variables['parametric gain'], SparseRunVariable)
-    assert variables['parametric gain'].index.shape == (86, 3)
+    assert variables['parametric gain'].index.shape == (86, 5)
     assert variables['parametric gain'].source == 'events'
 
 
@@ -51,12 +51,12 @@ def test_load_participants(layout1):
     assert {'age', 'sex'} == set(dataset.variables.keys())
     age = dataset.variables['age']
     assert isinstance(age, SimpleVariable)
-    assert age.index.shape == (16, 1)
+    assert age.index.shape == (16, 2)
     assert age.values.shape == (16,)
 
     index = load_variables(layout1, types='participants', subject=['^1.*'])
     age = index.get_nodes(level='dataset')[0].variables['age']
-    assert age.index.shape == (7, 1)
+    assert age.index.shape == (7, 2)
     assert age.values.shape == (7,)
 
 
diff --git a/bids/variables/tests/test_variables.py b/bids/variables/tests/test_variables.py
@@ -1,13 +1,16 @@
 from bids.layout import BIDSLayout
 import pytest
+import os
 from os.path import join
 from bids.tests import get_test_data_path
 from bids.variables import (merge_variables, DenseRunVariable, SimpleVariable,
                             load_variables)
 from bids.variables.entities import RunInfo
 import numpy as np
 import pandas as pd
+import nibabel as nb
 import uuid
+import json
 
 
 def generate_DEV(name='test', sr=20, duration=480):
@@ -174,3 +177,28 @@ def test_filter_simple_variable(layout2):
     assert merged.filter({'nonexistent': 2}, strict=True) is None
     merged.filter({'acquisition': 'fullbrain'}, inplace=True)
     assert merged.to_df().shape == (40, 9)
+
+
+@pytest.mark.parametrize(
+    "TR, nvols",
+    [(2.00000, 251),
+     (2.000001, 251)])
+def test_resampling_edge_case(tmpdir, TR, nvols):
+    tmpdir.chdir()
+    os.makedirs('sub-01/func')
+    with open('sub-01/func/sub-01_task-task_events.tsv', 'w') as fobj:
+        fobj.write('onset\tduration\tval\n1\t0.1\t1\n')
+    with open('sub-01/func/sub-01_task-task_bold.json', 'w') as fobj:
+        json.dump({'RepetitionTime': TR}, fobj)
+
+    dataobj = np.zeros((5, 5, 5, nvols), dtype=np.int16)
+    affine = np.diag((2.5, 2.5, 2.5, 1))
+    img = nb.Nifti1Image(dataobj, affine)
+    img.header.set_zooms((2.5, 2.5, 2.5, TR))
+    img.to_filename('sub-01/func/sub-01_task-task_bold.nii.gz')
+
+    layout = BIDSLayout('.', validate=False)
+    coll = load_variables(layout).get_collections('run')[0]
+    dense_var = coll.variables['val'].to_dense(coll.sampling_rate)
+    regressor = dense_var.resample(1.0 / TR).values
+    assert regressor.shape == (nvols, 1)
diff --git a/bids/variables/variables.py b/bids/variables/variables.py
@@ -448,12 +448,13 @@ def resample(self, sampling_rate, inplace=False, kind='linear'):
         self.index = self._build_entity_index(self.run_info, sampling_rate)
 
         x = np.arange(n)
-        num = int(np.ceil(n * sampling_rate / old_sr))
+        num = len(self.index)
 
         from scipy.interpolate import interp1d
         f = interp1d(x, self.values.values.ravel(), kind=kind)
         x_new = np.linspace(0, n - 1, num=num)
         self.values = pd.DataFrame(f(x_new))
+        assert len(self.values) == len(self.index)
 
         self.sampling_rate = sampling_rate