kipoi
diff --git a/‎kipoiseq/dataloaders/sequence.py‎
Lines changed: 4 additions & 4 deletions b/‎kipoiseq/dataloaders/sequence.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎kipoiseq/transforms/transforms.py‎
Lines changed: 7 additions & 6 deletions b/‎kipoiseq/transforms/transforms.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎kipoiseq/utils.py‎
Lines changed: 5 additions & 14 deletions b/‎kipoiseq/utils.py‎
Lines changed: 5 additions & 14 deletions
diff --git a/‎notebooks/getting-started.ipynb‎
Lines changed: 158 additions & 303 deletions b/‎notebooks/getting-started.ipynb‎
Lines changed: 158 additions & 303 deletions
diff --git a/‎tests/datasets/test_BedDataset.py‎ ‎tests/dataloaders/test_BedDataset.py‎tests/datasets/test_BedDataset.py renamed to tests/dataloaders/test_BedDataset.py b/‎tests/datasets/test_BedDataset.py‎ ‎tests/dataloaders/test_BedDataset.py‎tests/datasets/test_BedDataset.py renamed to tests/dataloaders/test_BedDataset.py
diff --git a/‎tests/datasets/test_sequence.py‎ ‎tests/dataloaders/test_sequence.py‎tests/datasets/test_sequence.py renamed to tests/dataloaders/test_sequence.py
Lines changed: 40 additions & 11 deletions b/‎tests/datasets/test_sequence.py‎ ‎tests/dataloaders/test_sequence.py‎tests/datasets/test_sequence.py renamed to tests/dataloaders/test_sequence.py
Lines changed: 40 additions & 11 deletions
diff --git a/‎tests/datasets/test_splicing.py‎ ‎tests/dataloaders/test_splicing.py‎tests/datasets/test_splicing.py renamed to tests/dataloaders/test_splicing.py b/‎tests/datasets/test_splicing.py‎ ‎tests/dataloaders/test_splicing.py‎tests/datasets/test_splicing.py renamed to tests/dataloaders/test_splicing.py
diff --git a/‎tests/test_0_transforms.py‎ ‎tests/test_0_transforms_class.py‎tests/test_0_transforms.py renamed to tests/test_0_transforms_class.py
Lines changed: 11 additions & 35 deletions b/‎tests/test_0_transforms.py‎ ‎tests/test_0_transforms_class.py‎tests/test_0_transforms.py renamed to tests/test_0_transforms_class.py
Lines changed: 11 additions & 35 deletions
diff --git a/‎tests/test_0_transforms_functional.py‎
Lines changed: 60 additions & 1 deletion b/‎tests/test_0_transforms_functional.py‎
Lines changed: 60 additions & 1 deletion
diff --git a/‎tests/test_2_datasets.py‎
Lines changed: 0 additions & 40 deletions b/‎tests/test_2_datasets.py‎
Lines changed: 0 additions & 40 deletions
@@ -40,7 +40,7 @@ class BedDataset(object):
       bed_columns: number of columns corresponding to the bed file. All the columns
         after that will be parsed as targets
       num_chr: if specified, 'chr' in the chromosome name will be dropped
-      label_dtype: specific data type for labels
+      label_dtype: specific data type for labels, Example: `float` or `np.float32`
       ambiguous_mask: if specified, rows containing only ambiguous_mask values will be skipped
       incl_chromosomes: exclusive list of chromosome names to include in the final dataset.
         if not None, only these will be present in the dataset
@@ -153,7 +153,7 @@ class IntervalSeqStringDl(Dataset):
         num_chr_fasta:
             doc: True, the the dataloader will make sure that the chromosomes don't start with chr.
         label_dtype:
-            doc: None, datatype of the task labels taken from the intervals_file. Allowed - string', 'int', 'float', 'bool'
+            doc: None, datatype of the task labels taken from the intervals_file. Example - str, int, float, np.float32
         auto_resize_len:
             doc: None, required sequence length.
         # max_seq_len:
@@ -280,7 +280,7 @@ class IntervalSeqDl(Dataset):
         num_chr_fasta:
             doc: True, the the dataloader will make sure that the chromosomes don't start with chr.
         label_dtype:
-            doc: None, datatype of the task labels taken from the intervals_file. Allowed - string', 'int', 'float', 'bool'
+            doc: 'None, datatype of the task labels taken from the intervals_file. Example: str, int, float, np.float32'
         auto_resize_len:
             doc: None, required sequence length.
         # use_strand:
@@ -294,7 +294,7 @@ class IntervalSeqDl(Dataset):
                 alphabet to use for the one-hot encoding. This defines the order of the one-hot encoding.
                 Can either be a list or a string: 'ACGT' or ['A, 'C', 'G', 'T']. Default: 'ACGT'
         dtype:
-            doc: defines the numpy dtype of the returned array.
+            doc: 'defines the numpy dtype of the returned array. Example: int, np.int32, np.float32, float'
         ignore_targets:
             doc: if True, don't return any target variables
 
 
@@ -4,7 +4,7 @@
 
 import numpy as np
 from kipoiseq.transforms import functional as F
-from kipoiseq.utils import DNA, parse_alphabet, parse_type
+from kipoiseq.utils import DNA, parse_alphabet, parse_dtype
 
 
 class Compose(object):
@@ -146,19 +146,20 @@ def __init__(self,
                  alphabet_axis=1,
                  dummy_axis=None):
         # make sure the alphabet axis and the dummy axis are valid:
+        if dummy_axis is not None:
+            if alphabet_axis == dummy_axis:
+                raise ValueError("dummy_axis can't be the same as dummy_axis")
+            if not (dummy_axis >= 0 and dummy_axis <= 2):
+                raise ValueError("dummy_axis can be either 0,1 or 2")
         assert alphabet_axis >= 0 and (alphabet_axis < 2 or (alphabet_axis <= 2 and dummy_axis is not None))
-        assert dummy_axis is None or (dummy_axis >= 0 and dummy_axis <= 2 and alphabet_axis != dummy_axis)
 
         self.alphabet_axis = alphabet_axis
         self.dummy_axis = dummy_axis
         self.alphabet = parse_alphabet(alphabet)
-        self.dtype = parse_type(dtype)
+        self.dtype = parse_dtype(dtype)
         self.neutral_alphabet = neutral_alphabet
         self.neutral_value = neutral_value
 
-        if dummy_axis is not None and alphabet_axis == dummy_axis:
-            raise ValueError("dummy_axis can't be the same as dummy_axis")
-
         # set the transform parameters correctly
         if dummy_axis is not None and dummy_axis < 2:
             # dummy axis is added somewhere in the middle, so the alphabet axis is at the end now
 
@@ -22,27 +22,18 @@ def to_scalar(obj):
         return obj
 
 
-def parse_dtype(dtype):
-    dtypes = {'int': int, 'string': str, 'float': float, 'bool': bool}
-    if dtype is None:
-        return None
-    if dtype in list(dtypes.values()):
-        return dtype
-    if dtype not in dtypes:
-        raise Exception("Datatype '{0}' not recognized. Allowed are: {1}".format(dtype, str(list(dtypes.keys()))))
-    return dtypes[dtype]
-
-
 def parse_alphabet(alphabet):
     if isinstance(alphabet, str):
         return list(alphabet)
     else:
         return alphabet
 
 
-def parse_type(dtype):
+def parse_dtype(dtype):
     if isinstance(dtype, string_types):
-        if dtype in dir(np):
-            return getattr(np, dtype)
+        try:
+            return eval(dtype)
+        except Exception as e:
+            raise ValueError("Unable to parse dtype: {}. \nException: {}".format(dtype, e))
     else:
         return dtype
@@ -5,7 +5,7 @@
 from pybedtools import Interval
 from kipoi.utils import override_default_kwargs
 from kipoiseq.transforms.functional import one_hot_dna
-from kipoiseq.dataloaders.sequence import IntervalSeqStringDl, IntervalSeqDl, parse_dtype, BedDataset
+from kipoiseq.dataloaders.sequence import IntervalSeqStringDl, IntervalSeqDl, BedDataset
 
 
 @pytest.fixture
@@ -32,15 +32,6 @@ def test_min_props():
         assert all([el in props for el in min_set_props])
 
 
-def test_parse_dtype():
-    dtypes = {'int': int, 'string': str, 'float': float, 'bool': bool}
-    assert all([parse_dtype(dt) == dtypes[dt] for dt in dtypes.keys()])
-    assert all([parse_dtype(dt) == dt for dt in dtypes.values()])
-    with pytest.raises(Exception):
-        parse_dtype("int8")
-    assert parse_dtype(None) is None
-
-
 def test_fasta_based_dataset(intervals_file, fasta_file):
     # just test the functionality
     dl = IntervalSeqStringDl(intervals_file, fasta_file)
@@ -52,7 +43,7 @@ def test_fasta_based_dataset(intervals_file, fasta_file):
     # with pytest.raises(Exception):
     #     dl[0]
 
-    dl = IntervalSeqStringDl(intervals_file, fasta_file, label_dtype="string")
+    dl = IntervalSeqStringDl(intervals_file, fasta_file, label_dtype="str")
     ret_val = dl[0]
     assert isinstance(ret_val['targets'][0], np.str_)
     dl = IntervalSeqStringDl(intervals_file, fasta_file, label_dtype="int")
@@ -74,6 +65,44 @@ def test_seq_dataset(intervals_file, fasta_file):
     assert ret_val["inputs"].shape == (2, 4)
 
 
+@pytest.fixture
+def example_kwargs():
+    return IntervalSeqDl.example_kwargs
+
+
+@pytest.mark.parametrize("alphabet_axis", list(range(0, 4)))
+@pytest.mark.parametrize("dummy_axis", [None] + list(range(0, 4)))
+def test_seq_dataset_reshape(alphabet_axis, dummy_axis, example_kwargs):
+    seq_len, alphabet_len = 3, 4
+
+    kwargs = example_kwargs
+    kwargs['auto_resize_len'] = seq_len
+    kwargs['alphabet_axis'] = alphabet_axis
+    kwargs['dummy_axis'] = dummy_axis
+
+    dummy_axis_int = dummy_axis
+    if dummy_axis is None:
+        dummy_axis_int = -2
+
+    if (alphabet_axis == dummy_axis_int) or (alphabet_axis == -1) or (dummy_axis_int == -1) or \
+            (alphabet_axis >= 3) or (dummy_axis_int >= 3) or ((alphabet_axis >= 2) and (dummy_axis is None)):
+        with pytest.raises(Exception):
+            seq_dataset = IntervalSeqDl(**kwargs)
+        return None
+
+    seq_dataset = IntervalSeqDl(**kwargs)
+
+    # test the single sample works
+    reshaped = seq_dataset[0]['inputs']
+    for i in range(len(reshaped.shape)):
+        if i == dummy_axis:
+            assert reshaped.shape[i] == 1
+        elif i == alphabet_axis:
+            assert reshaped.shape[i] == alphabet_len
+        else:
+            assert reshaped.shape[i] == seq_len
+
+
 # download example files
 @pytest.mark.parametrize("cls", [IntervalSeqStringDl, IntervalSeqDl])
 def test_examples_exist(cls):
 
@@ -1,44 +1,16 @@
 import pytest
 import numpy as np
 import copy
-from kipoiseq.transforms.functional import resize_interval
-from kipoiseq.transforms.transforms import SplitSplicingSeq, ReorderedOneHot
+from kipoiseq.transforms.transforms import Compose, OneHot, SplitSplicingSeq, ReorderedOneHot
 from kipoiseq.utils import DNA
 from pybedtools import Interval
 
 
 # --------------------------------------------
-
-@pytest.mark.parametrize("anchor", ['start', 'end', 'center'])
-@pytest.mark.parametrize("ilen", [3, 4])
-def test_resize_interval(anchor, ilen):
-    import pybedtools
-    dummy_start, dummy_end = 10, 20
-    dummy_centre = int((dummy_start + dummy_end) / 2)
-
-    dummy_inter = pybedtools.create_interval_from_list(['chr2', dummy_start, dummy_end, 'intname'])
-    ret_inter = resize_interval(dummy_inter, ilen, anchor)
-
-    # the original interval was left intact
-    assert dummy_inter.chrom == 'chr2'
-    assert dummy_inter.start == dummy_start
-    assert dummy_inter.end == dummy_end
-    assert dummy_inter.name == 'intname'
-
-    # metadata kept
-    assert ret_inter.chrom == dummy_inter.chrom
-    assert ret_inter.name == 'intname'
-
-    # desired output width
-    assert ret_inter.length == ilen
-
-    # correct anchor point
-    if anchor == "start":
-        assert ret_inter.start == dummy_start
-    elif anchor == "end":
-        assert ret_inter.end == dummy_end
-    elif anchor == "centre":
-        assert int((ret_inter.start + ret_inter.end) / 2) == dummy_centre
+def test_compose():
+    c = Compose([OneHot()])
+    print(str(c))
+    assert c("ACGT").shape == (4, 4)
 
 
 def test_ReorderedOneHot():
@@ -60,10 +32,10 @@ def test_ReorderedOneHot():
         assert out.shape == tr.get_output_shape(seqlen)
         assert out.shape == result
 
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError):
         ReorderedOneHot(alphabet_axis=1, dummy_axis=1)
 
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError):
         ReorderedOneHot(dummy_axis=1)
 
 
@@ -86,3 +58,7 @@ def test_SplitSplicingSeq():
     assert splited['exon'] == 'GTAGTAGA'
     assert splited['donor'] == 'AGAGT'
     assert splited['intron3prime'] == 'CC'
+
+
+def test_ResizeInterval():
+    pass
@@ -1,5 +1,6 @@
 import pytest
-from kipoiseq.transforms.functional import tokenize, token2one_hot, one_hot, one_hot_dna, pad, trim, fixed_len
+from kipoiseq.transforms.functional import resize_interval, tokenize, token2one_hot, one_hot, one_hot_dna, pad, trim, fixed_len
+from kipoiseq.transforms.transforms import ResizeInterval
 from kipoiseq.utils import DNA
 import numpy as np
 
@@ -81,3 +82,61 @@ def test_pad_sequences():
 
     assert fixed_len(seq, 10, anchor="start", value="N") == seq
     assert fixed_len(seq, 10, anchor="end", value="N") == 'CTTACTCAGA'
+
+
+@pytest.mark.parametrize("anchor", ['start', 'end', 'center'])
+@pytest.mark.parametrize("ilen", [3, 4])
+def test_resize_interval(anchor, ilen):
+    import pybedtools
+    dummy_start, dummy_end = 10, 20
+    dummy_center = int((dummy_start + dummy_end) / 2)
+
+    dummy_inter = pybedtools.create_interval_from_list(['chr2', dummy_start, dummy_end, 'intname'])
+    ret_inter = resize_interval(dummy_inter, ilen, anchor)
+
+    # the original interval was left intact
+    assert dummy_inter.chrom == 'chr2'
+    assert dummy_inter.start == dummy_start
+    assert dummy_inter.end == dummy_end
+    assert dummy_inter.name == 'intname'
+
+    # metadata kept
+    assert ret_inter.chrom == dummy_inter.chrom
+    assert ret_inter.name == 'intname'
+
+    # desired output width
+    assert ret_inter.length == ilen
+
+    # correct anchor point
+    if anchor == "start":
+        assert ret_inter.start == dummy_start
+    elif anchor == "end":
+        assert ret_inter.end == dummy_end
+    elif anchor == "center":
+        assert int((ret_inter.start + ret_inter.end) / 2) == dummy_center
+
+
+def test_ResizeInterval():
+    """Same test as before
+    """
+    import pybedtools
+    dummy_start, dummy_end = 10, 20
+    dummy_center = int((dummy_start + dummy_end) / 2)
+    ilen = 4
+    dummy_inter = pybedtools.create_interval_from_list(['chr2', dummy_start, dummy_end, 'intname'])
+    ri = ResizeInterval(ilen, 'center')
+    ret_inter = ri(dummy_inter)
+    assert int((ret_inter.start + ret_inter.end) / 2) == dummy_center
+
+    # the original interval was left intact
+    assert dummy_inter.chrom == 'chr2'
+    assert dummy_inter.start == dummy_start
+    assert dummy_inter.end == dummy_end
+    assert dummy_inter.name == 'intname'
+
+    # metadata kept
+    assert ret_inter.chrom == dummy_inter.chrom
+    assert ret_inter.name == 'intname'
+
+    # desired output width
+    assert ret_inter.length == ilen