Skip to content

Commit 3e3841a

Browse files
authored
Merge pull request #18 from kipoi/interval_seq_rename
rename IntervalSeqDl -> SeqIntervalDl
2 parents 82912ae + a88f9be commit 3e3841a

File tree

5 files changed

+32
-32
lines changed

5 files changed

+32
-32
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88

99
Standard set of data-loaders for training and making predictions for DNA sequence-based models.
1010

11-
All dataloaders in `kipoiseq.dataloaders` decorated with `@kipoi_dataloader` (IntervalSeqDl and IntervalSeqStringDl) are compatible Kipoi models and can be directly used when specifying a new model in `model.yaml`:
11+
All dataloaders in `kipoiseq.dataloaders` decorated with `@kipoi_dataloader` (SeqIntervalDl and StringSeqIntervalDl) are compatible Kipoi models and can be directly used when specifying a new model in `model.yaml`:
1212
```yaml
1313
...
1414
default_dataloader:
15-
defined_as: kipoiseq.dataloaders.IntervalSeqDl
15+
defined_as: kipoiseq.dataloaders.SeqIntervalDl
1616
default_args:
17-
auto_resize_len: 1000 # override default args in IntervalSeqDl
17+
auto_resize_len: 1000 # override default args in SeqIntervalDl
1818

1919
dependencies:
2020
pip:
@@ -31,11 +31,11 @@ pip install kipoiseq
3131
## Getting started
3232

3333
```python
34-
from kipoiseq.dataloaders import IntervalSeqDl
34+
from kipoiseq.dataloaders import SeqIntervalDl
3535

36-
dl = IntervalSeqDl.init_example() # use the provided example files
36+
dl = SeqIntervalDl.init_example() # use the provided example files
3737
# your own files
38-
dl = IntervalSeqDl("intervals.bed", "genome.fa")
38+
dl = SeqIntervalDl("intervals.bed", "genome.fa")
3939

4040
len(dl) # length of the dataset
4141

@@ -60,6 +60,6 @@ More info:
6060

6161
## How to write your own data-loaders
6262
- Read the pytorch [Data Loading and Processing Tutorial](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html) to become more familiar with transforms and dataloaders
63-
- Read the code for `IntervalSeqDl` in [kipoiseq/dataloaders/sequence.py](https://github.com/kipoi/kipoiseq/blob/master/kipoiseq/dataloaders/sequence.py)
63+
- Read the code for `SeqIntervalDl` in [kipoiseq/dataloaders/sequence.py](https://github.com/kipoi/kipoiseq/blob/master/kipoiseq/dataloaders/sequence.py)
6464
- you can skip the `@kipoi_dataloader` and the long yaml doc-string. These are only required if you want to use dataloaders in Kipoi's model.yaml files.
6565
- Explore the available transforms ([functional](http://kipoi.org/kipoiseq/transforms/functional/), [class-based](http://kipoi.org/kipoiseq/transforms/transforms/)) or extractors ([kipoiseq](https://github.com/kipoi/kipoiseq/blob/master/kipoiseq/extractors.py), [genomelake](https://github.com/kundajelab/genomelake/blob/master/genomelake/extractors.py))

kipoiseq/dataloaders/sequence.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
Author(name='Roman Kreuzhuber', github='krrome')]
2727

2828
# Object exported on import *
29-
__all__ = ['IntervalSeqDl', 'IntervalSeqStringDl', 'BedDataset']
29+
__all__ = ['SeqIntervalDl', 'StringSeqIntervalDl', 'BedDataset']
3030

3131

3232
class BedDataset(object):
@@ -132,7 +132,7 @@ def get_targets(self):
132132

133133

134134
@kipoi_dataloader(override={"dependencies": deps, 'info.authors': package_authors})
135-
class IntervalSeqStringDl(Dataset):
135+
class StringSeqIntervalDl(Dataset):
136136
"""
137137
info:
138138
doc: >
@@ -258,7 +258,7 @@ def get_output_schema(cls):
258258

259259

260260
@kipoi_dataloader(override={"dependencies": deps, 'info.authors': package_authors})
261-
class IntervalSeqDl(Dataset):
261+
class SeqIntervalDl(Dataset):
262262
"""
263263
info:
264264
doc: >
@@ -332,7 +332,7 @@ def __init__(self,
332332
ignore_targets=False,
333333
dtype=None):
334334
# core dataset, not using the one-hot encoding params
335-
self.seq_dl = IntervalSeqStringDl(intervals_file, fasta_file, num_chr_fasta=num_chr_fasta,
335+
self.seq_dl = StringSeqIntervalDl(intervals_file, fasta_file, num_chr_fasta=num_chr_fasta,
336336
label_dtype=label_dtype, auto_resize_len=auto_resize_len,
337337
# use_strand=use_strand,
338338
ignore_targets=ignore_targets)

notebooks/getting-started.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
"metadata": {},
2424
"outputs": [],
2525
"source": [
26-
"from kipoiseq.dataloaders import IntervalSeqDl"
26+
"from kipoiseq.dataloaders import SeqIntervalDl"
2727
]
2828
},
2929
{
@@ -58,7 +58,7 @@
5858
}
5959
],
6060
"source": [
61-
"kwargs = IntervalSeqDl.example_kwargs\n",
61+
"kwargs = SeqIntervalDl.example_kwargs\n",
6262
"kwargs"
6363
]
6464
},
@@ -127,7 +127,7 @@
127127
"outputs": [],
128128
"source": [
129129
"# setup the dataset\n",
130-
"dl = IntervalSeqDl(**kwargs)"
130+
"dl = SeqIntervalDl(**kwargs)"
131131
]
132132
},
133133
{
@@ -252,7 +252,7 @@
252252
"metadata": {},
253253
"outputs": [],
254254
"source": [
255-
"dl = IntervalSeqDl(auto_resize_len=10, **kwargs)"
255+
"dl = SeqIntervalDl(auto_resize_len=10, **kwargs)"
256256
]
257257
},
258258
{
@@ -708,7 +708,7 @@
708708
"source": [
709709
"### Final remarks\n",
710710
"\n",
711-
"- See the available arguments of `IntervalSeqDl`: http://kipoi.org/kipoiseq/dataloaders/sequence/#seqdataset\n",
711+
"- See the available arguments of `SeqIntervalDl`: http://kipoi.org/kipoiseq/dataloaders/sequence/#seqdataset\n",
712712
"- Both, the `intervals_file` and the `fasta_file` may be gzipped.\n",
713713
"- You may have multiple additional columns in the `intervals_file` to train a multi-task model.\n",
714714
"- If you are training on large datasets, find the appropriate encoding the the labels (say `bool` for binary-only labels)."

tests/dataloaders/test_sequence.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pybedtools import Interval
66
from kipoi.utils import override_default_kwargs
77
from kipoiseq.transforms.functional import one_hot_dna
8-
from kipoiseq.dataloaders.sequence import IntervalSeqStringDl, IntervalSeqDl, BedDataset
8+
from kipoiseq.dataloaders.sequence import StringSeqIntervalDl, SeqIntervalDl, BedDataset
99

1010

1111
@pytest.fixture
@@ -27,37 +27,37 @@ def test_min_props():
2727
min_set_props = ["output_schema", "type", "defined_as", "info", "args", "dependencies", "postprocessing",
2828
"source", "source_dir"]
2929

30-
for Dl in [IntervalSeqStringDl, IntervalSeqDl]:
30+
for Dl in [StringSeqIntervalDl, SeqIntervalDl]:
3131
props = dir(Dl)
3232
assert all([el in props for el in min_set_props])
3333

3434

3535
def test_fasta_based_dataset(intervals_file, fasta_file):
3636
# just test the functionality
37-
dl = IntervalSeqStringDl(intervals_file, fasta_file)
37+
dl = StringSeqIntervalDl(intervals_file, fasta_file)
3838
ret_val = dl[0]
3939
assert isinstance(ret_val["inputs"], np.ndarray)
4040
assert ret_val["inputs"].shape == ()
4141
# # test with set wrong seqlen:
42-
# dl = IntervalSeqStringDl(intervals_file, fasta_file, required_seq_len=3)
42+
# dl = StringSeqIntervalDl(intervals_file, fasta_file, required_seq_len=3)
4343
# with pytest.raises(Exception):
4444
# dl[0]
4545

46-
dl = IntervalSeqStringDl(intervals_file, fasta_file, label_dtype="str")
46+
dl = StringSeqIntervalDl(intervals_file, fasta_file, label_dtype="str")
4747
ret_val = dl[0]
4848
assert isinstance(ret_val['targets'][0], np.str_)
49-
dl = IntervalSeqStringDl(intervals_file, fasta_file, label_dtype="int")
49+
dl = StringSeqIntervalDl(intervals_file, fasta_file, label_dtype="int")
5050
ret_val = dl[0]
5151
assert isinstance(ret_val['targets'][0], np.int_)
52-
dl = IntervalSeqStringDl(intervals_file, fasta_file, label_dtype="bool")
52+
dl = StringSeqIntervalDl(intervals_file, fasta_file, label_dtype="bool")
5353
ret_val = dl[0]
5454
assert isinstance(ret_val['targets'][0], np.bool_)
5555
vals = dl.load_all()
5656
assert vals['inputs'][0] == 'GT'
5757

5858

5959
def test_seq_dataset(intervals_file, fasta_file):
60-
dl = IntervalSeqDl(intervals_file, fasta_file)
60+
dl = SeqIntervalDl(intervals_file, fasta_file)
6161
ret_val = dl[0]
6262

6363
assert np.all(ret_val['inputs'] == one_hot_dna("GT"))
@@ -67,7 +67,7 @@ def test_seq_dataset(intervals_file, fasta_file):
6767

6868
@pytest.fixture
6969
def example_kwargs():
70-
return IntervalSeqDl.example_kwargs
70+
return SeqIntervalDl.example_kwargs
7171

7272

7373
@pytest.mark.parametrize("alphabet_axis", list(range(0, 4)))
@@ -87,10 +87,10 @@ def test_seq_dataset_reshape(alphabet_axis, dummy_axis, example_kwargs):
8787
if (alphabet_axis == dummy_axis_int) or (alphabet_axis == -1) or (dummy_axis_int == -1) or \
8888
(alphabet_axis >= 3) or (dummy_axis_int >= 3) or ((alphabet_axis >= 2) and (dummy_axis is None)):
8989
with pytest.raises(Exception):
90-
seq_dataset = IntervalSeqDl(**kwargs)
90+
seq_dataset = SeqIntervalDl(**kwargs)
9191
return None
9292

93-
seq_dataset = IntervalSeqDl(**kwargs)
93+
seq_dataset = SeqIntervalDl(**kwargs)
9494

9595
# test the single sample works
9696
reshaped = seq_dataset[0]['inputs']
@@ -104,7 +104,7 @@ def test_seq_dataset_reshape(alphabet_axis, dummy_axis, example_kwargs):
104104

105105

106106
# download example files
107-
@pytest.mark.parametrize("cls", [IntervalSeqStringDl, IntervalSeqDl])
107+
@pytest.mark.parametrize("cls", [StringSeqIntervalDl, SeqIntervalDl])
108108
def test_examples_exist(cls):
109109
ex = cls.init_example()
110110
example_kwargs = cls.example_kwargs
@@ -120,7 +120,7 @@ def test_examples_exist(cls):
120120

121121

122122
def test_output_schape():
123-
Dl = deepcopy(IntervalSeqDl)
123+
Dl = deepcopy(SeqIntervalDl)
124124
assert Dl.get_output_schema().inputs.shape == (None, 4)
125125
override_default_kwargs(Dl, {"auto_resize_len": 100})
126126
assert Dl.get_output_schema().inputs.shape == (100, 4)

tests/dont_test_4_integration.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import kipoi_veff.snv_predict as sp
44
import pytest
55
from kipoi_veff.utils.generic import ModelInfoExtractor
6-
from kipoiseq.dataloaders.sequence import IntervalSeqDl
6+
from kipoiseq.dataloaders.sequence import SeqIntervalDl
77
import os
88
from kipoi.pipeline import install_model_requirements
99

@@ -12,7 +12,7 @@
1212

1313
def test_deepsea():
1414
model = kipoi.get_model("DeepSEA/variantEffects")
15-
mie = ModelInfoExtractor(model, IntervalSeqDl)
15+
mie = ModelInfoExtractor(model, SeqIntervalDl)
1616

1717

1818
def test_var_eff_pred_varseq(tmpdir):
@@ -22,7 +22,7 @@ def test_var_eff_pred_varseq(tmpdir):
2222
#
2323
model = kipoi.get_model(model_name, source="kipoi")
2424
# The preprocessor
25-
Dataloader = IntervalSeqDl
25+
Dataloader = SeqIntervalDl
2626
#
2727
dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
2828
"fasta_file": "example_files/hg38_chr22.fa",

0 commit comments

Comments
 (0)