|
16 | 16 | from torch.utils.data import Dataset |
17 | 17 |
|
18 | 18 | from grelu.data.augment import Augmenter, _split_overall_idx |
| 19 | +from grelu.data.preprocess import filter_chrom_ends |
19 | 20 | from grelu.data.utils import _check_multiclass, _create_task_data |
20 | 21 | from grelu.sequence.format import ( |
21 | 22 | INDEX_TO_BASE_HASH, |
@@ -152,6 +153,7 @@ def _load_seqs(self, seqs: Union[str, Sequence, pd.DataFrame, np.ndarray]) -> No |
152 | 153 | seqs = resize(seqs, seq_len=self.padded_seq_len, end=self.end) |
153 | 154 |
|
154 | 155 | if get_input_type(seqs) == "intervals": |
| 156 | + seqs = filter_chrom_ends(seqs, genome=self.genome) |
155 | 157 | self.intervals = seqs |
156 | 158 | self.chroms = list(set(self.intervals.chrom)) |
157 | 159 | else: |
@@ -603,7 +605,8 @@ def _load_seqs(self, variants: pd.DataFrame) -> None: |
603 | 605 | from grelu.variant import variants_to_intervals |
604 | 606 |
|
605 | 607 | self.padded_seq_len = self.seq_len + (2 * self.max_seq_shift) |
606 | | - self.intervals = variants_to_intervals(variants, seq_len=self.padded_seq_len) |
| 608 | + intervals = variants_to_intervals(variants, seq_len=self.padded_seq_len) |
| 609 | + self.intervals = filter_chrom_ends(intervals, genome=self.genome) |
607 | 610 | self.seqs = convert_input_type(self.intervals, "indices", genome=self.genome) |
608 | 611 |
|
609 | 612 | def __len__(self) -> int: |
@@ -710,7 +713,8 @@ def _load_seqs(self, variants: pd.DataFrame) -> None: |
710 | 713 | from grelu.variant import variants_to_intervals |
711 | 714 |
|
712 | 715 | self.padded_seq_len = self.seq_len + (2 * self.max_seq_shift) |
713 | | - self.intervals = variants_to_intervals(variants, seq_len=self.padded_seq_len) |
| 716 | + intervals = variants_to_intervals(variants, seq_len=self.padded_seq_len) |
| 717 | + self.intervals = filter_chrom_ends(intervals, genome=self.genome) |
714 | 718 | self.seqs = convert_input_type(self.intervals, "indices", genome=self.genome) |
715 | 719 | self.n_seqs = self.seqs.shape[0] |
716 | 720 |
|
|
0 commit comments