Skip to content

Commit 4ba473a

Browse files
committed
Fix tests
1 parent 0a2bec0 commit 4ba473a

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ pip install genomicranges[optional]
2424

2525
### From Bioinformatic file formats
2626

27+
> [!NOTE]
28+
> When reading genomic formats, `ends` are expected to be inclusive to be consistent with Bioconductor representations (& gff). If they are not, we recommend subtracting 1 from the `ends`.
29+
2730
#### From `biobear`
2831

2932
Although the parsing capabilities in this package are limited, the [biobear](https://github.com/wheretrue/biobear) library is designed for reading and searching various bioinformatics file formats, including FASTA, FASTQ, VCF, BAM, and GFF, or from an object store like S3. Users can esily convert these representations to `GenomicRanges` (or [read more here](https://www.wheretrue.dev/docs/exon/biobear/genomicranges-integration)):
@@ -44,8 +47,8 @@ print(len(gg), len(df))
4447
```
4548

4649
## output
47-
## 77 77
48-
50+
## 77 77> [!NOTE]
51+
> `ends` are expected to be inclusive to be consistent with Bioconductor representations. If they are not, we recommend subtracting 1 from the `ends`.
4952
5053
#### UCSC or GTF file
5154

src/genomicranges/GenomicRanges.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,7 @@ def from_pandas(cls, input) -> "GenomicRanges":
10561056
Args:
10571057
input:
10581058
Input data. Must contain columns 'seqnames', 'starts' and 'widths' or "ends".
1059+
'ends' are expected to be inclusive.
10591060
10601061
Returns:
10611062
A ``GenomicRanges`` object.
@@ -1136,6 +1137,7 @@ def from_polars(cls, input) -> "GenomicRanges":
11361137
Args:
11371138
input:
11381139
Input polars DataFrame. Must contain columns 'seqnames', 'starts' and 'widths' or "ends".
1140+
'ends' are expected to be inclusive.
11391141
11401142
Returns:
11411143
A ``GenomicRanges`` object.
@@ -2080,30 +2082,40 @@ def intersect(self, other: "GenomicRanges", ignore_strand: bool = False) -> "Gen
20802082
diff = x.setdiff(y_gaps)
20812083
return diff
20822084

2083-
def intersect_ncls(self, other: "GenomicRanges") -> "GenomicRanges":
2085+
def intersect_ncls(
2086+
self, other: "GenomicRanges", delete_index: bool = True, num_threads: int = 1
2087+
) -> "GenomicRanges":
20842088
"""Find intersecting genomic intervals with `other` (uses NCLS index).
20852089
20862090
Args:
20872091
other:
20882092
The other ``GenomicRanges`` object.
20892093
2094+
delete_index:
2095+
Defaults to True, to delete the cached ncls index.
2096+
Set to False, to reuse the index across multiple queries.
2097+
2098+
num_threads:
2099+
Number of threads to use.
2100+
Defaults to 1.
2101+
20902102
Returns:
20912103
A new ``GenomicRanges`` object with intersecting ranges.
20922104
"""
20932105
if not isinstance(other, GenomicRanges):
20942106
raise TypeError("'other' is not a `GenomicRanges` object.")
20952107

2096-
if not ut.package_utils.is_package_installed("ncls"):
2097-
raise ImportError("package: 'ncls' is not installed.")
2098-
2099-
from ncls import NCLS
2108+
self_end = self.get_end()
2109+
other_end = other.get_end()
21002110

2101-
self_end = self.end
2102-
other_end = other.end
2111+
other._ranges._build_ncls_index()
2112+
res = other._ranges.find_overlaps(self._ranges, num_threads=num_threads)
21032113

2104-
other_ncls = NCLS(other.start, other_end, np.arange(len(other)))
2105-
_self_indexes, _other_indexes = other_ncls.all_overlaps_both(self.start, self_end, np.arange(len(self)))
2114+
if delete_index:
2115+
other._ranges._delete_ncls_index()
21062116

2117+
_other_indexes = res["self_hits"]
2118+
_self_indexes = res["query_hits"]
21072119
other_chrms = np.array([other._seqinfo._seqnames[other._seqnames[i]] for i in _other_indexes])
21082120
self_chrms = np.array([self._seqinfo._seqnames[self._seqnames[i]] for i in _self_indexes])
21092121

0 commit comments

Comments
 (0)