66import os
77from typing import List , Optional , Union
88
9- import genomepy
109import pandas as pd
10+ import pyfaidx
11+ import genomepy
12+
13+
14+ class CustomGenome :
15+ """
16+ A custom genome object that can be used to load a genome from a file.
17+
18+ Args:
19+ genome: Path to the genome file.
20+ """
21+ def __init__ (self , genome : str ):
22+ self .genome = genome
23+ self ._genome = pyfaidx .Fasta (genome , rebuild = False )
24+ fai_file = genome + ".fai"
25+ if not os .path .isfile (fai_file ):
26+ raise FileNotFoundError (
27+ f"Genome file { fai_file } not found. "
28+ "Please provide a genome name or a path to a chromosome sizes file. "
29+ f"Or generate one with: `samtools faidx { genome } `."
30+ )
31+ self ._sizes_file = genome + ".sizes"
32+
33+ def get_seq (self , chrom : str , start : int , end : int , rc : bool = False ) -> str :
34+ """
35+ Get the sequence for a given chromosome and interval.
36+ """
37+ return self ._genome .get_seq (chrom , start , end , rc = rc )
38+
39+ @property
40+ def sizes_file (self ) -> str :
41+ if not os .path .isfile (self ._sizes_file ):
42+ raise FileNotFoundError (
43+ f"Genome file { self ._sizes_file } not found. "
44+ "Please provide a genome name or a path to a chromosome sizes file. "
45+ f"Or generate one with: `faidx -i chromsizes { self .genome } > { self ._sizes_file } `."
46+ )
47+ return self ._sizes_file
1148
1249
1350def read_sizes (genome : str = "hg38" ) -> pd .DataFrame :
@@ -24,16 +61,13 @@ def read_sizes(genome: str = "hg38") -> pd.DataFrame:
2461 and "size" (chromosome size).
2562 """
2663 # Get file path
27- if not os .path .isfile (genome ):
28- genome = get_genome (genome ).sizes_file
29-
30- # Read file
64+ genome = get_genome (genome ).sizes_file
3165 return pd .read_table (
3266 genome , header = None , names = ["chrom" , "size" ], dtype = {"chrom" : str , "size" : int }
3367 )
3468
3569
36- def get_genome (genome : str , ** kwargs ) -> genomepy .Genome :
70+ def get_genome (genome : str , ** kwargs ) -> Union [ CustomGenome , genomepy .Genome ] :
3771 """
3872 Install a genome from genomepy and load it as a Genome object
3973
@@ -44,11 +78,13 @@ def get_genome(genome: str, **kwargs) -> genomepy.Genome:
4478 Returns:
4579 Genome object
4680 """
47- if genome not in genomepy . list_installed_genomes ( ):
48- return genomepy . install_genome (genome , annotation = False , ** kwargs )
81+ if os . path . isfile ( genome ):
82+ return CustomGenome (genome , ** kwargs )
4983 else :
50- return genomepy .Genome (genome )
51-
84+ if genome not in genomepy .list_installed_genomes ():
85+ return genomepy .install_genome (genome , annotation = False , ** kwargs )
86+ else :
87+ return genomepy .Genome (genome , ** kwargs )
5288
5389def read_gtf (
5490 genome : str , features : Optional [Union [str , List [str ]]] = None
0 commit comments