Skip to content

Commit ab1373e

Browse files
committed
fix promoter quantification #38
1 parent 61b65b6 commit ab1373e

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

workflow/scripts/get_promoter_regions.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pybedtools as bedtools
55

66
# extract promoter regions
7-
def get_promoter(feature, upstream, downstream):
7+
def get_promoter(feature, upstream, downstream, chrom_sizes):
88
if feature.strand == '+':
99
start = feature.start - upstream
1010
end = feature.start + downstream
@@ -14,6 +14,8 @@ def get_promoter(feature, upstream, downstream):
1414

1515
# Ensure start is not negative
1616
start = max(start, 0)
17+
# Ensure end is not longer than chromosome
18+
end = min(end, chrom_sizes.get(feature.chrom, end))
1719

1820
# Extract gene_id and remove version number if present
1921
gene_id = feature.attrs['gene_id'].split('.')[0]
@@ -46,15 +48,19 @@ def get_promoter(feature, upstream, downstream):
4648

4749
# load the genome annotation file using pybedtools
4850
gtf = bedtools.BedTool(gtf_file)
49-
# load and get list of valid chromosomes
51+
52+
# load and get list of valid chromosomes and sizes
53+
chrom_sizes = {}
5054
with open(chrom_file, 'r') as f:
51-
valid_chromosomes = {line.split('\t')[0] for line in f}
55+
for line in f:
56+
chrom, size = line.strip().split('\t')
57+
chrom_sizes[chrom] = int(size)
5258

5359
# filter for features that are genes and create promoters
54-
promoters = gtf.filter(lambda x: x[2] == 'gene').each(get_promoter, TSS_up, TSS_dn)
60+
promoters = gtf.filter(lambda x: x[2] == 'gene').each(get_promoter, TSS_up, TSS_dn, chrom_sizes)
5561

5662
# filter for valid chromosomes
57-
promoters = promoters.filter(lambda x: x.chrom in valid_chromosomes)
63+
promoters = promoters.filter(lambda x: x.chrom in chrom_sizes)
5864

5965
# sort promoter regions
6066
promoters = promoters.sort(faidx=chrom_file)

0 commit comments

Comments
 (0)