@@ -6,30 +6,30 @@ from joblib import Parallel, delayed
66def binned_gc (fasta_path ,contig ,bin_size ,n_cutoff ):
77 fasta= pysam.FastaFile(fasta_path)
88 contig_length= fasta.get_reference_length(contig)
9- elements = int (math.ceil(contig_length/ bin_size))
9+ number_of_bins = int (math.ceil(contig_length/ bin_size))
1010
11- contig_gc= numpy.zeros(elements ,dtype = numpy.int8)
11+ contig_gc= numpy.zeros(number_of_bins ,dtype = numpy.int8)
1212
13- start = 0
14- for i in range (0 ,elements ):
15- slice = fasta.fetch(contig, start, start + bin_size)
13+ next_start = 0
14+ for bin in range (0 ,number_of_bins ):
15+ slice = fasta.fetch(contig, next_start, next_start + bin_size)
1616 n= 0
1717 gc= 0
18+ number_of_chars= 0
1819
19- for charachter in slice :
20- if charachter == " N" or charachter == " n" :
20+ for character in slice :
21+ number_of_chars += 1
22+ if character == " N" or character == " n" :
2123 n+= 1
22- elif charachter == " C" or charachter == " c" or charachter == " G" or charachter == " g" :
24+ elif character == " C" or character == " c" or character == " G" or character == " g" :
2325 gc+= 1
2426
2527 if n/ bin_size > n_cutoff:
26- contig_gc[i]= - 1
27-
28+ contig_gc[bin]= - 1
2829 else :
29- contig_gc[i]= round (100 * gc/ elements)
30-
31- start+= bin_size
30+ contig_gc[bin] = round (100 * gc/ number_of_chars)
3231
32+ next_start+= bin_size
3333 return ([contig,contig_gc])
3434
3535def main (reference ,contigs ,threads ,bin_size ,n_cutoff ):
0 commit comments