@@ -6,30 +6,31 @@ from joblib import Parallel, delayed
66def binned_gc (fasta_path ,contig ,bin_size ,n_cutoff ):
77 fasta= pysam.FastaFile(fasta_path)
88 contig_length= fasta.get_reference_length(contig)
9- elements = int (math.ceil(contig_length/ bin_size))
9+ number_of_bins = int (math.ceil(contig_length/ bin_size))
1010
11- contig_gc= numpy.zeros(elements ,dtype = numpy.int8)
11+ contig_gc= numpy.zeros(number_of_bins ,dtype = numpy.int8)
1212
13- start = 0
14- for i in range (0 ,elements ):
15- slice = fasta.fetch(contig, start, start + bin_size)
13+ next_start = 0
14+ for bin in range (0 ,number_of_bins ):
15+ slice = fasta.fetch(contig, next_start, next_start + bin_size)
1616 n= 0
1717 gc= 0
18+ number_of_chars= 0
1819
19- for charachter in slice :
20- if charachter == " N" or charachter == " n" :
20+ for character in slice :
21+ number_of_chars += 1
22+ if character == " N" or character == " n" :
2123 n+= 1
22- elif charachter == " C" or charachter == " c" or charachter == " G" or charachter == " g" :
24+ elif character == " C" or character == " c" or character == " G" or character == " g" :
2325 gc+= 1
2426
2527 if n/ bin_size > n_cutoff:
26- contig_gc[i]= - 1
27-
28+ contig_gc[bin]= - 1
2829 else :
29- contig_gc[i] = round (100 * gc/ elements )
30-
31- start += bin_size
32-
30+ result = round (100 * gc/ number_of_chars )
31+ contig_gc[bin] = result
32+
33+ next_start += bin_size
3334 return ([contig,contig_gc])
3435
3536def main (reference ,contigs ,threads ,bin_size ,n_cutoff ):
0 commit comments