|
8 | 8 | import os, sys, json, csv, mysql.connector,datetime,math |
9 | 9 |
|
10 | 10 | """ |
11 | | -The purpose of this file is to determine the codon actually adaptation indices (CAAI) for individual species.CAAI is Codon Adaptation Index (1987 version) contolled for Amino Acid composition and potential mutation bias within codon usuage. |
| 11 | +The purpose of this file is to determine the codon adaptation index of species (CAIS) for individual species. CAAI is Codon Adaptation Index (1987 version) contolled for Amino Acid composition and potential mutation bias within codon usuage. |
12 | 12 | This is a metric that describes codon usage patterns. |
13 | | -CAAI is calculated in the following way: |
| 13 | +CAIS is calculated in the following way: |
14 | 14 | |
15 | 15 | I) |
16 | 16 | |
|
37 | 37 | |
38 | 38 | II) Calculate the Amino Acid controlled codon frequency |
39 | 39 | |
40 | | - 7) |
| 40 | + III) Calculate unweighted CAI (CAAI) |
| 41 | +
|
| 42 | + IV) Calculate amino acid frequency controlled CAIS (Weighted_fi_CAAI) |
| 43 | + |
41 | 44 | |
42 | 45 | """ |
43 | 46 |
|
|
528 | 531 | # the RSCU_i value to one for all codons corresponding to that amino acid. We do this because later we'll |
529 | 532 | # take the geometric mean of all relative adaptedness values and multiplying by one will not affect this value |
530 | 533 | # In essence, setting them to one "silences" these. |
| 534 | + |
531 | 535 | if Sum[AA] != 0: |
532 | 536 | if Codon in ['TTA','TAT','ATT','AAT','ATA','TAA','AAA','TTT']: |
533 | 537 | Prob = (notGC_total_prob*notGC_total_prob*notGC_total_prob)*0.125 |
|
622 | 626 | print(RelativeAdaptednessTable) |
623 | 627 | sys.stdout.flush() |
624 | 628 |
|
625 | | - #CAI |
| 629 | + #CAAI |
626 | 630 | LogOfCAAI = 0 |
627 | 631 | for AA in RawCount: |
628 | 632 | for Codon in RawCount[AA]: |
|
636 | 640 | CAAI = math.exp(LogOfCAAI) |
637 | 641 |
|
638 | 642 |
|
639 | | - #Weighted wi CAI |
| 643 | + #Weighted wi CAAI |
640 | 644 | Weighted_wi_Log_ofCAAI = 0 |
641 | 645 | for AA in RawCount: |
642 | 646 | for Codon in RawCount[AA]: |
|
659 | 663 | print(Weighted_wi_Log_ofCAAI) |
660 | 664 | sys.stdout.flush() |
661 | 665 |
|
662 | | - # and we invert the log to get the CAI |
| 666 | + # and we invert the log to get the CAAI |
663 | 667 | Weighted_wi_CAAI = math.exp(Weighted_wi_Log_ofCAAI) |
664 | 668 | if Verbose == True: |
665 | 669 | print(Weighted_wi_Log_ofCAAI) |
|
668 | 672 |
|
669 | 673 |
|
670 | 674 |
|
671 | | - #Weighted codon frequency CAI |
| 675 | + #Weighted codon frequency CAIS |
672 | 676 | Weighted_fi_Log_ofCAAI = 0 |
673 | 677 | for AA in RawCount: |
674 | 678 | for Codon in RawCount[AA]: |
|
691 | 695 | print(Weighted_fi_Log_ofCAAI) |
692 | 696 | sys.stdout.flush() |
693 | 697 |
|
694 | | - # and we invert the log to get the CAI |
| 698 | + # and we invert the log to get the CAIS |
695 | 699 | Weighted_fi_CAAI = math.exp(Weighted_fi_Log_ofCAAI) |
696 | 700 | if Verbose == True: |
697 | 701 | print(Weighted_fi_Log_ofCAAI) |
698 | 702 | sys.stdout.flush() |
699 | 703 |
|
| 704 | + #Weighted_fi_CAAI is the amino acid frequency weighted, Total GC content weighted CAIS |
| 705 | + print("%s,%s,%s"%(i,CAAI,Weighted_fi_CAAI)) |
700 | 706 | #print("%s,%s,%s,%s"%(i,CAAI,Weighted_wi_CAAI,Weighted_fi_CAAI)) |
701 | | - print("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"%(i,Weighted_fi_CAAI,Codon_Summed_freqTable['F'],Codon_Summed_freqTable['L'],Codon_Summed_freqTable['S'],Codon_Summed_freqTable['Y'],Codon_Summed_freqTable['*'],Codon_Summed_freqTable['C'],Codon_Summed_freqTable['W'],Codon_Summed_freqTable['P'],Codon_Summed_freqTable['H'],Codon_Summed_freqTable['Q'],Codon_Summed_freqTable['R'],Codon_Summed_freqTable['I'],Codon_Summed_freqTable['M'],Codon_Summed_freqTable['T'],Codon_Summed_freqTable['N'],Codon_Summed_freqTable['K'],Codon_Summed_freqTable['V'],Codon_Summed_freqTable['A'],Codon_Summed_freqTable['D'],Codon_Summed_freqTable['E'],Codon_Summed_freqTable['G'])) |
| 707 | + #print("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"%(i,Weighted_fi_CAAI,Codon_Summed_freqTable['F'],Codon_Summed_freqTable['L'],Codon_Summed_freqTable['S'],Codon_Summed_freqTable['Y'],Codon_Summed_freqTable['*'],Codon_Summed_freqTable['C'],Codon_Summed_freqTable['W'],Codon_Summed_freqTable['P'],Codon_Summed_freqTable['H'],Codon_Summed_freqTable['Q'],Codon_Summed_freqTable['R'],Codon_Summed_freqTable['I'],Codon_Summed_freqTable['M'],Codon_Summed_freqTable['T'],Codon_Summed_freqTable['N'],Codon_Summed_freqTable['K'],Codon_Summed_freqTable['V'],Codon_Summed_freqTable['A'],Codon_Summed_freqTable['D'],Codon_Summed_freqTable['E'],Codon_Summed_freqTable['G'])) |
702 | 708 |
|
703 | 709 | cnx.close() |
704 | 710 |
|
|
0 commit comments