AstrobioMike
diff --git a/‎bin/GToTree‎
Lines changed: 268 additions & 74 deletions b/‎bin/GToTree‎
Lines changed: 268 additions & 74 deletions
diff --git a/‎bin/GToTree-gen-iToL-map‎
Lines changed: 38 additions & 0 deletions b/‎bin/GToTree-gen-iToL-map‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎bin/gtt-amino-acid-parallel.sh‎
Lines changed: 42 additions & 2 deletions b/‎bin/gtt-amino-acid-parallel.sh‎
Lines changed: 42 additions & 2 deletions
diff --git a/‎bin/gtt-amino-acid-serial.sh‎
Lines changed: 41 additions & 2 deletions b/‎bin/gtt-amino-acid-serial.sh‎
Lines changed: 41 additions & 2 deletions
diff --git a/‎bin/gtt-fasta-parallel.sh‎
Lines changed: 41 additions & 2 deletions b/‎bin/gtt-fasta-parallel.sh‎
Lines changed: 41 additions & 2 deletions
diff --git a/‎bin/gtt-fasta-serial.sh‎
Lines changed: 42 additions & 2 deletions b/‎bin/gtt-fasta-serial.sh‎
Lines changed: 42 additions & 2 deletions
diff --git a/‎bin/gtt-genbank-parallel.sh‎
Lines changed: 42 additions & 3 deletions b/‎bin/gtt-genbank-parallel.sh‎
Lines changed: 42 additions & 3 deletions
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+import argparse
+
+parser = argparse.ArgumentParser(description='This script is for creating a standard iToL color file give the genomes of interest and the "All_genomes_summary_info.tsv" output file from a normal GToTree run.')
+
+required = parser.add_argument_group('required arguments')
+
+required.add_argument("-s", "--all_genomes_summary", help="All_genomes_summary_info.tsv file from a typical GToTree run", action="store", dest="summary", required=True)
+
+required.add_argument("-g", "--target_genomes", help="Single-column file with the genomes to color (should match their initial IDs when given to GToTree, e.g. input file with no extension, or NCBI accessions)", action="store", dest="target_genomes", required=True)
+parser.add_argument("-o", "--output_file", help='Output file for iToL (default: "iToL-colors.txt")', action="store", dest="output_file", default="iToL-colors.txt")
+
+if len(sys.argv)==1:
+    parser.print_help(sys.stderr)
+    sys.exit(1)
+
+args = parser.parse_args()
+
+target_list = []
+
+with open(args.target_genomes, "r") as target_genomes:
+    for genome in target_genomes:
+        target_list.append(genome.strip())
+
+out_file = open(args.output_file, "w")
+
+out_file.write("DATASET_STYLE\nSEPARATOR TAB\nDATASET_LABEL\tGToTree\nCOLOR\t#0000ff\nDATA\n")
+
+with open(args.summary) as summary:
+    for line in summary:
+        line = line.split("\t")
+    
+        if line[0] in target_list:
+            out_file.write(str(line[1]) + "\tbranch\tnode\t#0000ff\t3\tnormal")
@@ -11,6 +11,7 @@ num_cpus=$4
 hmm_target_genes_total=$5
 output_dir=$6
 best_hit_mode=$7
+additional_pfam_targets=$8
 
 
 ### kill backstop
@@ -71,7 +72,7 @@ awk -F "\t" ' $2 == 1 ' ${tmp_dir}/${assembly}_conservative_filtering_counts_tab
 uniq_SCG_hits=$(wc -l ${tmp_dir}/${assembly}_conservative_target_unique_hmm_names.tmp | sed 's/^ *//' | cut -f 1 -d " ")
 
 ## adding SCG-hit counts to table
-paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/All_genomes_SCG_hit_counts.tsv
+paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/SCG_hit_counts.tsv
 
 num_SCG_hits=$(awk ' $1 > 0 ' ${tmp_dir}/${assembly}_uniq_counts.tmp | wc -l | tr -s " " | cut -f2 -d " ")
 
@@ -99,7 +100,7 @@ if [ ${mult_perc_redund_rnd} -ge 1000 ]; then
     printf "   going over 10%% is getting into the questionable range. You may want to\n"
     printf "   consider taking a closer look and/or removing it from the input genomes.\n\n"
 
-    printf "   Reported in \"${output_dir}/Genomes_with_questionable_redund_estimates.tsv\".\n"
+    printf "   Reported in \"${output_dir}/run_files/Genomes_with_questionable_redund_estimates.tsv\".\n"
     printf "  ${RED}****************************************************************************${NC}  \n\n"
 
     # writing to table of genomes with questionable redundancy estimates
@@ -137,4 +138,43 @@ else
 
 fi
 
+## searching for additional targets if provided
+if [ $additional_pfam_targets == "true" ]; then
+
+    ### counting how many genes in this genome
+    gene_count=$(grep -c ">" ${tmp_dir}/${assembly}_genes.tmp)
+
+    hmmsearch --cut_ga --cpu $num_cpus --tblout ${tmp_dir}/${assembly}_curr_hmm_hits.tmp ${tmp_dir}/all_targets.hmm ${tmp_dir}/${assembly}_genes.tmp > /dev/null
+
+    ### getting counts of each target in this genome
+    for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+    do
+        grep -w ${target} ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | wc -l | sed 's/^ *//' >> ${tmp_dir}/${assembly}_hit_counts.tmp
+    done
+
+    ### writing results to main output file
+    paste <( printf "${assembly}\tNA\t${gene_count}" ) <(printf %s "$(cat ${tmp_dir}/${assembly}_hit_counts.tmp | tr "\n" "\t") " )  >> ${output_dir}/additional_pfam_search_results/Additional_Pfam_hit_counts.tsv
+
+    ### Pulling out hits to additional pfam targets for this genome ###
+    for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+    do
+        if grep -w -q "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp; then
+
+            grep -w "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | cut -f 1 -d " " >> ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp
+
+            for gene in $(cat ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp)
+            do
+                echo $gene | esl-sfetch -f ${tmp_dir}/${assembly}_genes.tmp -
+            done >> ${tmp_dir}/${assembly}_${target}_genes1.tmp
+
+            gtt-append-fasta-headers -i ${tmp_dir}/${assembly}_${target}_genes1.tmp -w ${assembly}_${target} -o ${tmp_dir}/${assembly}_${target}_genes.tmp
+        
+            # adding to fasta of that target holding all genomes
+            cat ${tmp_dir}/${assembly}_${target}_genes.tmp >> ${output_dir}/additional_pfam_search_results/${target}_hits.faa
+        fi
+
+    done
+fi
+
+
 rm -rf ${tmp_dir}/${assembly}_*.tmp ${tmp_dir}/${assembly}_genes.tmp.ssi
@@ -12,6 +12,7 @@ num_cpus=$5
 hmm_target_genes_total=$6
 output_dir=$7
 best_hit_mode=$8
+additional_pfam_targets=$9
 
 # looping through the lines of the provided [-f] file (this loop operates on one genome at a time)
 while IFS=$'\t' read -r -a file
@@ -77,7 +78,7 @@ do
     uniq_SCG_hits=$(wc -l ${tmp_dir}/${assembly}_conservative_target_unique_hmm_names.tmp | sed 's/^ *//' | cut -f 1 -d " ")
 
     ## adding SCG-hit counts to table
-    paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/All_genomes_SCG_hit_counts.tsv
+    paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/SCG_hit_counts.tsv
 
     num_SCG_hits=$(awk ' $1 > 0 ' ${tmp_dir}/${assembly}_uniq_counts.tmp | wc -l | tr -s " " | cut -f2 -d " ")
     num_SCG_redund=$(awk '{ if ($1 == 0) { print $1 } else { print $1 - 1 } }' ${tmp_dir}/${assembly}_uniq_counts.tmp | awk '{ sum += $1 } END { print sum }')
@@ -104,7 +105,7 @@ do
         printf "   going over 10%% is getting into the questionable range. You may want to\n"
         printf "   consider taking a closer look and/or removing it from the input genomes.\n\n"
 
-        printf "   Reported in \"${output_dir}/Genomes_with_questionable_redund_estimates.tsv\".\n"
+        printf "   Reported in \"${output_dir}/run_files/Genomes_with_questionable_redund_estimates.tsv\".\n"
         printf "  ${RED}****************************************************************************${NC}  \n\n"
 
         # writing to table of genomes with questionable redundancy estimates
@@ -144,6 +145,44 @@ do
 
     fi
 
+    ## searching for additional targets if provided
+    if [ $additional_pfam_targets == "true" ]; then
+
+        ### counting how many genes in this genome
+        gene_count=$(grep -c ">" ${tmp_dir}/${assembly}_genes.tmp)
+
+        hmmsearch --cut_ga --cpu $num_cpus --tblout ${tmp_dir}/${assembly}_curr_hmm_hits.tmp ${tmp_dir}/all_targets.hmm ${tmp_dir}/${assembly}_genes.tmp > /dev/null
+
+        ### getting counts of each target in this genome
+        for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+        do
+            grep -w ${target} ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | wc -l | sed 's/^ *//' >> ${tmp_dir}/${assembly}_hit_counts.tmp
+        done
+
+        ### writing results to main output file
+        paste <( printf "${assembly}\tNA\t${gene_count}" ) <(printf %s "$(cat ${tmp_dir}/${assembly}_hit_counts.tmp | tr "\n" "\t") " )  >> ${output_dir}/additional_pfam_search_results/Additional_Pfam_hit_counts.tsv
+
+        ### Pulling out hits to additional pfam targets for this genome ###
+        for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+        do
+            if grep -w -q "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp; then
+
+                grep -w "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | cut -f 1 -d " " >> ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp
+
+                for gene in $(cat ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp)
+                do
+                    echo $gene | esl-sfetch -f ${tmp_dir}/${assembly}_genes.tmp -
+                done >> ${tmp_dir}/${assembly}_${target}_genes1.tmp
+
+                gtt-append-fasta-headers -i ${tmp_dir}/${assembly}_${target}_genes1.tmp -w ${assembly}_${target} -o ${tmp_dir}/${assembly}_${target}_genes.tmp
+            
+                # adding to fasta of that target holding all genomes
+                cat ${tmp_dir}/${assembly}_${target}_genes.tmp >> ${output_dir}/additional_pfam_search_results/${target}_hits.faa
+            fi
+
+        done
+    fi
+
     rm -rf ${tmp_dir}/${assembly}_*.tmp ${tmp_dir}/${assembly}_genes.tmp.ssi
 
 done < $1
@@ -11,6 +11,7 @@ num_cpus=$4
 hmm_target_genes_total=$5
 output_dir=$6
 best_hit_mode=$7
+additional_pfam_targets=$8
 
 
 ### kill backstop
@@ -77,7 +78,7 @@ awk -F "\t" ' $2 == 1 ' ${tmp_dir}/${assembly}_conservative_filtering_counts_tab
 uniq_SCG_hits=$(wc -l ${tmp_dir}/${assembly}_conservative_target_unique_hmm_names.tmp | sed 's/^ *//' | cut -f 1 -d " ")
 
 ## adding SCG-hit counts to table
-paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/All_genomes_SCG_hit_counts.tsv
+paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/SCG_hit_counts.tsv
 
 num_SCG_hits=$(awk ' $1 > 0 ' ${tmp_dir}/${assembly}_uniq_counts.tmp | wc -l | tr -s " " | cut -f2 -d " ")
 
@@ -105,7 +106,7 @@ if [ ${mult_perc_redund_rnd} -ge 1000 ]; then
     printf "   going over 10%% is getting into the questionable range. You may want to\n"
     printf "   consider taking a closer look and/or removing it from the input genomes.\n\n"
 
-    printf "   Reported in \"${output_dir}/Genomes_with_questionable_redund_estimates.tsv\".\n"
+    printf "   Reported in \"${output_dir}/run_files/Genomes_with_questionable_redund_estimates.tsv\".\n"
     printf "  ${RED}****************************************************************************${NC}  \n\n"
 
     # writing to table of genomes with questionable redundancy estimates
@@ -143,4 +144,42 @@ else
 
 fi
 
+## searching for additional targets if provided
+if [ $additional_pfam_targets == "true" ]; then
+
+    ### counting how many genes in this genome
+    gene_count=$(grep -c ">" ${tmp_dir}/${assembly}_genes.tmp)
+
+    hmmsearch --cut_ga --cpu $num_cpus --tblout ${tmp_dir}/${assembly}_curr_hmm_hits.tmp ${tmp_dir}/all_targets.hmm ${tmp_dir}/${assembly}_genes.tmp > /dev/null
+
+    ### getting counts of each target in this genome
+    for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+    do
+        grep -w ${target} ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | wc -l | sed 's/^ *//' >> ${tmp_dir}/${assembly}_hit_counts.tmp
+    done
+
+    ### writing results to main output file
+    paste <( printf "${assembly}\tNA\t${gene_count}" ) <(printf %s "$(cat ${tmp_dir}/${assembly}_hit_counts.tmp | tr "\n" "\t") " )  >> ${output_dir}/additional_pfam_search_results/Additional_Pfam_hit_counts.tsv
+
+    ### Pulling out hits to additional pfam targets for this genome ###
+    for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+    do
+        if grep -w -q "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp; then
+
+            grep -w "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | cut -f 1 -d " " >> ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp
+
+            for gene in $(cat ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp)
+            do
+                echo $gene | esl-sfetch -f ${tmp_dir}/${assembly}_genes.tmp -
+            done >> ${tmp_dir}/${assembly}_${target}_genes1.tmp
+
+            gtt-append-fasta-headers -i ${tmp_dir}/${assembly}_${target}_genes1.tmp -w ${assembly}_${target} -o ${tmp_dir}/${assembly}_${target}_genes.tmp
+        
+            # adding to fasta of that target holding all genomes
+            cat ${tmp_dir}/${assembly}_${target}_genes.tmp >> ${output_dir}/additional_pfam_search_results/${target}_hits.faa
+        fi
+
+    done
+fi
+
 rm -rf ${tmp_dir}/${assembly}_*.tmp ${tmp_dir}/${assembly}_genes.tmp.ssi
@@ -12,6 +12,7 @@ num_cpus=$5
 hmm_target_genes_total=$6
 output_dir=$7
 best_hit_mode=$8
+additional_pfam_targets=$9
 
 # looping through the lines of the provided [-f] file (this loop operates on one genome at a time)
 while IFS=$'\t' read -r -a file
@@ -86,7 +87,7 @@ do
     uniq_SCG_hits=$(wc -l ${tmp_dir}/${assembly}_conservative_target_unique_hmm_names.tmp | sed 's/^ *//' | cut -f 1 -d " ")
 
     ## adding SCG-hit counts to table
-    paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/All_genomes_SCG_hit_counts.tsv
+    paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/SCG_hit_counts.tsv
 
     num_SCG_hits=$(awk ' $1 > 0 ' ${tmp_dir}/${assembly}_uniq_counts.tmp | wc -l | tr -s " " | cut -f2 -d " ")
     num_SCG_redund=$(awk '{ if ($1 == 0) { print $1 } else { print $1 - 1 } }' ${tmp_dir}/${assembly}_uniq_counts.tmp | awk '{ sum += $1 } END { print sum }')
@@ -113,7 +114,7 @@ do
         printf "   going over 10%% is getting into the questionable range. You may want to\n"
         printf "   consider taking a closer look and/or removing it from the input genomes.\n\n"
 
-        printf "   Reported in \"${output_dir}/Genomes_with_questionable_redund_estimates.tsv\".\n"
+        printf "   Reported in \"${output_dir}/run_files/Genomes_with_questionable_redund_estimates.tsv\".\n"
         printf "  ${RED}****************************************************************************${NC}  \n\n"
 
         # writing to table of genomes with questionable redundancy estimates
@@ -153,6 +154,45 @@ do
 
     fi
 
+
+    ## searching for additional targets if provided
+    if [ $additional_pfam_targets == "true" ]; then
+
+        ### counting how many genes in this genome
+        gene_count=$(grep -c ">" ${tmp_dir}/${assembly}_genes.tmp)
+
+        hmmsearch --cut_ga --cpu $num_cpus --tblout ${tmp_dir}/${assembly}_curr_hmm_hits.tmp ${tmp_dir}/all_targets.hmm ${tmp_dir}/${assembly}_genes.tmp > /dev/null
+
+        ### getting counts of each target in this genome
+        for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+        do
+            grep -w ${target} ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | wc -l | sed 's/^ *//' >> ${tmp_dir}/${assembly}_hit_counts.tmp
+        done
+
+        ### writing results to main output file
+        paste <( printf "${assembly}\tNA\t${gene_count}" ) <(printf %s "$(cat ${tmp_dir}/${assembly}_hit_counts.tmp | tr "\n" "\t") " )  >> ${output_dir}/additional_pfam_search_results/Additional_Pfam_hit_counts.tsv
+
+        ### Pulling out hits to additional pfam targets for this genome ###
+        for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+        do
+            if grep -w -q "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp; then
+
+                grep -w "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | cut -f 1 -d " " >> ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp
+
+                for gene in $(cat ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp)
+                do
+                    echo $gene | esl-sfetch -f ${tmp_dir}/${assembly}_genes.tmp -
+                done >> ${tmp_dir}/${assembly}_${target}_genes1.tmp
+
+                gtt-append-fasta-headers -i ${tmp_dir}/${assembly}_${target}_genes1.tmp -w ${assembly}_${target} -o ${tmp_dir}/${assembly}_${target}_genes.tmp
+            
+                # adding to fasta of that target holding all genomes
+                cat ${tmp_dir}/${assembly}_${target}_genes.tmp >> ${output_dir}/additional_pfam_search_results/${target}_hits.faa
+            fi
+
+        done
+    fi
+
     rm -rf ${tmp_dir}/${assembly}_*.tmp ${tmp_dir}/${assembly}_genes.tmp.ssi
 
 done < $1
@@ -11,6 +11,7 @@ num_cpus=$4
 hmm_target_genes_total=$5
 output_dir=$6
 best_hit_mode=$7
+additional_pfam_targets=$8
 
 
 ### kill backstop
@@ -69,7 +70,7 @@ if [ ! -s ${tmp_dir}/${assembly}_genes2.tmp ]; then
     printf "   This genbank file doesn't appear to have CDS annotations,\n"
     printf "   so we are identifying coding sequences with prodigal.\n\n"
 
-    printf "   Reported in \"${output_dir}/Genbank_files_with_no_CDSs.txt\".\n"
+    printf "   Reported in \"${output_dir}/run_files/Genbank_files_with_no_CDSs.txt\".\n"
     printf "  ${RED}****************************************************************************${NC}  \n\n"
 
     echo "$1" >> ${output_dir}/Genbank_files_with_no_CDSs.txt
@@ -118,7 +119,7 @@ awk -F "\t" ' $2 == 1 ' ${tmp_dir}/${assembly}_conservative_filtering_counts_tab
 uniq_SCG_hits=$(wc -l ${tmp_dir}/${assembly}_conservative_target_unique_hmm_names.tmp | sed 's/^ *//' | cut -f 1 -d " ")
 
 ## adding SCG-hit counts to table
-paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/All_genomes_SCG_hit_counts.tsv
+paste <(printf $assembly) <(printf %s "$(cat ${tmp_dir}/${assembly}_uniq_counts.tmp | tr "\n" "\t")") >> ${output_dir}/SCG_hit_counts.tsv
 
 num_SCG_hits=$(awk ' $1 > 0 ' ${tmp_dir}/${assembly}_uniq_counts.tmp | wc -l | tr -s " " | cut -f2 -d " ")
 num_SCG_redund=$(awk '{ if ($1 == 0) { print $1 } else { print $1 - 1 } }' ${tmp_dir}/${assembly}_uniq_counts.tmp | awk '{ sum += $1 } END { print sum }')
@@ -145,7 +146,7 @@ if [ ${mult_perc_redund_rnd} -ge 1000 ]; then
     printf "   going over 10%% is getting into the questionable range. You may want to\n"
     printf "   consider taking a closer look and/or removing it from the input genomes.\n\n"
 
-    printf "   Reported in \"${output_dir}/Genomes_with_questionable_redund_estimates.tsv\".\n"
+    printf "   Reported in \"${output_dir}/run_files/Genomes_with_questionable_redund_estimates.tsv\".\n"
     printf "  ${RED}****************************************************************************${NC}  \n\n"
 
     # writing to table of genomes with questionable redundancy estimates
@@ -180,4 +181,42 @@ else
 
 fi
 
+## searching for additional targets if provided
+if [ $additional_pfam_targets == "true" ]; then
+
+    ### counting how many genes in this genome
+    gene_count=$(grep -c ">" ${tmp_dir}/${assembly}_genes.tmp)
+
+    hmmsearch --cut_ga --cpu $num_cpus --tblout ${tmp_dir}/${assembly}_curr_hmm_hits.tmp ${tmp_dir}/all_targets.hmm ${tmp_dir}/${assembly}_genes.tmp > /dev/null
+
+    ### getting counts of each target in this genome
+    for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+    do
+        grep -w ${target} ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | wc -l | sed 's/^ *//' >> ${tmp_dir}/${assembly}_hit_counts.tmp
+    done
+
+    ### writing results to main output file
+    paste <( printf "${assembly}\tNA\t${gene_count}" ) <(printf %s "$(cat ${tmp_dir}/${assembly}_hit_counts.tmp | tr "\n" "\t") " )  >> ${output_dir}/additional_pfam_search_results/Additional_Pfam_hit_counts.tsv
+
+    ### Pulling out hits to additional pfam targets for this genome ###
+    for target in $(cat ${tmp_dir}/actual_pfam_targets.tmp)
+    do
+        if grep -w -q "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp; then
+
+            grep -w "$target" ${tmp_dir}/${assembly}_curr_hmm_hits.tmp | cut -f 1 -d " " >> ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp
+
+            for gene in $(cat ${tmp_dir}/${assembly}_${target}_genes_of_int.tmp)
+            do
+                echo $gene | esl-sfetch -f ${tmp_dir}/${assembly}_genes.tmp -
+            done >> ${tmp_dir}/${assembly}_${target}_genes1.tmp
+
+            gtt-append-fasta-headers -i ${tmp_dir}/${assembly}_${target}_genes1.tmp -w ${assembly}_${target} -o ${tmp_dir}/${assembly}_${target}_genes.tmp
+        
+            # adding to fasta of that target holding all genomes
+            cat ${tmp_dir}/${assembly}_${target}_genes.tmp >> ${output_dir}/additional_pfam_search_results/${target}_hits.faa
+        fi
+
+    done
+fi
+
 rm -rf ${tmp_dir}/${assembly}_*.tmp ${tmp_dir}/${assembly}_genes.tmp.ssi