-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1.CodingFraction_dist.sh
More file actions
executable file
·46 lines (35 loc) · 1.44 KB
/
1.CodingFraction_dist.sh
File metadata and controls
executable file
·46 lines (35 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/bin/bash
#####################################
#### Filter putative mtDNA contigs (works also on scaffolds) based on the coding fraction.
#### Takes as input a directory containing all the files generated by the MITOS webserver.
#### From the MITOS webserver, download the raw data folder.
#### For each contig in each sample, there is a different raw data folder.
####
#### Usage: bash 1.CodingFraction Directory Prefix Output
#### - Directory = directory with all MITOS raw data folders
#### - Prefix = ID of the sample being processed
#### - Output = Complete path to the output folder of your choice
####
#### Francesco Cicconardi (2016)
#####################################
### Getting script arguments
MITODIR=$1
PREFIX=$2
OUTPUT=$3
### Changing directory, remove old tmp
cd $MITODIR
rm tmp.coding.frac
### Looping over alls contigs to calculate the coding fraction
ls -1 | while read contig
do
cd $contig
export SCF_len=`grep -v '^>' sequence.fas | awk '{sum+=(length$1)} END {print sum}'`
export ANN_len=`grep gene result | cut -f 5,6 | awk '{sum+=($2-$1)} END {print sum+0.00000000001}'`
echo $contig $ANN_len $SCF_len | awk '{frac=$2/$3} END {print $1, frac}' >> $OUTPUT/tmp.coding.frac
cd ..
done
cd ..
### Generating output
cat $OUTPUT/tmp.coding.frac | sort -k2,2rg > $OUTPUT/$PREFIX.Coding_len_frac.dat
### Removing mess
rm $OUTPUT/tmp.coding.frac