-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDNM_extract_groups.sh
More file actions
22 lines (19 loc) · 1.74 KB
/
DNM_extract_groups.sh
File metadata and controls
22 lines (19 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
awk '
BEGIN { nATCpG=0 }
{ if($7 == "Non-CpG" && (($5 =="A" && $6 =="T") || ($5 =="T" && $6 =="A"))) { ++nATnCpG; ++nAT }
else if($7 == "CpG" && (($5 =="A" && $6 =="T") || ($5 =="T" && $6 =="A"))) { ++nATCpG; ++nAT }
else if($7 == "Non-CpG" && (($5 =="C" && $6 =="G") || ($5 =="G" && $6 =="C"))) { ++nCGnCpG; ++nCG }
else if($7 == "CpG" && (($5 =="C" && $6 =="G") || ($5 =="G" && $6 =="C"))) { ++nCGCpG; ++nCG }
else if($7 == "Non-CpG" && (($5 =="A" && $6 =="G") || ($5 =="G" && $6 =="A"))) { ++nAGCTnCpG; ++nAGCT }
else if($7 == "CpG" && (($5 =="A" && $6 =="G") || ($5 =="G" && $6 =="A"))) { ++nAGCTCpG; ++nAGCT }
else if($7 == "Non-CpG" && (($5 =="C" && $6 =="T") || ($5 =="T" && $6 =="C"))) { ++nAGCTnCpG; ++nAGCT }
else if($7 == "CpG" && (($5 =="C" && $6 =="T") || ($5 =="T" && $6 =="C"))) { ++nAGCTCpG; ++nAGCT }
else if($7 == "Non-CpG" && (($5 =="A" && $6 =="C") || ($5 =="C" && $6 =="A"))) { ++nACGTnCpG; ++nACGT }
else if($7 == "CpG" && (($5 =="A" && $6 =="C") || ($5 =="C" && $6 =="A"))) { ++nACGTCpG; ++nACGT }
else if($7 == "Non-CpG" && (($5 =="G" && $6 =="T") || ($5 =="T" && $6 =="G"))) { ++nACGTnCpG; ++nACGT }
else if($7 == "CpG" && (($5 =="G" && $6 =="T") || ($5 =="T" && $6 =="G"))) { ++nACGTCpG; ++nACGT }
}
END {print "AT\t", nATnCpG, "\nCG\t", nCGnCpG, "\nAGCT\t", nAGCTnCpG, "\nACGT\t", nACGTnCpG > "./outdata/DNM_groups_not.in.CpG.tsv";
print "AT\t", nATCpG, "\nCG\t", nCGCpG, "\nAGCT\t", nAGCTCpG, "\nACGT\t", nACGTCpG > "./outdata/DNM_groups_in.CpG.tsv";
print "AT\t", nAT, "\nCG\t", nCG, "\nAGCT\t", nAGCT, "\nACGT\t", nACGT > "./outdata/DNM_groups_no.CpG.status.tsv";
}' './DNM/DNM data - new/DNMsByStudy.txt'