forked from jmeneghin/perl-for-reysenbach-lab
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblast_found.pl
More file actions
161 lines (159 loc) · 5.95 KB
/
blast_found.pl
File metadata and controls
161 lines (159 loc) · 5.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/perl -w
####################################################################################################
### BLAST FOUND 1.0 ###
### Usage: blast_found.pl <blast output file> <query file> ###
### This program takes a blast output file in short format as it's first parameter, ###
### and the query file (in fasta format) as it's second parameter. ###
### It returns two fasta files, one with all records found by blast, and one with all not found. ###
### ###
### Jennifer Meneghin ###
### January 28, 2009 ###
### ###
### BLAST FOUND 2.0 ###
### Usage: blast_found.pl -i blast_output_file -f fasta_query_file -n n ###
### Updates: ###
### Changed the how the parameters work to be more consistent with other scripts ###
### Added a new optional flag which should be either: -n y -n n ###
### -n y means returns the two fasta files as before (default) ###
### -n n means return only the records found, but not the records not found (one file only) ###
### ###
### Jennifer Meneghin ###
### August 4, 2010 ###
####################################################################################################
#--------------------------------------------------------------------------------------------------------------------------
#Deal with passed parameters
#--------------------------------------------------------------------------------------------------------------------------
#If no arguments are passed, show usage message and exit program.
if ($#ARGV == -1) {
usage();
exit;
}
$blast_file = "";
$query_file = "";
$flag = 0;
%my_args = @ARGV;
for $i (sort keys %my_args) {
if ($i eq "-i") {
$blast_file = $my_args{$i};
}
elsif ($i eq "-f") {
$query_file = $my_args{$i};
}
elsif ($i eq "-n") {
if ($my_args{$i} eq "n") {
$flag = 1;
}
elsif ($my_args{$i} eq "y") {
$flag = 0;
}
else {
print "\nThe -n parameter must be 'y' (print both files) or 'n' (print one file). You entered: $my_args{$i}\n\n";
&usage;
}
}
else {
print "\nUnrecognized argument: $i\n\n";
&usage;
}
}
#Open files. If unsuccessful, print an error message and exit program.
unless ( open(BLAST, "$blast_file") ) {
print "Got a bad blast file: $blast_file\n\n";
exit;
}
unless ( open(QUERY, "$query_file") ) {
print "Got a bad fasta query file: $query_file\n";
exit;
}
$matches_file = "matches.fasta";
if (-e $matches_file) {
print "\nCouldn't create output file $matches_file because it already exists.\n\n";
&usage;
}
unless ( open(MATCHES, ">$matches_file") ) {
print "\nCouldn't create output file: $matches_file\n\n";
&usage;
}
if ($flag == 0) {
$notmatches_file = "notmatches.fasta";
$notmatches_file =~ s/(.+)\.(.+)$/$1_notmatches\.$2/g;
if (-e $notmatches_file) {
print "\nCouldn't create output file $notmatches_file because it already exists.\n\n";
&usage;
}
unless ( open(NOTMATCHES, ">$notmatches_file") ) {
print "\nCouldn't create output file: $notmatches_file\n\n";
&usage;
}
}
#Everything looks good. Print the parameters we've found.
print "Parameters:\nblast file = $blast_file\nquery file = $query_file\nmatches file = $matches_file\n";
if ($flag == 0) {
print "not-matches file = $notmatches_file\n\n";
}
else {
print "\n";
}
#---------------------------------------------------------------------------------------------------------------------------
#The main event
#---------------------------------------------------------------------------------------------------------------------------
%ids = ();
while (<BLAST>) {
@fields = split(/\t/);
$id = $fields[0];
print "ID = $id\n";
if (!$ids{$id}) {
$ids{$id} = $id;
}
}
close(BLAST);
$match_flag = 0;
while (<QUERY>) {
$line = $_;
if ($line =~ /^>/) {
$id = $line;
$id =~ s/^>(.+?)\s.+$/$1/g;
chomp($id);
if ($ids{$id}) {
$match_flag = 1;
print "Found $id\n";
print MATCHES "$line";
}
else {
$match_flag = 0;
if ($flag == 0) {
print "Not Found $id\n";
print NOTMATCHES "$line";
}
}
}
else {
if ($match_flag == 1) {
print MATCHES "$line";
}
else {
if ($flag == 0) {
print NOTMATCHES "$line";
}
}
}
}
close(QUERY);
close(MATCHES);
if ($flag == 0) {
close(NOTMATCHES);
}
sub usage {
print "BLAST FOUND 2.0\n";
print "Usage: blast_found.pl -i blast_output_file -f fasta_query_file -n n\n\n";
print "This program takes (-i) a blast output file in short format (or any tabbed delimited file with fasta IDs\n";
print "in the first column), and (-f) a query file (in fasta format).\n\n";
print "If -n y, it returns two fasta files, one with all records found by blast (matches.fasta), and one with all not found (notmatches.fasta).\n";
print "If -n n, it returns one fasta file, with all records found in the blast file (matches.fasta).\n";
print "-n is optional; -n y is the default.\n\n";
print "Jennifer Meneghin\n";
print "January 28, 2009\n\n";
print "Updated:\n";
print "Jennifer Meneghin\n";
print "August 4, 2010\n\n"
}