forked from jmeneghin/perl-for-reysenbach-lab
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_selected_sequences.pl
More file actions
97 lines (97 loc) · 2.74 KB
/
get_selected_sequences.pl
File metadata and controls
97 lines (97 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl -w
#############################
### Jennifer Meneghin ###
### April 12, 2010 ###
#############################
#---------------------------------------------------------------------------------------------------------------------------
#Deal with passed parameters
#---------------------------------------------------------------------------------------------------------------------------
if ($#ARGV == -1) {
&usage;
}
$in_file = "";
$fasta_file = "";
$out_file = "SelectedSequences.fa";
%my_args = @ARGV;
for $i (sort keys %my_args) {
if ($i eq "-i") {
$in_file = $my_args{$i};
}
elsif ($i eq "-f") {
$fasta_file = $my_args{$i};
}
elsif ($i eq "-o") {
$out_file = $my_args{$i};
}
else {
print "\nUnrecognized argument: $i\n\n";
&usage;
}
}
unless ( open(IN, "$in_file") ) {
print "\nGot a bad sequence ID list file: $in_file\n\n";
&usage;
}
unless ( open(FASTA, "$fasta_file") ) {
print "\nGot a bad FASTA file: $fasta_file\n\n";
&usage;
}
unless ( open(OUT, ">$out_file") ) {
print "\nGot a bad output file: $out_file\n\n";
&usage;
}
print "Parameters:\nsequence ID list file = $in_file\nFASTA file = $fasta_file\noutput file = $out_file\n\n";
#---------------------------------------------------------------------------------------------------------------------------
#The main event
#---------------------------------------------------------------------------------------------------------------------------
%records = ();
while (<IN>) {
chomp;
s/\r//g;
$records{$_} = $_;
}
$seq = "";
$header = "";
while (<FASTA>) {
if (/^>/) {
if (length($header) > 0) {
if ($records{$header}) {
print ">$header\n";
print OUT ">$header\n";
print OUT "$seq";
}
$header = "";
$seq = "";
}
$header = $_;
$header =~ s/>//g;
$header =~ s/^(.+?)\s.*$/$1/g;
chomp($header);
}
else {
$seq = $seq . $_;
}
}
if (length($header) > 0) {
if ($records{$header}) {
print ">$header\n";
print OUT ">$header\n";
print OUT "$seq";
}
}
close(IN);
close(FASTA);
close(OUT);
#-----------------------------------------------------------------------
sub usage {
print "\nUsage: ./get_selected_sequences.pl\n\n";
print "Parameters:\n";
print "-i input file\tA Sequence ID List file that contains one sequence ID per line.\n";
print "-f input file\tA FASTA file.\n";
print "-o output file\tReturns a fasta file with only the selected sequences from the original FASTA sequence file.\n\n";
print "This script selects the sequences (in the sequence id list file) from the original fasta file.\n\n";
print "Jennifer Meneghin\n";
print "April 12, 2010\n\n";
exit;
}
#-----------------------------------------------------------------------