forked from jmeneghin/perl-for-reysenbach-lab
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmap_back_uniques.pl
More file actions
110 lines (110 loc) · 3.37 KB
/
map_back_uniques.pl
File metadata and controls
110 lines (110 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/perl -w
#############################
### Jennifer Meneghin ###
### February 9, 2011 ###
#############################
#---------------------------------------------------------------------------------------------------------------------------
#Deal with passed parameters
#---------------------------------------------------------------------------------------------------------------------------
if ($#ARGV == -1) {
&usage;
}
$in_file = "";
$fasta_file = "";
$out_file = "updated.fasta";
%my_args = @ARGV;
for $i (sort keys %my_args) {
if ($i eq "-i") {
$in_file = $my_args{$i};
}
elsif ($i eq "-f") {
$fasta_file = $my_args{$i};
}
elsif ($i eq "-o") {
$out_file = $my_args{$i};
}
else {
print "\nUnrecognized argument: $i\n\n";
&usage;
}
}
unless ( open(IN, "$in_file") ) {
print "\nGot a bad mapping file: $in_file\n\n";
&usage;
}
unless ( open(FASTA, "$fasta_file") ) {
print "\nGot a bad fasta file: $fasta_file\n\n";
&usage;
}
if (-e $out_file) {
print "\nOutput file $out_file already exists. Please delete it or choose a new output file.\n\n";
&usage;
}
unless ( open(OUT, ">$out_file") ) {
print "\nGot a bad output file: $out_file\n\n";
&usage;
}
print "Parameters:\ninput file = $in_file\nfasta file = $fasta_file\noutput file = $out_file\n\n";
#---------------------------------------------------------------------------------------------------------------------------
#The main event
#---------------------------------------------------------------------------------------------------------------------------
while (<IN>) {
chomp;
@fields = split(/\s/);
push(@lines, $fields[1]);
}
$i = 0;
$seq = "";
$total_num = 0;
$total_recs = 0;
while (<FASTA>) {
chomp;
if (/>/) {
if (length($seq) > 0) {
@labels = split(/,/, $lines[$i]);
for $j (0..$#labels) {
# print OUT ">$labels[$j] $header\n";
print OUT ">$labels[$j]\n";
print OUT "$seq\n";
$total_recs++;
}
$i++;
}
$seq = "";
$header = $_;
$header =~ s/^>(.+)$/$1/g;
$num = $header;
$num =~ s/^.+_(\d+)/$1/g;
$total_num = $total_num + $num;
}
else {
$seq = $seq . $_;
}
}
if (length($seq) > 0) {
@labels = split(/,/, $lines[$i]);
for $j (0..$#labels) {
print OUT "$labels[$j] $header\n";
print OUT "$seq\n";
$total_recs++;
}
}
print "Total expected number of records in new fasta file = $total_num\n";
print "Total number of records written to the new fasta file = $total_recs\n";
print "(These numbers should match... if they don't there is a problem with your files.)\n";
close(IN);
close(OUT);
close(FASTA);
#-----------------------------------------------------------------------
sub usage {
print "\nUSAGE: ./map_back_uniques.pl\n\n";
print "Parameters:\n";
print "-i <input file>\t\tA mapping file output from AmpliconNoise\n";
print "-f <fasta file>\t\tA fasta file output from AmpliconNoise\n";
print "-o <output file>\tThe new fasta file to create (optional. If not provided,\n";
print "\t\t\ta file called updated.fasta will be created.)\n\n";
print "This script creates a new \"de-uniqued\" fasta file from the fasta file of uniques and the file that maps the original names to the uniques. It also updates the headers so that they start with the original labels (found in the mapping file.)\n\n";
print "Jennifer Meneghin\n";
print "February 9, 2011\n\n";
exit;
}