Skip to content

Commit 8dadd4f

Browse files
author
wqiu
committed
bioseq: add --gff-pos; bioaln: change default input/output format to fasta while testing for clustalw
1 parent 1dec648 commit 8dadd4f

File tree

2 files changed

+117
-27
lines changed

2 files changed

+117
-27
lines changed

lib/Bio/BPWrapper/AlnManipulations.pm

Lines changed: 62 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -124,38 +124,53 @@ sub initialize {
124124
%opts = %{$opts_ref};
125125

126126
# This is the format that aln-manipulations expects by default
127-
my $default_format = "clustalw";
127+
my $default_format = "fasta";
128128

129+
=begin
129130
# assume we're getting input from standard input
130-
131-
# my $in_format = $opts{"input"} || $default_format;
132-
# my $in_format;
133-
# use IO::Scalar;
134-
# my $s;
135-
# my ($guesser);
136-
# if ($file eq "STDIN") {
137-
# my $line_ct = 0;
138-
# my $lines;
139-
# while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
140-
# $guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
141-
# } else {
142-
# open $ifh, "<", $file or die $!;
143-
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
144-
# }
145-
# $in_format = $guesser->guess();
146-
# die "unknown file format. Try specify with -i flag.\n" unless $in_format;
131+
my $in_format = $opts{"input"} || $default_format;
132+
my $in_format;
133+
use IO::Scalar; this dependency breaks Vonda environment
134+
my $s;
135+
my ($guesser);
136+
$file = shift @ARGV;
137+
if ($file eq "STDIN") {
138+
my $line_ct = 0;
139+
my $lines;
140+
while(<>) { $lines .= $_; $line_ct++; last if $line_ct >= 100 } # read the first 100 lines
141+
$guesser = Bio::Tools::GuessSeqFormat->new( -text => $lines );
142+
} else {
143+
open my $ifh, "<", $file or die $!;
144+
$guesser = Bio::Tools::GuessSeqFormat->new( -fh => $file );
145+
}
146+
$in_format = $guesser->guess();
147+
die "unknown file format. Try specify with -i flag.\n" unless $in_format;
147148
# seek (STDIN, 0, 0);
148-
# warn "$in_format\n";
149+
warn "$in_format\n";
150+
=cut
149151

150-
my $in_format = $opts{'input'} || 'clustalw';
152+
my $in_format = $opts{'input'} || $default_format;
151153
if ($opts{"concat"}) {
152154
# foreach my $file (glob @ARGV) {
153155
while ($file = shift @ARGV) {
154156
# warn "reading $file\n";
155157
# $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file);
156-
# $in_format = $guesser->guess;
157-
$in = Bio::AlignIO->new(-file => $file, -format => $in_format);
158-
while ($aln=$in->next_aln()) { push @alns, $aln }
158+
# $in_format = $guesser->guess;
159+
160+
open IN, "<", $file;
161+
my $ct_line = 1;
162+
my $first_line;
163+
while(<>){
164+
chomp;
165+
if($ct_line == 1) {
166+
$first_line = $_;
167+
last;
168+
}
169+
}
170+
close IN;
171+
$in_format = 'clustalw' if $first_line =~ /CLUSTAL/;
172+
$in = Bio::AlignIO->new(-file => $file, -format => $in_format);
173+
while ($aln=$in->next_aln()) { push @alns, $aln }
159174
}
160175
} else {
161176
$file = shift @ARGV || "STDIN"; # If no more arguments were given on the command line
@@ -171,7 +186,30 @@ sub initialize {
171186
}
172187
} else { # would throw error if format guessed wrong
173188
# $in = Bio::AlignIO->new(-format => $in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file));
174-
# $in = Bio::AlignIO->new(-format => $in_format, -fh => $ifh);
189+
# $in = Bio::AlignIO->new(-format => $in_format, -fh => $ifh);
190+
my $ct_line = 1;
191+
my $first_line;
192+
if ($file eq 'STDIN') {
193+
while(<>){
194+
chomp;
195+
if($ct_line == 1) {
196+
$first_line = $_;
197+
last;
198+
}
199+
}
200+
} else {
201+
open IN, "<", $file;
202+
while(<IN>){
203+
chomp;
204+
if($ct_line == 1) {
205+
$first_line = $_;
206+
last;
207+
}
208+
}
209+
close IN;
210+
}
211+
$in_format = 'clustalw' if $first_line =~ /^CLUSTAL/;
212+
175213
$in = Bio::AlignIO->new(-format=>$in_format, ($file eq "STDIN")? (-fh => \*STDIN) : (-file => $file) );
176214
$aln = $in->next_aln()
177215
}

lib/Bio/BPWrapper/SeqManipulations.pm

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,20 +198,72 @@ sub gff_pos {
198198
my $pos = $opts{"gff-pos"};
199199
die "$filename: Not a GenBank file. Quit\n" unless $in_format eq 'genbank';
200200
$seq = $in->next_seq();
201+
die "$0 --gff-pos\n" unless $pos >= 1 && $pos <= $seq->length();
201202
my $gene_count = 0;
203+
my @orfs;
204+
push @orfs, {
205+
id => 'query',
206+
start => $pos,
207+
end => $pos,
208+
strand => undef
209+
};
210+
211+
my $last_end;
202212
foreach my $feat ($seq->get_SeqFeatures()) {
203213
if ($feat->primary_tag eq 'CDS') {
214+
if ($gene_count == 0) { # add 5-UTR
215+
push @orfs, {
216+
id => '5UTR',
217+
start => 1,
218+
end => $feat->start() - 1,
219+
strand => undef
220+
};
221+
}
222+
204223
my $location = $feat->location();
205224
next if $location->isa('Bio::Location::Split');
206225
my $gene_tag = "gene_" . $gene_count++;
207-
my $gene_symbol = 'na';
208-
my $product = 'na';
209226
foreach my $tag ($feat->get_all_tags()) {
210227
($gene_tag) = $feat->get_tag_values($tag) if $tag eq 'locus_tag';
211228
}
212-
print $gene_tag, "\n";
229+
push @orfs, {
230+
id => $gene_tag,
231+
start => $feat->start,
232+
end => $feat->end,
233+
strand => $feat->strand
234+
};
213235
}
236+
$last_end = $feat->end();
237+
}
238+
239+
push @orfs, { # append 3-UTR
240+
id => '3UTR',
241+
start => $last_end + 1,
242+
end => $seq->length(),
243+
strand => undef
244+
};
245+
246+
@orfs = sort{$a->{start} <=> $b->{start}} @orfs;
247+
# print Dumper(\@orfs); exit;
248+
my $order_query = 0;
249+
for(my $i = 0; $i < @orfs; $i++) {
250+
$order_query = $i if $orfs[$i]->{id} eq 'query';
251+
}
252+
253+
my $before = $orfs[$order_query - 1];
254+
my $after = $orfs[$order_query + 1];
255+
my ($tag_before, $tag_after);
256+
if ($pos <= $before->{end}) {
257+
$tag_before = $before->{id};
258+
$tag_after = $before->{id};
259+
} elsif ($pos > $before->{end} && $pos < $after->{start}) {
260+
$tag_before = $before->{id};
261+
$tag_after = $after->{id};
262+
} else {
263+
$tag_before = $after->{id};
264+
$tag_after = $after->{id};
214265
}
266+
print join "\t", ($pos, $tag_before, $tag_after), "\n"
215267
}
216268

217269

0 commit comments

Comments
 (0)