11package PlantCompUtils ;
22require Exporter;
33
4- # Copyright [2019-2023 ] EMBL-European Bioinformatics Institute
4+ # Copyright [2019-2025 ] EMBL-European Bioinformatics Institute
55
66@ISA = qw( Exporter) ;
77@EXPORT_OK = qw(
@@ -23,8 +23,7 @@ use Time::HiRes;
2323use HTTP::Tiny;
2424use DBI;
2525
26- # Fungi Protists Metazoa have collections and one all-vs-all TSV file
27- # This code won't work there
26+ # Only tested in Plants; Fungi Protists Metazoa have collections, code will need tweaking
2827our @DIVISIONS = qw( Plants ) ;
2928our $FTPURL = ' ftp.ensemblgenomes.org' ;
3029our $COMPARADIR = ' /pub/xxx/current/tsv/ensembl-compara/homologies' ;
@@ -180,11 +179,11 @@ sub get_gene_coords_GTF_file {
180179 || die " # ERROR(get_gene_coords_GTF_file): cannot open $GTF_filename \n " ;
181180 while ( my $line = <GTF> ) {
182181
183- # 1 araport11 gene 3631 5899 . + . gene_id "AT1G01010";...
182+ # 1 araport11 gene 3631 5899 . + . gene_id "AT1G01010";...
183+ # C3 brad gene 4809 5027 . - . gene_id "Bo3g025160";...
184184 if ( $line =~
185- m / ^([^#])\t [^\t ] +\t gene\t (\d +)\t (\d +)\t [^\t ] \t (\S +)\t [^\t ] \t gene_id "([^";]+)/
186- )
187- {
185+ m / ^([^#]+)\t [^\t ] +\t gene\t (\d +)\t (\d +)\t [^\t ] \t (\S +)\t [^\t ] \t gene_id "([^";]+)/ ) {
186+
188187 ( $chr , $start , $end , $strand , $geneid ) = ( $1 , $2 , $3 , $4 , $5 );
189188 push ( @chr_sorted_gene_ids ,
190189 [ $geneid , $chr , $start , $end , $strand ] );
@@ -258,7 +257,7 @@ sub download_GTF_file {
258257
259258# download compressed TSV file from FTP site, renames it
260259# and saves it in $targetdir; uses FTP globals defined above
261- # NOTE: if species file is not found it tries the bulky all-vs-all file
260+ # NOTE: tries only the bulky all-vs-all file (GBs)
262261sub download_compara_TSV_file {
263262
264263 my ( $dir , $ref_genome , $targetdir ) = @_ ;
@@ -274,32 +273,18 @@ sub download_compara_TSV_file {
274273 || die " # ERROR(download_compara_TSV_file): cannot change working directory to $dir "
275274 . $ftp -> message();
276275
277- # find out which file is to be downloaded
278- if ( $ftp -> cwd($ref_genome ) ) {
279- foreach my $file ( $ftp -> ls() ) {
280- if ( $file =~ m / protein_default.homologies.tsv.gz/ ) {
281- $compara_file = $file ;
282- $stored_compara_file = " $targetdir /$compara_file " ;
283- $stored_compara_file =~ s / tsv.gz/ $ref_genome .tsv.gz/ ;
284- last ;
285- }
286- }
287- }
288- else { # try all-vs-all file instead (Fungi, Protists, Metazoa)
289-
290- print " # WARNING(download_compara_TSV_file): cannot find " .
291- " $ref_genome in $dir , try all-vs-all\n " ;
292-
293- foreach my $file ( $ftp -> ls() ) {
294- if ( $file =~ m / protein_default.homologies.tsv.gz/ ) {
295- $compara_file = $file ;
296- $stored_compara_file = " $targetdir /$compara_file " ;
297- foreach my $div (@DIVISIONS ) {
298- if ( $dir =~ m / ($div )/ i ) {
299- $div = $1 ;
300- $stored_compara_file =~ s / tsv.gz/ $div .tsv.gz/ ;
301- last ;
302- }
276+ # find file to be downloaded
277+ print " # WARNING(download_compara_TSV_file): try all-vs-all\n " ;
278+
279+ foreach my $file ( $ftp -> ls() ) {
280+ if ( $file =~ m / protein_default.homologies.tsv.gz/ ) {
281+ $compara_file = $file ;
282+ $stored_compara_file = " $targetdir /$compara_file " ;
283+ foreach my $div (@DIVISIONS ) {
284+ if ( $dir =~ m / ($div )/ i ) {
285+ $div = $1 ;
286+ $stored_compara_file =~ s / tsv.gz/ $div .tsv.gz/ ;
287+ last ;
303288 }
304289 }
305290 }
0 commit comments