@@ -53,9 +53,9 @@ typedef struct
5353 uint32_t beg ;
5454 uint32_t end ;
5555 uint32_t trid ;
56- uint32_t strand :1 ; // STRAND_REV,STRAND_FWD
56+ uint32_t strand :2 ; // STRAND_{REV,FWD,UNK}
5757 uint32_t phase :2 ; // 0, 1, 2, or 3 for unknown
58- uint32_t iseq :29 ;
58+ uint32_t iseq :28 ;
5959}
6060ftr_t ;
6161
@@ -474,13 +474,13 @@ static void gff_parse_exon(gff_t *gff, const char *line, ftr_t *ftr)
474474 // associate with transcript id
475475 gff_id_register (& gff -> tscript_ids , aux -> parent , aux -> parent_end , & ftr -> trid );
476476
477- if ( ftr -> strand == -1 && gff -> verbosity > 0 )
477+ if ( ftr -> strand == STRAND_UNK && gff -> verbosity > 0 )
478478 {
479479 if ( !gff -> warned .unknown_strand || gff -> verbosity > 1 )
480480 fprintf (stderr ,"Warning: Ignoring GFF feature with unknown strand .. %s\n" ,line );
481481 gff -> warned .unknown_strand ++ ;
482482 }
483- if ( ftr -> phase == -1 && gff -> verbosity > 0 )
483+ if ( ftr -> phase == CDS_PHASE_UNKN && gff -> verbosity > 0 )
484484 {
485485 if ( !gff -> warned .unknown_phase || gff -> verbosity > 1 )
486486 fprintf (stderr ,"Warning: Ignoring GFF feature with unknown phase .. %s\n" ,line );
@@ -568,10 +568,11 @@ static int gff_parse_line(gff_t *gff, char *line, ftr_t *ftr)
568568 ftr -> strand = -1 ;
569569 if ( * ss == '+' ) ftr -> strand = STRAND_FWD ;
570570 else if ( * ss == '-' ) ftr -> strand = STRAND_REV ;
571+ else ftr -> strand = STRAND_UNK ;
571572 ss += 2 ;
572573
573574 // 8th column: phase (codon offset)
574- ftr -> phase = -1 ;
575+ ftr -> phase = CDS_PHASE_UNKN ;
575576 if ( * ss == '0' ) ftr -> phase = 0 ;
576577 else if ( * ss == '1' ) ftr -> phase = 1 ;
577578 else if ( * ss == '2' ) ftr -> phase = 2 ;
@@ -757,7 +758,7 @@ static void tscript_init_cds(gff_t *gff)
757758 }
758759 if ( !tscript_ok ) continue ; // skip this transcript
759760 }
760- else
761+ else if ( tr -> strand == STRAND_REV )
761762 {
762763 if ( tr -> cds [tr -> ncds - 1 ]-> phase != CDS_PHASE_UNKN )
763764 {
@@ -820,6 +821,8 @@ static void tscript_init_cds(gff_t *gff)
820821 }
821822 if ( !tscript_ok ) continue ; // skip this transcript
822823 }
824+ else
825+ continue ; // unknown strand
823826
824827 // set len. At the same check that CDS within a transcript do not overlap
825828 len = 0 ;
@@ -868,7 +871,7 @@ static void tscript_init_cds(gff_t *gff)
868871 i -- ;
869872 }
870873 }
871- else
874+ else if ( tr -> strand == STRAND_REV )
872875 {
873876 i = 0 ;
874877 while ( i < tr -> ncds && len %3 )
@@ -910,7 +913,7 @@ static int gff_dump(gff_t *gff, const char *fname)
910913 gf_gene_t * gene = (gf_gene_t * ) kh_val (gff -> init .gid2gene , k );
911914 char * gene_id = gff -> init .gene_ids .str [gene -> id ];
912915 str .l = 0 ;
913- ksprintf (& str ,"%s\t.\tgene\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tID=%s;Name=%s;used=%d\n" ,gff -> init .seq [gene -> iseq ],gene -> beg + 1 ,gene -> end + 1 ,gene -> strand == STRAND_FWD ?'+' :'-' ,gene_id ,gene -> name ,gene -> used );
916+ ksprintf (& str ,"%s\t.\tgene\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tID=%s;Name=%s;used=%d\n" ,gff -> init .seq [gene -> iseq ],gene -> beg + 1 ,gene -> end + 1 ,gene -> strand == STRAND_FWD ?'+' :( gene -> strand == STRAND_REV ? '-' : '.' ) ,gene_id ,gene -> name ,gene -> used );
914917 if ( bgzf_write (out , str .s , str .l ) != str .l ) error ("Error writing %s: %s\n" , fname , strerror (errno ));
915918 }
916919
@@ -921,7 +924,7 @@ static int gff_dump(gff_t *gff, const char *fname)
921924 char * gene_id = gff -> init .gene_ids .str [tr -> gene -> id ];
922925 const char * type = tr -> type == GF_PROTEIN_CODING ? "mRNA" : gf_type2gff_string (tr -> type );
923926 str .l = 0 ;
924- ksprintf (& str ,"%s\t.\t%s\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tID=%s;Parent=%s;biotype=%s;used=%d\n" ,itr -> seq ,type ,itr -> beg + 1 ,itr -> end + 1 ,tr -> strand == STRAND_FWD ?'+' :'-' ,gff -> tscript_ids .str [tr -> id ],gene_id ,gf_type2gff_string (tr -> type ),tr -> used );
927+ ksprintf (& str ,"%s\t.\t%s\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tID=%s;Parent=%s;biotype=%s;used=%d\n" ,itr -> seq ,type ,itr -> beg + 1 ,itr -> end + 1 ,tr -> strand == STRAND_FWD ?'+' :( tr -> strand == STRAND_REV ? '-' : '.' ) ,gff -> tscript_ids .str [tr -> id ],gene_id ,gf_type2gff_string (tr -> type ),tr -> used );
925928 if ( bgzf_write (out , str .s , str .l ) != str .l ) error ("Error writing %s: %s\n" , fname , strerror (errno ));
926929 }
927930 regitr_destroy (itr );
@@ -932,7 +935,7 @@ static int gff_dump(gff_t *gff, const char *fname)
932935 gf_cds_t * cds = regitr_payload (itr ,gf_cds_t * );
933936 gf_tscript_t * tr = cds -> tr ;
934937 str .l = 0 ;
935- ksprintf (& str ,"%s\t.\tCDS\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t%c\tParent=%s\n" ,itr -> seq ,cds -> beg + 1 ,cds -> beg + cds -> len ,tr -> strand == STRAND_FWD ?'+' :'-' ,cds -> phase == 3 ?'.' :cds -> phase + (int )'0' ,gff -> tscript_ids .str [tr -> id ]);
938+ ksprintf (& str ,"%s\t.\tCDS\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t%c\tParent=%s\n" ,itr -> seq ,cds -> beg + 1 ,cds -> beg + cds -> len ,tr -> strand == STRAND_FWD ?'+' :( tr -> strand == STRAND_REV ? '-' : '.' ) ,cds -> phase == 3 ?'.' :cds -> phase + (int )'0' ,gff -> tscript_ids .str [tr -> id ]);
936939 if ( bgzf_write (out , str .s , str .l ) != str .l ) error ("Error writing %s: %s\n" , fname , strerror (errno ));
937940 }
938941 regitr_destroy (itr );
@@ -943,7 +946,7 @@ static int gff_dump(gff_t *gff, const char *fname)
943946 gf_utr_t * utr = regitr_payload (itr ,gf_utr_t * );
944947 gf_tscript_t * tr = utr -> tr ;
945948 str .l = 0 ;
946- ksprintf (& str ,"%s\t.\t%s_prime_UTR\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tParent=%s\n" ,itr -> seq ,utr -> which == prime3 ?"three" :"five" ,utr -> beg + 1 ,utr -> end + 1 ,tr -> strand == STRAND_FWD ?'+' :'-' ,gff -> tscript_ids .str [tr -> id ]);
949+ ksprintf (& str ,"%s\t.\t%s_prime_UTR\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tParent=%s\n" ,itr -> seq ,utr -> which == prime3 ?"three" :"five" ,utr -> beg + 1 ,utr -> end + 1 ,tr -> strand == STRAND_FWD ?'+' :( tr -> strand == STRAND_REV ? '-' : '.' ) ,gff -> tscript_ids .str [tr -> id ]);
947950 if ( bgzf_write (out , str .s , str .l ) != str .l ) error ("Error writing %s: %s\n" , fname , strerror (errno ));
948951 }
949952 regitr_destroy (itr );
@@ -954,7 +957,7 @@ static int gff_dump(gff_t *gff, const char *fname)
954957 gf_exon_t * exon = regitr_payload (itr ,gf_exon_t * );
955958 gf_tscript_t * tr = exon -> tr ;
956959 str .l = 0 ;
957- ksprintf (& str ,"%s\t.\texon\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tParent=%s\n" ,itr -> seq ,exon -> beg + 1 ,exon -> end + 1 ,tr -> strand == STRAND_FWD ?'+' :'-' ,gff -> tscript_ids .str [tr -> id ]);
960+ ksprintf (& str ,"%s\t.\texon\t%" PRIu32 "\t%" PRIu32 "\t.\t%c\t.\tParent=%s\n" ,itr -> seq ,exon -> beg + 1 ,exon -> end + 1 ,tr -> strand == STRAND_FWD ?'+' :( tr -> strand == STRAND_REV ? '-' : '.' ) ,gff -> tscript_ids .str [tr -> id ]);
958961 if ( bgzf_write (out , str .s , str .l ) != str .l ) error ("Error writing %s: %s\n" , fname , strerror (errno ));
959962 }
960963 regitr_destroy (itr );
0 commit comments