@@ -34,39 +34,52 @@ prepareAnnotations <- function(txdb) {
3434}
3535
3636
37- # ' Prepare annotations from gtf
38- # ' @title prepare annotations from gtf file
39- # ' @param gtf. file A string variable indicates the path to a gtf file.
40- # ' @param organism as described in \code{ \link{makeTxDbFromGFF}}.
41- # ' @param dataSource as described in \code{\link{makeTxDbFromGFF}}.
42- # ' @param taxonomyId as described in \code{\link{makeTxDbFromGFF}}.
43- # ' @param chrominfo as described in \code{\link{makeTxDbFromGFF}}.
44- # ' @param miRBaseBuild as described in \code{\link{makeTxDbFromGFF}}.
45- # ' @param metadata as described in \code{\link{makeTxDbFromGFF}}.
46- # ' @param dbxrefTag as described in \code{\link{makeTxDbFromGFF}}.
47- # ' @param ... see \code{\link{makeTxDbFromGFF}}.
48- # ' @return A \code{\link{GrangesList}} object
37+ # ' Prepare annotation granges object from GTF file
38+ # ' @title Prepare annotation granges object from GTF file into a GRangesList object
39+ # ' @param file a GTF file
40+ # ' @return grlist a \code{\link{GRangesList}} object, unlike \code\link{readFromGTF}},
41+ # ' this function finds out the equivalence classes between the transcripts,
42+ # ' with \code{\link{mcols}} data having three columns:
43+ # ' \itemize{
44+ # ' \item TXNAME specifying prefix for new gene Ids (genePrefix.number), defaults to empty
45+ # ' \item GENEID indicating whether filter to remove read classes which are a subset of known transcripts(), defaults to TRUE
46+ # ' \item eqClass specifying minimun read count to consider a read class valid in a sample, defaults to 2
47+ # ' }
48+ # '
4949# ' @export
50- prepareAnnotationsFromGTF <- function (gtf.file , dataSource = NA ,
51- organism = " Homo sapiens" ,
52- taxonomyId = NA ,
53- chrominfo = NULL ,
54- miRBaseBuild = NA ,
55- metadata = NULL ,
56- dbxrefTag ,... ){
57- return (prepareAnnotations(GenomicFeatures :: makeTxDbFromGFF(gtf.file , format = " gtf" ,
58- organism = organism ,
59- dataSource = dataSource ,
60- taxonomyId = taxonomyId ,
61- chrominfo = chrominfo ,
62- miRBaseBuild = miRBaseBuild ,
63- metadata = metadata ,
64- dbxrefTag = dbxrefTag
65- )))
50+ prepareAnnotationsFromGTF <- function (file ){
51+ if (missing(file )){
52+ stop(' A GTF file is required.' )
53+ }else {
54+ data <- read.delim(file ,header = FALSE ,comment.char = ' #' )
55+ colnames(data ) <- c(" seqname" ," source" ," type" ," start" ," end" ," score" ," strand" ," frame" ," attribute" )
56+ data <- data [data $ type == ' exon' ,]
57+ data $ strand [data $ strand == ' .' ] <- ' *'
58+ data $ GENEID = gsub(' gene_id (.*?);.*' ,' \\ 1' ,data $ attribute )
59+ data $ TXNAME = gsub(' .*transcript_id (.*?);.*' , ' \\ 1' ,data $ attribute )
60+ data $ exon_rank = as.integer(gsub(' .*exon_number (.*?);.*' , ' \\ 1' ,data $ attribute ))
61+ geneData = unique(data [,c(' TXNAME' , ' GENEID' )])
62+ grlist <- makeGRangesListFromDataFrame(
63+ data [,c(' seqname' , ' start' ,' end' ,' strand' ,' exon_rank' ,' TXNAME' )],split.field = ' TXNAME' ,keep.extra.columns = TRUE )
64+
65+ unlistedExons <- unlist(grlist , use.names = FALSE )
66+ partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist )), names = NULL )
67+ txIdForReorder <- togroup(PartitioningByWidth(grlist ))
68+ unlistedExons <- unlistedExons [order(txIdForReorder , unlistedExons $ exon_rank )] # 'exonsByTx' is always sorted by exon rank, not by strand, make sure that this is the case here
69+ unlistedExons $ exon_endRank <- unlist(sapply(elementNROWS(grlist ),seq ,to = 1 ), use.names = FALSE )
70+ unlistedExons <- unlistedExons [order(txIdForReorder , start(unlistedExons ))]
71+ # mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')]
72+ grlist <- relist(unlistedExons , partitioning )
73+ minEqClasses <- getMinimumEqClassByTx(grlist )
74+ mcols(grlist ) <- DataFrame(geneData [(match(names(grlist ), geneData $ TXNAME )),])
75+ mcols(grlist )$ eqClass <- minEqClasses $ eqClass [match(names(grlist ),minEqClasses $ queryTxId )]
76+ }
77+ return (grlist )
6678}
6779
6880
6981
82+
7083# ' Get minimum equivalent class by Transcript
7184# ' @param exonsByTranscripts exonsByTranscripts
7285# ' @noRd
0 commit comments