-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgeneAnnotationGRanges.R
More file actions
118 lines (89 loc) · 2.94 KB
/
geneAnnotationGRanges.R
File metadata and controls
118 lines (89 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#'
#' Extract annotation from GRanges
#'
library(AnnotationDbi)
library(GenomicFeatures)
#' Extracts sequence info annotation and scaffold filter from a GRanges object
#'
#' @param gr
#' @param genes
#'
#' @return
#' @export
#'
#' @examples
GRanges.extractSequenceInfoAnnotation <- function(gr, genes, imported_data) {
gff <- mcols(gr)
meta.indices <- match(genes, gff$gene_id) # F: Index of gene in readcounts -> Index of gene in gff annotation
if(any(is.na(meta.indices))) {
# Remove genes that are NA. They will not appear in the annotation
na.genes <- genes[is.na(meta.indices)]
genes <- genes[!is.na(meta.indices)]
meta.indices <- na.omit(meta.indices)
}
if(length(meta.indices) == 0) {
return(GeneAnnotation())
}
# Extract available info directly from GRanges
sequence.info <- data.frame(row.names = genes,
start_position = start(gr)[meta.indices],
end_position = end(gr)[meta.indices],
length = width(gr)[meta.indices],
exon_length = rep(NA, length(genes)),
stringsAsFactors = F)
if("exon_length" %in% imported_data) {
# For normalization we need the exonic length
# https://www.biostars.org/p/83901/
txdb <- suppressWarnings(makeTxDbFromGRanges(gr))
exons.list.per.gene <- exonsBy(txdb,by="gene")
exonic.gene.sizes <- sum(width(reduce(exons.list.per.gene)))
#found.genes <- mcols(gr)[match(names(exonic.gene.sizes), gr$Name), "gene_id"] # Txdb uses name instead of gene_id
found.genes <- intersect(names(exonic.gene.sizes), genes)
if(length(found.genes) > 0) {
sequence.info[found.genes, "exon_length"] <- exonic.gene.sizes[found.genes]
}
}
return(GeneAnnotation(sequence.info = sequence.info))
}
#' Extracts biotype annotation from a GRanges object
#'
#' @param gr
#' @param genes
#'
#' @return
#' @export
#'
#' @examples
GRanges.extractBiotypeAnnotation <- function(gr, genes) {
gff <- mcols(gr)
features <- list()
for(feature_type in unique(gff$biotype)) {
gene_ids <- unlist(gff[gff$biotype == feature_type,"gene_id"])
gene_ids <- unique(intersect(gene_ids, genes))
if(length(gene_ids) > 0) {
features[[feature_type]] <- gene_ids
}
}
return(GeneAnnotation(gene.biotype = GeneFilter$new(data = features)))
}
#' Extracts scaffold annotation from a GRanges object
#'
#' @param gr
#' @param genes
#'
#' @return
#' @export
#'
#' @examples
GRanges.extractScaffoldAnnotation <- function(gr, genes) {
gff <- mcols(gr)
scaffold <- list()
for(type in unique(seqnames(gr))) {
gene_ids <- unlist(gff[seqnames(gr) == type,"gene_id"])
gene_ids <- unique(intersect(gene_ids, genes))
if(length(gene_ids) > 0) {
scaffold[[type]] <- gene_ids
}
}
return(GeneAnnotation(gene.scaffold = GeneFilter$new(data = scaffold)))
}