@@ -45,9 +45,33 @@ threads <- opt$threads
4545
4646
4747
48- # # ------------------------------------------------------------------------------
48+ # # -----------------------------------------------------------------------------
49+ # # Functions
50+ # # -----------------------------------------------------------------------------
51+ parse_gff_attributes <- function (
52+ attributes_string , attribute_name , start_pos , end_pos , default_val = NULL ) {
53+ attr_pairs_raw <- stringr :: str_split(attributes_string , " ;" )[[1 ]]
54+ attr_pairs <- stringr :: str_trim(
55+ stringr :: str_replace_all(attr_pairs_raw , " [\\ s\\ p{Zs}\\ p{C}]" , " " ))
56+ search_pattern_regex <- paste0(" ^" , attribute_name , " =" )
57+ target_attr <- attr_pairs [grepl(search_pattern_regex , attr_pairs )]
58+ if (length(target_attr ) > 0 ) {
59+ value <- stringr :: str_remove(target_attr [1 ], search_pattern_regex )
60+ return (value )
61+ } else {
62+ if (! is.null(default_val )) {
63+ return (default_val )
64+ } else {
65+ default_value_generated <- paste0(" unknown_" , start_pos , " _" , end_pos )
66+ return (default_value_generated )
67+ }
68+ }
69+ }
70+
71+
72+ # # -----------------------------------------------------------------------------
4973# # Read data
50- # # ------------------------------------------------------------------------------
74+ # # -----------------------------------------------------------------------------
5175meta_tab <- read_tsv(meta_f )
5276
5377total_counts_list <- map(meta_tab $ sample , function (x ) {
@@ -59,25 +83,36 @@ merged_total_counts <- bind_rows(total_counts_list)
5983
6084
6185
62- # # ------------------------------------------------------------------------------
86+ # # -----------------------------------------------------------------------------
6387# # Read genome annotation
64- # # ------------------------------------------------------------------------------
88+ # # -----------------------------------------------------------------------------
6589ref_annot <- ape :: read.gff(gff_f , na.strings = c(" ." , " ?" ), GFF3 = TRUE )
6690
6791ref_annot <- subset(ref_annot , type == " gene" )
6892
6993gene_attr <- stringr :: str_split(ref_annot $ attributes , " ;" )
70- locus_tags <- unlist(lapply(gene_attr , function (x ) {
71- x [grepl(" ^locus_tag" , x )]
72- }))
73- gene_biotypes <- unlist(lapply(gene_attr , function (x ) {
74- x [grepl(" ^gene_biotype" , x )]
75- }))
76- common_gene_names <- unlist(lapply(gene_attr , function (x ) {
77- x <- x [grepl(" ^gene=" , x )]
78- x [identical(x , character (0 ))] <- " "
79- x
80- }))
94+
95+ locus_tags <- unname(mapply(parse_gff_attributes ,
96+ ref_annot $ attributes ,
97+ attribute_name = " locus_tag" ,
98+ start_pos = ref_annot $ start ,
99+ end_pos = ref_annot $ end ))
100+
101+ gene_biotypes <- unname(mapply(parse_gff_attributes ,
102+ ref_annot $ attributes ,
103+ attribute_name = " gene_biotype" ,
104+ start_pos = ref_annot $ start ,
105+ end_pos = ref_annot $ end ,
106+ " unknown" ))
107+
108+ common_gene_names <- unname(mapply(parse_gff_attributes ,
109+ ref_annot $ attributes ,
110+ attribute_name = " gene" ,
111+ start_pos = ref_annot $ start ,
112+ end_pos = ref_annot $ end ,
113+ " unknown" ))
114+
115+
81116gene_lengths <- (ref_annot $ end - ref_annot $ start ) + 1
82117
83118ref_gene_df <- data.frame (
@@ -93,9 +128,9 @@ ref_gene_df$gene_name <- sub("^.*=", "", ref_gene_df$gene_name)
93128write_tsv(ref_gene_df , " ref_gene_df.tsv" )
94129
95130
96- # # ------------------------------------------------------------------------------
131+ # # -----------------------------------------------------------------------------
97132# # Count reads mapping to genes
98- # # ------------------------------------------------------------------------------
133+ # # -----------------------------------------------------------------------------
99134bamfilesCount <- paste0(meta_tab $ sample , " .bam" )
100135
101136# 0 (unstranded), 1 (stranded) and 2 (reversely stranded)
@@ -132,9 +167,9 @@ gene_counts_pc <- counts_mat[ref_gene_df$biotype == "protein_coding", ]
132167write_tsv(gene_counts_pc , " gene_counts_pc.tsv" )
133168
134169
135- # # ------------------------------------------------------------------------------
170+ # # -----------------------------------------------------------------------------
136171# # Plot library composition
137- # # ------------------------------------------------------------------------------
172+ # # -----------------------------------------------------------------------------
138173# # set up plots
139174brewer_pallette1 <- brewer.pal(9 , " Set1" )
140175brewer_pallette3 <- brewer.pal(8 , " Dark2" )
0 commit comments