Skip to content

Commit 1adffb0

Browse files
committed
Internal function get_activity() for calcualtion of reaction activity scores with gene expression data
1 parent 87acc6e commit 1adffb0

File tree

6 files changed

+476
-31
lines changed

6 files changed

+476
-31
lines changed

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(get_activity,data.frame)
4+
S3method(get_activity,matrix)
35
export(as_reactDB)
46
export(extract_genes)
57
export(geneSBML)
8+
export(get_activity)
9+
export(get_regulation)
610
export(is_geneSBML)
711
export(is_memoSaver)
812
export(is_reactDB)

R/evaluation_funs.R

Lines changed: 264 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,10 @@
109109
#' BCA ("bca", default) or percentile ("perc").
110110
#' @param seed a seed for the random number generator.
111111
#' @param .parallel logical, should th computation be run in parallel?
112+
#' @param ... additional argument, currently none.
112113
#'
113114
#' @md
115+
#' @export
114116

115117
get_regulation <- function(x,
116118
err = NULL,
@@ -126,7 +128,7 @@
126128
burn_in = 0,
127129
ci_method = c("bca", "perc"),
128130
seed = NULL,
129-
.parallel = TRUE) {
131+
.parallel = TRUE, ...) {
130132

131133
## benchmarking and exit handlers --------
132134

@@ -219,6 +221,14 @@
219221
stopifnot(is.logical(return_mc))
220222
return_mc <- return_mc[1]
221223

224+
if(!is.na(x_default)) {
225+
226+
if(!is.numeric(x_default)) stop("`x_default` has to be a number or NA.", call. = FALSE)
227+
228+
x_default <- x_default[1]
229+
230+
}
231+
222232
## building a parent evaluation environment -------
223233

224234
message("Building an evaluation environment")
@@ -422,4 +432,257 @@
422432

423433
# Reaction activity scores with bare expression data --------
424434

435+
#' Calculate reaction activity scores based on gene expression data.
436+
#'
437+
#' @description
438+
#' Function `get_activity()` calculates activity scores of metabolic reactions
439+
#' by evaluation of gene - reaction association rules in a reaction annotation
440+
#' database given gene expression data.
441+
#'
442+
#' @details
443+
#' Function `get_activity()` takes a numeric matrix or a numeric data frame `x`
444+
#' with gene expression metrics to calculate activity scores of metabolic reactions.
445+
#' The scores are calculated by simple evaluation of gene - reaction association
446+
#' rules stored in a reaction annotation database data frame of class
447+
#' \code{\link{reactDB}} (created with \code{\link{as_reactDB}} or
448+
#' \code{\link{extract_genes}}).
449+
#'
450+
#' Of note, the gene expression metrics in `x` are not pre-processed prior to
451+
#' calculation of the reaction activity scores; the pre-processing steps are
452+
#' intended to be done by the user.
453+
#' Because evaluation of the gene - reaction association rules uses simple
454+
#' arithmetic with means, medians, minima or maxima of gene expression values,
455+
#' it's highly recommended to bring the expression values in `x` approximate
456+
#' equal scales, e.g. by computing Z-scores with \code{\link[microViz]{zScores}}.
457+
#'
458+
#' @return a numeric matrix or a data frame (if `as_data_frame = TRUE`) with
459+
#' reaction activity scores.
460+
#' Metabolic reactions named with their identifiers are in columns,
461+
#' samples are in rows.
462+
#' In the data frame output, sample identifiers are stored in the first column
463+
#' named `sample_id`.
464+
#'
465+
#' @inheritParams get_regulation
466+
#' @param x a numeric matrix or a data frame with gene expression metrics; genes
467+
#' in columns, samples in rows.
468+
#' @param as_data_frame logical, should the matrix of reaction activity scores
469+
#' be coerced to a data frame?
470+
#' @param id_col `NULL` or a character string specifying a variable serving as
471+
#' a sample identifier. If `id_col = NULL`, the sample identifiers will be
472+
#' extracted from row names of `x`.
473+
#' @param variables `NULL` or a character vector specifying gene expression
474+
#' variables used for calculation of reaction activity scores. If `NULL`, all
475+
#' variables in `x` except of the sample identifier will be used.
476+
#' @param ... additional arguments, currently none.
477+
#'
478+
#' @export
479+
480+
get_activity <- function(x, ...) UseMethod("get_activity")
481+
482+
#' @rdname get_activity
483+
#' @export
484+
485+
get_activity.matrix <- function(x,
486+
database,
487+
or_fun = c("mean", "median", "min", "max"),
488+
and_fun = c("min", "max", "mean", "median"),
489+
x_default = 1,
490+
as_data_frame = TRUE,
491+
.parallel = FALSE, ...) {
492+
493+
## benchmarking and exit handlers --------
494+
495+
start_time <- Sys.time()
496+
497+
on.exit(plan("sequential"))
498+
on.exit(message(paste("Elapsed:", Sys.time() - start_time)), add = TRUE)
499+
500+
## entry control --------
501+
502+
stopifnot(is.matrix(x))
503+
504+
if(!is.numeric(x)) stop("`x` must be a numeric matrix.", call. = FALSE)
505+
506+
if(is.null(colnames(x))) stop("Column names of `x` must be specified.", call. = FALSE)
507+
if(is.null(rownames(x))) stop("Row names of `x` must be specified.", call. = FALSE)
508+
509+
if(!is_reactDB(database)) {
510+
511+
stop(paste("`database` has to be a `reactDB` object.",
512+
"Please consult functions `as_reactDB()` and `extract_genes()`."),
513+
call. = FALSE)
514+
515+
}
516+
517+
or_fun <- match.arg(or_fun[1], c("mean", "median", "min", "max"))
518+
and_fun <- match.arg(and_fun[1], c("min", "max", "mean", "median"))
519+
520+
if(!is.na(x_default)) {
521+
522+
if(!is.numeric(x_default)) stop("`x_default` has to be a number or NA.", call. = FALSE)
523+
524+
x_default <- x_default[1]
525+
526+
}
527+
528+
stopifnot(is.logical(as_data_frame))
529+
as_data_frame <- as_data_frame[1]
530+
531+
## evaluation environment ---------
532+
533+
o_fun <- switch(or_fun,
534+
min = function(x, y) min(c(x, y), na.rm = TRUE),
535+
max = function(x, y) max(c(x, y), na.rm = TRUE),
536+
mean = function(x, y) mean(c(x, y), na.rm = TRUE),
537+
median = function(x, y) median(c(x, y), na.rm = TRUE))
538+
539+
a_fun <- switch(and_fun,
540+
min = function(x, y) min(c(x, y), na.rm = TRUE),
541+
max = function(x, y) max(c(x, y), na.rm = TRUE),
542+
mean = function(x, y) mean(c(x, y), na.rm = TRUE),
543+
median = function(x, y) median(c(x, y), na.rm = TRUE))
544+
545+
all_genes <- reduce(database$entrez_id, union)
546+
547+
miss_genes <- all_genes[!all_genes %in% colnames(x)]
548+
549+
miss_gene_lst <- set_names(rep(x_default, length(miss_genes)),
550+
miss_genes)
551+
fun_ev <-
552+
new_environment(data = c(list(`%AND%` = a_fun,
553+
`%OR%` = o_fun,
554+
`(` = `(`),
555+
as.list(miss_gene_lst)))
556+
557+
## calculation of the reaction activity scores ---------
558+
559+
row_data <- map(1:nrow(x), function(idx) x[idx, ])
560+
561+
row_data <- set_names(row_data,
562+
rownames(x))
563+
564+
if(.parallel) plan("multisession")
565+
566+
exp_globals <- c("eval_rules",
567+
"fun_ev",
568+
"database")
569+
570+
exp_packages <- c("generics",
571+
"rlang",
572+
"biggrExtra")
573+
574+
activity_mtx <-
575+
future_map(row_data,
576+
eval_rules,
577+
database = database,
578+
parent_env = fun_ev,
579+
.options = furrr_options(seed = TRUE,
580+
globals = exp_globals,
581+
packages = exp_packages))
582+
583+
activity_mtx <- reduce(activity_mtx, rbind)
584+
585+
rownames(activity_mtx) <- names(row_data)
586+
587+
if(!as_data_frame) return(activity_mtx)
588+
589+
return(as_tibble(rownames_to_column(as.data.frame(activity_mtx), "sample_id")))
590+
591+
}
592+
593+
#' @rdname get_activity
594+
#' @export
595+
596+
get_activity.data.frame <- function(x,
597+
database,
598+
id_col = NULL,
599+
variables = NULL,
600+
or_fun = c("mean", "median", "min", "max"),
601+
and_fun = c("min", "max", "mean", "median"),
602+
x_default = 1,
603+
as_data_frame = TRUE,
604+
.parallel = FALSE, ...) {
605+
606+
## minimal input control --------
607+
608+
stopifnot(is.data.frame(x))
609+
610+
if(!is.null(id_col)) {
611+
612+
stopifnot(is.character(id_col))
613+
id_col <- id_col[1]
614+
615+
if(!id_col %in% names(x)) {
616+
617+
stop("Variable specified by `id_col` is missing from `x`",
618+
call. = FALSE)
619+
620+
}
621+
622+
x <- column_to_rownames(x, id_col)
623+
624+
}
625+
626+
if(is.null(variables)) variables <- names(x)
627+
628+
if(!is.character(variables)) {
629+
630+
stop("`variables` has to be NULL or a character vector.", call. = FALSE)
631+
632+
}
633+
634+
missing_cols <- setdiff(variables, names(x))
635+
636+
if(length(missing_cols) > 0) {
637+
638+
if(length(missing_cols) > 20) {
639+
640+
missing_txt <- paste0(paste(missing_cols[1:20], collapse = ", "), ", ...")
641+
642+
} else {
643+
644+
missing_txt <- paste(missing_cols, collapse = ", ")
645+
646+
}
647+
648+
stop(paste("Some variables specified by the user are missing from `x`:",
649+
missing_txt),
650+
call. = FALSE)
651+
652+
}
653+
654+
x <- x[, variables, drop = FALSE]
655+
656+
class_check <- map_lgl(x, is.numeric)
657+
658+
if(any(!class_check)) {
659+
660+
error_vars <- names(x)[!class_check]
661+
662+
if(length(error_vars) > 20) {
663+
664+
err_var_txt <- paste0(paste(error_vars[1:20], collapse = ", "), ", ...")
665+
666+
} else {
667+
668+
err_var_txt <- paste(error_vars, collapse = ", ")
669+
670+
}
671+
672+
stop(paste("Some variables in `x` are not nomeric:", err_var_txt), call. = FALSE)
673+
674+
}
675+
676+
## calculation of the activity scores ---------
677+
678+
get_activity(as.matrix(x),
679+
database = database,
680+
or_fun = or_fun,
681+
and_fun = and_fun,
682+
x_default = x_default,
683+
as_data_frame = as_data_frame,
684+
.parallel = .parallel, ...)
685+
686+
}
687+
425688
# END --------

0 commit comments

Comments
 (0)