|
109 | 109 | #' BCA ("bca", default) or percentile ("perc"). |
110 | 110 | #' @param seed a seed for the random number generator. |
111 | 111 | #' @param .parallel logical, should th computation be run in parallel? |
| 112 | +#' @param ... additional argument, currently none. |
112 | 113 | #' |
113 | 114 | #' @md |
| 115 | +#' @export |
114 | 116 |
|
115 | 117 | get_regulation <- function(x, |
116 | 118 | err = NULL, |
|
126 | 128 | burn_in = 0, |
127 | 129 | ci_method = c("bca", "perc"), |
128 | 130 | seed = NULL, |
129 | | - .parallel = TRUE) { |
| 131 | + .parallel = TRUE, ...) { |
130 | 132 |
|
131 | 133 | ## benchmarking and exit handlers -------- |
132 | 134 |
|
|
219 | 221 | stopifnot(is.logical(return_mc)) |
220 | 222 | return_mc <- return_mc[1] |
221 | 223 |
|
| 224 | + if(!is.na(x_default)) { |
| 225 | + |
| 226 | + if(!is.numeric(x_default)) stop("`x_default` has to be a number or NA.", call. = FALSE) |
| 227 | + |
| 228 | + x_default <- x_default[1] |
| 229 | + |
| 230 | + } |
| 231 | + |
222 | 232 | ## building a parent evaluation environment ------- |
223 | 233 |
|
224 | 234 | message("Building an evaluation environment") |
|
422 | 432 |
|
423 | 433 | # Reaction activity scores with bare expression data -------- |
424 | 434 |
|
| 435 | +#' Calculate reaction activity scores based on gene expression data. |
| 436 | +#' |
| 437 | +#' @description |
| 438 | +#' Function `get_activity()` calculates activity scores of metabolic reactions |
| 439 | +#' by evaluation of gene - reaction association rules in a reaction annotation |
| 440 | +#' database given gene expression data. |
| 441 | +#' |
| 442 | +#' @details |
| 443 | +#' Function `get_activity()` takes a numeric matrix or a numeric data frame `x` |
| 444 | +#' with gene expression metrics to calculate activity scores of metabolic reactions. |
| 445 | +#' The scores are calculated by simple evaluation of gene - reaction association |
| 446 | +#' rules stored in a reaction annotation database data frame of class |
| 447 | +#' \code{\link{reactDB}} (created with \code{\link{as_reactDB}} or |
| 448 | +#' \code{\link{extract_genes}}). |
| 449 | +#' |
| 450 | +#' Of note, the gene expression metrics in `x` are not pre-processed prior to |
| 451 | +#' calculation of the reaction activity scores; the pre-processing steps are |
| 452 | +#' intended to be done by the user. |
| 453 | +#' Because evaluation of the gene - reaction association rules uses simple |
| 454 | +#' arithmetic with means, medians, minima or maxima of gene expression values, |
| 455 | +#' it's highly recommended to bring the expression values in `x` approximate |
| 456 | +#' equal scales, e.g. by computing Z-scores with \code{\link[microViz]{zScores}}. |
| 457 | +#' |
| 458 | +#' @return a numeric matrix or a data frame (if `as_data_frame = TRUE`) with |
| 459 | +#' reaction activity scores. |
| 460 | +#' Metabolic reactions named with their identifiers are in columns, |
| 461 | +#' samples are in rows. |
| 462 | +#' In the data frame output, sample identifiers are stored in the first column |
| 463 | +#' named `sample_id`. |
| 464 | +#' |
| 465 | +#' @inheritParams get_regulation |
| 466 | +#' @param x a numeric matrix or a data frame with gene expression metrics; genes |
| 467 | +#' in columns, samples in rows. |
| 468 | +#' @param as_data_frame logical, should the matrix of reaction activity scores |
| 469 | +#' be coerced to a data frame? |
| 470 | +#' @param id_col `NULL` or a character string specifying a variable serving as |
| 471 | +#' a sample identifier. If `id_col = NULL`, the sample identifiers will be |
| 472 | +#' extracted from row names of `x`. |
| 473 | +#' @param variables `NULL` or a character vector specifying gene expression |
| 474 | +#' variables used for calculation of reaction activity scores. If `NULL`, all |
| 475 | +#' variables in `x` except of the sample identifier will be used. |
| 476 | +#' @param ... additional arguments, currently none. |
| 477 | +#' |
| 478 | +#' @export |
| 479 | + |
| 480 | + get_activity <- function(x, ...) UseMethod("get_activity") |
| 481 | + |
| 482 | +#' @rdname get_activity |
| 483 | +#' @export |
| 484 | + |
| 485 | + get_activity.matrix <- function(x, |
| 486 | + database, |
| 487 | + or_fun = c("mean", "median", "min", "max"), |
| 488 | + and_fun = c("min", "max", "mean", "median"), |
| 489 | + x_default = 1, |
| 490 | + as_data_frame = TRUE, |
| 491 | + .parallel = FALSE, ...) { |
| 492 | + |
| 493 | + ## benchmarking and exit handlers -------- |
| 494 | + |
| 495 | + start_time <- Sys.time() |
| 496 | + |
| 497 | + on.exit(plan("sequential")) |
| 498 | + on.exit(message(paste("Elapsed:", Sys.time() - start_time)), add = TRUE) |
| 499 | + |
| 500 | + ## entry control -------- |
| 501 | + |
| 502 | + stopifnot(is.matrix(x)) |
| 503 | + |
| 504 | + if(!is.numeric(x)) stop("`x` must be a numeric matrix.", call. = FALSE) |
| 505 | + |
| 506 | + if(is.null(colnames(x))) stop("Column names of `x` must be specified.", call. = FALSE) |
| 507 | + if(is.null(rownames(x))) stop("Row names of `x` must be specified.", call. = FALSE) |
| 508 | + |
| 509 | + if(!is_reactDB(database)) { |
| 510 | + |
| 511 | + stop(paste("`database` has to be a `reactDB` object.", |
| 512 | + "Please consult functions `as_reactDB()` and `extract_genes()`."), |
| 513 | + call. = FALSE) |
| 514 | + |
| 515 | + } |
| 516 | + |
| 517 | + or_fun <- match.arg(or_fun[1], c("mean", "median", "min", "max")) |
| 518 | + and_fun <- match.arg(and_fun[1], c("min", "max", "mean", "median")) |
| 519 | + |
| 520 | + if(!is.na(x_default)) { |
| 521 | + |
| 522 | + if(!is.numeric(x_default)) stop("`x_default` has to be a number or NA.", call. = FALSE) |
| 523 | + |
| 524 | + x_default <- x_default[1] |
| 525 | + |
| 526 | + } |
| 527 | + |
| 528 | + stopifnot(is.logical(as_data_frame)) |
| 529 | + as_data_frame <- as_data_frame[1] |
| 530 | + |
| 531 | + ## evaluation environment --------- |
| 532 | + |
| 533 | + o_fun <- switch(or_fun, |
| 534 | + min = function(x, y) min(c(x, y), na.rm = TRUE), |
| 535 | + max = function(x, y) max(c(x, y), na.rm = TRUE), |
| 536 | + mean = function(x, y) mean(c(x, y), na.rm = TRUE), |
| 537 | + median = function(x, y) median(c(x, y), na.rm = TRUE)) |
| 538 | + |
| 539 | + a_fun <- switch(and_fun, |
| 540 | + min = function(x, y) min(c(x, y), na.rm = TRUE), |
| 541 | + max = function(x, y) max(c(x, y), na.rm = TRUE), |
| 542 | + mean = function(x, y) mean(c(x, y), na.rm = TRUE), |
| 543 | + median = function(x, y) median(c(x, y), na.rm = TRUE)) |
| 544 | + |
| 545 | + all_genes <- reduce(database$entrez_id, union) |
| 546 | + |
| 547 | + miss_genes <- all_genes[!all_genes %in% colnames(x)] |
| 548 | + |
| 549 | + miss_gene_lst <- set_names(rep(x_default, length(miss_genes)), |
| 550 | + miss_genes) |
| 551 | + fun_ev <- |
| 552 | + new_environment(data = c(list(`%AND%` = a_fun, |
| 553 | + `%OR%` = o_fun, |
| 554 | + `(` = `(`), |
| 555 | + as.list(miss_gene_lst))) |
| 556 | + |
| 557 | + ## calculation of the reaction activity scores --------- |
| 558 | + |
| 559 | + row_data <- map(1:nrow(x), function(idx) x[idx, ]) |
| 560 | + |
| 561 | + row_data <- set_names(row_data, |
| 562 | + rownames(x)) |
| 563 | + |
| 564 | + if(.parallel) plan("multisession") |
| 565 | + |
| 566 | + exp_globals <- c("eval_rules", |
| 567 | + "fun_ev", |
| 568 | + "database") |
| 569 | + |
| 570 | + exp_packages <- c("generics", |
| 571 | + "rlang", |
| 572 | + "biggrExtra") |
| 573 | + |
| 574 | + activity_mtx <- |
| 575 | + future_map(row_data, |
| 576 | + eval_rules, |
| 577 | + database = database, |
| 578 | + parent_env = fun_ev, |
| 579 | + .options = furrr_options(seed = TRUE, |
| 580 | + globals = exp_globals, |
| 581 | + packages = exp_packages)) |
| 582 | + |
| 583 | + activity_mtx <- reduce(activity_mtx, rbind) |
| 584 | + |
| 585 | + rownames(activity_mtx) <- names(row_data) |
| 586 | + |
| 587 | + if(!as_data_frame) return(activity_mtx) |
| 588 | + |
| 589 | + return(as_tibble(rownames_to_column(as.data.frame(activity_mtx), "sample_id"))) |
| 590 | + |
| 591 | + } |
| 592 | + |
| 593 | +#' @rdname get_activity |
| 594 | +#' @export |
| 595 | + |
| 596 | + get_activity.data.frame <- function(x, |
| 597 | + database, |
| 598 | + id_col = NULL, |
| 599 | + variables = NULL, |
| 600 | + or_fun = c("mean", "median", "min", "max"), |
| 601 | + and_fun = c("min", "max", "mean", "median"), |
| 602 | + x_default = 1, |
| 603 | + as_data_frame = TRUE, |
| 604 | + .parallel = FALSE, ...) { |
| 605 | + |
| 606 | + ## minimal input control -------- |
| 607 | + |
| 608 | + stopifnot(is.data.frame(x)) |
| 609 | + |
| 610 | + if(!is.null(id_col)) { |
| 611 | + |
| 612 | + stopifnot(is.character(id_col)) |
| 613 | + id_col <- id_col[1] |
| 614 | + |
| 615 | + if(!id_col %in% names(x)) { |
| 616 | + |
| 617 | + stop("Variable specified by `id_col` is missing from `x`", |
| 618 | + call. = FALSE) |
| 619 | + |
| 620 | + } |
| 621 | + |
| 622 | + x <- column_to_rownames(x, id_col) |
| 623 | + |
| 624 | + } |
| 625 | + |
| 626 | + if(is.null(variables)) variables <- names(x) |
| 627 | + |
| 628 | + if(!is.character(variables)) { |
| 629 | + |
| 630 | + stop("`variables` has to be NULL or a character vector.", call. = FALSE) |
| 631 | + |
| 632 | + } |
| 633 | + |
| 634 | + missing_cols <- setdiff(variables, names(x)) |
| 635 | + |
| 636 | + if(length(missing_cols) > 0) { |
| 637 | + |
| 638 | + if(length(missing_cols) > 20) { |
| 639 | + |
| 640 | + missing_txt <- paste0(paste(missing_cols[1:20], collapse = ", "), ", ...") |
| 641 | + |
| 642 | + } else { |
| 643 | + |
| 644 | + missing_txt <- paste(missing_cols, collapse = ", ") |
| 645 | + |
| 646 | + } |
| 647 | + |
| 648 | + stop(paste("Some variables specified by the user are missing from `x`:", |
| 649 | + missing_txt), |
| 650 | + call. = FALSE) |
| 651 | + |
| 652 | + } |
| 653 | + |
| 654 | + x <- x[, variables, drop = FALSE] |
| 655 | + |
| 656 | + class_check <- map_lgl(x, is.numeric) |
| 657 | + |
| 658 | + if(any(!class_check)) { |
| 659 | + |
| 660 | + error_vars <- names(x)[!class_check] |
| 661 | + |
| 662 | + if(length(error_vars) > 20) { |
| 663 | + |
| 664 | + err_var_txt <- paste0(paste(error_vars[1:20], collapse = ", "), ", ...") |
| 665 | + |
| 666 | + } else { |
| 667 | + |
| 668 | + err_var_txt <- paste(error_vars, collapse = ", ") |
| 669 | + |
| 670 | + } |
| 671 | + |
| 672 | + stop(paste("Some variables in `x` are not nomeric:", err_var_txt), call. = FALSE) |
| 673 | + |
| 674 | + } |
| 675 | + |
| 676 | + ## calculation of the activity scores --------- |
| 677 | + |
| 678 | + get_activity(as.matrix(x), |
| 679 | + database = database, |
| 680 | + or_fun = or_fun, |
| 681 | + and_fun = and_fun, |
| 682 | + x_default = x_default, |
| 683 | + as_data_frame = as_data_frame, |
| 684 | + .parallel = .parallel, ...) |
| 685 | + |
| 686 | + } |
| 687 | + |
425 | 688 | # END -------- |
0 commit comments