|
112 | 112 | #' @return A list of 3 matrices:\describe{ |
113 | 113 | #' \item{\code{mRNAProportions}}{(\code{nSamples} x (\code{nCellTypes+1})) the |
114 | 114 | #' proportion of mRNA coming from all cell types with a ref profile + the |
115 | | -#' uncharacterized other cell.} |
| 115 | +#' uncharacterized other cell. Please note that if working with reconstructed |
| 116 | +#' in silico bulk samples built for example from single-cell RNA-seq data, |
| 117 | +#' then you should compare the 'true' proportions against these |
| 118 | +#' 'mRNAProportions', while if working with true bulk samples, then you should |
| 119 | +#' compare the cell proportions against the 'cellFractions'.} |
116 | 120 | #' \item{\code{cellFractions}}{(\code{nSamples} x (\code{nCellTypes+1})) this |
117 | 121 | #' gives the proportion of cells from each cell type after accounting for |
118 | 122 | #' the mRNA / cell value.} |
@@ -392,18 +396,20 @@ EPIC <- function(bulk, reference=NULL, mRNA_cell=NULL, mRNA_cell_sub=NULL, |
392 | 396 | if (anyNA(tInds)){ |
393 | 397 | defaultInd <- match("default", names(mRNA_cell)) |
394 | 398 | if (is.na(defaultInd)){ |
395 | | - tStr <- paste(" and no default value is given for this mRNA per cell,", |
396 | | - "so we cannot estimate the cellFractions, only", |
397 | | - "the mRNA proportions") |
| 399 | + warning("mRNA_cell value unknown for some cell types: ", |
| 400 | + paste(colnames(mRNAProportions)[is.na(tInds)], collapse=", "), |
| 401 | + " and no default value is given for the mRNA per cell, so we cannot ", |
| 402 | + "estimate the cellFractions, only the mRNA proportions") |
398 | 403 | } else { |
399 | | - tStr <- paste(" - using the default value of", mRNA_cell[defaultInd], |
400 | | - "for these but this might bias the true cell proportions from", |
401 | | - "all cell types.") |
| 404 | + # warning("mRNA_cell value unknown for some cell types: ", |
| 405 | + # paste(colnames(mRNAProportions)[is.na(tInds)], collapse=", "), |
| 406 | + # " - using the default value of", mRNA_cell[defaultInd], " for these but ", |
| 407 | + # "this might bias the true cell proportions from all cell types.") |
| 408 | + # Not indicating this warning message as it comes about always if the |
| 409 | + # user doesn't define additional mRNA_cell values by himself. Instead, |
| 410 | + # I've indicated this warning in the documentation directly. |
| 411 | + tInds[is.na(tInds)] <- defaultInd |
402 | 412 | } |
403 | | - warning("mRNA_cell value unknown for some cell types: ", |
404 | | - paste(colnames(mRNAProportions)[is.na(tInds)], collapse=", "), |
405 | | - tStr) |
406 | | - tInds[is.na(tInds)] <- defaultInd |
407 | 413 | } |
408 | 414 | cellFractions <- t( t(mRNAProportions) / mRNA_cell[tInds]) |
409 | 415 | cellFractions <- cellFractions / rowSums(cellFractions, na.rm=FALSE) |
@@ -465,15 +471,17 @@ merge_duplicates <- function(mat, warn=TRUE, in_type=NULL){ |
465 | 471 | if (warn){ |
466 | 472 | warning("There are ", length(dupl_genes), " duplicated gene names", |
467 | 473 | ifelse(!is.null(in_type), paste(" in the", in_type), ""), |
468 | | - ". We'll use the median value for each of these cases.") |
| 474 | + " (e.g., ", paste0("'", dupl_genes[1:(min(5, length(dupl_genes)))], |
| 475 | + "'", collapse=", "), "). We'll use the median value for ", |
| 476 | + "each of these cases.") |
469 | 477 | } |
470 | 478 | mat_dupl <- mat[rownames(mat) %in% dupl_genes,,drop=F] |
471 | 479 | mat_dupl_names <- rownames(mat_dupl) |
472 | 480 | mat <- mat[!dupl,,drop=F] |
473 | 481 | # First put the dupl cases in a separate matrix and keep only the unique |
474 | 482 | # gene names in the mat matrix. |
475 | | - mat[dupl_genes,] <- t(sapply(dupl_genes, FUN=function(cgene) |
476 | | - apply(mat_dupl[mat_dupl_names == cgene,,drop=F], MARGIN=2, FUN=median))) |
| 483 | + mat[match(dupl_genes, rownames(mat)),] <- t(sapply(dupl_genes, FUN=function(cgene) |
| 484 | + apply(mat_dupl[mat_dupl_names == cgene,,drop=F], MARGIN=2, FUN=stats::median))) |
477 | 485 | } |
478 | 486 | return(mat) |
479 | 487 | } |
0 commit comments