MonashBioinformaticsPlatform
diff --git a/‎.Rbuildignore‎
Lines changed: 5 additions & 5 deletions b/‎.Rbuildignore‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎MBP-logo.png‎
171 KB b/‎MBP-logo.png‎
171 KB
diff --git a/‎NEWS‎
Lines changed: 6 additions & 1 deletion b/‎NEWS‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎R/heatmap.R‎
Lines changed: 24 additions & 6 deletions b/‎R/heatmap.R‎
Lines changed: 24 additions & 6 deletions
diff --git a/‎R/plot.R‎
Lines changed: 3 additions & 7 deletions b/‎R/plot.R‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎README.md‎
Lines changed: 11 additions & 0 deletions b/‎README.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎_pkgdown.yml‎
Lines changed: 3 additions & 0 deletions b/‎_pkgdown.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎inst/CITATION‎
Lines changed: 17 additions & 0 deletions b/‎inst/CITATION‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎man/composable_shiny_app.Rd‎
Lines changed: 7 additions & 2 deletions b/‎man/composable_shiny_app.Rd‎
Lines changed: 7 additions & 2 deletions
@@ -6,9 +6,9 @@
 ^test$
 ^test_output$
 ^paper
-^_pkgdown.yml$
-^index.md$
-^codemeta.json$
+^index\.md$
+^codemeta\.json$
 ^docs$
-^varistran.sublime-
-
+^varistran\.sublime-
+^MBP-logo.png$
+^_pkgdown\.yml$
@@ -10,7 +10,7 @@ Description: Transform RNA-Seq count data so that variance due to biological
 Authors@R: person("Paul", "Harrison", email = "[email protected]", role = c("aut", "cre"))
 Maintainer: Paul Harrison <[email protected]>
 URL: https://github.com/MonashBioinformaticsPlatform/varistran
-Version: 1.0.3
+Version: 1.0.4
 License: LGPL-2.1 | file LICENSE
 Depends:
     grid
@@ -27,4 +27,4 @@ Suggests:
     DESeq2,
     biomaRt,
     NBPSeq
-RoxygenNote: 6.0.1
+RoxygenNote: 7.1.0
@@ -1,11 +1,16 @@
 
-1.0.3
+1.0.4
 =====
 
 Increase robustness of heatmap to missing values.
 
 Baseline plot is not shown in heatmap if all zero.
 
+1.0.3
+=====
+
+No code changes. Updated READEME with references and supporting/contributing section.
+
 1.0.2
 =====
 
 
@@ -3,14 +3,20 @@
 #'
 #' Produces a heatmap as a grid grob.
 #'
-#' Clustering is performed using the "seriation" package, and is approximately a Travelling Salesman Problem ordering. If there are many features (more than a couple of thousand) clustering may be slow.
+#' This heatmap differs from other heatmaps in R in the method of clustering used:
 #'
-#' @param y A matrix of expression levels, such as a transformed counts matrix.
+#' 1. The distances used are cosine distances (i.e. the magnitude of log fold changes is not important, only the pattern).
+#'
+#' 2. \code{hclust()} is used to produce a clustering, as normal.
+#'
+#' 3. Branches in the hierarchical clustering are flipped to minimize sharp changes between neighbours, using the seriation package's OLO (Optimal Leaf Ordering) method.
+#'
+#' @param y A matrix of expression levels, such as a transformed counts matrix as produced by \code{varistran::vst}.
 #' @param cluster_samples Should samples (columns) be clustered?
 #' @param cluster_features Should features (rows) be clustered?
 #' @param sample_labels Names for each sample. If not given and y has column names, these will be used instead.
 #' @param feature_labels Names for each feature. If not given and y has row names, these will be used instead.
-#' @param baseline Baseline level for each row, to be subtracted when drawing the heatmap colors. If omitted, the row mean will be used.
+#' @param baseline Baseline level for each row, to be subtracted when drawing the heatmap colors. If omitted, the row mean will be used. Specify \code{baseline=0} to not subtract anything and not show a baseline bar graph.
 #' @param baseline_label Text description of what the baseline is.
 #' @param scale_label Text description of what the heatmap colors represent (after baseline is subtracted).
 #' @param n Show only this many rows. Rows are selected in order of greatest span of expression level.
@@ -20,6 +26,15 @@
 #' Additionally $info$row_order will contain row ordering and $info$col_order will contain column ordering.
 #' @author Paul Harrison.
 #'
+#' @examples
+#'
+#' # Generate some random data.
+#' counts <- matrix(rnbinom(1000, size=1/0.01, mu=100), ncol=10)
+#'
+#' y <- varistran::vst(counts, cpm=TRUE)
+#' print( varistran::plot_heatmap(y, n=20) )
+#'
+#'
 #' @export
 plot_heatmap <- function(
         y,
@@ -50,11 +65,14 @@ plot_heatmap <- function(
 
     feature_labels[is.na(feature_labels)] <- ""
 
-    if (!is.null(baseline))
+    if (!is.null(baseline)) {
+        if (length(baseline) == 1)
+            baseline <- rep(baseline, nrow(y))
+        stopifnot(length(baseline) == nrow(y))
         means <- baseline
-    else
+    } else {
         means <- rowMeans(y, na.rm=TRUE)
-
+    }
 
     # Show only a subset of rows, if desired
     if (n < nrow(y)) {    
 
@@ -133,17 +133,13 @@ plot_stability <- function(y, x=NULL, design=NULL, bins=20) {
 #'
 #' Produce a ggplot object containing a biplot of expression data.
 #'
-#' Biplot based on the Singular Value Decomposition of the matrix x. The
-#' dimensions corresponding to the two largest singular values are shown.
+#' Biplot based on the Singular Value Decomposition of the matrix x, after subtracting row means. The dimensions corresponding to the two largest singular values are shown.
 #'
 #' Genes are shown in blue and samples in red.
 #'
-#' The dot product of the gene and sample vectors approximates the difference
-#' from the average expression level of that gene in that sample.
+#' The dot product of the gene and sample vectors approximates the difference from the average expression level of that gene in that sample.
 #'
-#' Sample points (red) are scaled to have the same variance in the two
-#' dimensions. Therefore the gene points (blue) may have greater variance along
-#' dimension 1 if dimension 1 explains more of the variance than dimension 2.
+#' Sample points (red) are scaled to have the same variance in the two dimensions. Therefore the gene points (blue) may have greater variance along dimension 1 if dimension 1 explains more of the variance than dimension 2.
 #'
 #' @param x Matrix of expression levels, with features (eg genes) as rows and
 #' samples as columns. For example, you could use the output of varistran::vst
 
@@ -10,8 +10,12 @@ Varistran is an R package providing a Variance Stabilizing Transformation approp
 
 * [Poster for ABACBS 2015](doc/varistran-poster-abacbs-2015.pdf) [(on F1000, doi: 10.7490/f1000research.1110757.1)](http://f1000research.com/posters/4-1041)
 
+* [Publication in the Journal of Open Source Software](http://joss.theoj.org/papers/10.21105/joss.00257)
+
 Varistran is developed by Paul Harrison ([email protected], [@paulfharrisson](https://twitter.com/paulfharrison)) for the [Monash Bioinformatics platform](https://platforms.monash.edu/bioinformatics/).
 
+<a href="https://platforms.monash.edu/bioinformatics/"><img src="MBP-logo.png" height="88"></a>
+
 ## Install
 
 Varistran is most easily installed from GitHub using devtools:
@@ -143,6 +147,13 @@ Pull requests gratefully considered.
 * [RNA Systems Laboratory, Monash University](http://rnasystems.erc.monash.edu)
 
 
+## Citing Varistran
+
+To cite this R package, use:
+
+> Harrison, Paul F. 2017. "Varistran: Anscombe's variance stabilizing transformation for RNA-seq gene expression data." *The Journal of Open Source Software* 2 (16). [doi:10.21105/joss.00257](http://dx.doi.org/10.21105/joss.00257)
+
+
 ## References
 
 Anscombe, Francis J. 1948. "The Transformation of Poisson, Binomial and Negative-Binomial Data." *Biometrika* 35 (3/4): 246–54.
 
@@ -13,6 +13,9 @@ reference:
     contents:
       - vst
       - shiny_report
+      - plot_stability
+      - plot_biplot
+      - plot_heatmap
 
   - title: Support functions
     contents:
 
@@ -0,0 +1,17 @@
+
+citEntry(
+    entry = "Article",
+    author = personList(as.person("Paul Francis Harrison")),
+    title = "Varistran: Anscombe's variance stabilizing transformation for {RNA}-seq gene expression data",
+    journal = "The Journal of Open Source Software",
+    year = "2017",
+    volume = "2",
+    number = "16",
+    doi = "10.21105/joss.00257",
+    url = "http://dx.doi.org/10.21105/joss.00257",
+    textVersion = paste(
+        "Paul F. Harrison (2017).",
+        "Varistran: Anscombe's variance stabilizing transformation for RNA-seq gene expression data.",
+        "The Journal of Open Source Software 2 (16).",
+        "doi:10.21105/joss.00257")
+)