Merge pull request #56 from lionel-/add-regression

lionel- · web-flow · commit 2930cbd85945 · 2018-12-30T11:09:15.000+01:00
Skip tests on CRAN
diff --git a/NEWS.md b/NEWS.md
@@ -2,7 +2,12 @@
 # vdiffr 0.2.99.9000
 
 This release of vdiffr features a major overhaul of the internals to
-make the package more robust and reliable across platforms:
+make the package more robust.
+
+
+## Cross-platform reliability
+
+vdiffr now works reliably across platforms:
 
 * svglite is now embedded in vdiffr to protect against updates of the
   SVG generation engine.
@@ -20,6 +25,34 @@ Now that vdiffr has a stable engine, the next release will focus on
 improving the Shiny UI.
 
 
+## Regression testing versus Unit testing
+
+Another important change is that figure mismatches are no longer
+reported as failures, except when the tests are run locally, on
+Travis, Appveyor, or any environment where the `Sys.getenv("CI")` or
+`Sys.getenv("NOT_CRAN")` variables are set. Because vdiffr is more of
+a monitoring than a unit testing tool, it shouldn't cause R CMD check
+failures on the CRAN machines.
+
+Despite our efforts to make vdiffr robust and reliable across
+platforms, checking the appearance of a figure is still inherently
+fragile. It is similar to testing for errors by matching exact error
+messages: these messages are susceptible to change at any
+time. Similarly, the appearance of plots depends on a lot of upstream
+code, such as the way margins and spacing are computed. vdiffr uses a
+special ggplot2 theme that should change very rarely, but there are
+just too many upstream factors that could cause breakages. For this
+reason, figure mismatches are not necessarily representative of actual
+failures.
+
+Visual testing is not an alternative to writing unit tests for the
+internal data transformations performed during the creation of your
+figure. It is more of a monitoring tool that allows you to quickly
+check how the appearance of your figures changes over time, and to
+manually assess whether changes reflect actual problems in your
+package.
+
+
 ## Features
 
 * vdiffr now advises user to run `manage_cases()` when a figure was
diff --git a/R/testthat-ui.R b/R/testthat-ui.R
@@ -1,13 +1,19 @@
 #' Does a figure look like its expected output?
 #'
-#' If the test has never been validated yet, the test is skipped. If
-#' the test has previously been validated but \code{fig} does not look
-#' like its expected output, an error is issued. Use
-#' [validate_cases()] or [manage_cases()] to (re)validate
-#' the test.
+#' @description
 #'
-#' `fig` can be a ggplot object, a recordedplot, a function to be
-#' called, or more generally any object with a `print` method.
+#' `expect_doppelganger()` takes a figure to check visually.
+#'
+#' * If the figure has yet to be validated, the test is skipped. Call
+#'   [manage_cases()] to validate the new figure, so vdiffr knows what
+#'   to compare against.
+#'
+#' * If the test has been validated, `fig` is compared to the
+#'   validated figure. If the plot differs, a failure is issued
+#'   (except on CRAN, see section on regression testing below).
+#'
+#'   Either fix the problem, or call [manage_cases()] to validate the
+#'   new figure appearance.
 #'
 #' @param title A brief description of what is being tested in the
 #'   figure. For instance: "Points and lines overlap".
@@ -17,7 +23,12 @@
 #'
 #'   The title is also used as file name for storing SVG (in a
 #'   sanitzed form, with special characters converted to `"-"`).
-#' @param fig A figure to test.
+#' @param fig A figure to test. This can be a ggplot object, a
+#'   recordedplot, or more generally any object with a `print` method.
+#'
+#'   For plots that can't be represented as printable objects, you can
+#'   pass a function. This function must construct the plot and print
+#'   it.
 #' @param path The path where the test case should be stored, relative
 #'   to the `tests/figs/` folder. If `NULL` (the default), the current
 #'   testthat context is used to create a subfolder. Supply an empty
@@ -31,6 +42,31 @@
 #'   in a deterministic way and write it to the target file. See
 #'   [write_svg()] (the default) for an example.
 #'
+#' @section Regression testing versus Unit testing:
+#'
+#' Failures to match a validated appearance are only reported when the
+#' tests are run locally, on Travis, Appveyor, or any environment
+#' where the `Sys.getenv("CI")` or `Sys.getenv("NOT_CRAN")` variables
+#' are set. Because vdiffr is more of a monitoring than a unit testing
+#' tool, it shouldn't cause R CMD check failures on the CRAN machines.
+#'
+#' Checking the appearance of a figure is inherently fragile. It is
+#' similar to testing for errors by matching exact error messages:
+#' these messages are susceptible to change at any time. Similarly,
+#' the appearance of plots depends on a lot of upstream code, such as
+#' the way margins and spacing are computed. vdiffr uses a special
+#' ggplot2 theme that should change very rarely, but there are just
+#' too many upstream factors that could cause breakages. For this
+#' reason, figure mismatches are not necessarily representative of
+#' actual failures.
+#'
+#' Visual testing is not an alternative to writing unit tests for the
+#' internal data transformations performed during the creation of your
+#' figure. It is more of a monitoring tool that allows you to quickly
+#' check how the appearance of your figures changes over time, and to
+#' manually assess whether changes reflect actual problems in your
+#' package.
+#'
 #' @section Debugging:
 #'
 #' It is sometimes difficult to understand the cause of a failure.
@@ -167,6 +203,7 @@ new_expectation <- function(msg, case, type, vdiffr_type) {
   classes <- c(class(exp), vdiffr_type)
   structure(exp, class = classes, vdiffr_case = case)
 }
+
 new_exp <- function(msg, case) {
   new_expectation(msg, case, "skip", "vdiffr_new")
 }
@@ -177,8 +214,10 @@ mismatch_exp <- function(msg, case) {
   if (is_vdiffr_stale()) {
     msg <- "The vdiffr engine is too old. Please update vdiffr and revalidate the figures."
     new_expectation(msg, case, "skip", "vdiffr_mismatch")
-  } else {
+  } else if (is_ci()) {
     new_expectation(msg, case, "failure", "vdiffr_mismatch")
+  } else {
+    new_expectation(msg, case, "skip", "vdiffr_mismatch")
   }
 }
 
diff --git a/R/utils.R b/R/utils.R
@@ -170,3 +170,7 @@ is_vdiffr_stale <- function() {
 hash_encode_url <- function(url){
   gsub("#", "%23", url)
 }
+
+is_ci <- function() {
+  nzchar(Sys.getenv("CI")) || nzchar(Sys.getenv("NOT_CRAN"))
+}
diff --git a/README.md b/README.md
@@ -41,6 +41,8 @@ that each plot is correct.
 
 1) Run `devtools::test()` to execute the tests as normal.
 
+When a figure doesn't matched the saved version, vdiffr signals a failure when it is run interactively, or when it is run on Travis or Appveyor. Mismatches do not cause R CMD check to fail on CRAN machines. See the testing versus monitoring section below.
+
 
 ### Adding expectations
 
@@ -123,6 +125,23 @@ You can run the tests the usual way, for example with
 will be skipped. Failed tests will show as an error.
 
 
+### Testing versus Monitoring
+
+When a figure doesn't match its saved version, it is only reported as a failure under these circumstances:
+
+- When the `NOT_CRAN` environment is set. In particular, devtools sets this when running the tests interactively.
+
+- On Travis, Appveyor, or any environment where the `Sys.getenv("CI")` is set.
+
+Otherwise, the failure is ignored. The motivation for this is that vdiffr is a monitoring tool and shouldn't cause R CMD check failures on the CRAN machines.
+
+Checking the appearance of a figure is inherently fragile. It is a bit like testing for errors by matching exact error messages. These messages are susceptible to change at any time. Similarly, the appearance of plots depends on a lot of upstream code, such as the way margins and spacing are computed. vdiffr uses a special ggplot2 theme that should change very rarely, but there are just too many upstream factors that could cause breakages. For this reason, figure mismatches are not necessarily representative of actual failures.
+
+Visual testing is not an alternative to writing unit tests for the internal data transformations performed during the creation of your figure. It is more of a monitoring tool that allows you to quickly check how the appearance of your figures changes over time, and to manually assess whether changes reflect actual problems in your packages.
+
+If you want vdiffr to fail on CRAN machines as well, just set the environment variable `CI` to `"true"` in a `setup-vdiffr.R` file in your testthat folder.
+
+
 ### RStudio integration
 
 An addin to launch `manage_cases()` is provided with vdiffr. Use the
diff --git a/man/expect_doppelganger.Rd b/man/expect_doppelganger.Rd
diff --git a/tests/mock.Rout.fail b/tests/mock.Rout.fail
@@ -43,3 +43,43 @@ Failed doppelganger: myplot (../figs//myplot.svg)
   Mi41Mg==)' />                                                             
   <defs>                                                                    
 
+
+Failed doppelganger: myplot (../figs//myplot.svg)
+
+< before                                                                    
+> after                                                                     
+@@ 50,4 / 50,5 @@                                                           
+  <rect x='641.72' y='401.98' width='20.80' height='118.58' style='stroke-wi
+  dth: 1.07; stroke: none; stroke-linecap: butt; fill: #595959;' clip-path='
+  url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3wyMi41Mg==)' />                          
+  <rect x='662.52' y='401.98' width='20.80' height='118.58' style='stroke-wi
+  dth: 1.07; stroke: none; stroke-linecap: butt; fill: #595959;' clip-path='
+  url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3wyMi41Mg==)' />                          
+> <line x1='417.09' y1='544.27' x2='417.09' y2='22.52' style='stroke-width: 
+: 1.07; stroke-linecap: butt;' clip-path='url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3w
+: yMi41Mg==)' />                                                            
+  <rect x='28.09' y='22.52' width='686.43' height='521.75' style='stroke-wid
+  th: 1.07; stroke: #333333;' clip-path='url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3wy
+  Mi41Mg==)' />                                                             
+  <defs>                                                                    
+
+
+Failed doppelganger: myplot (../figs//myplot.svg)
+
+< before                                                                    
+> after                                                                     
+@@ 50,4 / 50,5 @@                                                           
+  <rect x='641.72' y='401.98' width='20.80' height='118.58' style='stroke-wi
+  dth: 1.07; stroke: none; stroke-linecap: butt; fill: #595959;' clip-path='
+  url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3wyMi41Mg==)' />                          
+  <rect x='662.52' y='401.98' width='20.80' height='118.58' style='stroke-wi
+  dth: 1.07; stroke: none; stroke-linecap: butt; fill: #595959;' clip-path='
+  url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3wyMi41Mg==)' />                          
+> <line x1='417.09' y1='544.27' x2='417.09' y2='22.52' style='stroke-width: 
+: 1.07; stroke-linecap: butt;' clip-path='url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3w
+: yMi41Mg==)' />                                                            
+  <rect x='28.09' y='22.52' width='686.43' height='521.75' style='stroke-wid
+  th: 1.07; stroke: #333333;' clip-path='url(#cpMjguMDl8NzE0LjUyfDU0NC4yN3wy
+  Mi41Mg==)' />                                                             
+  <defs>                                                                    
+
diff --git a/tests/testthat/mock-pkg/tests/testthat/test-failed.R b/tests/testthat/mock-pkg/tests/testthat/test-failed.R
@@ -12,7 +12,7 @@ skip_if_maintenance <- function() {
   }
 }
 
-test_that("New plots work are collected", {
+test_that("mismatches are hard failures when NOT_CRAN is set", {
   skip_if_maintenance()
   expect_doppelganger("myplot", p1_fail, "")
 })
@@ -22,6 +22,18 @@ test_that("Duplicated expectations issue a warning", {
   expect_doppelganger("myplot", p1_fail, "")
 })
 
+test_that("mismatches are hard failures when CI is set", {
+  skip_if_maintenance()
+  withr::local_envvar(c(NOT_CRAN = "", CI = "true"))
+  expect_doppelganger("myplot", p1_fail, "")
+})
+
+test_that("mismatches are skipped when NOT_CRAN is unset", {
+  skip_if_maintenance()
+  withr::local_envvar(c(NOT_CRAN = "", CI = ""))
+  expect_doppelganger("myplot", p1_fail, "")
+})
+
 
 # Maintenance --------------------------------------------------------
 
diff --git a/tests/testthat/test-expectations.R b/tests/testthat/test-expectations.R
@@ -1,12 +1,18 @@
 
 context("Expectations")
 
-test_that("Mismatches fail", {
-  failed_result <- subset_results(test_results, "test-failed.R", "New plots work are collected")[[1]]
+test_that("Mismatches are skipped except on CI and interactively", {
+  notcran_result <- subset_results(test_results, "test-failed.R", "mismatches are hard failures when NOT_CRAN is set")[[1]]
+  expect_match(notcran_result$message, "Figures don't match: myplot.svg\n")
+  expect_is(notcran_result, "expectation_failure")
+
+  failed_result <- subset_results(test_results, "test-failed.R", "mismatches are hard failures when CI is set")[[1]]
   expect_match(failed_result$message, "Figures don't match: myplot.svg\n")
+  expect_is(failed_result, "expectation_failure")
 
-  class <- class(failed_result)[[1]]
-  expect_equal(class, "expectation_failure")
+  skipped_result <- subset_results(test_results, "test-failed.R", "mismatches are skipped when NOT_CRAN is unset")[[1]]
+  expect_match(skipped_result$message, "Figures don't match: myplot.svg\n")
+  expect_is(skipped_result, "expectation_skip")
 })
 
 test_that("Duplicated expectations issue warning", {

Original file line number	Diff line number	Diff line change
`@@ -170,3 +170,7 @@ is_vdiffr_stale <- function() {`
`170`	`170`	`hash_encode_url <- function(url){`
`171`	`171`	`gsub("#", "%23", url)`
`172`	`172`	`}`
	`173`	`+`
	`174`	`+is_ci <- function() {`
	`175`	`+ nzchar(Sys.getenv("CI")) \|\| nzchar(Sys.getenv("NOT_CRAN"))`
	`176`	`+}`