ModelOriented
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 8 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎R/explain_forest.R‎
Lines changed: 20 additions & 6 deletions b/‎R/explain_forest.R‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎R/measure_importance.R‎
Lines changed: 117 additions & 26 deletions b/‎R/measure_importance.R‎
Lines changed: 117 additions & 26 deletions
diff --git a/‎R/min_depth_distribution.R‎
Lines changed: 40 additions & 4 deletions b/‎R/min_depth_distribution.R‎
Lines changed: 40 additions & 4 deletions
@@ -20,10 +20,11 @@ Imports:
   ggrepel (>= 0.6.5),
   MASS (>= 7.3.47),
   randomForest (>= 4.6.12),
+  ranger(>= 0.9.0),
   reshape2 (>= 1.4.2),
   rmarkdown (>= 1.5)
 Suggests:
   knitr
 VignetteBuilder: knitr
-RoxygenNote: 6.0.1
+RoxygenNote: 6.1.1
 URL: https://github.com/MI2DataLab/randomForestExplainer
@@ -1,5 +1,13 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(measure_importance,randomForest)
+S3method(measure_importance,ranger)
+S3method(min_depth_distribution,randomForest)
+S3method(min_depth_distribution,ranger)
+S3method(min_depth_interactions,randomForest)
+S3method(min_depth_interactions,ranger)
+S3method(plot_predict_interaction,randomForest)
+S3method(plot_predict_interaction,ranger)
 export(explain_forest)
 export(important_variables)
 export(measure_importance)
 
@@ -22,15 +22,29 @@
 #'
 #' @export
 explain_forest <- function(forest, interactions = FALSE, data = NULL, vars = NULL, no_of_pred_plots = 3, pred_grid = 100,
-                             measures = if(forest$type == "classification")
-                               c("mean_min_depth", "accuracy_decrease", "gini_decrease", "no_of_nodes", "times_a_root") else
-                                 c("mean_min_depth", "mse_increase", "node_purity_increase", "no_of_nodes", "times_a_root")){
-  if(any(c("accuracy_decrease", "mse_increase") %in% measures) & dim(forest$importance)[2] == 1) {
+                           measures = NULL){
+  if(is.null(measures)){
+    if("randomForest" %in% class(forest)){
+      if(forest$type == "classification"){
+        measures <- c("mean_min_depth", "accuracy_decrease", "gini_decrease", "no_of_nodes", "times_a_root")
+      } else{
+        measures <- c("mean_min_depth", "mse_increase", "node_purity_increase", "no_of_nodes", "times_a_root")
+      }
+    } else if("ranger" %in% class(forest)){
+      measures <- c("mean_min_depth", forest$importance.mode, "no_of_nodes", "times_a_root")
+    }
+  }
+  if("randomForest" %in% class(forest) && dim(forest$importance)[2] == 1){
     stop(paste("Your forest does not contain information on local importance so",
-               paste(intersect(c("accuracy_decrease", "mse_increase"), measures), sep=", "),
+               ifelse(forest$type == "classification", "accuracy_decrease", "mse_increase"),
                "measure cannot be extracted.",
                "To add it regrow the forest with the option localImp = TRUE and run this function again."))
   }
+  if("ranger" %in% class(forest) && forest$importance.mode == "none"){
+   stop(paste("Your forest does not contain importance information so",
+              "importance cannot be extracted.",
+              "To add it regrow the forest with the option importance other than 'none' and run this function again."))
+  }
   environment <- new.env()
   environment$forest <- forest
   environment$data <- data
@@ -42,7 +56,7 @@ explain_forest <- function(forest, interactions = FALSE, data = NULL, vars = NUL
   directory <- getwd()
   path_to_templates <- file.path(path.package("randomForestExplainer"), "templates")
   template_name <- grep('explain_forest_template.rmd', list.files(path_to_templates),
-                   ignore.case = TRUE, value = TRUE)
+                        ignore.case = TRUE, value = TRUE)
 
   rmarkdown::render(file.path(path_to_templates, template_name),
                     "html_document", output_file = paste0(directory, "/Your_forest_explained.html"),
 
@@ -7,6 +7,7 @@ measure_min_depth <- function(min_depth_frame, mean_sample){
 }
 
 # Calculate the number of nodes split on each variable for a data frame with the whole forest
+# randomForest
 measure_no_of_nodes <- function(forest_table){
   `split var` <- NULL
   frame <- dplyr::group_by(forest_table, `split var`) %>% dplyr::summarize(n())
@@ -16,7 +17,19 @@ measure_no_of_nodes <- function(forest_table){
   return(frame)
 }
 
+# Calculate the number of nodes split on each variable for a data frame with the whole forest
+# randomForest
+measure_no_of_nodes_ranger <- function(forest_table){
+  splitvarName <- NULL
+  frame <- dplyr::group_by(forest_table, splitvarName) %>% dplyr::summarize(n())
+  colnames(frame) <- c("variable", "no_of_nodes")
+  frame <- as.data.frame(frame[!is.na(frame$variable),])
+  frame$variable <- as.character(frame$variable)
+  return(frame)
+}
+
 # Extract randomForest variable importance measures
+# randomForest
 measure_vimp <- function(forest, only_nonlocal = FALSE){
   if(forest$type == "classification"){
     if(dim(forest$importance)[2] == 1){
@@ -44,6 +57,20 @@ measure_vimp <- function(forest, only_nonlocal = FALSE){
   return(frame)
 }
 
+# Extract randomForest variable importance measures
+# ranger
+measure_vimp_ranger <- function(forest){
+  if (forest$importance.mode == "none"){
+    stop("No variable importance available, regenerate forest by ranger(..., importance='impurity').")
+  }
+  frame <- data.frame(importance=forest$variable.importance,
+                      variable=names(forest$variable.importance),
+                      stringsAsFactors = FALSE)
+  colnames(frame)[1] <- forest$importance.mode
+  # possible values are: impurity, 'impurity_corrected', 'permutation'.
+  return(frame)
+}
+
 # Calculate the number of trees using each variable for splitting
 measure_no_of_trees <- function(min_depth_frame){
   variable <- NULL
@@ -68,8 +95,8 @@ measure_times_a_root <- function(min_depth_frame){
 measure_p_value <- function(importance_frame){
   total_no_of_nodes <- sum(importance_frame$no_of_nodes)
   p_value <- unlist(lapply(importance_frame$no_of_nodes,
-                  function(x) stats::binom.test(x, total_no_of_nodes, 1/nrow(importance_frame),
-                                         alternative = "greater")$p.value))
+                           function(x) stats::binom.test(x, total_no_of_nodes, 1/nrow(importance_frame),
+                                                         alternative = "greater")$p.value))
   return(p_value)
 }
 
@@ -87,21 +114,25 @@ measure_p_value <- function(importance_frame){
 #'
 #' @return A data frame with rows corresponding to variables and columns to various measures of importance of variables
 #'
-#' @import dplyr
-#' @importFrom data.table rbindlist
-#'
 #' @examples
 #' forest <- randomForest::randomForest(Species ~ ., data = iris, localImp = TRUE, ntree = 300)
 #' measure_importance(forest)
 #'
 #' @export
 measure_importance <- function(forest, mean_sample = "top_trees", measures = NULL){
+  UseMethod("measure_importance")
+}
+
+#' @import dplyr
+#' @importFrom data.table rbindlist
+#' @export
+measure_importance.randomForest <- function(forest, mean_sample = "top_trees", measures = NULL){
   tree <- NULL; `split var` <- NULL; depth <- NULL
   if(is.null(measures)){
     if(forest$type == "classification"){
       measures <- c("mean_min_depth", "no_of_nodes", "accuracy_decrease",
                     "gini_decrease", "no_of_trees", "times_a_root", "p_value")
-    } else if(forest$type =="regression") {
+    } else if(forest$type =="regression"){
       measures <- c("mean_min_depth", "no_of_nodes", "mse_increase", "node_purity_increase",
                     "no_of_trees", "times_a_root", "p_value")
     }
@@ -130,7 +161,7 @@ measure_importance <- function(forest, mean_sample = "top_trees", measures = NUL
   }
   if(forest$type == "classification"){
     vimp <- c("accuracy_decrease", "gini_decrease")
-  } else if(forest$type =="regression") {
+  } else if(forest$type =="regression"){
     vimp <- c("mse_increase", "node_purity_increase")
   }
   if(all(vimp %in% measures)){
@@ -156,6 +187,54 @@ measure_importance <- function(forest, mean_sample = "top_trees", measures = NUL
   return(importance_frame)
 }
 
+#' @import dplyr
+#' @importFrom data.table rbindlist
+#' @export
+measure_importance.ranger <- function(forest, mean_sample = "top_trees", measures = NULL){
+  tree <- NULL; splitvarName <- NULL; depth <- NULL
+  if(is.null(measures)){
+    measures <- c("mean_min_depth", "no_of_nodes", forest$importance.mode, "no_of_trees", "times_a_root", "p_value")
+  }
+  if(("p_value" %in% measures) && !("no_of_nodes" %in% measures)){
+    measures <- c(measures, "no_of_nodes")
+  }
+  importance_frame <- data.frame(variable = names(forest$variable.importance), stringsAsFactors = FALSE)
+  # Get objects necessary to calculate importance measures based on the tree structure
+  if(any(c("mean_min_depth", "no_of_nodes", "no_of_trees", "times_a_root", "p_value") %in% measures)){
+    forest_table <-
+      lapply(1:forest$num.trees, function(i) ranger::treeInfo(forest, tree = i) %>%
+               calculate_tree_depth_ranger() %>% cbind(tree = i)) %>% rbindlist()
+    min_depth_frame <- dplyr::group_by(forest_table, tree, splitvarName) %>%
+      dplyr::summarize(min(depth))
+    colnames(min_depth_frame) <- c("tree", "variable", "minimal_depth")
+    min_depth_frame <- as.data.frame(min_depth_frame[!is.na(min_depth_frame$variable),])
+  }
+  # Add each importance measure to the table (if it was requested)
+  if("mean_min_depth" %in% measures){
+    importance_frame <- merge(importance_frame, measure_min_depth(min_depth_frame, mean_sample), all = TRUE)
+  }
+  if("no_of_nodes" %in% measures){
+    importance_frame <- merge(importance_frame, measure_no_of_nodes_ranger(forest_table), all = TRUE)
+    importance_frame[is.na(importance_frame$no_of_nodes), "no_of_nodes"] <- 0
+  }
+  if(forest$importance.mode %in% measures){
+    importance_frame <- merge(importance_frame, measure_vimp_ranger(forest), all = TRUE)
+  }
+  if("no_of_trees" %in% measures){
+    importance_frame <- merge(importance_frame, measure_no_of_trees(min_depth_frame), all = TRUE)
+    importance_frame[is.na(importance_frame$no_of_trees), "no_of_trees"] <- 0
+  }
+  if("times_a_root" %in% measures){
+    importance_frame <- merge(importance_frame, measure_times_a_root(min_depth_frame), all = TRUE)
+    importance_frame[is.na(importance_frame$times_a_root), "times_a_root"] <- 0
+  }
+  if("p_value" %in% measures){
+    importance_frame$p_value <- measure_p_value(importance_frame)
+    importance_frame$variable <- as.factor(importance_frame$variable)
+  }
+  return(importance_frame)
+}
+
 #' Extract k most important variables in a random forest
 #'
 #' Get the names of k variables with highest sum of rankings based on the specified importance measures
@@ -174,13 +253,16 @@ measure_importance <- function(forest, mean_sample = "top_trees", measures = NUL
 #' important_variables(measure_importance(forest), k = 2)
 #'
 #' @export
-important_variables <- function(importance_frame, k = 15, measures = names(importance_frame)[2:5],
+important_variables <- function(importance_frame, k = 15,
+                                measures = names(importance_frame)[2:min(5, ncol(importance_frame))],
                                 ties_action = "all"){
   if("randomForest" %in% class(importance_frame)){
     importance_frame <- measure_importance(importance_frame)
     if("predicted" %in% measures){
       measures <- names(importance_frame)[2:5]
     }
+  } else if ("ranger" %in% class(importance_frame)){
+    importance_frame <- measure_importance(importance_frame)
   }
   rankings <- data.frame(variable = importance_frame$variable, mean_min_depth =
                            frankv(importance_frame$mean_min_depth, ties.method = "dense"),
@@ -232,7 +314,7 @@ plot_multi_way_importance <- function(importance_frame, x_measure = "mean_min_de
                                       min_no_of_trees = 0, no_of_labels = 10,
                                       main = "Multi-way importance plot"){
   variable <- NULL
-  if("randomForest" %in% class(importance_frame)){
+  if(any(c("randomForest", "ranger") %in% class(importance_frame))){
     importance_frame <- measure_importance(importance_frame)
   }
   data <- importance_frame[importance_frame$no_of_trees > min_no_of_trees, ]
@@ -294,14 +376,16 @@ plot_multi_way_importance <- function(importance_frame, x_measure = "mean_min_de
 #' plot_importance_ggpairs(frame, measures = c("mean_min_depth", "times_a_root"))
 #'
 #' @export
-plot_importance_ggpairs <- function(importance_frame, measures =
-                                      names(importance_frame)[c(2, 4, 5, 3, 7)],
+plot_importance_ggpairs <- function(importance_frame, measures = NULL,
                                     main = "Relations between measures of importance"){
-  if("randomForest" %in% class(importance_frame)){
+  if(any(c("randomForest", "ranger") %in% class(importance_frame))){
     importance_frame <- measure_importance(importance_frame)
-    if("predicted" %in% measures){
-      names(importance_frame)[c(2, 4, 5, 3, 7)]
-    }
+  }
+  if (is.null(measures)){
+    default_measures <- c("gini_decrease", "node_purity_increase", # randomForest
+                          "impurity", "impurity_corrected", "permutation", # ranger
+                          "mean_min_depth", "no_of_trees", "no_of_nodes", "p_value")
+    measures <- intersect(default_measures, colnames(importance_frame))
   }
   plot <- ggpairs(importance_frame[, measures]) + theme_bw()
   if(!is.null(main)){
@@ -315,7 +399,7 @@ plot_importance_ggpairs <- function(importance_frame, measures =
 #' Plot against each other rankings of variables according to various measures of importance
 #'
 #' @param importance_frame A result of using the function measure_importance() to a random forest or a randomForest object
-#' @param measures A character vector specifying the measures of importance to be used
+#' @param measures A character vector specifying the measures of importance to be used.
 #' @param main A string to be used as title of the plot
 #'
 #' @return A ggplot object
@@ -329,22 +413,29 @@ plot_importance_ggpairs <- function(importance_frame, measures =
 #' plot_importance_ggpairs(frame, measures = c("mean_min_depth", "times_a_root"))
 #'
 #' @export
-plot_importance_rankings <- function(importance_frame, measures =
-                                       names(importance_frame)[c(2, 4, 5, 3, 7)],
+plot_importance_rankings <- function(importance_frame, measures = NULL,
                                      main = "Relations between rankings according to different measures"){
-  if("randomForest" %in% class(importance_frame)){
+  if(any(c("randomForest", "ranger") %in% class(importance_frame))){
     importance_frame <- measure_importance(importance_frame)
-    if("predicted" %in% measures){
-      names(importance_frame)[c(2, 4, 5, 3, 7)]
-    }
   }
-  rankings <- data.frame(variable = importance_frame$variable, mean_min_depth =
-                           frankv(importance_frame$mean_min_depth, ties.method = "dense"),
-                         apply(importance_frame[, -c(1, 2)], 2,
+  if (is.null(measures)){
+    default_measures <- c("gini_decrease", "node_purity_increase", # randomForest
+                          "impurity", "impurity_corrected", "permutation", # ranger
+                          "mean_min_depth", "no_of_trees", "no_of_nodes", "p_value")
+    measures <- intersect(default_measures, colnames(importance_frame))
+  }
+  rankings <- data.frame(variable = importance_frame$variable,
+                         apply(importance_frame[, !colnames(importance_frame) %in% c("variable", "mean_min_depth", "p_value")], 2,
                                function(x) frankv(x, order = -1, ties.method = "dense")))
+  if ("mean_min_depth" %in% measures){
+    rankings$mean_min_depth = frankv(importance_frame$mean_min_depth, ties.method = "dense")
+  }
+  if ("p_value" %in% measures){
+    rankings$p_value = frankv(importance_frame$p_value, ties.method = "dense")
+  }
   plot <- ggpairs(rankings[, measures], lower = list(continuous = function(data, mapping){
     ggplot(data = data, mapping = mapping) + geom_point() +  geom_smooth(method = "loess")
-    }))+ theme_bw()
+  })) + theme_bw()
   if(!is.null(main)){
     plot <- plot + ggtitle(main)
   }
 
@@ -14,22 +14,43 @@ calculate_tree_depth <- function(frame){
   return(frame)
 }
 
+# Calculate the depth of each node in a single tree obtained from a forest with ranger::treeInfo
+calculate_tree_depth_ranger <- function(frame){
+  if(!all(c("rightChild", "leftChild") %in% names(frame))){
+    stop("The data frame has to contain columns called 'rightChild' and 'leftChild'!
+         It should be a product of the function ranger::treeInfo().")
+  }
+  frame$depth <- NA
+  frame$depth[1] <- 0
+  for(i in 2:nrow(frame)){
+    frame[i, "depth"] <-
+      frame[(!is.na(frame[, "leftChild"]) & frame[, "leftChild"] == frame[i, "nodeID"]) |
+              (!is.na(frame[, "rightChild"]) & frame[, "rightChild"] == frame[i, "nodeID"]), "depth"] + 1
+  }
+  return(frame)
+}
+
 #' Calculate minimal depth distribution of a random forest
 #'
 #' Get minimal depth values for all trees in a random forest
 #'
-#' @param forest A randomForest object
+#' @param forest A randomForest or ranger object
 #'
 #' @return A data frame with the value of minimal depth for every variable in every tree
 #'
-#' @import dplyr
-#' @importFrom data.table rbindlist
-#'
 #' @examples
 #' min_depth_distribution(randomForest::randomForest(Species ~ ., data = iris))
+#' min_depth_distribution(ranger::ranger(Species ~ ., data = iris))
 #'
 #' @export
 min_depth_distribution <- function(forest){
+  UseMethod("min_depth_distribution")
+}
+
+#' @import dplyr
+#' @importFrom data.table rbindlist
+#' @export
+min_depth_distribution.randomForest <- function(forest){
   tree <- NULL; `split var` <- NULL; depth <- NULL
   forest_table <-
     lapply(1:forest$ntree, function(i) randomForest::getTree(forest, k = i, labelVar = T) %>%
@@ -41,6 +62,21 @@ min_depth_distribution <- function(forest){
   return(min_depth_frame)
 }
 
+#' @import dplyr
+#' @importFrom data.table rbindlist
+#' @export
+min_depth_distribution.ranger <- function(forest){
+  tree <- NULL; splitvarName <- NULL; depth <- NULL
+  forest_table <-
+    lapply(1:forest$num.trees, function(i) ranger::treeInfo(forest, tree = i) %>%
+             calculate_tree_depth_ranger() %>% cbind(tree = i)) %>% rbindlist()
+  min_depth_frame <- dplyr::group_by(forest_table, tree, splitvarName) %>%
+    dplyr::summarize(min(depth))
+  colnames(min_depth_frame) <- c("tree", "variable", "minimal_depth")
+  min_depth_frame <- as.data.frame(min_depth_frame[!is.na(min_depth_frame$variable),])
+  return(min_depth_frame)
+}
+
 # Count the trees in which each variable had a given minimal depth
 min_depth_count <- function(min_depth_frame){
   tree <- NULL; minimal_depth <- NULL; variable <- NULL