feat: Support fit_power_law(implementation = "plfit.p") to compute the P-value (#1386)

aviator-app[bot] · web-flow · commit 7578190a84cd · 2024-08-22T12:17:07.000Z
diff --git a/R/fit.R b/R/fit.R
@@ -99,17 +99,19 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i
 #'   be used to calculate confidence intervals and log-likelihood. See
 #'   [stats4::mle-class()] for details.
 #'
-#'   If `implementation` is \sQuote{`plfit`}, then the result is a
-#'   named list with entries: \item{continuous}{Logical scalar, whether the
+#'   If `implementation` is \sQuote{`plfit`} or \sQuote{`plfit.p`}, then the result is a
+#'   named list with entries:
+#'   \item{continuous}{Logical scalar, whether the
 #'   fitted power-law distribution was continuous or discrete.}
-#'   \item{alpha}{Numeric scalar, the exponent of the fitted power-law
-#'   distribution.} \item{xmin}{Numeric scalar, the minimum value from which the
+#'   \item{alpha}{Numeric scalar, the exponent of the fitted power-law distribution.}
+#'   \item{xmin}{Numeric scalar, the minimum value from which the
 #'   power-law distribution was fitted. In other words, only the values larger
-#'   than `xmin` were used from the input vector.} \item{logLik}{Numeric
-#'   scalar, the log-likelihood of the fitted parameters.} \item{KS.stat}{Numeric
-#'   scalar, the test statistic of a Kolmogorov-Smirnov test that compares the
-#'   fitted distribution with the input vector. Smaller scores denote better
-#'   fit.} \item{KS.p}{Numeric scalar, the p-value of the Kolmogorov-Smirnov
+#'   than `xmin` were used from the input vector.}
+#'   \item{logLik}{Numeric scalar, the log-likelihood of the fitted parameters.}
+#'   \item{KS.stat}{Numeric scalar, the test statistic of a Kolmogorov-Smirnov test
+#'   that compares the fitted distribution with the input vector.
+#'   Smaller scores denote better fit.}
+#'   \item{KS.p}{Only for `plfit.p`. Numeric scalar, the p-value of the Kolmogorov-Smirnov
 #'   test. Small p-values (less than 0.05) indicate that the test rejected the
 #'   hypothesis that the original data could have been drawn from the fitted
 #'   power-law distribution.}
@@ -137,15 +139,25 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i
 #' fit1$logLik
 #' stats4::logLik(fit2)
 #'
-fit_power_law <- function(x, xmin = NULL, start = 2, force.continuous = FALSE,
-                          implementation = c("plfit", "R.mle"), ...) {
+fit_power_law <- function(
+    x,
+    xmin = NULL,
+    start = 2,
+    force.continuous = FALSE,
+    implementation = c("plfit", "R.mle", "plfit.p"),
+    ...) {
   implementation <- igraph.match.arg(implementation)
 
   if (implementation == "r.mle") {
     power.law.fit.old(x, xmin, start, ...)
-  } else if (implementation == "plfit") {
+  } else if (implementation %in% c("plfit", "plfit.p")) {
     if (is.null(xmin)) xmin <- -1
-    power.law.fit.new(x, xmin = xmin, force.continuous = force.continuous)
+    power.law.fit.new(
+      x,
+      xmin = xmin,
+      force.continuous = force.continuous,
+      p.value = (implementation == "plfit.p")
+    )
   }
 }
 
@@ -186,15 +198,15 @@ power.law.fit.old <- function(x, xmin = NULL, start = 2, ...) {
   alpha
 }
 
-power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE) {
+power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE, p.value = FALSE) {
   # Argument checks
   data <- as.numeric(data)
   xmin <- as.numeric(xmin)
   force.continuous <- as.logical(force.continuous)
 
   on.exit(.Call(R_igraph_finalizer))
   # Function call
-  res <- .Call(R_igraph_power_law_fit, data, xmin, force.continuous)
+  res <- .Call(R_igraph_power_law_fit_new, data, xmin, force.continuous, p.value)
 
   res
 }
diff --git a/man/fit_power_law.Rd b/man/fit_power_law.Rd
diff --git a/src/cpp11.cpp b/src/cpp11.cpp
@@ -361,7 +361,7 @@ extern SEXP R_igraph_path_length_hist(SEXP, SEXP);
 extern SEXP R_igraph_permute_vertices(SEXP, SEXP);
 extern SEXP R_igraph_personalized_pagerank(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP R_igraph_personalized_pagerank_vs(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
-extern SEXP R_igraph_power_law_fit(SEXP, SEXP, SEXP);
+extern SEXP R_igraph_power_law_fit_new(SEXP, SEXP, SEXP, SEXP);
 extern SEXP R_igraph_preference_game(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP R_igraph_pseudo_diameter(SEXP, SEXP, SEXP, SEXP);
 extern SEXP R_igraph_pseudo_diameter_dijkstra(SEXP, SEXP, SEXP, SEXP, SEXP);
@@ -824,7 +824,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"R_igraph_permute_vertices",                           (DL_FUNC) &R_igraph_permute_vertices,                            2},
     {"R_igraph_personalized_pagerank",                      (DL_FUNC) &R_igraph_personalized_pagerank,                       8},
     {"R_igraph_personalized_pagerank_vs",                   (DL_FUNC) &R_igraph_personalized_pagerank_vs,                    8},
-    {"R_igraph_power_law_fit",                              (DL_FUNC) &R_igraph_power_law_fit,                               3},
+    {"R_igraph_power_law_fit_new",                          (DL_FUNC) &R_igraph_power_law_fit_new,                           4},
     {"R_igraph_preference_game",                            (DL_FUNC) &R_igraph_preference_game,                             7},
     {"R_igraph_pseudo_diameter",                            (DL_FUNC) &R_igraph_pseudo_diameter,                             4},
     {"R_igraph_pseudo_diameter_dijkstra",                   (DL_FUNC) &R_igraph_pseudo_diameter_dijkstra,                    5},
diff --git a/src/rinterface_extra.c b/src/rinterface_extra.c
@@ -8331,6 +8331,60 @@ SEXP R_igraph_incident_edges(SEXP pgraph, SEXP pe, SEXP pmode) {
   return result;
 }
 
+SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEXP compute_pvalue)
+{
+  igraph_vector_t c_data;
+  igraph_plfit_result_t c_res;
+  igraph_real_t c_xmin;
+  igraph_bool_t c_force_continuous, c_compute_pvalue;
+  SEXP result, names;
+
+  SEXP r_result;
+
+  R_SEXP_to_vector(data, &c_data);
+  IGRAPH_R_CHECK_REAL(xmin);
+  c_xmin = REAL(xmin)[0];
+  IGRAPH_R_CHECK_BOOL(force_continuous);
+  c_force_continuous = LOGICAL(force_continuous)[0];
+  IGRAPH_R_CHECK_BOOL(compute_pvalue);
+  c_compute_pvalue = LOGICAL(compute_pvalue)[0];
+
+  // Can't use the generated `R_igraph_power_law_fit()` because we need `c_res` to compute the p-value
+  IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous));
+
+  if (c_compute_pvalue) {
+    igraph_real_t p;
+    igraph_plfit_result_calculate_p_value(&c_res, &p, 0.001);
+
+    PROTECT(result=NEW_LIST(6));
+    PROTECT(names=NEW_CHARACTER(6));
+
+    SET_VECTOR_ELT(result, 5, Rf_ScalarReal(p));
+    SET_STRING_ELT(names, 5, Rf_mkChar("KS.p"));
+  } else {
+    PROTECT(result=NEW_LIST(5));
+    PROTECT(names=NEW_CHARACTER(5));
+  }
+
+  SET_VECTOR_ELT(result, 0, Rf_ScalarLogical(c_res.continuous));
+  SET_VECTOR_ELT(result, 1, Rf_ScalarReal(c_res.alpha));
+  SET_VECTOR_ELT(result, 2, Rf_ScalarReal(c_res.xmin));
+  SET_VECTOR_ELT(result, 3, Rf_ScalarReal(c_res.L));
+  SET_VECTOR_ELT(result, 4, Rf_ScalarReal(c_res.D));
+
+  SET_STRING_ELT(names, 0, Rf_mkChar("continuous"));
+  SET_STRING_ELT(names, 1, Rf_mkChar("alpha"));
+  SET_STRING_ELT(names, 2, Rf_mkChar("xmin"));
+  SET_STRING_ELT(names, 3, Rf_mkChar("logLik"));
+  SET_STRING_ELT(names, 4, Rf_mkChar("KS.stat"));
+  SET_NAMES(result, names);
+
+  r_result = result;
+
+  UNPROTECT(2);
+  return(r_result);
+}
+
 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C */
 /*                                                               C */
 /*  Given a HIERARCHIC CLUSTERING, described as a sequence of    C */