Skip to content

Commit 7578190

Browse files
feat: Support fit_power_law(implementation = "plfit.p") to compute the P-value (#1386)
2 parents f04cf68 + dc277f6 commit 7578190

File tree

4 files changed

+95
-27
lines changed

4 files changed

+95
-27
lines changed

R/fit.R

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,19 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i
9999
#' be used to calculate confidence intervals and log-likelihood. See
100100
#' [stats4::mle-class()] for details.
101101
#'
102-
#' If `implementation` is \sQuote{`plfit`}, then the result is a
103-
#' named list with entries: \item{continuous}{Logical scalar, whether the
102+
#' If `implementation` is \sQuote{`plfit`} or \sQuote{`plfit.p`}, then the result is a
103+
#' named list with entries:
104+
#' \item{continuous}{Logical scalar, whether the
104105
#' fitted power-law distribution was continuous or discrete.}
105-
#' \item{alpha}{Numeric scalar, the exponent of the fitted power-law
106-
#' distribution.} \item{xmin}{Numeric scalar, the minimum value from which the
106+
#' \item{alpha}{Numeric scalar, the exponent of the fitted power-law distribution.}
107+
#' \item{xmin}{Numeric scalar, the minimum value from which the
107108
#' power-law distribution was fitted. In other words, only the values larger
108-
#' than `xmin` were used from the input vector.} \item{logLik}{Numeric
109-
#' scalar, the log-likelihood of the fitted parameters.} \item{KS.stat}{Numeric
110-
#' scalar, the test statistic of a Kolmogorov-Smirnov test that compares the
111-
#' fitted distribution with the input vector. Smaller scores denote better
112-
#' fit.} \item{KS.p}{Numeric scalar, the p-value of the Kolmogorov-Smirnov
109+
#' than `xmin` were used from the input vector.}
110+
#' \item{logLik}{Numeric scalar, the log-likelihood of the fitted parameters.}
111+
#' \item{KS.stat}{Numeric scalar, the test statistic of a Kolmogorov-Smirnov test
112+
#' that compares the fitted distribution with the input vector.
113+
#' Smaller scores denote better fit.}
114+
#' \item{KS.p}{Only for `plfit.p`. Numeric scalar, the p-value of the Kolmogorov-Smirnov
113115
#' test. Small p-values (less than 0.05) indicate that the test rejected the
114116
#' hypothesis that the original data could have been drawn from the fitted
115117
#' power-law distribution.}
@@ -137,15 +139,25 @@ power.law.fit <- function(x, xmin = NULL, start = 2, force.continuous = FALSE, i
137139
#' fit1$logLik
138140
#' stats4::logLik(fit2)
139141
#'
140-
fit_power_law <- function(x, xmin = NULL, start = 2, force.continuous = FALSE,
141-
implementation = c("plfit", "R.mle"), ...) {
142+
fit_power_law <- function(
143+
x,
144+
xmin = NULL,
145+
start = 2,
146+
force.continuous = FALSE,
147+
implementation = c("plfit", "R.mle", "plfit.p"),
148+
...) {
142149
implementation <- igraph.match.arg(implementation)
143150

144151
if (implementation == "r.mle") {
145152
power.law.fit.old(x, xmin, start, ...)
146-
} else if (implementation == "plfit") {
153+
} else if (implementation %in% c("plfit", "plfit.p")) {
147154
if (is.null(xmin)) xmin <- -1
148-
power.law.fit.new(x, xmin = xmin, force.continuous = force.continuous)
155+
power.law.fit.new(
156+
x,
157+
xmin = xmin,
158+
force.continuous = force.continuous,
159+
p.value = (implementation == "plfit.p")
160+
)
149161
}
150162
}
151163

@@ -186,15 +198,15 @@ power.law.fit.old <- function(x, xmin = NULL, start = 2, ...) {
186198
alpha
187199
}
188200

189-
power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE) {
201+
power.law.fit.new <- function(data, xmin = -1, force.continuous = FALSE, p.value = FALSE) {
190202
# Argument checks
191203
data <- as.numeric(data)
192204
xmin <- as.numeric(xmin)
193205
force.continuous <- as.logical(force.continuous)
194206

195207
on.exit(.Call(R_igraph_finalizer))
196208
# Function call
197-
res <- .Call(R_igraph_power_law_fit, data, xmin, force.continuous)
209+
res <- .Call(R_igraph_power_law_fit_new, data, xmin, force.continuous, p.value)
198210

199211
res
200212
}

man/fit_power_law.Rd

Lines changed: 12 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cpp11.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ extern SEXP R_igraph_path_length_hist(SEXP, SEXP);
361361
extern SEXP R_igraph_permute_vertices(SEXP, SEXP);
362362
extern SEXP R_igraph_personalized_pagerank(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
363363
extern SEXP R_igraph_personalized_pagerank_vs(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
364-
extern SEXP R_igraph_power_law_fit(SEXP, SEXP, SEXP);
364+
extern SEXP R_igraph_power_law_fit_new(SEXP, SEXP, SEXP, SEXP);
365365
extern SEXP R_igraph_preference_game(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
366366
extern SEXP R_igraph_pseudo_diameter(SEXP, SEXP, SEXP, SEXP);
367367
extern SEXP R_igraph_pseudo_diameter_dijkstra(SEXP, SEXP, SEXP, SEXP, SEXP);
@@ -824,7 +824,7 @@ static const R_CallMethodDef CallEntries[] = {
824824
{"R_igraph_permute_vertices", (DL_FUNC) &R_igraph_permute_vertices, 2},
825825
{"R_igraph_personalized_pagerank", (DL_FUNC) &R_igraph_personalized_pagerank, 8},
826826
{"R_igraph_personalized_pagerank_vs", (DL_FUNC) &R_igraph_personalized_pagerank_vs, 8},
827-
{"R_igraph_power_law_fit", (DL_FUNC) &R_igraph_power_law_fit, 3},
827+
{"R_igraph_power_law_fit_new", (DL_FUNC) &R_igraph_power_law_fit_new, 4},
828828
{"R_igraph_preference_game", (DL_FUNC) &R_igraph_preference_game, 7},
829829
{"R_igraph_pseudo_diameter", (DL_FUNC) &R_igraph_pseudo_diameter, 4},
830830
{"R_igraph_pseudo_diameter_dijkstra", (DL_FUNC) &R_igraph_pseudo_diameter_dijkstra, 5},

src/rinterface_extra.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8331,6 +8331,60 @@ SEXP R_igraph_incident_edges(SEXP pgraph, SEXP pe, SEXP pmode) {
83318331
return result;
83328332
}
83338333

8334+
SEXP R_igraph_power_law_fit_new(SEXP data, SEXP xmin, SEXP force_continuous, SEXP compute_pvalue)
8335+
{
8336+
igraph_vector_t c_data;
8337+
igraph_plfit_result_t c_res;
8338+
igraph_real_t c_xmin;
8339+
igraph_bool_t c_force_continuous, c_compute_pvalue;
8340+
SEXP result, names;
8341+
8342+
SEXP r_result;
8343+
8344+
R_SEXP_to_vector(data, &c_data);
8345+
IGRAPH_R_CHECK_REAL(xmin);
8346+
c_xmin = REAL(xmin)[0];
8347+
IGRAPH_R_CHECK_BOOL(force_continuous);
8348+
c_force_continuous = LOGICAL(force_continuous)[0];
8349+
IGRAPH_R_CHECK_BOOL(compute_pvalue);
8350+
c_compute_pvalue = LOGICAL(compute_pvalue)[0];
8351+
8352+
// Can't use the generated `R_igraph_power_law_fit()` because we need `c_res` to compute the p-value
8353+
IGRAPH_R_CHECK(igraph_power_law_fit(&c_data, &c_res, c_xmin, c_force_continuous));
8354+
8355+
if (c_compute_pvalue) {
8356+
igraph_real_t p;
8357+
igraph_plfit_result_calculate_p_value(&c_res, &p, 0.001);
8358+
8359+
PROTECT(result=NEW_LIST(6));
8360+
PROTECT(names=NEW_CHARACTER(6));
8361+
8362+
SET_VECTOR_ELT(result, 5, Rf_ScalarReal(p));
8363+
SET_STRING_ELT(names, 5, Rf_mkChar("KS.p"));
8364+
} else {
8365+
PROTECT(result=NEW_LIST(5));
8366+
PROTECT(names=NEW_CHARACTER(5));
8367+
}
8368+
8369+
SET_VECTOR_ELT(result, 0, Rf_ScalarLogical(c_res.continuous));
8370+
SET_VECTOR_ELT(result, 1, Rf_ScalarReal(c_res.alpha));
8371+
SET_VECTOR_ELT(result, 2, Rf_ScalarReal(c_res.xmin));
8372+
SET_VECTOR_ELT(result, 3, Rf_ScalarReal(c_res.L));
8373+
SET_VECTOR_ELT(result, 4, Rf_ScalarReal(c_res.D));
8374+
8375+
SET_STRING_ELT(names, 0, Rf_mkChar("continuous"));
8376+
SET_STRING_ELT(names, 1, Rf_mkChar("alpha"));
8377+
SET_STRING_ELT(names, 2, Rf_mkChar("xmin"));
8378+
SET_STRING_ELT(names, 3, Rf_mkChar("logLik"));
8379+
SET_STRING_ELT(names, 4, Rf_mkChar("KS.stat"));
8380+
SET_NAMES(result, names);
8381+
8382+
r_result = result;
8383+
8384+
UNPROTECT(2);
8385+
return(r_result);
8386+
}
8387+
83348388
/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C */
83358389
/* C */
83368390
/* Given a HIERARCHIC CLUSTERING, described as a sequence of C */

0 commit comments

Comments
 (0)