@@ -6,6 +6,7 @@ import numpy as np
66cimport numpy as np
77
88from scipy.special import gammaln
9+ from scipy.stats import chi2
910
1011from libc.math cimport exp, log, pi, pow
1112
@@ -23,30 +24,45 @@ ctypedef np.float64_t floatTYPE_t
2324def _compute_kstar (floatTYPE_t id_sel ,
2425 DTYPE_t Nele ,
2526 DTYPE_t maxk ,
26- floatTYPE_t Dthr ,#=23.92812698,
27+ floatTYPE_t alpha , #=1e-6
2728 np.ndarray[DTYPE_t , ndim = 2 ] dist_indices,
28- np.ndarray[floatTYPE_t , ndim = 2 ] distances
29+ np.ndarray[floatTYPE_t , ndim = 2 ] distances,
30+ bint bonferroni_deloc ,
31+ bint bonferroni_loc ,
2932):
33+ """ Likelihhod ratio test to find the k*, ie the farthest neighbour for which the density is
34+ considered approximately constant. We include also bonferroni correction for multiple testing,
35+ in the delocalised version (alpha scaled by number of points) and the local one. In principle, for
36+ the local one, the threshold is updated at each new test and in principle one should check that
37+ all dL passes test with the new threshold. However, since the threshold is always increasing (as alpha is reduced),
38+ practically one has to check only for the last dL (againt the proper threshold).
3039
40+ """
3141
32- cdef floatTYPE_t dL, vvi, vvj
33- cdef DTYPE_t i, j, ksel
42+ cdef floatTYPE_t dL, vvi, vvj, thr
43+ cdef DTYPE_t i, j, ksel, h
3444 cdef np.ndarray[DTYPE_t, ndim = 1 ] kstar = np.empty(Nele, dtype = int )
3545 cdef floatTYPE_t prefactor = exp( id_sel / 2.0 * log(pi) - gammaln((id_sel + 2.0 ) / 2.0 ) )
46+ cdef floatTYPE_t alpha_eff = alpha / Nele if bonferroni_deloc else alpha
47+ cdef floatTYPE_t Dthr = chi2.isf(alpha_eff,1 )
48+ cdef np.ndarray[floatTYPE_t, ndim = 1 ] Dthr_loc_arr = np.array([chi2.isf(alpha_eff / (h+ 1 ), 1 ) for h in range (maxk)], dtype = float )
3649
3750 for i in range (Nele):
3851 j = 4
3952 dL = 0.0
40- while j < maxk and dL < Dthr:
53+ h = 0
54+ while j < maxk:
4155 ksel = j - 1
4256 vvi = prefactor * pow (distances[i, ksel], id_sel)
4357 vvj = prefactor * pow (distances[dist_indices[i, j], ksel], id_sel)
4458 dL = - 2.0 * ksel * ( log(vvi) + log(vvj) - 2.0 * log(vvi + vvj) + log(4 ) )
45- j = j + 1
46- if j == maxk:
47- kstar[i] = j - 1
48- else :
49- kstar[i] = j - 2
59+ thr = Dthr_loc_arr[h] if bonferroni_loc else Dthr
60+ if dL > thr:
61+ break
62+ else :
63+ j = j + 1
64+ h = h + 1
65+ kstar[i] = j - 1 # fall back to previous iteration where the test had passed
5066
5167 return kstar
5268
@@ -56,30 +72,38 @@ def _compute_kstar(floatTYPE_t id_sel,
5672def _compute_kstar_interp (floatTYPE_t id_sel ,
5773 DTYPE_t Nele ,
5874 DTYPE_t maxk ,
59- floatTYPE_t Dthr , #=23.92812698 ,
75+ floatTYPE_t alpha , #=1e-6 ,
6076 np.ndarray[DTYPE_t , ndim = 2 ] cross_dist_indices,
6177 np.ndarray[floatTYPE_t , ndim = 2 ] cross_distances,
62- np.ndarray[floatTYPE_t , ndim = 2 ] data_distances
78+ np.ndarray[floatTYPE_t , ndim = 2 ] data_distances,
79+ bint bonferroni_deloc ,
80+ bint bonferroni_loc ,
6381 ):
6482
6583
66- cdef floatTYPE_t dL, vvi, vvj
67- cdef DTYPE_t i, j, ksel
84+ cdef floatTYPE_t dL, vvi, vvj, thr
85+ cdef DTYPE_t i, j, ksel, h
6886 cdef np.ndarray[DTYPE_t, ndim = 1 ] kstar = np.empty(Nele, dtype = int )
6987 cdef floatTYPE_t prefactor = exp( id_sel / 2.0 * log(pi) - gammaln((id_sel + 2.0 ) / 2.0 ) )
88+ cdef floatTYPE_t alpha_eff = alpha / Nele if bonferroni_deloc else alpha
89+ cdef floatTYPE_t Dthr = chi2.isf(alpha_eff,1 )
90+ cdef np.ndarray[floatTYPE_t, ndim = 1 ] Dthr_loc_arr = np.array([chi2.isf(alpha_eff / (h+ 1 ), 1 ) for h in range (maxk)], dtype = float )
7091
7192 for i in range (Nele):
7293 j = 4
7394 dL = 0.0
74- while j < maxk and dL < Dthr:
95+ h = 0
96+ while j < maxk:
7597 ksel = j - 1
7698 vvi = prefactor * pow (cross_distances[i, ksel], id_sel)
7799 vvj = prefactor * pow (data_distances[cross_dist_indices[i, j], ksel], id_sel)
78100 dL = - 2.0 * ksel * ( log(vvi) + log(vvj) - 2.0 * log(vvi + vvj) + log(4 ) )
79- j = j + 1
80- if j == maxk:
81- kstar[i] = j - 1
82- else :
83- kstar[i] = j - 2
84-
85- return kstar
101+ thr = Dthr_loc_arr[h] if bonferroni_loc else Dthr
102+ if dL > thr:
103+ break
104+ else :
105+ j = j + 1
106+ h = h + 1
107+ kstar[i] = j - 1 # fall back to previous iteration where the test had passed
108+
109+ return kstar
0 commit comments