diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py index 488a2c45..32de5d36 100644 --- a/causallearn/score/LocalScoreFunction.py +++ b/causallearn/score/LocalScoreFunction.py @@ -194,11 +194,11 @@ def local_score_cv_general( score: local score """ - Data = np.mat(Data) + Data = Data PAi = list(PAi) T = Data.shape[0] - X = Data[:, [Xi]] + X = Data[:, Xi].reshape(-1, 1) var_lambda = parameters["lambda"] # regularization parameter k = parameters["kfold"] # k-fold cross validation n0 = math.floor(T / k) @@ -206,24 +206,22 @@ def local_score_cv_general( Thresh = 1e-5 if len(PAi): - PA = Data[:, PAi] + PA = Data[:, PAi].reshape(-1, 1) # set the kernel for X - GX = np.sum(np.multiply(X, X), axis=1) + GX = np.multiply(X, X).reshape(-1, 1) Q = np.tile(GX, (1, T)) R = np.tile(GX.T, (T, 1)) - dists = Q + R - 2 * X * X.T + dists = Q + R - 2 * X @ X.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) # median value + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 2 theta = 1 / (width**2) Kx, _ = kernel(X, X, (theta, 1)) # Gaussian kernel H0 = ( - np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / T + np.eye(T) - np.ones((T, T)) / T ) # for centering of the data in feature space Kx = H0 * Kx * H0 # kernel matrix for X @@ -234,24 +232,25 @@ def local_score_cv_general( # mx = len(IIx) # set the kernel for PA - Kpa = np.mat(np.ones((T, T))) + Kpa = np.ones((T, T)) for m in range(PA.shape[1]): - G = np.sum((np.multiply(PA[:, m], PA[:, m])), axis=1) + G = np.multiply(PA[:, [m]], PA[:, [m]]).reshape(-1, 1) Q = np.tile(G, (1, T)) R = np.tile(G.T, (T, 1)) - dists = Q + R - 2 * PA[:, m] * PA[:, m].T + dists = Q + R - 2 * PA[:, [m]] @ PA[:, [m]].T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) # median value + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 2 theta = 1 / (width**2) - Kpa = np.multiply(Kpa, kernel(PA[:, m], PA[:, m], (theta, 1))[0]) + Kpa = np.multiply( + Kpa, + kernel(PA[:, m].reshape(-1, 1), PA[:, m].reshape(-1, 1), (theta, 1))[0], + ) H0 = ( - np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / T + np.eye(T) - np.ones((T, T)) / T ) # for centering of the data in feature space Kpa = H0 * Kpa * H0 # kernel matrix for PA @@ -317,40 +316,36 @@ def local_score_cv_general( raise ValueError("Not cover all logic path") n1 = T - nv - tmp1 = pdinv(Kpa_tr + n1 * var_lambda * np.mat(np.eye(n1))) - tmp2 = tmp1 * Kx_tr * tmp1 - tmp3 = ( - tmp1 - * pdinv(np.mat(np.eye(n1)) + n1 * var_lambda**2 / gamma * tmp2) - * tmp1 - ) + tmp1 = pdinv(Kpa_tr + n1 * var_lambda * np.eye(n1)) + tmp2 = tmp1 @ Kx_tr @ tmp1 + tmp3 = tmp1 @ pdinv(np.eye(n1) + n1 * var_lambda**2 / gamma * tmp2) @ tmp1 A = ( Kx_te - + Kpa_tr_te.T * tmp2 * Kpa_tr_te - - 2 * Kx_tr_te.T * tmp1 * Kpa_tr_te - - n1 * var_lambda**2 / gamma * Kx_tr_te.T * tmp3 * Kx_tr_te + + Kpa_tr_te.T @ tmp2 @ Kpa_tr_te + - 2 * Kx_tr_te.T @ tmp1 @ Kpa_tr_te + - n1 * var_lambda**2 / gamma * Kx_tr_te.T @ tmp3 @ Kx_tr_te - n1 * var_lambda**2 / gamma * Kpa_tr_te.T - * tmp1 - * Kx_tr - * tmp3 - * Kx_tr - * tmp1 - * Kpa_tr_te + @ tmp1 + @ Kx_tr + @ tmp3 + @ Kx_tr + @ tmp1 + @ Kpa_tr_te + 2 * n1 * var_lambda**2 / gamma * Kx_tr_te.T - * tmp3 - * Kx_tr - * tmp1 - * Kpa_tr_te + @ tmp3 + @ Kx_tr + @ tmp1 + @ Kpa_tr_te ) / gamma - B = n1 * var_lambda**2 / gamma * tmp2 + np.mat(np.eye(n1)) + B = n1 * var_lambda**2 / gamma * tmp2 + np.eye(n1) L = np.linalg.cholesky(B) C = np.sum(np.log(np.diag(L))) # CV = CV + (nv*nv*log(2*pi) + nv*C + nv*mx*log(gamma) + trace(A))/2; @@ -365,14 +360,12 @@ def local_score_cv_general( dists = Q + R - 2 * X * X.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) # median value + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 2 theta = 1 / (width**2) Kx, _ = kernel(X, X, (theta, 1)) # Gaussian kernel - H0 = np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / ( + H0 = np.eye(T) - np.ones((T, T)) / ( T ) # for centering of the data in feature space Kx = H0 * Kx * H0 # kernel matrix for X @@ -423,10 +416,10 @@ def local_score_cv_general( - 1 / (gamma * n1) * Kx_tr_te.T - * pdinv(np.mat(np.eye(n1)) + 1 / (gamma * n1) * Kx_tr) + * pdinv(np.eye(n1) + 1 / (gamma * n1) * Kx_tr) * Kx_tr_te ) / gamma - B = 1 / (gamma * n1) * Kx_tr + np.mat(np.eye(n1)) + B = 1 / (gamma * n1) * Kx_tr + np.eye(n1) L = np.linalg.cholesky(B) C = np.sum(np.log(np.diag(L))) @@ -465,7 +458,7 @@ def local_score_cv_multi( """ T = Data.shape[0] - X = Data[:, parameters["dlabel"][Xi]] + X = Data[:, parameters["dlabel"][Xi]].reshape(-1, 1) var_lambda = parameters["lambda"] # regularization parameter k = parameters["kfold"] # k-fold cross validation n0 = math.floor(T / k) @@ -474,43 +467,39 @@ def local_score_cv_multi( if len(PAi): # set the kernel for X - GX = np.sum(np.multiply(X, X), axis=1) + GX = np.multiply(X, X).reshape(-1, 1) Q = np.tile(GX, (1, T)) R = np.tile(GX.T, (T, 1)) dists = Q + R - 2 * X * X.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) # median value + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 3 ### theta = 1 / (width**2 * X.shape[1]) # Kx, _ = kernel(X, X, (theta, 1)) # Gaussian kernel - H0 = np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / ( + H0 = np.eye(T) - np.ones((T, T)) / ( T ) # for centering of the data in feature space Kx = H0 * Kx * H0 # kernel matrix for X # set the kernel for PA - Kpa = np.mat(np.ones((T, T))) + Kpa = np.ones((T, T), dtype=np.float64) for m in range(len(PAi)): - PA = Data[:, parameters["dlabel"][PAi[m]]] - G = np.sum((np.multiply(PA, PA)), axis=1) + PA = Data[:, parameters["dlabel"][PAi[m]]].reshape(-1, 1) + G = np.multiply(PA, PA).reshape(-1, 1) Q = np.tile(G, (1, T)) R = np.tile(G.T, (T, 1)) dists = Q + R - 2 * PA * PA.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) # median value + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 3 ### theta = 1 / (width**2 * PA.shape[1]) Kpa = np.multiply(Kpa, kernel(PA, PA, (theta, 1))[0]) - H0 = np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / ( + H0 = np.eye(T) - np.ones((T, T)) / ( T ) # for centering of the data in feature space Kpa = H0 * Kpa * H0 # kernel matrix for PA @@ -577,40 +566,36 @@ def local_score_cv_multi( raise ValueError("Not cover all logic path") n1 = T - nv - tmp1 = pdinv(Kpa_tr + n1 * var_lambda * np.mat(np.eye(n1))) - tmp2 = tmp1 * Kx_tr * tmp1 - tmp3 = ( - tmp1 - * pdinv(np.mat(np.eye(n1)) + n1 * var_lambda**2 / gamma * tmp2) - * tmp1 - ) + tmp1 = pdinv(Kpa_tr + n1 * var_lambda * np.eye(n1)) + tmp2 = tmp1 @ Kx_tr @ tmp1 + tmp3 = tmp1 @ pdinv(np.eye(n1) + n1 * var_lambda**2 / gamma * tmp2) @ tmp1 A = ( Kx_te - + Kpa_tr_te.T * tmp2 * Kpa_tr_te - - 2 * Kx_tr_te.T * tmp1 * Kpa_tr_te - - n1 * var_lambda**2 / gamma * Kx_tr_te.T * tmp3 * Kx_tr_te + + Kpa_tr_te.T @ tmp2 @ Kpa_tr_te + - 2 * Kx_tr_te.T @ tmp1 @ Kpa_tr_te + - n1 * var_lambda**2 / gamma * Kx_tr_te.T @ tmp3 @ Kx_tr_te - n1 * var_lambda**2 / gamma * Kpa_tr_te.T - * tmp1 - * Kx_tr - * tmp3 - * Kx_tr - * tmp1 - * Kpa_tr_te + @ tmp1 + @ Kx_tr + @ tmp3 + @ Kx_tr + @ tmp1 + @ Kpa_tr_te + 2 * n1 * var_lambda**2 / gamma * Kx_tr_te.T - * tmp3 - * Kx_tr - * tmp1 - * Kpa_tr_te + @ tmp3 + @ Kx_tr + @ tmp1 + @ Kpa_tr_te ) / gamma - B = n1 * var_lambda**2 / gamma * tmp2 + np.mat(np.eye(n1)) + B = n1 * var_lambda**2 / gamma * tmp2 + np.eye(n1) L = np.linalg.cholesky(B) C = np.sum(np.log(np.diag(L))) # CV = CV + (nv*nv*log(2*pi) + nv*C + nv*mx*log(gamma) + trace(A))/2; @@ -625,14 +610,12 @@ def local_score_cv_multi( dists = Q + R - 2 * X * X.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) # median value + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 3 ### theta = 1 / (width**2 * X.shape[1]) # Kx, _ = kernel(X, X, (theta, 1)) # Gaussian kernel - H0 = np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / ( + H0 = np.eye(T) - np.ones((T, T)) / ( T ) # for centering of the data in feature space Kx = H0 * Kx * H0 # kernel matrix for X @@ -679,10 +662,10 @@ def local_score_cv_multi( - 1 / (gamma * n1) * Kx_tr_te.T - * pdinv(np.mat(np.eye(n1)) + 1 / (gamma * n1) * Kx_tr) + * pdinv(np.eye(n1) + 1 / (gamma * n1) * Kx_tr) * Kx_tr_te ) / gamma - B = 1 / (gamma * n1) * Kx_tr + np.mat(np.eye(n1)) + B = 1 / (gamma * n1) * Kx_tr + np.eye(n1) L = np.linalg.cholesky(B) C = np.sum(np.log(np.diag(L))) @@ -715,20 +698,20 @@ def local_score_marginal_general( """ T = Data.shape[0] - X = Data[:, [Xi]] + X = Data[:, Xi].reshape(-1, 1) dX = X.shape[1] # set the kernel for X - GX = np.sum(np.multiply(X, X), axis=1) + GX = np.sum(np.multiply(X, X), axis=1).reshape(-1, 1) Q = np.tile(GX, (1, T)) R = np.tile(GX.T, (T, 1)) dists = Q + R - 2 * X * X.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0]) + width = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) width = width * 2.5 # kernel width theta = 1 / (width**2) - H = np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / T + H = np.eye(T) - np.ones((T, T)) / T Kx, _ = kernel(X, X, (theta, 1)) Kx = H * Kx * H @@ -743,21 +726,19 @@ def local_score_marginal_general( if len(PAi): PA = Data[:, PAi] - widthPA = np.mat(np.empty((PA.shape[1], 1))) + widthPA = np.empty((PA.shape[1], 1)) # set the kernel for PA for m in range(PA.shape[1]): - G = np.sum((np.multiply(PA[:, m], PA[:, m])), axis=1) + G = np.multiply(PA[:, [m]], PA[:, [m]]).reshape(-1, 1) Q = np.tile(G, (1, T)) R = np.tile(G.T, (T, 1)) - dists = Q + R - 2 * PA[:, m] * PA[:, m].T + dists = Q + R - 2 * PA[:, [m]] * PA[:, [m]].T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - widthPA[m] = np.sqrt( - 0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0] - ) + widthPA[m] = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) widthPA = widthPA * 2.5 # kernel width - covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]]) + covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]], dtype=object) logtheta0 = np.vstack([np.log(widthPA), 0, np.log(np.sqrt(0.1))]) logtheta, fvals, iter = minimize( logtheta0, @@ -765,34 +746,34 @@ def local_score_marginal_general( -300, covfunc, PA, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), ) nlml, dnlml = gpr_multi_new( logtheta, covfunc, PA, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), nargout=2, ) else: - covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]]) - PA = np.mat(np.zeros((T, 1))) - logtheta0 = np.mat([100, 0, np.log(np.sqrt(0.1))]).T + covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]], dtype=object) + PA = np.zeros((T, 1)) + logtheta0 = np.array([[100], [0], [np.log(np.sqrt(0.1))]]) logtheta, fvals, iter = minimize( logtheta0, "gpr_multi_new", -300, covfunc, PA, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), ) nlml, dnlml = gpr_multi_new( logtheta, covfunc, PA, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), nargout=2, ) score = nlml # negative log-likelihood @@ -821,20 +802,20 @@ def local_score_marginal_multi( score: local score """ T = Data.shape[0] - X = Data[:, parameters["dlabel"][Xi]] + X = Data[:, parameters["dlabel"][Xi]].reshape(-1, 1) dX = X.shape[1] # set the kernel for X - GX = np.sum(np.multiply(X, X), axis=1) + GX = np.sum(np.multiply(X, X), axis=1).reshape(-1, 1) Q = np.tile(GX, (1, T)) R = np.tile(GX.T, (T, 1)) dists = Q + R - 2 * X * X.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - widthX = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0]) + widthX = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) widthX = widthX * 2.5 # kernel width theta = 1 / (widthX**2) - H = np.mat(np.eye(T)) - np.mat(np.ones((T, T))) / T + H = np.eye(T) - np.ones((T, T)) / T Kx, _ = kernel(X, X, (theta, 1)) Kx = H * Kx * H @@ -847,28 +828,27 @@ def local_score_marginal_multi( eix = eix[:, IIx] if len(PAi): - widthPA_all = np.mat(np.empty((1, 0))) + widthPA_all = np.empty((1, 0)) # set the kernel for PA - PA_all = np.mat(np.empty((Data.shape[0], 0))) + PA_all = np.empty((Data.shape[0], 0)) for m in range(len(PAi)): - PA = Data[:, parameters["dlabel"][PAi[m]]] + PA = Data[:, parameters["dlabel"][PAi[m]]].reshape(-1, 1) PA_all = np.hstack([PA_all, PA]) - G = np.sum((np.multiply(PA, PA)), axis=1) + G = np.sum((np.multiply(PA, PA)), axis=1).reshape(-1, 1) Q = np.tile(G, (1, T)) R = np.tile(G.T, (T, 1)) dists = Q + R - 2 * PA * PA.T dists = dists - np.tril(dists) dists = np.reshape(dists, (T**2, 1)) - widthPA = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)], axis=1)[0, 0]) + widthPA = np.sqrt(0.5 * np.median(dists[np.where(dists > 0)])) widthPA_all = np.hstack( [ widthPA_all, - widthPA - * np.mat(np.ones((1, np.size(parameters["dlabel"][PAi[m]])))), + widthPA * np.ones((1, np.size(parameters["dlabel"][PAi[m]]))), ] ) widthPA_all = widthPA_all * 2.5 # kernel width - covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]]) + covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]], dtype=object) logtheta0 = np.vstack([np.log(widthPA_all.T), 0, np.log(np.sqrt(0.1))]) logtheta, fvals, iter = minimize( logtheta0, @@ -876,35 +856,35 @@ def local_score_marginal_multi( -300, covfunc, PA_all, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), ) nlml, dnlml = gpr_multi_new( logtheta, covfunc, PA_all, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), nargout=2, ) else: - covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]]) - PA = np.mat(np.zeros((T, 1))) - logtheta0 = np.mat([100, 0, np.log(np.sqrt(0.1))]).T + covfunc = np.asarray(["covSum", ["covSEard", "covNoise"]], dtype=object) + PA = np.zeros((T, 1)) + logtheta0 = np.array([[100], [0], [np.log(np.sqrt(0.1))]]) logtheta, fvals, iter = minimize( logtheta0, "gpr_multi_new", -300, covfunc, PA, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), ) nlml, dnlml = gpr_multi_new( logtheta, covfunc, PA, - 2 * np.sqrt(T) * eix * np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), + 2 * np.sqrt(T) * eix @ np.diag(np.sqrt(eig_Kx)) / np.sqrt(eig_Kx[0]), nargout=2, ) score = nlml # negative log-likelihood - return score + return score \ No newline at end of file diff --git a/causallearn/search/ScoreBased/GES.py b/causallearn/search/ScoreBased/GES.py index c9475755..018bbf93 100644 --- a/causallearn/search/ScoreBased/GES.py +++ b/causallearn/search/ScoreBased/GES.py @@ -8,8 +8,13 @@ from typing import Union -def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = None, - parameters: Optional[Dict[str, Any]] = None, node_names: Union[List[str], None] = None,) -> Dict[str, Any]: +def ges( + X: ndarray, + score_func: str = "local_score_BIC", + maxP: Optional[float] = None, + parameters: Optional[Dict[str, Any]] = None, + node_names: Union[List[str], None] = None, +) -> Dict[str, Any]: """ Perform greedy equivalence search (GES) algorithm @@ -39,61 +44,88 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = if X.shape[0] < X.shape[1]: warnings.warn("The number of features is much larger than the sample size!") - X = np.mat(X) - if score_func == 'local_score_CV_general': # % k-fold negative cross validated likelihood based on regression in RKHS + if ( + score_func == "local_score_CV_general" + ): # % k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: - parameters = {'kfold': 10, # 10 fold cross validation - 'lambda': 0.01} # regularization parameter + parameters = { + "kfold": 10, # 10 fold cross validation + "lambda": 0.01, + } # regularization parameter if maxP is None: maxP = X.shape[1] / 2 # maximum number of parents N = X.shape[1] # number of variables - localScoreClass = LocalScoreClass(data=X, local_score_fun=local_score_cv_general, parameters=parameters) + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_general, parameters=parameters + ) - elif score_func == 'local_score_marginal_general': # negative marginal likelihood based on regression in RKHS + elif ( + score_func == "local_score_marginal_general" + ): # negative marginal likelihood based on regression in RKHS parameters = {} if maxP is None: maxP = X.shape[1] / 2 # maximum number of parents N = X.shape[1] # number of variables - localScoreClass = LocalScoreClass(data=X, local_score_fun=local_score_marginal_general, parameters=parameters) + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_general, parameters=parameters + ) - elif score_func == 'local_score_CV_multi': # k-fold negative cross validated likelihood based on regression in RKHS + elif ( + score_func == "local_score_CV_multi" + ): # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: - parameters = {'kfold': 10, 'lambda': 0.01, 'dlabel': {}} # regularization parameter + parameters = { + "kfold": 10, + "lambda": 0.01, + "dlabel": {}, + } # regularization parameter for i in range(X.shape[1]): - parameters['dlabel'][i] = i + parameters["dlabel"][i] = i if maxP is None: - maxP = len(parameters['dlabel']) / 2 - N = len(parameters['dlabel']) - localScoreClass = LocalScoreClass(data=X, local_score_fun=local_score_cv_multi, parameters=parameters) + maxP = len(parameters["dlabel"]) / 2 + N = len(parameters["dlabel"]) + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_multi, parameters=parameters + ) - elif score_func == 'local_score_marginal_multi': # negative marginal likelihood based on regression in RKHS + elif ( + score_func == "local_score_marginal_multi" + ): # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: - parameters = {'dlabel': {}} + parameters = {"dlabel": {}} for i in range(X.shape[1]): - parameters['dlabel'][i] = i + parameters["dlabel"][i] = i if maxP is None: - maxP = len(parameters['dlabel']) / 2 - N = len(parameters['dlabel']) - localScoreClass = LocalScoreClass(data=X, local_score_fun=local_score_marginal_multi, parameters=parameters) + maxP = len(parameters["dlabel"]) / 2 + N = len(parameters["dlabel"]) + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_multi, parameters=parameters + ) - elif score_func == 'local_score_BIC' or score_func == 'local_score_BIC_from_cov': # Greedy equivalence search with BIC score + elif ( + score_func == "local_score_BIC" or score_func == "local_score_BIC_from_cov" + ): # Greedy equivalence search with BIC score if maxP is None: maxP = X.shape[1] / 2 N = X.shape[1] # number of variables parameters = {} parameters["lambda_value"] = 2 - localScoreClass = LocalScoreClass(data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters) + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) - elif score_func == 'local_score_BDeu': # Greedy equivalence search with BDeu score + elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score if maxP is None: maxP = X.shape[1] / 2 N = X.shape[1] # number of variables - localScoreClass = LocalScoreClass(data=X, local_score_fun=local_score_BDeu, parameters=None) + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BDeu, parameters=None + ) else: - raise Exception('Unknown function!') + raise Exception("Unknown function!") score_func = localScoreClass if node_names is None: @@ -113,8 +145,9 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = ## -------------------------------------------------------------------- ## forward greedy search - record_local_score = [[] for i in range( - N)] # record the local score calculated each time. Thus when we transition to the second phase, + record_local_score = [ + [] for i in range(N) + ] # record the local score calculated each time. Thus when we transition to the second phase, # many of the operators can be scored without an explicit call the the scoring function # record_local_score{trial}{j} record the local scores when Xj as a parent score_new = score @@ -132,17 +165,27 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = min_desc = [] for i in range(N): for j in range(N): - if (G.graph[i, j] == Endpoint.NULL.value and G.graph[j, i] == Endpoint.NULL.value - and i != j and len(np.where(G.graph[j, :] == Endpoint.ARROW.value)[0]) <= maxP): + if ( + G.graph[i, j] == Endpoint.NULL.value + and G.graph[j, i] == Endpoint.NULL.value + and i != j + and len(np.where(G.graph[j, :] == Endpoint.ARROW.value)[0]) <= maxP + ): # find a pair (Xi, Xj) that is not adjacent in the current graph , and restrict the number of parents - Tj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj + Tj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj - Ti = np.union1d(np.where(G.graph[:, i] != Endpoint.NULL.value)[0], - np.where(G.graph[i, :] != Endpoint.NULL.value)[0]) # adjacent to Xi + Ti = np.union1d( + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + ) # adjacent to Xi NTi = np.setdiff1d(np.arange(N), Ti) - T0 = np.intersect1d(Tj, NTi) # find the neighbours of Xj that are not adjacent to Xi + T0 = np.intersect1d( + Tj, NTi + ) # find the neighbours of Xj that are not adjacent to Xi # for any subset of T0 sub = Combinatorial(T0.tolist()) # find all the subsets for T0 S = np.zeros(len(sub)) @@ -151,34 +194,49 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = # 1: only check the first condition # 2: check nothing and is not valid. for k in range(len(sub)): - if (S[k] < 2): # S indicate whether we need to check subset(k) - V1 = insert_validity_test1(G, i, j, sub[k]) # Insert operator validation test:condition 1 - if (V1): - if (not S[k]): - V2 = insert_validity_test2(G, i, j, - sub[k]) # Insert operator validation test:condition 2 + if S[k] < 2: # S indicate whether we need to check subset(k) + V1 = insert_validity_test1( + G, i, j, sub[k] + ) # Insert operator validation test:condition 1 + if V1: + if not S[k]: + V2 = insert_validity_test2( + G, i, j, sub[k] + ) # Insert operator validation test:condition 2 else: V2 = 1 - if (V2): - Idx = find_subset_include(sub[k], sub) # find those subsets that include sub(k) + if V2: + Idx = find_subset_include( + sub[k], sub + ) # find those subsets that include sub(k) S[np.where(Idx == 1)] = 1 - chscore, desc, record_local_score = insert_changed_score(X, G, i, j, sub[k], - record_local_score, - score_func, - parameters) + chscore, desc, record_local_score = ( + insert_changed_score( + X, + G, + i, + j, + sub[k], + record_local_score, + score_func, + parameters, + ) + ) # calculate the changed score after Insert operator # desc{count} saves the corresponding (i,j,sub{k}) # sub{k}: - if (chscore < min_chscore): + if chscore < min_chscore: min_chscore = chscore min_desc = desc else: - Idx = find_subset_include(sub[k], sub) # find those subsets that include sub(k) + Idx = find_subset_include( + sub[k], sub + ) # find those subsets that include sub(k) S[np.where(Idx == 1)] = 2 - if (len(min_desc) != 0): + if len(min_desc) != 0: score_new = score + min_chscore - if (score - score_new <= 0): + if score - score_new <= 0: break G = insert(G, min_desc[0], min_desc[1], min_desc[2]) update1.append([min_desc[0], min_desc[1], min_desc[2]]) @@ -206,35 +264,54 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = min_desc = [] for i in range(N): for j in range(N): - if ((G.graph[j, i] == Endpoint.TAIL.value and G.graph[i, j] == Endpoint.TAIL.value) - or G.graph[j, i] == Endpoint.ARROW.value): # if Xi - Xj or Xi -> Xj - Hj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj - Hi = np.union1d(np.where(G.graph[i, :] != Endpoint.NULL.value)[0], - np.where(G.graph[:, i] != Endpoint.NULL.value)[0]) # adjacent to Xi - H0 = np.intersect1d(Hj, Hi) # find the neighbours of Xj that are adjacent to Xi + if ( + G.graph[j, i] == Endpoint.TAIL.value + and G.graph[i, j] == Endpoint.TAIL.value + ) or G.graph[ + j, i + ] == Endpoint.ARROW.value: # if Xi - Xj or Xi -> Xj + Hj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj + Hi = np.union1d( + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + ) # adjacent to Xi + H0 = np.intersect1d( + Hj, Hi + ) # find the neighbours of Xj that are adjacent to Xi # for any subset of H0 sub = Combinatorial(H0.tolist()) # find all the subsets for H0 S = np.ones(len(sub)) # S indicate whether we need to check sub{k}. # 1: check the condition, # 2: check nothing and is valid; for k in range(len(sub)): - if (S[k] == 1): - V = delete_validity_test(G, i, j, sub[k]) # Delete operator validation test - if (V): + if S[k] == 1: + V = delete_validity_test( + G, i, j, sub[k] + ) # Delete operator validation test + if V: # find those subsets that include sub(k) Idx = find_subset_include(sub[k], sub) S[np.where(Idx == 1)] = 2 # and set their S to 2 else: V = 1 - if (V): - chscore, desc, record_local_score = delete_changed_score(X, G, i, j, sub[k], - record_local_score, score_func, - parameters) + if V: + chscore, desc, record_local_score = delete_changed_score( + X, + G, + i, + j, + sub[k], + record_local_score, + score_func, + parameters, + ) # calculate the changed score after Insert operator # desc{count} saves the corresponding (i,j,sub{k}) - if (chscore < min_chscore): + if chscore < min_chscore: min_chscore = chscore min_desc = desc @@ -251,5 +328,12 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = score_new = score break - Record = {'update1': update1, 'update2': update2, 'G_step1': G_step1, 'G_step2': G_step2, 'G': G, 'score': score} + Record = { + "update1": update1, + "update2": update2, + "G_step1": G_step1, + "G_step2": G_step2, + "G": G, + "score": score, + } return Record diff --git a/causallearn/utils/DAG2CPDAG.py b/causallearn/utils/DAG2CPDAG.py index 7a619a02..2604b1f7 100644 --- a/causallearn/utils/DAG2CPDAG.py +++ b/causallearn/utils/DAG2CPDAG.py @@ -25,10 +25,11 @@ def dag2cpdag(G: Dag) -> GeneralGraph: # order the edges in G nodes_order = list( - map(lambda x: G.node_map[x], G.get_causal_ordering())) # Perform a topological sort on the nodes of G + map(lambda x: G.node_map[x], G.get_causal_ordering()) + ) # Perform a topological sort on the nodes of G # nodes_order(1) is the node which has the highest order # nodes_order(N) is the node which has the lowest order - edges_order = np.mat([[], []], dtype=np.int64).T + edges_order = np.empty((0, 2), dtype=np.int64) # edges_order(1,:) is the edge which has the highest order # edges_order(M,:) is the edge which has the lowest order M = G.get_num_edges() # the number of edges in this DAG @@ -37,30 +38,44 @@ def dag2cpdag(G: Dag) -> GeneralGraph: while edges_order.shape[0] < M: for ny in range(N - 1, -1, -1): j = nodes_order[ny] - inci_all = np.where(G.graph[j, :] == 1)[0] # all the edges that incident to j + inci_all = np.where(G.graph[j, :] == 1)[ + 0 + ] # all the edges that incident to j if len(inci_all) != 0: if len(edges_order) != 0: - inci = edges_order[np.where(edges_order[:, 1] == j)[0], 0] # ordered edge that incident to j - if len(set(inci_all) - set(inci.T.tolist()[0])) != 0: + inci = edges_order[ + np.where(edges_order[:, 1] == j)[0], 0 + ] # ordered edge that incident to j + if len(set(inci_all) - set(inci.tolist())) != 0: break else: break for nx in range(N): i = nodes_order[nx] if len(edges_order) != 0: - if (len(np.intersect1d(np.where(edges_order[:, 1] == j)[0], - np.where(edges_order[:, 0] == i)[0])) == 0 and G.graph[j, i] == 1): + if ( + len( + np.intersect1d( + np.where(edges_order[:, 1] == j)[0], + np.where(edges_order[:, 0] == i)[0], + ) + ) + == 0 + and G.graph[j, i] == 1 + ): break else: if G.graph[j, i] == 1: break - edges_order = np.r_[edges_order, np.mat([i, j])] + edges_order = np.r_[edges_order, np.array([[i, j]])] ## ---------------------------------------------------------------- sign_edges = np.zeros(M) # 0 means unknown, 1 means compelled, -1 means reversible while len(np.where(sign_edges == 0)[0]) != 0: ss = 0 - for m in range(M - 1, -1, -1): # let x->y be the lowest ordered edge that is labeled "unknown" + for m in range( + M - 1, -1, -1 + ): # let x->y be the lowest ordered edge that is labeled "unknown" if sign_edges[m] == 0: i = edges_order[m, 0] j = edges_order[m, 1] @@ -70,42 +85,82 @@ def dag2cpdag(G: Dag) -> GeneralGraph: for m in range(len(k)): if sign_edges[idk[m]] == 1: if G.graph[j, k[m]] != 1: # if w is not a parent of y - _id = np.where(edges_order[:, 1] == j)[0] # label every edge that incident into y with "complled" + _id = np.where(edges_order[:, 1] == j)[ + 0 + ] # label every edge that incident into y with "complled" sign_edges[_id] = 1 ss = 1 break else: - _id = np.intersect1d(np.where(edges_order[:, 0] == k[m, 0])[0], - np.where(edges_order[:, 1] == j)[0]) # label w->y with "complled" + _id = np.intersect1d( + np.where(edges_order[:, 0] == k[m])[0], + np.where(edges_order[:, 1] == j)[0], + ) # label w->y with "complled" sign_edges[_id] = 1 if ss: continue z = np.where(G.graph[j, :] == 1)[0] - if (len(np.intersect1d(np.setdiff1d(z, i), - np.union1d(np.union1d(np.where(G.graph[i, :] == 0)[0], np.where(G.graph[i, :] == -1)[0]), - np.intersect1d(np.where(G.graph[i, :] == -1)[0], - np.where(G.graph[:, i] == -1)[0])))) != 0): - _id = np.intersect1d(np.where(edges_order[:, 0] == i)[0], np.where(edges_order[:, 1] == j)[0]) + if ( + len( + np.intersect1d( + np.setdiff1d(z, i), + np.union1d( + np.union1d( + np.where(G.graph[i, :] == 0)[0], + np.where(G.graph[i, :] == -1)[0], + ), + np.intersect1d( + np.where(G.graph[i, :] == -1)[0], + np.where(G.graph[:, i] == -1)[0], + ), + ), + ) + ) + != 0 + ): + _id = np.intersect1d( + np.where(edges_order[:, 0] == i)[0], np.where(edges_order[:, 1] == j)[0] + ) sign_edges[_id] = 1 # label x->y with "compelled" id1 = np.where(edges_order[:, 1] == j)[0] id2 = np.intersect1d(np.where(sign_edges == 0)[0], id1) - sign_edges[id2] = 1 # label all "unknown" edges incident into y with "complled" + sign_edges[id2] = ( + 1 # label all "unknown" edges incident into y with "complled" + ) else: - _id = np.intersect1d(np.where(edges_order[:, 0] == i)[0], np.where(edges_order[:, 1] == j)[0]) + _id = np.intersect1d( + np.where(edges_order[:, 0] == i)[0], np.where(edges_order[:, 1] == j)[0] + ) sign_edges[_id] = -1 # label x->y with "reversible" id1 = np.where(edges_order[:, 1] == j)[0] id2 = np.intersect1d(np.where(sign_edges == 0)[0], id1) - sign_edges[id2] = -1 # label all "unknown" edges incident into y with "reversible" + sign_edges[id2] = ( + -1 + ) # label all "unknown" edges incident into y with "reversible" # create CPDAG according the labelled edge nodes = G.get_nodes() CPDAG = GeneralGraph(nodes) for m in range(M): if sign_edges[m] == 1: - CPDAG.add_edge(Edge(nodes[edges_order[m, 0]], nodes[edges_order[m, 1]], Endpoint.TAIL, Endpoint.ARROW)) + CPDAG.add_edge( + Edge( + nodes[edges_order[m, 0]], + nodes[edges_order[m, 1]], + Endpoint.TAIL, + Endpoint.ARROW, + ) + ) else: - CPDAG.add_edge(Edge(nodes[edges_order[m, 0]], nodes[edges_order[m, 1]], Endpoint.TAIL, Endpoint.TAIL)) + CPDAG.add_edge( + Edge( + nodes[edges_order[m, 0]], + nodes[edges_order[m, 1]], + Endpoint.TAIL, + Endpoint.TAIL, + ) + ) return CPDAG diff --git a/causallearn/utils/GESUtils.py b/causallearn/utils/GESUtils.py index 20735676..ee0e1138 100644 --- a/causallearn/utils/GESUtils.py +++ b/causallearn/utils/GESUtils.py @@ -2,8 +2,6 @@ from copy import deepcopy from typing import List -import numpy.matlib - from causallearn.graph.Edge import Edge from causallearn.graph.Endpoint import Endpoint from causallearn.score.LocalScoreFunction import * @@ -59,12 +57,18 @@ def insert_validity_test1(G, i, j, T) -> int: V = 0 # condition 1 - Tj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj - Ti = np.union1d(np.where(G.graph[:, i] != Endpoint.NULL.value)[0], - np.where(G.graph[i, :] != Endpoint.NULL.value)[0]) # adjacent to Xi; + Tj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj + Ti = np.union1d( + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + ) # adjacent to Xi; NA = np.intersect1d(Tj, Ti) # find the neighbours of Xj and are adjacent to Xi - V = check_clique(G, list(np.union1d(NA, T).astype(int))) # check whether it is a clique + V = check_clique( + G, list(np.union1d(NA, T).astype(int)) + ) # check whether it is a clique return V @@ -81,7 +85,9 @@ def check_clique(G, subnode) -> int: # check whether node subnode is a clique i row, col = np.where(Gs == Endpoint.ARROW.value) Gs[row, col] = Endpoint.TAIL.value Gs[col, row] = Endpoint.TAIL.value - if np.all((np.eye(ns) - np.ones((ns, ns))) == Gs): # check whether it is a clique + if np.all( + (np.eye(ns) - np.ones((ns, ns))) == Gs + ): # check whether it is a clique s = 1 else: s = 0 @@ -92,10 +98,14 @@ def insert_validity_test2(G, i, j, T) -> int: # V=Insert_validity_test(G, X, Y, T,1); % do validity test for the operator Insert; V=1 means valid, V=0 mean invalid; # here G is CPDAG V = 0 - Tj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj - Ti = np.union1d(np.where(G.graph[i, :] != Endpoint.NULL.value)[0], - np.where(G.graph[:, i] != Endpoint.NULL.value)[0]) # adjacent to Xi; + Tj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj + Ti = np.union1d( + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + ) # adjacent to Xi; NA = np.intersect1d(Tj, Ti) # find the neighbours of Xj and are adjacent to Xi # condition 2: every semi-directed path from Xj to Xi contains a node in union(NA,T) @@ -113,47 +123,55 @@ def insert_vc2_new(G, j, i, NAT): # validity test for condition 2 of Insert ope start = j target = i # stack(1)=start; % initialize the stack - stack = [{'value': start, 'pa': {}}] + stack = [{"value": start, "pa": {}}] sign = 1 # If every semi-pathway contains a node in NAT, than sign=1; count = 1 while len(stack): top = stack[0] stack = stack[1:] # pop - if top['value'] == target: # if find the target, search that pathway to see whether NAT is in that pathway + if ( + top["value"] == target + ): # if find the target, search that pathway to see whether NAT is in that pathway curr = top ss = 0 while True: - if len(curr['pa']): - if curr['pa']['value'] in NAT: # contains a node in NAT + if len(curr["pa"]): + if curr["pa"]["value"] in NAT: # contains a node in NAT ss = 1 break else: break - curr = curr['pa'] + curr = curr["pa"] if not ss: # do not include NAT sign = 0 break else: - child = np.concatenate((np.where(G.graph[:, top['value']] == Endpoint.ARROW.value)[0], - np.intersect1d(np.where(G.graph[top['value'], :] == Endpoint.TAIL.value)[0], - np.where(G.graph[:, top['value']] == Endpoint.TAIL.value)[0]))) + child = np.concatenate( + ( + np.where(G.graph[:, top["value"]] == Endpoint.ARROW.value)[0], + np.intersect1d( + np.where(G.graph[top["value"], :] == Endpoint.TAIL.value)[0], + np.where(G.graph[:, top["value"]] == Endpoint.TAIL.value)[0], + ), + ) + ) sign_child = np.ones(len(child)) # check each child, whether it has appeared before in the same pathway for k in range(len(child)): curr = top while True: - if len(curr['pa']): - if curr['pa']['value'] == child[k]: + if len(curr["pa"]): + if curr["pa"]["value"] == child[k]: sign_child[k] = 0 # has appeared in that path before break else: break - curr = curr['pa'] + curr = curr["pa"] for k in range(len(sign_child)): if sign_child[k]: - stack.insert(0, {'value': child[k], 'pa': top}) # push + stack.insert(0, {"value": child[k], "pa": top}) # push return sign @@ -173,10 +191,14 @@ def find_subset_include(s0, sub): def insert_changed_score(Data, G, i, j, T, record_local_score, score_func, parameters): # calculate the changed score after the insert operator: i->j - Tj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj - Ti = np.union1d(np.where(G.graph[i, :] != Endpoint.NULL.value)[0], - np.where(G.graph[:, i] != Endpoint.NULL.value)[0]) # adjacent to Xi; + Tj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj + Ti = np.union1d( + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + ) # adjacent to Xi; NA = np.intersect1d(Tj, Ti) # find the neighbours of Xj and are adjacent to Xi Paj = np.where(G.graph[j, :] == Endpoint.ARROW.value)[0] # find the parents of Xj # the function local_score() calculates the local score @@ -197,12 +219,15 @@ def insert_changed_score(Data, G, i, j, T, record_local_score, score_func, param score1 = record_local_score[j][r0][-1] s1 = 1 - if not np.setxor1d(record_local_score[j][r0][0:-1], - tmp2).size: # notice the difference between 0*0 empty matrix and 1*0 empty matrix + if not np.setxor1d( + record_local_score[j][r0][0:-1], tmp2 + ).size: # notice the difference between 0*0 empty matrix and 1*0 empty matrix score2 = record_local_score[j][r0][-1] s2 = 1 else: - if (not np.setxor1d(record_local_score[j][r0][0:-1], [-1]).size) and (not tmp2.size): + if (not np.setxor1d(record_local_score[j][r0][0:-1], [-1]).size) and ( + not tmp2.size + ): score2 = record_local_score[j][r0][-1] s2 = 1 @@ -236,7 +261,9 @@ def insert(G, i, j, T): nodes = G.get_nodes() G.add_edge(Edge(nodes[i], nodes[j], Endpoint.TAIL, Endpoint.ARROW)) - for k in range(len(T)): # directing the previous undirected edge between T and Xj as T->Xj + for k in range( + len(T) + ): # directing the previous undirected edge between T and Xj as T->Xj if G.get_edge(nodes[T[k]], nodes[j]) is not None: G.remove_edge(G.get_edge(nodes[T[k]], nodes[j])) G.add_edge(Edge(nodes[T[k]], nodes[j], Endpoint.TAIL, Endpoint.ARROW)) @@ -250,14 +277,18 @@ def delete_validity_test(G, i, j, H): V = 0 # condition 1 - Hj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj - Hi = np.union1d(np.where(G.graph[i, :] != Endpoint.NULL.value)[0], - np.where(G.graph[:, i] != Endpoint.NULL.value)[0]) # adjacent to Xi; + Hj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj + Hi = np.union1d( + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + ) # adjacent to Xi; NA = np.intersect1d(Hj, Hi) # find the neighbours of Xj and are adjacent to Xi s1 = check_clique(G, list(set(NA) - set(H))) # check whether it is a clique - if (s1): + if s1: V = 1 return V @@ -265,12 +296,18 @@ def delete_validity_test(G, i, j, H): def delete_changed_score(Data, G, i, j, H, record_local_score, score_func, parameters): # calculate the changed score after the Delete operator - Hj = np.intersect1d(np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], - np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj - Hi = np.union1d(np.where(G.graph[i, :] != Endpoint.NULL.value)[0], - np.where(G.graph[:, i] != Endpoint.NULL.value)[0]) # adjacent to Xi; + Hj = np.intersect1d( + np.where(G.graph[:, j] == Endpoint.TAIL.value)[0], + np.where(G.graph[j, :] == Endpoint.TAIL.value)[0], + ) # neighbors of Xj + Hi = np.union1d( + np.where(G.graph[i, :] != Endpoint.NULL.value)[0], + np.where(G.graph[:, i] != Endpoint.NULL.value)[0], + ) # adjacent to Xi; NA = np.intersect1d(Hj, Hi) # find the neighbours of Xj and are adjacent to Xi - Paj = np.union1d(np.where(G.graph[j, :] == Endpoint.ARROW.value)[0], [i]) # find the parents of Xj + Paj = np.union1d( + np.where(G.graph[j, :] == Endpoint.ARROW.value)[0], [i] + ) # find the parents of Xj # the function local_score() calculates the local score tmp1 = set(NA) - set(H) tmp2 = set.union(tmp1, set(Paj)) @@ -289,8 +326,9 @@ def delete_changed_score(Data, G, i, j, H, record_local_score, score_func, param score1 = record_local_score[j][r0][-1] s1 = 1 - if set(record_local_score[j][r0][ - 0:-1]) == tmp2: # notice the difference between 0*0 empty matrix and 1*0 empty matrix + if ( + set(record_local_score[j][r0][0:-1]) == tmp2 + ): # notice the difference between 0*0 empty matrix and 1*0 empty matrix score2 = record_local_score[j][r0][-1] s2 = 1 else: @@ -356,11 +394,13 @@ def dist2(x, c): ndata, dimx = x.shape ncentres, dimc = c.shape if dimx != dimc: - raise Exception('Data dimension does not match dimension of centres') + raise Exception("Data dimension does not match dimension of centres") - n2 = (np.mat(np.ones((ncentres, 1))) * np.sum(np.multiply(x, x).T, axis=0)).T + \ - np.mat(np.ones((ndata, 1))) * np.sum(np.multiply(c, c).T, axis=0) - \ - 2 * (x * c.T) + n2 = ( + (np.ones((ncentres, 1)) * np.sum(np.multiply(x, x).T, axis=0)).T + + np.ones((ndata, 1)) * np.sum(np.multiply(c, c).T, axis=0) + - 2 * (x * c.T) + ) # Rounding errors occasionally cause negative entries in n2 n2[np.where(n2 < 0)] = 0 @@ -375,9 +415,9 @@ def pdinv(A): invU = np.eye(numData).dot(np.linalg.inv(U)) Ainv = invU.dot(invU.T) except numpy.linalg.LinAlgError as e: - warnings.warn('Matrix is not positive definite in pdinv, inverting using svd') + warnings.warn("Matrix is not positive definite in pdinv, inverting using svd") u, s, vh = np.linalg.svd(A, full_matrices=True) Ainv = vh.T.dot(np.diag(1 / s)).dot(u.T) except Exception as e: raise e - return np.mat(Ainv) + return Ainv diff --git a/causallearn/utils/ScoreUtils.py b/causallearn/utils/ScoreUtils.py index 6bd075aa..1b937451 100644 --- a/causallearn/utils/ScoreUtils.py +++ b/causallearn/utils/ScoreUtils.py @@ -10,7 +10,7 @@ def kernel(x, xKern, theta): # KERNEL Compute the rbf kernel n2 = dist2(x, xKern) - if (theta[0] == 0): + if theta[0] == 0: theta[0] = 2 / np.median(n2[np.where(np.tril(n2) > 0)]) theta_new = theta[0] wi2 = theta[0] / 2 @@ -36,12 +36,14 @@ def dist2(x, c): ndata, dimx = x.shape ncentres, dimc = c.shape - if (dimx != dimc): - raise Exception('Data dimension does not match dimension of centres') + if dimx != dimc: + raise Exception("Data dimension does not match dimension of centres") - n2 = (np.mat(np.ones((ncentres, 1))) * np.sum(np.multiply(x, x).T, axis=0)).T + \ - np.mat(np.ones((ndata, 1))) * np.sum(np.multiply(c, c).T, axis=0) - \ - 2 * (x * c.T) + n2 = ( + (np.ones((ncentres, 1)) * np.sum(np.multiply(x, x).T, axis=0)).T + + np.ones((ndata, 1)) * np.sum(np.multiply(c, c).T, axis=0) + - 2 * (x * c.T) + ) # Rounding errors occasionally cause negative entries in n2 n2[np.where(n2 < 0)] = 0 @@ -56,52 +58,52 @@ def pdinv(A): invU = np.eye(numData).dot(np.linalg.inv(U)) Ainv = invU.dot(invU.T) except numpy.linalg.LinAlgError as e: - warnings.warn('Matrix is not positive definite in pdinv, inverting using svd') + warnings.warn("Matrix is not positive definite in pdinv, inverting using svd") u, s, vh = np.linalg.svd(A, full_matrices=True) Ainv = vh.T.dot(np.diag(1 / s)).dot(u.T) except Exception as e: raise e - return np.mat(Ainv) + return Ainv def eigdec(x, N, evals_only=False): # EIGDEC Sorted eigendecomposition # - # Description - # EVALS = EIGDEC(X, N computes the largest N eigenvalues of the - # matrix X in descending order. [EVALS, EVEC] = EIGDEC(X, N) also - # computes the corresponding eigenvectors. + # Description + # EVALS = EIGDEC(X, N computes the largest N eigenvalues of the + # matrix X in descending order. [EVALS, EVEC] = EIGDEC(X, N) also + # computes the corresponding eigenvectors. # - # See also - # PCA, PPCA + # See also + # PCA, PPCA # - if (N != np.round(N) or N < 1 or N > x.shape[1]): - raise Exception('Number of PCs must be integer, >0, < dim') + if N != np.round(N) or N < 1 or N > x.shape[1]: + raise Exception("Number of PCs must be integer, >0, < dim") # Find the eigenvalues of the data covariance matrix - if (evals_only): + if evals_only: # Use eig function as always more efficient than eigs here temp_evals, _ = np.linalg.eig(x) else: # Use eig function unless fraction of eigenvalues required is tiny - if ((N / x.shape[1]) > 0.04): + if (N / x.shape[1]) > 0.04: temp_evals, temp_evec = np.linalg.eig(x) else: - temp_evals, temp_evec = scipy.sparse.linalg.eigs(x, k=N, which='LM') + temp_evals, temp_evec = scipy.sparse.linalg.eigs(x, k=N, which="LM") # Eigenvalues nearly always returned in descending order, but just to make sure..... evals = np.sort(-temp_evals) perm = np.argsort(-temp_evals) evals = -evals[0:N] - if (not evals_only): - if (np.all(evals == temp_evals[0:N])): + if not evals_only: + if np.all(evals == temp_evals[0:N]): # Originals were in order - evec = temp_evec[:, 0: N] + evec = temp_evec[:, 0:N] else: # Need to reorder the eigenvectors - evec = np.empty_like(temp_evec[:, 0: N]) + evec = np.empty_like(temp_evec[:, 0:N]) for i in range(N): evec[:, i] = temp_evec[:, perm[i]] @@ -179,9 +181,9 @@ def minimize(X, f, length, *varargin): red = 1 if length > 0: - S = 'Linesearch' + S = "Linesearch" else: - S = 'Function evaluation' + S = "Function evaluation" i = 0 # zero the run length counter ls_failed = 0 # no previous line search has failed @@ -213,7 +215,7 @@ def minimize(X, f, length, *varargin): df3 = df0 success = False - while (not success and M > 0): + while not success and M > 0: try: M = M - 1 i = i + (1 if length < 0 else 0) # count epochs?! @@ -221,8 +223,13 @@ def minimize(X, f, length, *varargin): temp.extend(varargin) temp.extend([None, 2]) f3, df3 = feval(temp) - if np.isnan(f3) or np.isinf(f3) or np.any(np.isnan(df3)) or np.any(np.isinf(df3)): - raise Exception('') + if ( + np.isnan(f3) + or np.isinf(f3) + or np.any(np.isnan(df3)) + or np.any(np.isinf(df3)) + ): + raise Exception("") success = True except Exception as e: # catch any error which occurred in f x3 = (x2 + x3) / 2 # bisect and try again @@ -232,7 +239,9 @@ def minimize(X, f, length, *varargin): F0 = f3 dF0 = df3 d3 = (df3.T * s)[0, 0] # new slope - if d3 > SIG * d0 or f3 > f0 + x3 * RHO * d0 or M == 0: # are we done extrapolating? + if ( + d3 > SIG * d0 or f3 > f0 + x3 * RHO * d0 or M == 0 + ): # are we done extrapolating? break x1 = x2 # move point 2 to point 1 @@ -243,8 +252,12 @@ def minimize(X, f, length, *varargin): d2 = d3 A = 6 * (f1 - f2) + 3 * (d2 + d1) * (x2 - x1) # make cubic extrapolation B = 3 * (f2 - f1) - (2 * d1 + d2) * (x2 - x1) - x3 = x1 - d1 * (x2 - x1) ** 2 / (B + np.sqrt(B * B - A * d1 * (x2 - x1))) # num. error possible, ok! - if not np.isreal(x3) or np.isnan(x3) or np.isinf(x3) or x3 < 0: # num prob | wrong sign? + x3 = x1 - d1 * (x2 - x1) ** 2 / ( + B + np.sqrt(B * B - A * d1 * (x2 - x1)) + ) # num. error possible, ok! + if ( + not np.isreal(x3) or np.isnan(x3) or np.isinf(x3) or x3 < 0 + ): # num prob | wrong sign? x3 = x2 * EXT # extrapolate maximum amount elif x3 > x2 * EXT: # new point beyond extrapolation limit? x3 = x2 * EXT # extrapolate maximum amount @@ -252,7 +265,9 @@ def minimize(X, f, length, *varargin): x3 = x2 + INT * (x2 - x1) # end extrapolation - while (abs(d3) > -SIG * d0 or f3 > f0 + x3 * RHO * d0) and M > 0: # keep interpolating + while ( + abs(d3) > -SIG * d0 or f3 > f0 + x3 * RHO * d0 + ) and M > 0: # keep interpolating if d3 > 0 or f3 > f0 + x3 * RHO * d0: # choose subinterval x4 = x3 # move point 3 to point 4 f4 = f3 @@ -263,16 +278,22 @@ def minimize(X, f, length, *varargin): d2 = d3 if f4 > f0: - x3 = x2 - (0.5 * d2 * (x4 - x2) ** 2) / (f4 - f2 - d2 * (x4 - x2)) # quadratic interpolation + x3 = x2 - (0.5 * d2 * (x4 - x2) ** 2) / ( + f4 - f2 - d2 * (x4 - x2) + ) # quadratic interpolation else: A = 6 * (f2 - f4) / (x4 - x2) + 3 * (d4 + d2) # cubic interpolation B = 3 * (f4 - f2) - (2 * d2 + d4) * (x4 - x2) - x3 = x2 + (np.sqrt(B * B - A * d2 * (x4 - x2) ** 2) - B) / A # num. error possible, ok! + x3 = ( + x2 + (np.sqrt(B * B - A * d2 * (x4 - x2) ** 2) - B) / A + ) # num. error possible, ok! if np.isnan(x3) or np.isinf(x3): x3 = (x2 + x4) / 2 # if we had a numerical problem then bisect - x3 = max(min(x3, x4 - INT * (x4 - x2)), x2 + INT * (x4 - x2)) # don't accept too close + x3 = max( + min(x3, x4 - INT * (x4 - x2)), x2 + INT * (x4 - x2) + ) # don't accept too close temp = [f, X + x3 * s] temp.extend(varargin) temp.extend([None, 2]) @@ -286,24 +307,28 @@ def minimize(X, f, length, *varargin): d3 = (df3.T * s)[0, 0] # new slope # end interpolation - if (abs(d3) < -SIG * d0 and f3 < f0 + x3 * RHO * d0): # if line search succeeded + if abs(d3) < -SIG * d0 and f3 < f0 + x3 * RHO * d0: # if line search succeeded X = X + x3 * s f0 = f3 fX = np.vstack([fX, f0]) # update variables - s = ((df3.T * df3)[0, 0] - df0.T * df3[0, 0]) / (df0.T * df0)[0, 0] * s - df3 # Polack-Ribiere CG direction + s = ((df3.T * df3)[0, 0] - df0.T * df3[0, 0]) / (df0.T * df0)[ + 0, 0 + ] * s - df3 # Polack-Ribiere CG direction df0 = df3 # swap derivatives d3 = d0 d0 = (df0.T * s)[0, 0] - if (d0 > 0): # new slope must be negative + if d0 > 0: # new slope must be negative s = -df0 d0 = -(s.T * s)[0, 0] - x3 = x3 * min(RATIO, d3 / (d0 - sys.float_info.min)) # slope ratio but max RATIO + x3 = x3 * min( + RATIO, d3 / (d0 - sys.float_info.min) + ) # slope ratio but max RATIO ls_failed = 0 # this line search did not fail else: X = X0 # restore best point so far f0 = F0 df0 = dF0 - if (ls_failed or i > abs(length)): # line search failed twice in a row + if ls_failed or i > abs(length): # line search failed twice in a row break # or we ran out of time, so we give up s = -df0 # try steepest d0 = -(s.T * s)[0, 0] @@ -313,72 +338,98 @@ def minimize(X, f, length, *varargin): def feval(parameters): - if parameters[0] == 'covSum': - if (len(parameters) == 1): + if parameters[0] == "covSum": + if len(parameters) == 1: return cov_sum() - elif (len(parameters) == 2): + elif len(parameters) == 2: return cov_sum(parameters[1]) - elif (len(parameters) == 3): + elif len(parameters) == 3: return cov_sum(parameters[1], parameters[2]) - elif (len(parameters) == 4): + elif len(parameters) == 4: return cov_sum(parameters[1], parameters[2], parameters[3]) - elif (len(parameters) == 5): + elif len(parameters) == 5: return cov_sum(parameters[1], parameters[2], parameters[3], parameters[4]) - elif (len(parameters) == 6): - return cov_sum(parameters[1], parameters[2], parameters[3], parameters[4], parameters[5]) - elif parameters[0] == 'covNoise': - if (len(parameters) == 1): + elif len(parameters) == 6: + return cov_sum( + parameters[1], + parameters[2], + parameters[3], + parameters[4], + parameters[5], + ) + elif parameters[0] == "covNoise": + if len(parameters) == 1: return cov_noise() - elif (len(parameters) == 2): + elif len(parameters) == 2: return cov_noise(parameters[1]) - elif (len(parameters) == 3): + elif len(parameters) == 3: return cov_noise(parameters[1], parameters[2]) - elif (len(parameters) == 4): + elif len(parameters) == 4: return cov_noise(parameters[1], parameters[2], parameters[3]) else: return cov_noise(parameters[1], parameters[2], parameters[3], parameters[4]) - elif parameters[0] == 'covSEard': - if (len(parameters) == 1): + elif parameters[0] == "covSEard": + if len(parameters) == 1: return cov_seard() - elif (len(parameters) == 2): + elif len(parameters) == 2: return cov_seard(parameters[1]) - elif (len(parameters) == 3): + elif len(parameters) == 3: return cov_seard(parameters[1], parameters[2]) - elif (len(parameters) == 4): + elif len(parameters) == 4: return cov_seard(parameters[1], parameters[2], parameters[3]) else: return cov_seard(parameters[1], parameters[2], parameters[3], parameters[4]) - elif parameters[0] == 'covSum': - if (len(parameters) == 1): + elif parameters[0] == "covSum": + if len(parameters) == 1: return cov_sum() - elif (len(parameters) == 2): + elif len(parameters) == 2: return cov_sum(parameters[1]) - elif (len(parameters) == 3): + elif len(parameters) == 3: return cov_sum(parameters[1], parameters[2]) - elif (len(parameters) == 4): + elif len(parameters) == 4: return cov_sum(parameters[1], parameters[2], parameters[3]) - elif (len(parameters) == 5): + elif len(parameters) == 5: return cov_sum(parameters[1], parameters[2], parameters[3], parameters[4]) - elif (len(parameters) == 6): - return cov_sum(parameters[1], parameters[2], parameters[3], parameters[4], parameters[5]) - elif parameters[0] == 'gpr_multi_new': - if (len(parameters) == 1): + elif len(parameters) == 6: + return cov_sum( + parameters[1], + parameters[2], + parameters[3], + parameters[4], + parameters[5], + ) + elif parameters[0] == "gpr_multi_new": + if len(parameters) == 1: return gpr_multi_new() - elif (len(parameters) == 2): + elif len(parameters) == 2: return gpr_multi_new(parameters[1]) - elif (len(parameters) == 3): + elif len(parameters) == 3: return gpr_multi_new(parameters[1], parameters[2]) - elif (len(parameters) == 4): + elif len(parameters) == 4: return gpr_multi_new(parameters[1], parameters[2], parameters[3]) - elif (len(parameters) == 5): - return gpr_multi_new(parameters[1], parameters[2], parameters[3], parameters[4]) - elif (len(parameters) == 6): - return gpr_multi_new(parameters[1], parameters[2], parameters[3], parameters[4], parameters[5]) - elif (len(parameters) == 7): - return gpr_multi_new(parameters[1], parameters[2], parameters[3], parameters[4], parameters[5], - parameters[6]) + elif len(parameters) == 5: + return gpr_multi_new( + parameters[1], parameters[2], parameters[3], parameters[4] + ) + elif len(parameters) == 6: + return gpr_multi_new( + parameters[1], + parameters[2], + parameters[3], + parameters[4], + parameters[5], + ) + elif len(parameters) == 7: + return gpr_multi_new( + parameters[1], + parameters[2], + parameters[3], + parameters[4], + parameters[5], + parameters[6], + ) else: - raise Exception('Undefined function') + raise Exception("Undefined function") def gpr_multi_new(logtheta=None, covfunc=None, x=None, y=None, xstar=None, nargout=1): @@ -418,7 +469,9 @@ def gpr_multi_new(logtheta=None, covfunc=None, x=None, y=None, xstar=None, nargo n, D = x.shape n, m = y.shape if eval(feval(covfunc)) != logtheta.shape[0]: - raise Exception('Error: Number of parameters do not agree with covariance function') + raise Exception( + "Error: Number of parameters do not agree with covariance function" + ) temp = list(covfunc.copy()) temp.append(logtheta) @@ -429,13 +482,25 @@ def gpr_multi_new(logtheta=None, covfunc=None, x=None, y=None, xstar=None, nargo alpha = solve_chol(L.T, y) if ( - logtheta is not None and covfunc is not None and x is not None and y is not None and xstar is None): # if no test cases, compute the negative log marginal likelihood - out1 = 0.5 * np.trace(y.T * alpha) + m * np.sum(np.log(np.diag(L)), axis=0) + 0.5 * m * n * np.log( - 2 * np.pi) + logtheta is not None + and covfunc is not None + and x is not None + and y is not None + and xstar is None + ): # if no test cases, compute the negative log marginal likelihood + out1 = ( + 0.5 * np.trace(y.T @ alpha) + + m * np.sum(np.log(np.diag(L)), axis=0) + + 0.5 * m * n * np.log(2 * np.pi) + ) if nargout == 2: # ... and if requested, its partial derivatives - out2 = np.mat(np.zeros((logtheta.shape[0], 1))) # set the size of the derivative vector - W = m * (np.linalg.inv(L.T) * ( - np.linalg.inv(L) * np.mat(np.eye(n)))) - alpha * alpha.T # precompute for convenience + out2 = np.zeros( + (logtheta.shape[0], 1) + ) # set the size of the derivative vector + W = ( + m * (np.linalg.inv(L.T) * (np.linalg.inv(L) * np.eye(n))) + - alpha @ alpha.T + ) # precompute for convenience for i in range(len(out2) - 1, len(out2)): temp = list(covfunc.copy()) temp.append(logtheta) @@ -453,7 +518,7 @@ def gpr_multi_new(logtheta=None, covfunc=None, x=None, y=None, xstar=None, nargo if nargout == 2: v = np.linalg.inv(L) * Kstar - v = np.mat(v) + v = v out2 = Kss - np.sum(np.multiply(v, v), axis=0).T if nargout == 1: @@ -476,16 +541,16 @@ def solve_chol(A, B): # takes precedence over the matlab code in this file. if A is None or B is None: - raise Exception('Wrong number of arguments.') + raise Exception("Wrong number of arguments.") - if (A.shape[0] != A.shape[1] or A.shape[0] != B.shape[0]): - raise Exception('Wrong sizes of matrix arguments.') + if A.shape[0] != A.shape[1] or A.shape[0] != B.shape[0]: + raise Exception("Wrong sizes of matrix arguments.") - res = np.linalg.inv(A) * (np.linalg.inv(A.T) * B) + res = np.linalg.inv(A) @ (np.linalg.inv(A.T) @ B) return res -K = np.mat(np.empty((0, 0))) +K = np.empty((0, 0)) def cov_noise(logtheta=None, x=None, z=None, nargout=1): @@ -501,22 +566,24 @@ def cov_noise(logtheta=None, x=None, z=None, nargout=1): # # For more help on design of covariance functions, see "covFunctions". - if (logtheta is None and x is None and z is None): # report number of parameters - A = '1' + if logtheta is None and x is None and z is None: # report number of parameters + A = "1" return A - s2 = np.exp(2 * logtheta)[0, 0] # noise variance + s2 = np.exp(2 * logtheta)[0] # noise variance - if (logtheta is not None and x is not None and z is None): # compute covariance matrix - A = s2 * np.mat(np.eye(x.shape[0])) - elif (nargout == 2): # compute test set covariances + if ( + logtheta is not None and x is not None and z is None + ): # compute covariance matrix + A = s2 * np.eye(x.shape[0]) + elif nargout == 2: # compute test set covariances A = s2 B = 0 # zeros cross covariance by independence else: # compute derivative matrix - A = 2 * s2 * np.mat(np.eye(x.shape[0])) + A = 2 * s2 * np.eye(x.shape[0]) - if (nargout == 2): + if nargout == 2: return A, B else: return A @@ -541,8 +608,8 @@ def cov_seard(loghyper=None, x=None, z=None, nargout=1): # For more help on design of covariance functions, see "covFunctions". global K - if (loghyper is None and x is None and z is None): - A = '(D+1)' + if loghyper is None and x is None and z is None: + A = "(D+1)" return A # report number of parameters @@ -551,24 +618,24 @@ def cov_seard(loghyper=None, x=None, z=None, nargout=1): ell = np.exp(loghyper[0:D]) # characteristic length scale sf2 = np.exp(2 * loghyper[D]) # signal variance - if (loghyper is not None and x is not None): - K = sf2 * np.exp(-sq_dist(np.mat(np.diag(1 / ell) * x.T)) / 2) + if loghyper is not None and x is not None: + K = sf2 * np.exp(-sq_dist(np.diag(1 / ell) * x.T) / 2) A = K elif nargout == 2: # compute test set covariances - A = sf2 * np.mat(np.ones((z, 1))) - B = sf2 * np.exp(-sq_dist(np.mat(np.diag(1 / ell)) * x.T, np.mat(np.diag(1 / ell)) * z) / 2) + A = sf2 * np.ones((z, 1)) + B = sf2 * np.exp(-sq_dist(np.diag(1 / ell) * x.T, np.diag(1 / ell) * z) / 2) else: # check for correct dimension of the previously calculated kernel matrix - if (K.shape[0] != n or K.shape[1] != n): - K = sf2 * np.exp(-sq_dist(np.mat(np.diag(1 / ell) * x.T)) / 2) + if K.shape[0] != n or K.shape[1] != n: + K = sf2 * np.exp(-sq_dist(np.diag(1 / ell) * x.T) / 2) if z <= D: # length scale parameters A = np.multiply(K, sq_dist(x[:, z].T / ell[z])) else: # magnitude parameter A = 2 * K - K = np.mat(np.empty((0, 0))) + K = np.empty((0, 0)) - if (nargout == 2): + if nargout == 2: return A, B else: return A @@ -600,30 +667,34 @@ def sq_dist(a, b=None, Q=None): # where a is of size D by n, b is of size D by m (or empty), C and Q are of # size n by m and c is of size D by 1. - if b is None or len(b) == 0: # input arguments are taken to be identical if b is missing or empty + if ( + b is None or len(b) == 0 + ): # input arguments are taken to be identical if b is missing or empty b = a D, n = a.shape d, m = b.shape if d != D: - raise Exception('Error: column lengths must agree.') + raise Exception("Error: column lengths must agree.") if Q is None: - C = np.mat(np.zeros((n, m))) + C = np.zeros((n, m)) for d in range(D): - temp = np.tile(b[d, :], (n, 1)) - np.tile(a[d, :].T, (1, m)) + temp = np.tile(b[d, :], (n, 1)) - np.tile(a[d, :].reshape(-1, 1), (1, m)) C = C + np.multiply(temp, temp) else: if (n, m) == Q.shape: - C = np.mat(np.zeros((D, 1))) + C = np.zeros((D, 1)) for d in range(D): - temp = np.tile(b[d, :], (n, 1)) - np.tile(a[d, :].T, (1, m)) + temp = np.tile(b[d, :], (n, 1)) - np.tile( + a[d, :].reshape(-1, 1), (1, m) + ) temp = np.multiply(temp, temp) temp = np.multiply(temp, Q) C[d] = np.sum(temp) else: - raise Exception('Third argument has wrong size.') + raise Exception("Third argument has wrong size.") return C @@ -640,10 +711,10 @@ def cov_sum(covfunc, logtheta=None, x=None, z=None, nargout=1): f = covfunc[i] j.append([feval([f])]) - if (logtheta is None and x is None and z is None): # report number of parameters + if logtheta is None and x is None and z is None: # report number of parameters A = j[0][0] for i in range(1, len(covfunc)): - A = A + '+' + j[i][0] + A = A + "+" + j[i][0] return A @@ -655,8 +726,10 @@ def cov_sum(covfunc, logtheta=None, x=None, z=None, nargout=1): v.append(i) v = np.asarray(v) - if (logtheta is not None and x is not None and z is None): # compute covariance matrix - A = np.mat(np.zeros((n, n))) # allocate space for covariance matrix + if ( + logtheta is not None and x is not None and z is None + ): # compute covariance matrix + A = np.zeros((n, n)) # allocate space for covariance matrix for i in range(len(covfunc)): # iteration over summand functions f = covfunc[i] temp = [f] @@ -666,10 +739,11 @@ def cov_sum(covfunc, logtheta=None, x=None, z=None, nargout=1): A = A + feval(temp) if ( - logtheta is not None and x is not None and z is not None): # compute derivative matrix or test set covariances + logtheta is not None and x is not None and z is not None + ): # compute derivative matrix or test set covariances if nargout == 2: # compute test set cavariances - A = np.mat(np.zeros((z, 1))) - B = np.mat(np.zeros((x.shape[0], z))) # allocate space + A = np.zeros((z, 1)) + B = np.zeros((x.shape[0], z)) # allocate space for i in range(len(covfunc)): f = covfunc[i] temp = [f] @@ -683,7 +757,9 @@ def cov_sum(covfunc, logtheta=None, x=None, z=None, nargout=1): B = B + BB else: # compute derivative matrices i = v[z] # which covariance function - j = np.sum(np.where(v[0:z] == i, 1, 0)) # which parameter in that covariance + j = np.sum( + np.where(v[0:z] == i, 1, 0) + ) # which parameter in that covariance f = covfunc[i] temp = [f] t = logtheta[np.where(v == i)] @@ -692,7 +768,7 @@ def cov_sum(covfunc, logtheta=None, x=None, z=None, nargout=1): temp.append(j) A = feval(temp) - if (nargout == 2): + if nargout == 2: return A, B else: return A diff --git a/tests/TestLocalScore.py b/tests/TestLocalScore.py index f8061be3..b2937843 100644 --- a/tests/TestLocalScore.py +++ b/tests/TestLocalScore.py @@ -2,6 +2,7 @@ from causallearn.utils.GESUtils import * from causallearn.score.LocalScoreFunctionClass import LocalScoreClass + class TestLocalScore(unittest.TestCase): np.random.seed(10) @@ -9,21 +10,25 @@ class TestLocalScore(unittest.TestCase): X_prime = np.random.randn(300, 1) Y = X + 0.5 * np.random.randn(300, 1) Z = Y + 0.5 * np.random.randn(300, 1) - data = np.mat(np.hstack((X, X_prime, Y, Z))) + data = np.hstack((X, X_prime, Y, Z)) np.random.seed(10) X_slash = np.random.randint(0, 10, (300, 1)) X_prime_slash = np.random.randint(0, 10, (300, 1)) Y_slash = X_slash + np.random.randint(0, 10, (300, 1)) Z_slash = Y_slash + np.random.randint(0, 10, (300, 1)) - data_slash = np.mat(np.hstack((X_slash, X_prime_slash, Y_slash, Z_slash))) + data_slash = np.hstack((X_slash, X_prime_slash, Y_slash, Z_slash)) def test_local_score_marginal_multi(self): - parameters = {'dlabel': {}} + parameters = {"dlabel": {}} for i in range(self.data.shape[1]): - parameters['dlabel'][i] = i + parameters["dlabel"][i] = i - localScoreClass = LocalScoreClass(data=self.data, local_score_fun=local_score_marginal_multi, parameters=parameters) + localScoreClass = LocalScoreClass( + data=self.data, + local_score_fun=local_score_marginal_multi, + parameters=parameters, + ) q = localScoreClass.score(0, [0]) p = localScoreClass.score(0, [1]) @@ -32,11 +37,17 @@ def test_local_score_marginal_multi(self): assert q < v < p def test_local_score_CV_multi(self): - parameters = {'kfold': 10, 'lambda': 0.01, 'dlabel': {}} # regularization parameter + parameters = { + "kfold": 10, + "lambda": 0.01, + "dlabel": {}, + } # regularization parameter for i in range(self.data.shape[1]): - parameters['dlabel'][i] = i + parameters["dlabel"][i] = i - localScoreClass = LocalScoreClass(data=self.data, local_score_fun=local_score_cv_multi, parameters=parameters) + localScoreClass = LocalScoreClass( + data=self.data, local_score_fun=local_score_cv_multi, parameters=parameters + ) q = localScoreClass.score(0, [0]) p = localScoreClass.score(0, [1]) @@ -48,7 +59,9 @@ def test_local_score_BIC(self): parameters = {} parameters["lambda_value"] = 2 - localScoreClass = LocalScoreClass(data=self.data, local_score_fun=local_score_BIC, parameters=parameters) + localScoreClass = LocalScoreClass( + data=self.data, local_score_fun=local_score_BIC, parameters=parameters + ) q = localScoreClass.score(0, [0]) p = localScoreClass.score(0, [1]) @@ -57,10 +70,16 @@ def test_local_score_BIC(self): assert q < v < p def test_local_score_CV_general(self): - parameters = {'kfold': 10, # 10 fold cross validation - 'lambda': 0.01} # regularization parameter + parameters = { + "kfold": 10, # 10 fold cross validation + "lambda": 0.01, + } # regularization parameter - localScoreClass = LocalScoreClass(data=self.data, local_score_fun=local_score_cv_general, parameters=parameters) + localScoreClass = LocalScoreClass( + data=self.data, + local_score_fun=local_score_cv_general, + parameters=parameters, + ) q = localScoreClass.score(0, [0]) p = localScoreClass.score(0, [1]) @@ -71,7 +90,11 @@ def test_local_score_CV_general(self): def test_local_score_marginal_general(self): parameters = {} - localScoreClass = LocalScoreClass(data=self.data, local_score_fun=local_score_marginal_general, parameters=parameters) + localScoreClass = LocalScoreClass( + data=self.data, + local_score_fun=local_score_marginal_general, + parameters=parameters, + ) q = localScoreClass.score(0, [0]) p = localScoreClass.score(0, [1]) @@ -80,7 +103,9 @@ def test_local_score_marginal_general(self): assert q < v < p def test_local_score_BDeu(self): - localScoreClass = LocalScoreClass(data=self.data_slash, local_score_fun=local_score_BDeu, parameters=None) + localScoreClass = LocalScoreClass( + data=self.data_slash, local_score_fun=local_score_BDeu, parameters=None + ) q = localScoreClass.score(0, [0]) p = localScoreClass.score(0, [1])