@@ -650,136 +650,13 @@ def _sample_ou(
650650 Xc_init = X_init - self .B [:, None ]
651651 for iter_ou in range (self .n_iter_ou ):
652652 noise = self .ampli * self .rng .normal (0 , 1 , size = (n_variables , n_samples ))
653-
654- # Xc_lag = np.roll(Xc, 1, axis=1)
655- # Z_back = Xc - self.A @ Xc_lag
656- # Z_fore = np.roll(Z_back, -1, axis=1)
657- # Z_back[:, 0] = 0
658- # Z_fore[:, -1] = 0
659- # Xc = Xc - gamma * self.omega_inv @ Z_back * dt
660- # - gamma * self.A.T @ self.omega_inv @ Z_fore * dt
661- # + np.sqrt(2 * gamma[:, None] * dt) * noise
662653 grad_X = self .gradient_X_centered_loglik (Xc )
663654
664- # print("Xc")
665- # print(Xc)
666- # print(gamma)
667- # print(grad_X)
668- # print(noise)
669655 Xc = Xc + self .dt * gamma * grad_X + np .sqrt (2 * gamma * self .dt ) * noise
670656
671- # from matplotlib import pyplot as plt
672- # plt.figure(figsize=(24, 4))
673- # plt.plot(Xc[0], label="before")
674- # plt.plot((self.dt * gamma * grad_X)[0], label="grad")
675- # plt.plot(Xc[0], label="after")
676- # plt.ylim(-2, 2)
677- # plt.legend()
678- # plt.show()
679-
680657 Xc [~ mask_na ] = Xc_init [~ mask_na ]
681658 X = Xc + self .B [:, None ]
682659
683660 self .fit_distribution (X )
684661
685662 return X
686-
687- # def _sample_ou(
688- # self,
689- # X: ArrayLike,
690- # mask_na: ArrayLike,
691- # rng: int,
692- # dt: float = 2e-2,
693- # ) -> ArrayLike:
694- # """
695- # Samples the Gaussian distribution under the constraint that not na values must remain
696- # unchanged, using a projected Ornstein-Uhlenbeck process.
697- # The sampled distribution tends to the target one in the limit dt -> 0 and
698- # n_iter_ou x dt -> infty.
699- # Called by `impute_sample_ts`.
700- # X = A X_t-1 + B + E where E ~ N(0, omega)
701-
702- # Parameters
703- # ----------
704- # df : ArrayLike
705- # Inital dataframe to be imputed, which should have been already imputed using a simple
706- # method. This first imputation will be used as an initial guess.
707- # mask_na : ArrayLike
708- # Boolean dataframe indicating which coefficients should be resampled.
709- # rng : int
710- # Random number generator to be used (for reproducibility).
711- # n_iter_ou : int
712- # Number of iterations for the OU process, a large value decreases the sample bias
713- # but increases the computation time.
714- # dt : float
715- # Process integration time step, a large value increases the sample bias and can make
716- # the algorithm unstable, but compensates for a smaller n_iter_ou. By default, 2e-2.
717- # ampli : float
718- # Amplification of the noise, if less than 1 the variance is reduced. By default, 1.
719-
720- # Returns
721- # -------
722- # ArrayLike
723- # DataFrame after Ornstein-Uhlenbeck process.
724- # """
725- # n_variables, n_samples = X.shape
726-
727- # X_init = X.copy()
728- # gamma = np.diagonal(self.omega)
729-
730- # # list_X = []
731- # list_B = []
732- # list_XX_lag = []
733- # list_XX = []
734- # list_ZZ = []
735- # for iter_ou in range(self.n_iter_ou):
736- # noise = self.ampli * rng.normal(0, 1, size=(n_variables, n_samples))
737- # Xc = X - self.B[:, None]
738- # Xc_lag = np.roll(Xc, 1, axis=1)
739- # Z_back = Xc - self.A @ Xc_lag
740- # Z_back[:, 0] = 0
741- # Z_front = np.roll(Z_back, -1, axis=1)
742- # # X = X - self.omega_inv @ Z_back * dt
743- # - self.A.T @ self.omega_inv @ Z_front * dt + noise * np.sqrt(2 * dt)
744- # X = X - gamma * self.omega_inv @ Z_back * dt
745- # - gamma * self.A.T @ self.omega_inv @ Z_front * dt
746- # + np.sqrt(2 * gamma[:, None] * dt) * noise
747- # X[~mask_na] = X_init[~mask_na]
748- # if iter_ou > self.n_iter_ou - 50:
749- # X_lag = np.roll(X, 1, axis=1)
750- # B = np.mean(X - self.A @ X_lag, axis=1)
751-
752- # Xc = X - self.B[:, None]
753- # Xc_lag = X_lag - self.B[:, None]
754-
755- # XX = X @ X.T
756- # XX_lag = Xc[:, 1:] @ X_lag[:, :-1].T
757-
758- # Z_back = Xc - self.A @ Xc_lag
759- # Z_back[:, 0] = 0
760-
761- # list_B.append(B)
762- # list_XX.append(XX)
763- # list_XX_lag.append(XX_lag)
764- # list_ZZ.append(Z_back @ Z_back.T)
765-
766- # B_stack = np.stack(list_B, axis=1)
767- # self.B = np.mean(B_stack, axis=1)
768-
769- # XX_stack = np.stack(list_XX, axis=2)
770- # # XX_mean = np.mean(XX_stack, axis=2)
771- # XX_lag_stack = np.stack(list_XX_lag, axis=2)
772- # # XX_lag_mean = np.mean(XX_lag_stack, axis=2)
773-
774- # # MANOVA formula
775- # cov_B = np.cov(B_stack, bias=True)
776- # cov_XX = np.mean(XX_stack, axis=2) + cov_B
777- # cov_XX_lag = np.mean(XX_lag_stack, axis=2) + cov_B
778- # self.A = cov_XX_lag @ invert_robust(cov_XX, epsilon=1e-2)
779- # ZZ_stack = np.stack(list_ZZ, axis=2)
780-
781- # # Biased estimation of omega, ignoring intra-group covariances
782- # self.omega = np.mean(ZZ_stack, axis=2) # / n_variables
783- # self.omega_inv = invert_robust(self.omega, epsilon=1e-2)
784-
785- # return X
0 commit comments