1313EPS = np .finfo (float ).eps
1414
1515
16+ @make_insert_doc ()
1617class ULSIF (BaseAdaptEstimator ):
1718 """
18- RULSIF: Relative least-squares importance fitting
19+ ULSIF: Unconstrained Least-Squares Importance Fitting
1920
20- RULSIF is an instance-based method for domain adaptation.
21+ ULSIF is an instance-based method for domain adaptation.
2122
2223 The purpose of the algorithm is to correct the difference between
2324 input distributions of source and target domains. This is done by
@@ -33,12 +34,12 @@ class ULSIF(BaseAdaptEstimator):
3334 Where:
3435
3536 - :math:`x, x_i` are input instances.
36- - :math:`X_T` is the target input data.
37+ - :math:`X_T` is the target input data of size :math:`n_T` .
3738 - :math:`\\ theta_i` are the basis functions coefficients.
3839 - :math:`K(x, x_i) = \\ text{exp}(-\\ gamma ||x - x_i||^2)`
3940 for instance if ``kernel="rbf"``.
4041
41- KLIEP algorithm consists in finding the optimal :math:`\\ theta` according to
42+ ULSIF algorithm consists in finding the optimal :math:`\\ theta` according to
4243 the quadratic problem
4344
4445 .. math::
@@ -50,26 +51,28 @@ class ULSIF(BaseAdaptEstimator):
5051
5152 .. math::
5253
53- H_{ll'}= \\ frac{\\ alpha }{n_s} \sum_{x_i \\ in X_S} K(x_i, x_l) K(x_i, x_l') + \\ frac{1- \\ alpha}{n_t} \\ sum_{x_i \\ in X_T} K(x_i, x_l) K(x_i, x_l')
54+ H_{ll'} = \\ frac{1 }{n_s} \sum_{x_i \\ in X_S} K(x_i, x_l) K(x_i, x_l') \\
5455 h_{l}= \\ frac{1}{n_T} \sum_{x_i \\ in X_T} K(x_i, x_l)
5556
56- Where:
57-
58- - :math:`X_T` is the source input data of size :math:`n_T`.
5957
6058 The above OP is solved by the closed form expression
6159
62- - :math:\hat{\\ theta}=(H+\\ lambda I_{n_s})^{(-1)} h
60+ .. math::
61+
62+ \hat{\\ theta} = (H+\\ lambda I_{n_s})^{(-1)} h
6363
64- Furthemore the method admits a leave one out cross validation score that has a clossed expression
64+ Furthemore the method admits a leave one out cross validation score that has a closed form expression
6565 and can be used to select the appropriate parameters of the kernel function :math:`K` (typically, the paramter
6666 :math:`\\ gamma` of the Gaussian kernel). The parameter is then selected using
6767 cross-validation on the :math:`J` score defined as follows:
68- :math:`J = -\\ frac{\\ alpha}{2|X_S|} \\ sum_{x \\ in X_S} w(x)^2 - \f rac{1-\\ alpha}{2|X_T|} \\ sum_{x \in X_T} w(x)^2 `
68+
69+ .. math::
70+
71+ J = -\\ frac{1}{2 n_s} \\ sum_{x \\ in X_S} w(x)^2
6972
7073 Finally, an estimator is fitted using the reweighted labeled source instances.
7174
72- RULSIF method has been originally introduced for **unsupervised**
75+ ULSIF method has been originally introduced for **unsupervised**
7376 DA but it could be widen to **supervised** by simply adding labeled
7477 target data to the training set.
7578
@@ -81,9 +84,10 @@ class ULSIF(BaseAdaptEstimator):
8184 ‘linear’, ‘poly’, ‘polynomial’, ‘rbf’,
8285 ‘laplacian’, ‘sigmoid’, ‘cosine’]
8386
84- sigmas : float or list of float (default=None)
85- Deprecated, please use the ``gamma`` parameter
86- instead. (See below).
87+ lambdas : float or list of float (default=1.)
88+ Optimization parameter. If a list is given,
89+ the best lambda will be selected based on
90+ the unsupervised Leave-One-Out J-score.
8791
8892 max_centers : int (default=100)
8993 Maximal number of target instances use to
@@ -131,6 +135,7 @@ class ULSIF(BaseAdaptEstimator):
131135 parameter description).
132136 If a list is given, the LCV process is performed to
133137 select the best parameter ``degree``.
138+
134139 Attributes
135140 ----------
136141 weights_ : numpy array
@@ -156,55 +161,38 @@ class ULSIF(BaseAdaptEstimator):
156161
157162 Examples
158163 --------
159- >>> import numpy as np
160- >>> from adapt.instance_based import KLIEP
161- >>> np.random.seed(0)
162- >>> Xs = np.random.randn(50) * 0.1
163- >>> Xs = np.concatenate((Xs, Xs + 1.))
164- >>> Xt = np.random.randn(100) * 0.1
165- >>> ys = np.array([-0.2 * x if x<0.5 else 1. for x in Xs])
166- >>> yt = -0.2 * Xt
167- >>> kliep = KLIEP(sigmas=[0.1, 1, 10], random_state=0)
168- >>> kliep.fit_estimator(Xs.reshape(-1,1), ys)
169- >>> np.abs(kliep.predict(Xt.reshape(-1,1)).ravel() - yt).mean()
170- 0.09388...
171- >>> kliep.fit(Xs.reshape(-1,1), ys, Xt.reshape(-1,1))
172- Fitting weights...
173- Cross Validation process...
174- Parameter sigma = 0.1000 -- J-score = 0.059 (0.001)
175- Parameter sigma = 1.0000 -- J-score = 0.427 (0.003)
176- Parameter sigma = 10.0000 -- J-score = 0.704 (0.017)
177- Fitting estimator...
178- >>> np.abs(kliep.predict(Xt.reshape(-1,1)).ravel() - yt).mean()
179- 0.00302...
164+ >>> from sklearn.linear_model import RidgeClassifier
165+ >>> from adapt.utils import make_classification_da
166+ >>> from adapt.instance_based import ULSIF
167+ >>> Xs, ys, Xt, yt = make_classification_da()
168+ >>> model = ULSIF(RidgeClassifier(0.), Xt=Xt, kernel="rbf",
169+ ... lambdas=[0.1, 1., 10.], gamma=[0.1, 1., 10.], random_state=0)
170+ >>> model.fit(Xs, ys);
171+ >>> model.score(Xt, yt)
172+ 0.71
173+
180174 See also
181175 --------
182- KMM
176+ RULSIF
177+ KLIEP
178+
183179 References
184180 ----------
185- .. [1] `[1] <https://proceedings.neurips.cc/paper/2011/file/
186- d1f255a373a3cef72e03aa9d980c7eca-Paper.pdf>`_ \
187- M. Yamada, T. Suzuki, T. Kanamori, H. Hachiya and M. Sugiyama. \
188- "Relative Density-Ratio Estimation
189- for Robust Distribution Comparison". In NIPS 2011
181+ .. [1] `[1] <https://www.jmlr.org/papers/volume10/kanamori09a/kanamori09a.pdf>`_ \
182+ Takafumi Kanamori, Shohei Hido, Masashi Sugiyama \
183+ "A Least-squares Approach to Direct Importance Estimation". In JMLR 2009
190184 """
191185 def __init__ (self ,
192186 estimator = None ,
193187 Xt = None ,
194188 kernel = "rbf" ,
195- sigmas = None ,
196- lambdas = None ,
189+ lambdas = 1. ,
197190 max_centers = 100 ,
198191 copy = True ,
199192 verbose = 1 ,
200193 random_state = None ,
201194 ** params ):
202195
203- if sigmas is not None :
204- warnings .warn ("The `sigmas` argument is deprecated, "
205- "please use the `gamma` argument instead." ,
206- DeprecationWarning )
207-
208196 names = self ._get_param_names ()
209197 kwargs = {k : v for k , v in locals ().items () if k in names }
210198 kwargs .update (params )
@@ -239,10 +227,6 @@ def fit_weights(self, Xs, Xt, **kwargs):
239227 kernel_params = {k : v for k , v in self .__dict__ .items ()
240228 if k in KERNEL_PARAMS [self .kernel ]}
241229
242- # Handle deprecated sigmas (will be removed)
243- if (self .sigmas is not None ) and (not "gamma" in kernel_params ):
244- kernel_params ["gamma" ] = self .sigmas
245-
246230 kernel_params_dict = {k :(v if hasattr (v , "__iter__" ) else [v ]) for k , v in kernel_params .items ()}
247231 lambdas_params_dict = {"lamb" :(self .lambdas if hasattr (self .lambdas , "__iter__" ) else [self .lambdas ])}
248232 options = kernel_params_dict
@@ -343,7 +327,7 @@ def predict_weights(self, X=None):
343327 Return fitted source weights
344328
345329 If ``None``, the fitted source weights are returned.
346- Else, sample weights are computing using the fitted
330+ Else, sample weights are computed using the fitted
347331 ``thetas_`` and the chosen ``centers_``.
348332
349333 Parameters
@@ -393,4 +377,4 @@ def _fit(self, Xs, Xt, kernel_params,lamb):
393377 h = h .reshape (- 1 , 1 )
394378 theta = np .linalg .solve (H + lamb * np .eye (n_centers ), h )
395379 theta [theta < 0 ]= 0
396- return theta , centers
380+ return theta , centers
0 commit comments