1111
1212def Graph (data ,
1313 n_pca = None ,
14+ rank_threshold = None ,
1415 sample_idx = None ,
1516 adaptive_k = None ,
1617 precomputed = None ,
1718 knn = 5 ,
18- decay = 10 ,
19+ decay = 40 ,
1920 bandwidth = None ,
2021 bandwidth_scale = 1.0 ,
2122 anisotropy = 0 ,
@@ -53,19 +54,29 @@ def Graph(data,
5354 ----------
5455 data : array-like, shape=[n_samples,n_features]
5556 accepted types: `numpy.ndarray`, `scipy.sparse.spmatrix`.
56- TODO: accept pandas dataframes
57+ TODO: accept pandas dataframes'
5758
58- n_pca : `int` or `None`, optional (default: `None`)
59+ n_pca : { `int`, `None`, `bool`, 'auto'} , optional (default: `None`)
5960 number of PC dimensions to retain for graph building.
60- If `None`, uses the original data.
61+ If n_pca in `[None, False, 0]`, uses the original data.
62+ If 'auto' or `True` then estimate using a singular value threshold
6163 Note: if data is sparse, uses SVD instead of PCA
6264 TODO: should we subtract and store the mean?
6365
66+ rank_threshold : `float`, 'auto', optional (default: 'auto')
67+ threshold to use when estimating rank for
68+ `n_pca in [True, 'auto']`.
69+ If 'auto', this threshold is
70+ s_max * eps * max(n_samples, n_features)
71+ where s_max is the maximum singular value of the data matrix
72+ and eps is numerical precision. [press2007]_.
73+
6474 knn : `int`, optional (default: 5)
6575 Number of nearest neighbors (including self) to use to build the graph
6676
67- decay : `int` or `None`, optional (default: 10)
68- Rate of alpha decay to use. If `None`, alpha decay is not used.
77+ decay : `int` or `None`, optional (default: 40)
78+ Rate of alpha decay to use. If `None`, alpha decay is not used and a vanilla
79+ k-Nearest Neighbors graph is returned.
6980
7081 bandwidth : `float`, list-like,`callable`, or `None`, optional (default: `None`)
7182 Fixed bandwidth to use. If given, overrides `knn`. Can be a single
@@ -91,14 +102,14 @@ def Graph(data,
91102 on time and memory constraints.
92103
93104 kernel_symm : string, optional (default: '+')
94- Defines method of MNN symmetrization.
105+ Defines method of kernel symmetrization.
95106 '+' : additive
96107 '*' : multiplicative
97- 'theta ' : min-max
108+ 'mnn ' : min-max MNN symmetrization
98109 'none' : no symmetrization
99110
100111 theta: float (default: None)
101- Min-max symmetrization constant or matrix. Only used if kernel_symm='theta '.
112+ Min-max symmetrization constant or matrix. Only used if kernel_symm='mnn '.
102113 K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)`
103114
104115 precomputed : {'distance', 'affinity', 'adjacency', `None`}, optional (default: `None`)
@@ -155,6 +166,12 @@ def Graph(data,
155166 Raises
156167 ------
157168 ValueError : if selected parameters are incompatible.
169+
170+ References
171+ ----------
172+ .. [press2007] W. Press, S. Teukolsky, W. Vetterling and B. Flannery,
173+ “Numerical Recipes (3rd edition)”,
174+ Cambridge University Press, 2007, page 795.
158175 """
159176 tasklogger .set_level (verbose )
160177 if sample_idx is not None and len (np .unique (sample_idx )) == 1 :
0 commit comments