Skip to content

Commit aebd3bf

Browse files
authored
Merge pull request #657 from yzhao062/development
V2.0.7
2 parents d81080e + 9cb0981 commit aebd3bf

File tree

19 files changed

+277
-41
lines changed

19 files changed

+277
-41
lines changed

.github/workflows/testing.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,14 @@ jobs:
4747
coverage run --source=pyod -m pytest
4848
4949
- name: Coverage report
50+
shell: bash
5051
env:
5152
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
5253
COVERALLS_SERVICE_NAME: github-actions
5354
run: |
54-
coveralls --service=github || echo "Coveralls failed to submit - retrying..." && coveralls --service=github
55+
coveralls --service=github --no-fail || true
56+
echo "Coveralls upload attempted with --no-fail; retrying once in 15s for best effort."
57+
sleep 15
58+
coveralls --service=github --no-fail || true
5559
5660

CHANGES.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,8 @@ v<2.0.4>, <04/29/2025> -- Mistakenly we skipped 2.0.4.
203203
v<2.0.5>, <04/29/2025> -- Add wheel for better installation.
204204
v<2.0.6>, <09/04/2025> -- Finally, add the auto model selector (#616).
205205
v<2.0.6>, <12/01/2025> -- Pre-caution for new sklearn break change(#649).
206-
v<2.0.7>, <01/04/2026> -- Fix compatability issue of new sklearn.
206+
v<2.0.7>, <01/04/2026> -- Fix compatability issue of new sklearn.
207+
v<2.0.7>, <02/27/2026> -- Improve NearestNeighbors consistency/performance in KNN, ABOD, SOD, and LUNAR (issue #654).
208+
v<2.0.7>, <02/27/2026> -- VAE default output activation changed to identity and added identity activation support/tests (issue #651).
209+
v<2.0.7>, <02/27/2026> -- Fix package_data/MANIFEST configuration to include auto model selector JSON resources in distributions (issue #642).
210+
v<2.0.7>, <02/27/2026> -- In BaseDeepLearningDetector, explicitly ignore y during unsupervised fit to avoid batch tuple/list device errors in DL detectors (issue #591).

MANIFEST.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ prune notebooks
33
prune pyod/test
44
prune README.md
55
include README.rst
6-
include requirements.txt
6+
include requirements.txt
7+
recursive-include pyod/utils/model_analysis_jsons *.json

pyod/models/abod.py

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
import numpy as np
1212
from numba import njit
13-
from sklearn.neighbors import KDTree
1413
from sklearn.neighbors import NearestNeighbors
1514
from sklearn.utils import check_array
1615
from sklearn.utils.validation import check_is_fitted
@@ -117,6 +116,31 @@ class ABOD(BaseDetector):
117116
- 'default': original ABOD with all training points, which could be
118117
slow
119118
119+
algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
120+
Algorithm used to compute nearest neighbors when ``method='fast'``.
121+
122+
- 'ball_tree' will use BallTree
123+
- 'kd_tree' will use KDTree
124+
- 'brute' will use a brute-force search
125+
- 'auto' will attempt to choose the most appropriate algorithm
126+
127+
leaf_size : int, optional (default=30)
128+
Leaf size passed to nearest-neighbor tree backends when applicable.
129+
This can affect construction/query speed and memory usage.
130+
131+
metric : str or callable, optional (default='minkowski')
132+
Distance metric used for nearest-neighbor computation in fast mode.
133+
134+
p : int, optional (default=2)
135+
Power parameter for the Minkowski metric when ``metric='minkowski'``.
136+
137+
metric_params : dict, optional (default=None)
138+
Additional keyword arguments for the metric function.
139+
140+
n_jobs : int, optional (default=1)
141+
Number of parallel jobs for nearest-neighbor search.
142+
If ``-1``, all available CPU cores are used.
143+
120144
Attributes
121145
----------
122146
decision_scores_ : numpy array of shape (n_samples,)
@@ -137,10 +161,19 @@ class ABOD(BaseDetector):
137161
``threshold_`` on ``decision_scores_``.
138162
"""
139163

140-
def __init__(self, contamination=0.1, n_neighbors=5, method='fast'):
164+
def __init__(self, contamination=0.1, n_neighbors=5, method='fast',
165+
algorithm='auto', leaf_size=30, metric='minkowski', p=2,
166+
metric_params=None, n_jobs=1, **kwargs):
141167
super(ABOD, self).__init__(contamination=contamination)
142168
self.method = method
143169
self.n_neighbors = n_neighbors
170+
self.algorithm = algorithm
171+
self.leaf_size = leaf_size
172+
self.metric = metric
173+
self.p = p
174+
self.metric_params = metric_params
175+
self.n_jobs = n_jobs
176+
self.kwargs = kwargs
144177

145178
def fit(self, X, y=None):
146179
"""Fit detector. y is ignored in unsupervised methods.
@@ -208,12 +241,19 @@ def _fit_fast(self):
208241
check_parameter(self.n_neighbors, 1, self.n_train_,
209242
include_left=True, include_right=True)
210243

211-
self.tree_ = KDTree(self.X_train_)
212-
213-
neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
214-
neigh.fit(self.X_train_)
215-
ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
216-
return_distance=False)
244+
self.neigh_ = NearestNeighbors(n_neighbors=self.n_neighbors,
245+
algorithm=self.algorithm,
246+
leaf_size=self.leaf_size,
247+
metric=self.metric,
248+
p=self.p,
249+
metric_params=self.metric_params,
250+
n_jobs=self.n_jobs,
251+
**self.kwargs)
252+
self.neigh_.fit(self.X_train_)
253+
self.tree_ = self.neigh_
254+
ind_arr = self.neigh_.kneighbors(self.X_train_,
255+
n_neighbors=self.n_neighbors,
256+
return_distance=False)
217257

218258
for i in range(self.n_train_):
219259
curr_pt = self.X_train_[i, :]
@@ -293,12 +333,13 @@ def _decision_function_fast(self, X):
293333
294334
"""
295335

296-
check_is_fitted(self, ['tree_'])
336+
check_is_fitted(self, ['neigh_'])
297337
# initialize the output score
298338
pred_score = np.zeros([X.shape[0], 1])
299339

300340
# get the indexes of the X's k nearest training points
301-
_, ind_arr = self.tree_.query(X, k=self.n_neighbors)
341+
_, ind_arr = self.neigh_.kneighbors(X, n_neighbors=self.n_neighbors,
342+
return_distance=True)
302343

303344
for i in range(X.shape[0]):
304345
curr_pt = X[i, :]

pyod/models/base_dl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,10 @@ def fit(self, X, y=None):
186186
if self.preprocessing:
187187
self.X_mean = np.mean(X, axis=0)
188188
self.X_std = np.std(X, axis=0)
189-
train_set = TorchDataset(X=X, y=y,
189+
train_set = TorchDataset(X=X, y=None,
190190
mean=self.X_mean, std=self.X_std)
191191
else:
192-
train_set = TorchDataset(X=X, y=y)
192+
train_set = TorchDataset(X=X, y=None)
193193

194194
# create data loader
195195
train_loader = torch.utils.data.DataLoader(

pyod/models/knn.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -232,26 +232,23 @@ def decision_function(self, X):
232232
-------
233233
anomaly_scores : numpy array of shape (n_samples,)
234234
The anomaly score of the input samples.
235+
236+
Notes
237+
-----
238+
This method performs batched neighbor queries through the fitted
239+
``NearestNeighbors`` estimator, so runtime behavior follows the
240+
configured neighbor-search parameters (e.g., ``algorithm``,
241+
``metric``, and ``n_jobs``).
235242
"""
236-
check_is_fitted(self, ['tree_', 'decision_scores_',
237-
'threshold_', 'labels_'])
243+
check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
238244

239245
X = check_array(X)
240246

241-
# initialize the output score
242-
pred_scores = np.zeros([X.shape[0], 1])
243-
244-
for i in range(X.shape[0]):
245-
x_i = X[i, :]
246-
x_i = np.asarray(x_i).reshape(1, x_i.shape[0])
247-
248-
# get the distance of the current point
249-
dist_arr, _ = self.tree_.query(x_i, k=self.n_neighbors)
250-
dist = self._get_dist_by_method(dist_arr)
251-
pred_score_i = dist[-1]
252-
253-
# record the current item
254-
pred_scores[i, :] = pred_score_i
247+
# Use the fitted NearestNeighbors object for batch querying so
248+
# query-time behavior is consistent with fit-time configuration.
249+
dist_arr, _ = self.neigh_.kneighbors(
250+
X, n_neighbors=self.n_neighbors, return_distance=True)
251+
pred_scores = self._get_dist_by_method(dist_arr)
255252

256253
return pred_scores.ravel()
257254

pyod/models/lunar.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ class LUNAR(BaseDetector):
124124
125125
n_neighbors: int, optional (default = 5)
126126
Number of neighbors to use by default for k neighbors queries.
127+
In the implementation, the constructor argument name is
128+
``n_neighbours`` for backward compatibility.
127129
128130
negative_sampling: str in ['UNIFORM', 'SUBSPACE', MIXED'], optional (default = 'MIXED)
129131
Type of negative samples to use between:
@@ -158,6 +160,27 @@ class LUNAR(BaseDetector):
158160
verbose: int in {0,1}, optional (default = 0):
159161
To view or hide training progress
160162
163+
algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
164+
Algorithm used to compute nearest neighbors.
165+
166+
leaf_size : int, optional (default=30)
167+
Leaf size passed to nearest-neighbor tree backends when applicable.
168+
This can affect construction/query speed and memory usage.
169+
170+
metric : str or callable, optional (default='minkowski')
171+
Distance metric used for nearest-neighbor computation.
172+
173+
p : int, optional (default=2)
174+
Power parameter for the Minkowski metric when
175+
``metric='minkowski'``.
176+
177+
metric_params : dict, optional (default=None)
178+
Additional keyword arguments for the metric function.
179+
180+
n_jobs : int, optional (default=1)
181+
Number of parallel jobs for nearest-neighbor search.
182+
If ``-1``, all available CPU cores are used.
183+
161184
Attributes
162185
----------
163186
"""
@@ -166,7 +189,9 @@ def __init__(self, model_type="WEIGHT", n_neighbours=5,
166189
negative_sampling="MIXED",
167190
val_size=0.1, scaler=MinMaxScaler(), epsilon=0.1,
168191
proportion=1.0,
169-
n_epochs=200, lr=0.001, wd=0.1, verbose=0, contamination=0.1):
192+
n_epochs=200, lr=0.001, wd=0.1, verbose=0, contamination=0.1,
193+
algorithm='auto', leaf_size=30, metric='minkowski', p=2,
194+
metric_params=None, n_jobs=1, **kwargs):
170195
super(LUNAR, self).__init__(contamination=contamination)
171196

172197
self.model_type = model_type
@@ -180,6 +205,13 @@ def __init__(self, model_type="WEIGHT", n_neighbours=5,
180205
self.wd = wd
181206
self.val_size = val_size
182207
self.verbose = verbose
208+
self.algorithm = algorithm
209+
self.leaf_size = leaf_size
210+
self.metric = metric
211+
self.p = p
212+
self.metric_params = metric_params
213+
self.n_jobs = n_jobs
214+
self.kwargs = kwargs
183215
self.device = torch.device(
184216
'cuda' if torch.cuda.is_available() else 'cpu')
185217

@@ -239,7 +271,14 @@ def fit(self, X, y=None):
239271
val_x = np.vstack((val_x, neg_val_x))
240272
val_y = np.hstack((val_y, neg_val_y))
241273

242-
self.neigh = NearestNeighbors(n_neighbors=self.n_neighbours + 1)
274+
self.neigh = NearestNeighbors(n_neighbors=self.n_neighbours + 1,
275+
algorithm=self.algorithm,
276+
leaf_size=self.leaf_size,
277+
metric=self.metric,
278+
p=self.p,
279+
metric_params=self.metric_params,
280+
n_jobs=self.n_jobs,
281+
**self.kwargs)
243282
self.neigh.fit(train_x)
244283

245284
# nearest neighbours of training set

pyod/models/sod.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,27 @@ class SOD(BaseDetector):
6262
specifies the lower limit for selecting subspace.
6363
0.8 is set as default as suggested in the original paper.
6464
65+
algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
66+
Algorithm used to compute nearest neighbors.
67+
68+
leaf_size : int, optional (default=30)
69+
Leaf size passed to nearest-neighbor tree backends when applicable.
70+
This can affect construction/query speed and memory usage.
71+
72+
metric : str or callable, optional (default='minkowski')
73+
Distance metric used for nearest-neighbor computation.
74+
75+
p : int, optional (default=2)
76+
Power parameter for the Minkowski metric when
77+
``metric='minkowski'``.
78+
79+
metric_params : dict, optional (default=None)
80+
Additional keyword arguments for the metric function.
81+
82+
n_jobs : int, optional (default=1)
83+
Number of parallel jobs for nearest-neighbor search.
84+
If ``-1``, all available CPU cores are used.
85+
6586
contamination : float in (0., 0.5), optional (default=0.1)
6687
The amount of contamination of the data set, i.e.
6788
the proportion of outliers in the data set. Used when fitting to
@@ -88,7 +109,9 @@ class SOD(BaseDetector):
88109
"""
89110

90111
def __init__(self, contamination=0.1, n_neighbors=20, ref_set=10,
91-
alpha=0.8):
112+
alpha=0.8, algorithm='auto', leaf_size=30,
113+
metric='minkowski', p=2, metric_params=None, n_jobs=1,
114+
**kwargs):
92115
super(SOD, self).__init__(contamination=contamination)
93116
if isinstance(n_neighbors, int):
94117
check_parameter(n_neighbors, low=1, param_name='n_neighbors')
@@ -110,6 +133,13 @@ def __init__(self, contamination=0.1, n_neighbors=20, ref_set=10,
110133
self.n_neighbors = n_neighbors
111134
self.ref_set = ref_set
112135
self.alpha = alpha
136+
self.algorithm = algorithm
137+
self.leaf_size = leaf_size
138+
self.metric = metric
139+
self.p = p
140+
self.metric_params = metric_params
141+
self.n_jobs = n_jobs
142+
self.kwargs = kwargs
113143

114144
def fit(self, X, y=None):
115145
"""Fit detector. y is ignored in unsupervised methods.
@@ -165,7 +195,14 @@ def _snn(self, X):
165195
snn_indices : numpy array of shape (n_shared_nearest_neighbors,)
166196
The indices of top k shared nearest neighbors for each observation.
167197
"""
168-
knn = NearestNeighbors(n_neighbors=self.n_neighbors)
198+
knn = NearestNeighbors(n_neighbors=self.n_neighbors,
199+
algorithm=self.algorithm,
200+
leaf_size=self.leaf_size,
201+
metric=self.metric,
202+
p=self.p,
203+
metric_params=self.metric_params,
204+
n_jobs=self.n_jobs,
205+
**self.kwargs)
169206
knn.fit(X)
170207
# Get the knn index
171208
ind = knn.kneighbors(return_distance=False)

pyod/models/vae.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,10 @@ class VAE(BaseDeepLearningDetector):
163163
hidden_activation_name : str, optional (default='relu')
164164
The activation function used in hidden layers.
165165
166-
output_activation_name : str, optional (default='sigmoid')
166+
output_activation_name : str, optional (default='identity')
167167
The activation function used in output layer.
168+
``identity`` is the default to avoid constraining reconstruction
169+
outputs to a bounded range when preprocessing is enabled.
168170
169171
batch_norm : boolean, optional (default=False)
170172
Whether to apply Batch Normalization,
@@ -213,7 +215,7 @@ def __init__(self, contamination=0.1, preprocessing=True,
213215
decoder_neuron_list=[32, 64, 128],
214216
latent_dim=2,
215217
hidden_activation_name='relu',
216-
output_activation_name='sigmoid',
218+
output_activation_name='identity',
217219
batch_norm=False, dropout_rate=0.2):
218220
super(VAE, self).__init__(contamination=contamination,
219221
preprocessing=preprocessing,

pyod/test/test_abod.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,20 @@ def test_fit_predict_score(self):
135135
def test_model_clone(self):
136136
clone_clf = clone(self.clf)
137137

138+
def test_fast_mode_tree_and_neighbor_model_consistent(self):
139+
assert (hasattr(self.clf, 'neigh_') and self.clf.neigh_ is not None)
140+
assert (self.clf.tree_ is self.clf.neigh_)
141+
142+
def test_fast_mode_neighbor_params_propagation(self):
143+
for algorithm in ['auto', 'kd_tree', 'brute']:
144+
clf = ABOD(contamination=self.contamination, n_neighbors=5,
145+
method='fast', algorithm=algorithm, n_jobs=-1)
146+
clf.fit(self.X_train)
147+
assert_equal(clf.neigh_.algorithm, algorithm)
148+
assert_equal(clf.neigh_.n_jobs, -1)
149+
pred_scores = clf.decision_function(self.X_test)
150+
assert_equal(pred_scores.shape[0], self.X_test.shape[0])
151+
138152
def tearDown(self):
139153
pass
140154

0 commit comments

Comments
 (0)