Skip to content

Commit 2b81c5d

Browse files
authored
Merge pull request #26 from selimfirat/develop
Release v0.3.4
2 parents 4ef6ee5 + 797324b commit 2b81c5d

File tree

8 files changed

+67
-13
lines changed

8 files changed

+67
-13
lines changed

CHANGES.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ v<0.2.0> <06/10/2023> -- Upgraded some libraries like numpy,pandas and small ref
33
v<0.3.0>, <05/23/2025> -- Updated dependencies to latest versions, improved documentation, added quick start example in README. Added Inqmad model. Special thanks to @TechyNilesh @Joaggi @onixlas.
44
v<0.3.1>, <06/04/2025> -- Enhanced README with comprehensive community engagement section, educational content, third-party integrations, and developer community highlights. Fixed sublist formatting in reStructuredText. Improved error handling for Inqmad model imports.
55
v<0.3.2>, <06/14/2025> -- Updated PyOD version and added Python 3.13 support. Fixed Flaticon link in documentation. Enhanced error handling for optional JAX dependencies. Updated rrcf version to 0.4.4 and improved quantization in RelativeEntropy model. Improved test coverage and configuration. Added deprecation warning tests and enhanced model methods array handling. Special thanks to @onixlas for multiple contributions.
6-
v<0.3.3>, <06/19/2025> -- Fixed critical bugs in ReferenceWindowModel: resolved data mutation issues by ensuring reference window data is properly copied, fixed window_length parameter naming to window_size, and improved reference window label management. Enhanced test coverage and fixed minor typo in bibtex reference. Special thanks to @onixlas for bug reports and fixes.
6+
v<0.3.3>, <06/19/2025> -- Fixed critical bugs in ReferenceWindowModel: resolved data mutation issues by ensuring reference window data is properly copied, fixed window_length parameter naming to window_size, and improved reference window label management. Enhanced test coverage and fixed minor typo in bibtex reference. Special thanks to @onixlas for bug reports and fixes.
7+
v<0.3.4>, <06/24/2025> -- Fixed critical reference window reset bug in ReferenceWindowModel (#25), preventing incorrect window reinitialization during sliding window operations. Added validation for single class presence in AUROCMetric evaluation to prevent evaluation errors. Fixed hash seed type casting issue in StreamhashProjector's _hash_string method. Enhanced CI infrastructure with Python 3.13 support and updated dependency management. Special thanks to @onixlas for discovering and reporting the reference window bug.

README.rst

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -191,13 +191,10 @@ Quick Links
191191
Contributors
192192
============
193193

194-
.. raw:: html
195-
196-
<div align="center">
197-
<a href="https://github.com/selimfirat/pysad/graphs/contributors">
198-
<img src="https://contrib.rocks/image?repo=selimfirat/pysad" alt="Contributors" />
199-
</a>
200-
</div>
194+
.. image:: https://contrib.rocks/image?repo=selimfirat/pysad
195+
:target: https://github.com/selimfirat/pysad/graphs/contributors
196+
:alt: Contributors
197+
:align: center
201198

202199
We thank all our contributors for their valuable input and efforts to make PySAD better!
203200

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "pysad"
7-
version = "0.3.3"
7+
version = "0.3.4"
88
dependencies = [
99
"numpy",
1010
"scikit-learn",
@@ -20,7 +20,7 @@ maintainers = [
2020
{name = "Selim Firat Yilmaz", email = "yilmazselimfirat@gmail.com"}
2121
]
2222
description = "PySAD is an open-source python framework for anomaly detection on streaming multivariate data."
23-
readme = "README.rst"
23+
readme = {file = "README.rst", content-type = "text/x-rst"}
2424
license = {file = "LICENSE"}
2525
keywords = ["outlier detection", "anomaly detection", "machine learning"]
2626
classifiers = [

pysad/evaluation/metrics.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ class AUROCMetric(BaseSKLearnMetric):
6363
"""
6464

6565
def _evaluate(self, y_true, y_pred):
66+
# Check if only one class is present
67+
if len(set(y_true)) <= 1:
68+
raise ValueError("Only one class present in y_true. ROC AUC score is not defined in that case.")
6669
return roc_auc_score(y_true, y_pred)
6770

6871

pysad/models/integrations/reference_window_model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ def fit_partial(self, X, y=None):
6464
self.cur_window_y.append(y)
6565

6666
if not self.initial_ref_window and len(
67-
self.cur_window_X) < self.window_size:
67+
self.cur_window_X) < self.window_size and (
68+
self.reference_window_X is None or len(self.reference_window_X) < self.window_size):
6869
self.reference_window_X = self.cur_window_X.copy()
6970
self.reference_window_y = self.cur_window_y.copy() if y is not None else None
7071
self._fit_model()

pysad/transform/projection/streamhash_projector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def transform_partial(self, X):
5353

5454
def _hash_string(self, k, s):
5555
import mmh3
56-
hash_value = int(mmh3.hash(s, signed=False, seed=k)) / (2.0 ** 32 - 1)
56+
hash_value = int(mmh3.hash(s, signed=False, seed=int(k))) / (2.0 ** 32 - 1)
5757
s = self.density
5858
if hash_value <= s / 2.0:
5959
return -1 * self.constant

pysad/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@
2121
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
2222
#
2323

24-
__version__ = '0.3.3'
24+
__version__ = '0.3.4'

tests/models/test_pyod_integrations.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,3 +271,55 @@ def test_reference_window_model_issue_23_fix():
271271
expected = [1.0, 2.0]
272272
assert ref_values == expected, \
273273
f"Expected {expected}, but got {ref_values}. Duplication detected!"
274+
275+
276+
def test_reference_window_model_issue_25_fix():
277+
"""Test for the fix of issue #25: Reference window reset bug.
278+
279+
The bug: When cur_window_X length is less than window_size after sliding,
280+
the reference_window_X is incorrectly reset to just the current window
281+
instead of maintaining the properly sized reference window.
282+
283+
Test scenario: window_size=4, sliding_size=2, streaming data [1,2,3,4,5,6,7,8,...]
284+
The critical test is after sliding occurs and cur_window_X becomes small again,
285+
the reference_window_X should NOT be reset to just the current window.
286+
"""
287+
from pysad.models.integrations.reference_window_model import ReferenceWindowModel
288+
from pyod.models.iforest import IForest
289+
import numpy as np
290+
291+
window_size = 4
292+
sliding_size = 2
293+
294+
model = ReferenceWindowModel(
295+
model_cls=IForest,
296+
window_size=window_size,
297+
sliding_size=sliding_size,
298+
initial_window_X=None
299+
)
300+
301+
# Build up the initial window
302+
for i in range(1, 9):
303+
model.fit_partial(np.array([float(i)]))
304+
305+
# At this point, after several sliding operations, the reference window
306+
# should be properly sized (window_size=4) and should NOT be reset to
307+
# just the current window when cur_window_X is small
308+
309+
# Verify reference window is properly sized
310+
assert len(model.reference_window_X) == window_size, \
311+
f"Reference window should be size {window_size}, got {len(model.reference_window_X)}"
312+
313+
# Verify reference window is not just the current window
314+
ref_values = [x[0] for x in model.reference_window_X]
315+
cur_values = [x[0] for x in model.cur_window_X] if model.cur_window_X else []
316+
317+
# The reference window should not be identical to the current window
318+
# (this would indicate the bug where reference_window_X gets reset)
319+
assert ref_values != cur_values, \
320+
f"Reference window should not be reset to current window. Got ref={ref_values}, cur={cur_values}"
321+
322+
# Reference window should contain the most recent window_size elements
323+
# from the sliding window, not just the current partial window
324+
assert len(ref_values) == window_size, \
325+
f"Reference window should maintain size {window_size}, got {len(ref_values)}"

0 commit comments

Comments
 (0)