Skip to content

Commit 4645268

Browse files
author
Jaime Céspedes Sisniega
authored
Merge pull request #141 from IFCA/fix-adwin
Fix ADWIN variance error
2 parents 481d24f + ec2c01b commit 4645268

File tree

2 files changed

+55
-24
lines changed

2 files changed

+55
-24
lines changed

frouros/detectors/concept_drift/streaming/window_based/adwin.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ def compress(self, num_items_deleted: int) -> None:
145145

146146
idx_start = self.array_size - num_items_deleted
147147
# fmt: off
148-
self.total[idx_start:self.array_size] = 0
149-
self.variance[idx_start:self.array_size] = 0
148+
self.total[idx_start:self.array_size] = 0.0
149+
self.variance[idx_start:self.array_size] = 0.0
150150
# fmt: on
151151

152152
self.idx -= num_items_deleted
@@ -168,6 +168,7 @@ def __init__(
168168
clock: int = 32,
169169
delta: float = 0.002,
170170
m: int = 5,
171+
min_window_size: int = 5,
171172
min_num_instances: int = 10,
172173
) -> None:
173174
"""Init method.
@@ -179,6 +180,9 @@ def __init__(
179180
:param m: controls the amount of memory used and
180181
the closeness of the cutpoints checked
181182
:type m: int
183+
:param min_window_size: minimum numbers of instances
184+
per window to start looking for changes
185+
:type min_window_size: int
182186
:param min_num_instances: minimum numbers of instances
183187
to start looking for changes
184188
:type min_num_instances: int
@@ -187,6 +191,7 @@ def __init__(
187191
self.clock = clock
188192
self.delta = delta
189193
self.m = m
194+
self.min_window_size = min_window_size
190195

191196
@property
192197
def clock(self) -> int:
@@ -252,6 +257,27 @@ def m(self, value: int) -> None:
252257
raise ValueError("m value must be greater than 0.")
253258
self._m = value
254259

260+
@property
261+
def min_window_size(self) -> int:
262+
"""Minimum window size value property.
263+
264+
:return: minimum window size value per each window
265+
:rtype: int
266+
"""
267+
return self._min_window_size
268+
269+
@min_window_size.setter
270+
def min_window_size(self, value: int) -> None:
271+
"""Minimum window size value setter.
272+
273+
:param value: value to be set
274+
:type value: float
275+
:raises ValueError: Value error exception
276+
"""
277+
if value < 1:
278+
raise ValueError("min_window_size value must be greater than 0.")
279+
self._min_window_size = value
280+
255281

256282
class ADWIN(WindowBased):
257283
"""ADWIN (ADaptive WINdowing) [bifet2007learning]_ detector.
@@ -358,6 +384,15 @@ def variance(self, value: float) -> None:
358384
# raise ValueError("variance value must be greater or equal than 0.0.")
359385
self._additional_vars["variance"] = value
360386

387+
@property
388+
def variance_window(self) -> float:
389+
"""Variance in window value property.
390+
391+
:return: variance in window value
392+
:rtype: float
393+
"""
394+
return self.variance / self.width
395+
361396
@property
362397
def width(self) -> int:
363398
"""Width value property.
@@ -422,15 +457,15 @@ def num_max_buckets(self, value: int) -> None:
422457
self._additional_vars["num_max_buckets"] = value
423458

424459
def _insert_bucket(self, value: float) -> None:
425-
self._insert_bucket_data(variance=0, value=value, bucket=self.buckets[0])
460+
self._insert_bucket_data(variance=0.0, value=value, bucket=self.buckets[0])
426461
self.width += 1
427462
incremental_variance = (
428463
(self.width - 1)
429464
* (value - self.total / (self.width - 1))
430465
* (value - self.total / (self.width - 1))
431466
/ self.width
432467
if self.width > 1
433-
else 0
468+
else 0.0
434469
)
435470
self.variance += incremental_variance
436471
self.total += value
@@ -454,9 +489,10 @@ def _delete_bucket(self) -> int:
454489
self.width -= bucket_size
455490
self.total -= bucket.total[0]
456491
bucket_mean = bucket.total[0] / bucket_size
492+
window_mean = self.total / self.width
457493
incremental_variance = bucket.variance[0] + bucket_size * self.width * (
458-
bucket_mean - self.total / self.width
459-
) * (bucket_mean - self.total / self.width) / (bucket_size + self.width)
494+
bucket_mean - window_mean
495+
) * (bucket_mean - window_mean) / (bucket_size + self.width)
460496
self.variance -= incremental_variance
461497

462498
bucket.remove()
@@ -504,18 +540,16 @@ def _compress_buckets(self) -> None:
504540
idx += 1
505541

506542
def _calculate_threshold(self, w0_instances: int, w1_instances: int) -> float:
507-
# NOTE: Review this formula
508-
delta_prime = self.config.delta / np.log( # type: ignore
509-
w0_instances + w1_instances
510-
)
511-
# Has highlighted in river library, the use of the inverse (reciprocal)
543+
delta_prime = np.log(2 * np.log(self.width) / self.config.delta) # type: ignore
544+
# Has highlighted in river library, the use of the reciprocal
512545
# of m allows to avoid extra divisions
513-
m_inv = 1 / (w0_instances - self._min_instances) + 1 / (
514-
w1_instances - self._min_instances
546+
min_window_size = self.config.min_window_size + 1 # type: ignore
547+
m_reciprocal = 1 / (w0_instances - min_window_size) + 1 / (
548+
w1_instances - min_window_size
515549
)
516550
epsilon = (
517-
np.sqrt(2 * m_inv * self.variance * delta_prime)
518-
+ 2 / 3 * delta_prime * m_inv
551+
np.sqrt(2 * m_reciprocal * self.variance_window * delta_prime)
552+
+ 2 / 3 * delta_prime * m_reciprocal
519553
)
520554
return epsilon
521555

@@ -525,7 +559,6 @@ def _update(self, value: Union[int, float], **kwargs) -> None:
525559
self.num_instances += 1
526560
self._insert_bucket(value=value)
527561

528-
# self.drift = False
529562
if (
530563
self.num_instances % self.config.clock == 0 # type: ignore
531564
and self.width > self.config.min_num_instances # type: ignore
@@ -546,6 +579,7 @@ def _update(self, value: Union[int, float], **kwargs) -> None:
546579
bucket = self.buckets[i]
547580
for j in range(bucket.idx - 1):
548581
bucket_size = self._bucket_size(index=i)
582+
549583
w0_instances += bucket_size
550584
w1_instances -= bucket_size
551585
w0_total += bucket.total[j]
@@ -556,8 +590,11 @@ def _update(self, value: Union[int, float], **kwargs) -> None:
556590
break
557591

558592
if (
559-
w1_instances > self._min_instances
560-
and w0_instances > self._min_instances # type: ignore
593+
w1_instances > self.config.min_window_size # type: ignore
594+
and (
595+
w0_instances
596+
> self.config.min_window_size # type: ignore
597+
)
561598
):
562599
w0_mean = w0_total / w0_instances
563600
w1_mean = w1_total / w1_instances
@@ -571,8 +608,6 @@ def _update(self, value: Union[int, float], **kwargs) -> None:
571608
if self.width > 0:
572609
w0_instances -= self._delete_bucket()
573610
flag_exit = True
574-
# FIXME: Reset here? # pylint: disable=fixme
575-
# self._reset()
576611
break
577612

578613
def reset(self) -> None:

frouros/detectors/concept_drift/streaming/window_based/base.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ class WindowBased(ConceptDriftStreamingBase):
1818

1919
config_type = WindowBaseConfig
2020

21-
@abc.abstractmethod
22-
def reset(self) -> None:
23-
"""Reset method."""
24-
2521
@abc.abstractmethod
2622
def _update(self, value: Union[int, float], **kwargs) -> None:
2723
pass

0 commit comments

Comments
 (0)