11"""Synthetic datasets module."""
22
3- from typing import Tuple , Iterator
3+ from typing import Tuple , Iterator , Optional
44
55import numpy as np # type: ignore
66
1111class SEA (BaseDatasetGenerator ):
1212 """SEA generator [street2001streaming]_.
1313
14+ :param seed: seed value, defaults to None
15+ :type seed: Optional[int]
16+
1417 :References:
1518
1619 .. [street2001streaming] Street, W. Nick, and YongSeog Kim.
1720 "A streaming ensemble algorithm (SEA) for large-scale classification."
1821 Proceedings of the seventh ACM SIGKDD international conference on Knowledge
1922 discovery and data mining. 2001.
23+
24+ :Example:
25+
26+ >>> from frouros.datasets.synthetic import SEA
27+ >>> sea = SEA(seed=31)
28+ >>> dataset = sea.generate_dataset(block=1, noise=0.1, num_samples=5)
29+ >>> for X, y in dataset:
30+ ... print(X, y)
31+ [2.86053822 9.58105567 7.70312932] 0
32+ [2.08165462 1.36917049 9.08373802] 0
33+ [8.36483632 1.12172604 8.3489916 ] 0
34+ [2.44680795 1.36231348 7.22094455] 1
35+ [1.28477715 2.20364007 5.19211202] 1
2036 """
2137
22- block_map = {1 : 8.0 , 2 : 9.0 , 3 : 7.0 , 4 : 9.5 }
38+ def __init__ ( # noqa: D107
39+ self ,
40+ seed : Optional [int ] = None ,
41+ ) -> None :
42+ super ().__init__ (
43+ seed = seed ,
44+ )
45+ self ._block_map = {1 : 8.0 , 2 : 9.0 , 3 : 7.0 , 4 : 9.5 }
2346
2447 @staticmethod
2548 def _generate_sample (threshold : float , noise : float ) -> Tuple [np .ndarray , int ]:
@@ -35,17 +58,17 @@ def generate_dataset(
3558 ) -> Iterator [Tuple [np .ndarray , int ]]:
3659 """Generate dataset.
3760
38- :param block: block to generate samples from
61+ :param block: block to generate samples from, must be 1, 2, 3 or 4
3962 :type block: int
40- :param noise: ratio of samples with a noisy class
63+ :param noise: ratio of samples with a noisy class, defaults to 0.1
4164 :type noise: float
42- :param num_samples: number of samples to generate
65+ :param num_samples: number of samples to generate, defaults to 12500
4366 :type num_samples: int
4467 :return: generator with the samples
4568 :rtype: Iterator[Tuple[np.ndarray, int]]
4669 """
4770 try :
48- threshold = self .block_map [block ]
71+ threshold = self ._block_map [block ]
4972 except KeyError as e :
5073 raise InvalidBlockError ("block must be 1, 2, 3 or 4." ) from e
5174 if num_samples < 1 :
0 commit comments