Skip to content

Commit 07c0953

Browse files
Refactored methods and added its proper documentation #195
1 parent 22fe4ef commit 07c0953

File tree

8 files changed

+541
-238
lines changed

8 files changed

+541
-238
lines changed

instance_selection/_CNN.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,29 @@
33
# @Filename: CNN.py
44
# @Author: Daniel Puente Ramírez
55
# @Time: 19/11/21 07:13
6-
# @Version: 4.0
6+
# @Version: 5.0
77

88
import numpy as np
99
import pandas as pd
1010

1111
from .utils import transform, delete_multiple_element
1212

1313

14-
def check_store(store, sample, store_classes):
15-
euc = []
16-
for s in store:
17-
euc.append(np.linalg.norm(s - sample))
18-
euc = np.array(euc)
19-
euc_nn = np.amin(euc)
20-
index_nn = np.ravel(np.where(euc == euc_nn))
21-
return store_classes[index_nn[0]]
14+
class CNN:
15+
"""
16+
Hart, P. (1968). The condensed nearest neighbor rule (corresp.). IEEE
17+
transactions on information theory, 14(3), 515-516.
2218
19+
Parameters
20+
----------
2321
24-
class CNN:
22+
"""
2523

2624
def __init__(self):
2725
self.x_attr = None
2826

2927
def filter(self, samples, y):
3028
"""
31-
Hart, P. (1968). The condensed nearest neighbor rule (corresp.). IEEE
32-
transactions on information theory, 14(3), 515-516.
33-
3429
Implementation of The Condensed Nearest Neighbor Rule
3530
3631
The first sample of each class is placed in *store*. Thus we only have
@@ -48,6 +43,7 @@ def filter(self, samples, y):
4843
Extracted from:
4944
The condensed nearest neighbor rule. IEEE Transactions on Information
5045
Theory ( Volume: 14, Issue: 3, May 1968)
46+
5147
:param samples: DataFrame.
5248
:param y: DataFrame.
5349
:return: the input dataset with the remaining samples.
@@ -61,7 +57,7 @@ def filter(self, samples, y):
6157
handbag = []
6258

6359
for sample_class, sample in zip(samples.target, samples.data):
64-
nn_class = check_store(store, sample, store_classes)
60+
nn_class = self._check_store(store, sample, store_classes)
6561

6662
if nn_class == sample_class:
6763
handbag.append((sample_class, sample))
@@ -75,7 +71,7 @@ def filter(self, samples, y):
7571
indexes = []
7672
for index, s2 in enumerate(handbag):
7773
sample_class, sample = s2
78-
nn_class = check_store(store, sample, store_classes)
74+
nn_class = self._check_store(store, sample, store_classes)
7975
if nn_class != sample_class:
8076
store.append(sample)
8177
store_classes.append(sample_class)
@@ -88,3 +84,24 @@ def filter(self, samples, y):
8884
int))
8985

9086
return samples, y
87+
88+
@staticmethod
89+
def _check_store(store, sample, store_classes):
90+
"""
91+
> The function takes in a sample, a store of samples, and the classes of
92+
the store of samples. It then calculates the Euclidean distance
93+
between the sample and each sample in the store. It then returns the
94+
class of the sample in the store that is closest to the sample
95+
96+
:param store: the list of samples that have been stored
97+
:param sample: the sample we want to classify
98+
:param store_classes: the classes of the samples in the store
99+
:return: The class of the nearest neighbor.
100+
"""
101+
euc = []
102+
for s in store:
103+
euc.append(np.linalg.norm(s - sample))
104+
euc = np.array(euc)
105+
euc_nn = np.amin(euc)
106+
index_nn = np.ravel(np.where(euc == euc_nn))
107+
return store_classes[index_nn[0]]

0 commit comments

Comments
 (0)