Skip to content

Commit 25b5c68

Browse files
Updated model classifier selection + added references to literature #151
1 parent 90616ba commit 25b5c68

File tree

5 files changed

+87
-30
lines changed

5 files changed

+87
-30
lines changed

semisupervised/CoTraining.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# @Filename: CoTraining.py
44
# @Author: Daniel Puente Ramírez
55
# @Time: 22/12/21 09:27
6-
# @Version: 3.0
6+
# @Version: 4.0
77

88
from math import ceil, floor
99

@@ -14,16 +14,35 @@
1414

1515

1616
class CoTraining:
17-
18-
def __init__(self, p=1, n=3, k=30, u=75, random_state=None):
17+
"""Blum, A., & Mitchell, T. (1998, July). Combining labeled and unlabeled
18+
data with co-training. In Proceedings of the eleventh annual conference
19+
on Computational learning theory (pp. 92-100).
20+
"""
21+
22+
def __init__(self, p=1, n=3, k=30, u=75, random_state=None,
23+
c1=None, c1_params=None,
24+
c2=None, c2_params=None,
25+
):
1926
self.p = p
2027
self.n = n
2128
self.k = k
2229
self.u = u
2330
self.random_state = random_state
2431
self.size_x1 = 0
25-
self.h1 = GaussianNB()
26-
self.h2 = GaussianNB()
32+
33+
classifiers = [c1, c2]
34+
classifiers_params = [c1_params, c2_params]
35+
configs = []
36+
for c, cp in zip(classifiers, classifiers_params):
37+
if c is not None:
38+
if cp is not None:
39+
configs.append(c(**cp))
40+
else:
41+
configs.append(c())
42+
else:
43+
configs.append(GaussianNB())
44+
45+
self.h1, self.h2 = configs
2746

2847
def fit(self, samples, y):
2948
labeled, u, y = split(samples, y)

semisupervised/DemocraticCoLearning.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# @Filename: DemocraticCoLearning.py
44
# @Author: Daniel Puente Ramírez
55
# @Time: 29/12/21 15:39
6-
# @Version: 3.0
6+
# @Version: 4.0
77

88
import copy
99
from math import sqrt
@@ -38,7 +38,10 @@ class DemocraticCoLearning:
3838
Intelligence (pp. 594-602). IEEE.
3939
"""
4040

41-
def __init__(self, n_neighbors=3, random_state=None):
41+
def __init__(self, random_state=None,
42+
c1=None, c1_params=None,
43+
c2=None, c2_params=None,
44+
c3=None, c3_params=None):
4245
self.const = 1.96 # 95%
4346
self.random_state = random_state if random_state is not None else \
4447
np.random.randint(low=0, high=10e5, size=1)[0]
@@ -49,9 +52,21 @@ def __init__(self, n_neighbors=3, random_state=None):
4952
self.w2 = 0
5053
self.w3 = 0
5154

52-
self.h1 = MultinomialNB()
53-
self.h2 = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1, p=2)
54-
self.h3 = DecisionTreeClassifier(random_state=self.random_state)
55+
classifiers = [c1, c2, c3]
56+
classifiers_params = [c1_params, c2_params, c3_params]
57+
default_classifiers = [MultinomialNB, KNeighborsClassifier,
58+
DecisionTreeClassifier]
59+
configs = []
60+
for index, (c, cp) in enumerate(zip(classifiers, classifiers_params)):
61+
if c is not None:
62+
if cp is not None:
63+
configs.append(c(**cp))
64+
else:
65+
configs.append(c())
66+
else:
67+
configs.append(default_classifiers[index]())
68+
69+
self.h1, self.h2, self.h3 = configs
5570

5671
def fit(self, samples, y):
5772
labeled, u, y = split(samples, y)

semisupervised/DensityPeaks.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,20 @@
1414
from sklearn.preprocessing import LabelEncoder
1515
from sklearn.semi_supervised import SelfTrainingClassifier
1616
from sklearn.svm import SVC
17+
from .utils import split
18+
from instance_selection import ENN
1719

1820

1921
class STDPNF:
22+
"""
23+
Li, J., Zhu, Q., & Wu, Q. (2019). A self-training method based on density
24+
peaks and an extended parameter-free local noise filter for k nearest
25+
neighbor. Knowledge-Based Systems, 184, 104895.
26+
27+
Wu, D., Shang, M., Luo, X., Xu, J., Yan, H., Deng, W., & Wang, G. (2018).
28+
Self-training semi-supervised classification based on density peaks of
29+
data. Neurocomputing, 275, 180-191.
30+
"""
2031

2132
def __init__(self,
2233
dc=None,

semisupervised/TriTraining.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# @Filename: TriTraining.py
44
# @Author: Daniel Puente Ramírez
55
# @Time: 27/12/21 10:25
6-
# @Version: 3.0
6+
# @Version: 4.0
77

88
from math import floor, ceil
99

@@ -27,23 +27,31 @@ def measure_error(classifier_j, classifier_k, labeled_data):
2727

2828

2929
class TriTraining:
30-
def __init__(self, learn, random_state=None):
31-
if learn == '3-NN':
32-
self.hj = KNeighborsClassifier(n_neighbors=3, n_jobs=-1, p=2)
33-
self.hk = KNeighborsClassifier(n_neighbors=3, n_jobs=-1, p=2)
34-
self.hi = KNeighborsClassifier(n_neighbors=3, n_jobs=-1, p=2)
35-
elif learn == 'DecisionTree Classifier':
36-
self.hj = DecisionTreeClassifier(random_state=random_state)
37-
self.hk = DecisionTreeClassifier(random_state=random_state)
38-
self.hi = DecisionTreeClassifier(random_state=random_state)
39-
elif learn == 'RandomForest Classifier':
40-
self.hj = RandomForestClassifier(random_state=random_state)
41-
self.hk = RandomForestClassifier(random_state=random_state)
42-
self.hi = RandomForestClassifier(random_state=random_state)
43-
else:
44-
self.hj = GaussianNB()
45-
self.hk = GaussianNB()
46-
self.hi = GaussianNB()
30+
"""Zhou, Z. H., & Li, M. (2005). Tri-training: Exploiting unlabeled data
31+
using three classifiers. IEEE Transactions on knowledge and Data
32+
Engineering, 17(11), 1529-1541.
33+
"""
34+
35+
def __init__(self, random_state=None,
36+
c1=None, c1_params=None,
37+
c2=None, c2_params=None,
38+
c3=None, c3_params=None):
39+
40+
classifiers = [c1, c2, c3]
41+
classifiers_params = [c1_params, c2_params, c3_params]
42+
default_classifiers = [KNeighborsClassifier, DecisionTreeClassifier,
43+
RandomForestClassifier]
44+
configs = []
45+
for index, (c, cp) in enumerate(zip(classifiers, classifiers_params)):
46+
if c is not None:
47+
if cp is not None:
48+
configs.append(c(**cp))
49+
else:
50+
configs.append(c())
51+
else:
52+
configs.append(default_classifiers[index]())
53+
54+
self.hj, self.hk, self.hi = configs
4755

4856
self.random_state = random_state if random_state is not None else \
4957
np.random.randint(low=0, high=10e5, size=1)[0]

semisupervised/utils/_split.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# @Time: 4/2/22 11:54
66

77
import numpy as np
8+
import pandas as pd
89

910

1011
def split(samples, y):
@@ -20,10 +21,13 @@ def split(samples, y):
2021
y {Numpy array} -- real labels
2122
"""
2223

24+
if isinstance(y, pd.DataFrame):
25+
y = y.to_numpy()
26+
2327
labeled_indexes = y != (-1 or np.NaN or None)
2428

25-
L = samples[labeled_indexes].to_numpy()
26-
U = samples[~labeled_indexes].to_numpy()
29+
L = samples.iloc[labeled_indexes].to_numpy()
30+
U = samples.iloc[~labeled_indexes].to_numpy()
2731
y = y[labeled_indexes]
2832

2933
assert len(L) == len(y), f"L {len(L)} != {len(y)} y"

0 commit comments

Comments
 (0)