Skip to content

Commit 21b3d2c

Browse files
authored
Merge pull request #9 from jlgarridol/development
Update to version 1.0.3
2 parents 51a132e + f2563cb commit 21b3d2c

File tree

11 files changed

+228
-80
lines changed

11 files changed

+228
-80
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [1.0.3] - 2023-03-29
8+
9+
### Added
10+
- Methods now support no unlabeled data. In this case, the method will return the same as the base estimator.
11+
12+
### Changed
13+
- In OneHotEncoder, the `sparse` parameter is now `sparse_output` to avoid a FutureWarning.
14+
15+
### Fixed
16+
17+
- CoForest now is most similar to the original paper.
18+
- TriTraining can use at least 3 n_jobs. Fixed the bug that allows using as many n_jobs as cpus in the machine.
19+
720
## [1.0.2] - 2023-02-17
821

922
### Fixed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,12 @@ Citing
4545
César García-Osorio and
4646
Juan J. Rodríguez and
4747
Jesus Maudes},
48-
title = {jlgarridol/sslearn: Zenodo Indexed},
49-
month = jan,
48+
title = {jlgarridol/sslearn: V1.0.2},
49+
month = feb,
5050
year = 2023,
5151
publisher = {Zenodo},
52-
version = {1.0.1},
53-
doi = {10.5281/zenodo.7565222},
54-
url = {https://doi.org/10.5281/zenodo.7565222}
52+
version = {1.0.2},
53+
doi = {10.5281/zenodo.7650049},
54+
url = {https://doi.org/10.5281/zenodo.7650049}
5555
}
5656
```

pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ filterwarnings =
55
ignore:divide by zero
66
ignore:X does not have valid feature names
77
ignore:invalid value encountered in divide
8+
ignore:Poolsize
9+
ignore:y contains no unlabeled samples

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ joblib==1.2.0
22
numpy==1.23.3
33
pandas==1.4.3
44
scikit_learn==1.2.0
5-
scipy==1.9.3
5+
scipy==1.10.1
66
statsmodels==0.13.2

sslearn/base.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,13 @@
2020
from sklearn.ensemble._base import _set_random_states
2121
from sklearn.utils import check_random_state
2222

23-
24-
25-
2623
def get_dataset(X, y):
2724

2825
is_df = False
2926
if isinstance(X, pd.DataFrame):
3027
is_df = True
3128
columns = X.columns
3229

33-
3430
X = check_array(X)
3531
y = check_array(y, ensure_2d=False, dtype=y.dtype.type)
3632

sslearn/model_selection/_split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def artificial_ssl_dataset(X, y, label_rate=0.1, random_state=None, **kwards):
7676
y_unlabel: ndarray
7777
The true label for each y in the same order.
7878
"""
79-
assert (label_rate > 0) and (label_rate < 100),\
80-
"Label rate must be in (0, 100)."
79+
assert (label_rate > 0) and (label_rate < 1),\
80+
"Label rate must be in (0, 1)."
8181
assert "test_size" not in kwards and "train_size" not in kwards,\
8282
"Test size and train size are illegal parameters in this method."
8383
X_label, X_unlabel, y_label, true_label = \

sslearn/utils.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,25 @@
33
import math
44

55
import pandas as pd
6-
import json
76

87
from statsmodels.stats.proportion import proportion_confint
9-
import scipy.stats as st
108
from sklearn.tree import DecisionTreeClassifier
119
from sklearn.base import ClassifierMixin
1210

13-
import sslearn
14-
1511

1612
def safe_division(dividend, divisor, epsilon):
1713
if divisor == 0:
1814
return dividend / epsilon
1915
return dividend / divisor
2016

2117

22-
def confidence_interval(X, hyp, y, alpha=.95 ):
18+
def confidence_interval(X, hyp, y, alpha=.95):
2319
data = hyp.predict(X)
2420

2521
successes = np.count_nonzero(data == y)
2622
trials = X.shape[0]
2723
li, hi = proportion_confint(successes, trials, alpha=1 - alpha, method="wilson")
2824
return li, hi
29-
3025

3126

3227
def choice_with_proportion(predictions, class_predicted, proportion, extra=0):
@@ -69,6 +64,27 @@ def is_int(x):
6964
return isinstance(x, (int, np.integer)) and not isinstance(x, bool)
7065

7166

67+
def mode(y):
68+
"""Calculate the mode of a list of values
69+
70+
Parameters
71+
----------
72+
y : array-like of shape (n_samples, n_estimators)
73+
array of values
74+
75+
Returns
76+
-------
77+
mode: array-like of shape (n_samples,)
78+
array of mode of each label
79+
count: array-like of shape (n_samples,)
80+
array of count of the mode of each label
81+
"""
82+
array = pd.DataFrame(np.array(y))
83+
mode = array.mode(axis=0).loc[0, :]
84+
count = array.apply(lambda x: x.value_counts().max())
85+
return mode.values, count.values
86+
87+
7288
def check_n_jobs(n_jobs):
7389
"""Check `n_jobs` parameter according to the scikit-learn convention.
7490
From sktime: BSD 3-Clause
@@ -101,9 +117,10 @@ def calc_number_per_class(y_label):
101117
number_per_class = dict()
102118
for c in classes:
103119
number_per_class[c] = math.ceil(proportion[c] * factor)
104-
120+
105121
return number_per_class
106122

123+
107124
def check_classifier(base_classifier, can_be_list=True, collection_size=None):
108125

109126
if base_classifier is None:
@@ -114,7 +131,7 @@ def check_classifier(base_classifier, can_be_list=True, collection_size=None):
114131
raise AttributeError(f"base_classifier is a list of classifiers, but its length ({len(base_classifier)}) is different from expected ({collection_size})")
115132
for i, bc in enumerate(base_classifier):
116133
base_classifier[i] = check_classifier(bc, False)
117-
return list(base_classifier) # Transform to list
134+
return list(base_classifier) # Transform to list
118135
else:
119136
if not isinstance(base_classifier, ClassifierMixin):
120137
raise AttributeError(f"base_classifier must be a ClassifierMixin, but found {type(base_classifier)}")

0 commit comments

Comments
 (0)