Skip to content

Commit fc563be

Browse files
committed
make release-tag: Merge branch 'master' into stable
2 parents a58fcaa + b86507a commit fc563be

29 files changed

+461
-65
lines changed

HISTORY.md

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,39 @@
11
# History
22

3+
## 0.2.4 - 2020-01-30
4+
5+
### New Primitives
6+
7+
* Add RangeScaler and RangeUnscaler primitives - [Issue #232](https://github.com/HDI-Project/MLPrimitives/issues/232) by @csala
8+
9+
### Primitive Improvements
10+
11+
* Extract input_shape from X in keras.Sequential - [Issue #223](https://github.com/HDI-Project/MLPrimitives/issues/223) by @csala
12+
13+
### Bug Fixes
14+
15+
* mlprimitives.custom.text.TextCleaner fails if text is empty - [Issue #228](https://github.com/HDI-Project/MLPrimitives/issues/228) by @csala
16+
* Error when loading the reviews dataset - [Issue #230](https://github.com/HDI-Project/MLPrimitives/issues/230) by @csala
17+
* Curate dependencies: specify an explicit prompt-toolkit version range - [Issue #224](https://github.com/HDI-Project/MLPrimitives/issues/224) by @csala
18+
319
## 0.2.3 - 2019-11-14
420

521
### New Primitives
622

7-
Add primitive to make window_sequences based on cutoff times - [Issue #217](https://github.com/HDI-Project/MLPrimitives/issues/217) by @csala
8-
Create a keras LSTM based TimeSeriesClassifier primitive - [Issue #218](https://github.com/HDI-Project/MLPrimitives/issues/218) by @csala
9-
Add pandas DataFrame primitives - [Issue #214](https://github.com/HDI-Project/MLPrimitives/issues/214) by @csala
10-
Add featuretools.EntitySet.normalize_entity primitive - [Issue #209](https://github.com/HDI-Project/MLPrimitives/issues/209) by @csala
23+
* Add primitive to make window_sequences based on cutoff times - [Issue #217](https://github.com/HDI-Project/MLPrimitives/issues/217) by @csala
24+
* Create a keras LSTM based TimeSeriesClassifier primitive - [Issue #218](https://github.com/HDI-Project/MLPrimitives/issues/218) by @csala
25+
* Add pandas DataFrame primitives - [Issue #214](https://github.com/HDI-Project/MLPrimitives/issues/214) by @csala
26+
* Add featuretools.EntitySet.normalize_entity primitive - [Issue #209](https://github.com/HDI-Project/MLPrimitives/issues/209) by @csala
1127

1228
### Primitive Improvements
1329

14-
Make featuretools.EntitySet.entity_from_dataframe entityset arg optional - [Issue #208](https://github.com/HDI-Project/MLPrimitives/issues/208) by @csala
30+
* Make featuretools.EntitySet.entity_from_dataframe entityset arg optional - [Issue #208](https://github.com/HDI-Project/MLPrimitives/issues/208) by @csala
1531

16-
Add text regression dataset - [Issue #206](https://github.com/HDI-Project/MLPrimitives/issues/206) by @csala
32+
* Add text regression dataset - [Issue #206](https://github.com/HDI-Project/MLPrimitives/issues/206) by @csala
1733

1834
### Bug Fixes
1935

20-
pandas.DataFrame.resample crash when grouping by integer columns - [Issue #211](https://github.com/HDI-Project/MLPrimitives/issues/211) by @csala
36+
* pandas.DataFrame.resample crash when grouping by integer columns - [Issue #211](https://github.com/HDI-Project/MLPrimitives/issues/211) by @csala
2137

2238
## 0.2.2 - 2019-10-08
2339

mlprimitives/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
__author__ = 'MIT Data To AI Lab'
66
__email__ = '[email protected]'
7-
__version__ = '0.2.3'
7+
__version__ = '0.2.4.dev1'
88

99
import os
1010

mlprimitives/adapters/keras.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,24 @@ def __init__(self, layers, loss, optimizer, classification, callbacks=tuple(),
7878

7979
self.callbacks = callbacks
8080

81+
def _setdefault(self, kwargs, key, value):
82+
if key in kwargs:
83+
return
84+
85+
if key in self.hyperparameters and self.hyperparameters[key] is None:
86+
kwargs[key] = value
87+
88+
def _augment_hyperparameters(self, X, kwargs):
89+
shape = np.asarray(X)[0].shape
90+
length = shape[0]
91+
self._setdefault(kwargs, 'input_shape', shape)
92+
self._setdefault(kwargs, 'input_dim', length)
93+
self._setdefault(kwargs, 'input_length', length)
94+
95+
return kwargs
96+
8197
def fit(self, X, y, **kwargs):
98+
self._augment_hyperparameters(X, kwargs)
8299
self.model = self._build_model(**kwargs)
83100

84101
if self.classification:

mlprimitives/cli.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import sys
99
import warnings
1010

11+
import pandas as pd
1112
from mlblocks import add_primitives_path, get_primitives_paths
1213

1314
from mlprimitives.evaluation import score_pipeline
@@ -18,7 +19,7 @@
1819
def _logging_setup(verbosity=1):
1920
logger = logging.getLogger()
2021
log_level = (3 - verbosity) * 10
21-
fmt = '%(asctime)s - %(levelname)s - %(message)s'
22+
fmt = '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
2223
formatter = logging.Formatter(fmt)
2324
logger.setLevel(log_level)
2425
logger.propagate = False
@@ -30,10 +31,36 @@ def _logging_setup(verbosity=1):
3031

3132

3233
def _test(args):
33-
for pipeline in args.pipeline:
34-
print('Scoring pipeline: {}'.format(pipeline))
35-
score, stdev = score_pipeline(pipeline, args.splits, args.random_state, args.dataset)
36-
print('Obtained Score: {:.4f} +/- {:.4f}'.format(score, stdev))
34+
results = pd.DataFrame(columns=['pipeline', 'mean', 'std', 'error'])
35+
try:
36+
for pipeline in args.pipeline:
37+
print('Scoring pipeline: {}'.format(pipeline))
38+
pipeline_name = os.path.basename(pipeline)
39+
try:
40+
score, std = score_pipeline(
41+
pipeline,
42+
args.splits,
43+
args.random_state,
44+
args.dataset
45+
)
46+
47+
print('Obtained Score: {:.4f} +/- {:.4f}'.format(score, std))
48+
results = results.append({
49+
'pipeline': pipeline_name,
50+
'mean': score,
51+
'std': std,
52+
}, ignore_index=True)
53+
54+
except Exception as ex:
55+
results = results.append({
56+
'pipeline': pipeline_name,
57+
'error': ex,
58+
}, ignore_index=True)
59+
60+
except KeyboardInterrupt:
61+
pass
62+
63+
print(results.to_string(index=False))
3764

3865

3966
def _get_primitives(pattern):

mlprimitives/custom/preprocessing.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,41 @@ def fit(self, classes):
2828

2929
def decode(self, y):
3030
return self._label_encoder.inverse_transform(y)
31+
32+
33+
class RangeScaler():
34+
35+
_data_min = None
36+
_data_scale = None
37+
_data_range = None
38+
39+
def __init__(self, out_min, out_max):
40+
self._out_min = out_min
41+
self._out_scale = out_max - out_min
42+
43+
def fit(self, X):
44+
data_max = X.max(axis=0)
45+
self._data_min = X.min(axis=0)
46+
self._data_scale = data_max - self._data_min
47+
self._data_range = (self._data_min, data_max)
48+
49+
def scale(self, X):
50+
scaled = (X - self._data_min) / self._data_scale
51+
rescaled = (scaled * self._out_scale) + self._out_min
52+
53+
return rescaled, self._data_range
54+
55+
56+
class RangeUnscaler():
57+
58+
def __init__(self, out_min, out_max):
59+
self._out_min = out_min
60+
self._out_scale = out_max - out_min
61+
62+
def fit(self, data_range):
63+
self._data_min = data_range[0]
64+
self._data_scale = data_range[1] - self._data_min
65+
66+
def unscale(self, X):
67+
unscaled = (X - self._out_min) / self._out_scale
68+
return (unscaled * self._data_scale) + self._data_min

mlprimitives/custom/text.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ def get_stopwords(cls, language_code):
8282
return []
8383

8484
def _remove_stopwords(self, text):
85+
if text == '':
86+
return text
87+
8588
if self.language_code:
8689
language_code = self.language_code
8790

mlprimitives/datasets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ def load_reviews():
327327
X = _load_csv(dataset_path, 'data')
328328
y = X.pop('evaluation').values
329329

330-
return Dataset(load_reviews.__doc__, X, y, r2_score)
330+
return Dataset(load_reviews.__doc__, X, y, r2_score, 'text', 'regression', 'univariate')
331331

332332

333333
def load_umls():

mlprimitives/pipelines/keras.Sequential.LSTMTextClassifier.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"maxlen": 100
4040
},
4141
"keras.Sequential.LSTMTextClassifier#1": {
42-
"input_length": 100,
42+
"epochs": 1,
4343
"verbose": true,
4444
"validation_split": 0.2,
4545
"callbacks": [

mlprimitives/pipelines/keras.Sequential.MLPBinaryClassifier.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
"primitives": [
1212
"mlprimitives.custom.preprocessing.ClassEncoder",
1313
"mlprimitives.custom.feature_extraction.CategoricalEncoder",
14-
"mlprimitives.custom.counters.count_features",
1514
"keras.Sequential.MLPBinaryClassifier",
1615
"mlprimitives.custom.preprocessing.ClassDecoder"
1716
]

mlprimitives/pipelines/keras.Sequential.MLPMultiClassClassifier.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
},
1111
"primitives": [
1212
"mlprimitives.custom.counters.UniqueCounter",
13-
"mlprimitives.custom.counters.count_features",
1413
"keras.Sequential.MLPMultiClassClassifier"
1514
],
1615
"input_names": {

0 commit comments

Comments
 (0)