Skip to content

Commit aa6b787

Browse files
authored
Merge pull request #11 from sintel-dev/process_signals_pre_release
Process signals pre release
2 parents 66ef507 + 20b9c05 commit aa6b787

File tree

14 files changed

+514
-745
lines changed

14 files changed

+514
-745
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
history = history_file.read()
1313

1414
install_requires = [
15-
'mlblocks>=0.4.0,<0.5',
15+
'mlblocks>=0.4.1,<0.5',
1616
'pandas>=1,<2',
1717
'numpy>=1.17.4,<1.19',
1818
'scipy>=1.3.3,<2',

sigpro/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from mlblocks import discovery
1212

13-
from sigpro.process_signals import process_signals
13+
from sigpro.core import SigPro
1414

1515
_BASE_PATH = os.path.abspath(os.path.dirname(__file__))
1616
MLBLOCKS_PRIMITIVES = os.path.join(_BASE_PATH, 'primitives')
@@ -46,4 +46,4 @@ def get_primitives(name=None, primitive_type=None, primitive_subtype=None):
4646
return discovery.find_primitives(name or 'sigpro', filters)
4747

4848

49-
__all__ = ('process_signals', )
49+
__all__ = ('SigPro', )

sigpro/core.py

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
# -*- coding: utf-8 -*-
2+
"""Process Signals core functionality."""
3+
4+
from collections import Counter
5+
from copy import deepcopy
6+
7+
import pandas as pd
8+
from mlblocks import MLPipeline, load_primitive
9+
10+
DEFAULT_INPUT = [
11+
{
12+
'name': 'readings',
13+
'keyword': 'data',
14+
'type': 'pandas.DataFrame'
15+
},
16+
{
17+
'name': 'feature_columns',
18+
'default': None,
19+
'type': 'list'
20+
}
21+
]
22+
23+
DEFAULT_OUTPUT = [
24+
{
25+
'name': 'readings',
26+
'type': 'pandas.DataFrame'
27+
},
28+
{
29+
'name': 'feature_columns',
30+
'type': 'list'
31+
}
32+
]
33+
34+
35+
class SigPro:
36+
"""SigPro class applies multiple transformation and aggregation primitives.
37+
38+
The Process Signals is responsible for applying a collection of primitives specified by the
39+
user in order to create features for the given data.
40+
41+
Given a list of transformations and aggregations which are composed
42+
by dictionaries with the following specification:
43+
44+
* ``Name``:
45+
Name of the transformation / aggregation.
46+
* ``primitive``:
47+
Name of the primitive to apply.
48+
* ``init_params``:
49+
Dictionary containing the initializing parameters for the primitive.
50+
51+
The process signals will build an ``mlblocks.MLPipeline`` and will generate the features
52+
by previously applying the transformations and then compute the aggregations.
53+
54+
Args:
55+
data (pandas.DataFrame):
56+
Dataframe with a column that contains signal values.
57+
transformations (list):
58+
List of dictionaries containing the transformation primitives.
59+
aggregations (list):
60+
List of dictionaries containing the aggregation primitives.
61+
values_column_name (str):
62+
The name of the column that contains the signal values. Defaults to ``values``.
63+
keep_columns (Union[bool, list]):
64+
Whether to keep non-feature columns in the output DataFrame or not.
65+
If a list of column names are passed, those columns are kept.
66+
"""
67+
68+
def _build_pipeline(self):
69+
"""Build Pipeline function.
70+
71+
Given a list of transformations and aggregations build a pipeline
72+
with the output of the aggregations, which take as name the specified
73+
name of the transformations and the aggregation. This lists are composed
74+
by dictionaries with the following specification:
75+
76+
* ``Name``:
77+
Name of the transformation / aggregation.
78+
* ``primitive``:
79+
Name of the primitive to apply.
80+
* ``init_params``:
81+
Dictionary containing the initializing parameters for the primitive.
82+
83+
Args:
84+
transformations (list):
85+
List of dictionaries containing the transformation primitives.
86+
aggregations (list):
87+
List of dictionaries containing the aggregation primitives.
88+
89+
Returns:
90+
mlblocks.MLPipeline:
91+
An ``MLPipeline`` object that first applies all the transformations
92+
and then produces as output the aggregations specified.
93+
"""
94+
primitives = []
95+
init_params = {}
96+
prefix = []
97+
outputs = []
98+
counter = Counter()
99+
100+
for transformation in self.transformations:
101+
name = transformation.get('name')
102+
if name is None:
103+
name = transformation['primitive'].split('.')[-1]
104+
105+
prefix.append(name)
106+
primitive = transformation['primitive']
107+
counter[primitive] += 1
108+
primitive_name = f'{primitive}#{counter[primitive]}'
109+
primitives.append(primitive)
110+
params = transformation.get('init_params')
111+
if params:
112+
init_params[primitive_name] = params
113+
114+
prefix = '.'.join(prefix) if prefix else ''
115+
116+
for aggregation in self.aggregations:
117+
name = aggregation.get('name')
118+
if name is None:
119+
name = aggregation['primitive'].split('.')[-1]
120+
121+
aggregation_name = f'{prefix}.{name}' if prefix else name
122+
123+
primitive = aggregation['primitive']
124+
counter[primitive] += 1
125+
primitive_name = f'{primitive}#{counter[primitive]}'
126+
primitives.append(primitive)
127+
128+
primitive = load_primitive(primitive)
129+
primitive_outputs = primitive['produce']['output']
130+
131+
params = aggregation.get('init_params')
132+
if params:
133+
init_params[primitive_name] = params
134+
135+
if name.lower() == 'sigpro':
136+
primitive = MLPipeline([primitive], init_params={'sigpro.SigPro#1': params})
137+
primitive_outputs = primitive.get_outputs()
138+
139+
# primitive_outputs = getattr(self, primitive_outputs)()
140+
if not isinstance(primitive_outputs, str):
141+
for output in primitive_outputs:
142+
output = output['name']
143+
outputs.append({
144+
'name': f'{aggregation_name}.{output}',
145+
'variable': f'{primitive_name}.{output}'
146+
})
147+
148+
outputs = {'default': outputs} if outputs else None
149+
150+
return MLPipeline(
151+
primitives,
152+
init_params=init_params,
153+
outputs=outputs
154+
)
155+
156+
def __init__(self, transformations, aggregations, values_column_name='values',
157+
keep_columns=False, input_is_dataframe=True):
158+
159+
self.transformations = transformations
160+
self.aggregations = aggregations
161+
self.values_column_name = values_column_name
162+
self.keep_columns = keep_columns
163+
self.input_is_dataframe = input_is_dataframe
164+
self.pipeline = self._build_pipeline()
165+
166+
def _apply_pipeline(self, row):
167+
"""Apply a ``mlblocks.MLPipeline`` to a row.
168+
169+
Apply a ``MLPipeline`` to a row of a ``pd.DataFrame``, this function can
170+
be combined with the ``pd.DataFrame.apply`` method to be applied to the
171+
entire data frame.
172+
173+
Args:
174+
row (pd.Series):
175+
Row used to apply the pipeline to.
176+
"""
177+
context = row.to_dict()
178+
amplitude_values = context.pop(self.values_column_name)
179+
output = self.pipeline.predict(
180+
amplitude_values=amplitude_values,
181+
**context,
182+
)
183+
output_names = self.pipeline.get_output_names()
184+
185+
# ensure that we can iterate over output
186+
output = output if isinstance(output, tuple) else (output, )
187+
188+
return pd.Series(dict(zip(output_names, output)))
189+
190+
def process_signal(self, data=None, feature_columns=None, **kwargs):
191+
"""Apply multiple transformation and aggregation primitives.
192+
193+
Args:
194+
data (pandas.DataFrame):
195+
Dataframe with a column that contains signal values.
196+
feature_columns (list):
197+
List of column names from the input data frame that must be considered as
198+
features and should not be dropped.
199+
200+
Returns:
201+
tuple:
202+
pandas.DataFrame:
203+
A data frame with new feature columns by applying the previous primitives. If
204+
``keep_values`` is ``True`` the original signal values will be conserved in the
205+
data frame, otherwise the original signal values will be deleted.
206+
list:
207+
A list with the feature names generated.
208+
"""
209+
if data is None:
210+
row = pd.Series(kwargs)
211+
values = self._apply_pipeline(row).values
212+
return values if len(values) > 1 else values[0]
213+
214+
features = data.apply(
215+
self._apply_pipeline,
216+
axis=1
217+
)
218+
data = pd.concat([data, features], axis=1)
219+
220+
if feature_columns:
221+
feature_columns = feature_columns + list(features.columns)
222+
else:
223+
feature_columns = list(features.columns)
224+
225+
if isinstance(self.keep_columns, list):
226+
data = data[self.keep_columns + feature_columns]
227+
elif not self.keep_columns:
228+
data = data[feature_columns]
229+
230+
return data, feature_columns
231+
232+
def get_input_args(self):
233+
"""Return the pipeline input args."""
234+
if self.input_is_dataframe:
235+
return deepcopy(DEFAULT_INPUT)
236+
237+
return self.pipeline.get_predict_args()
238+
239+
def get_output_args(self):
240+
"""Return the pipeline output args."""
241+
if self.input_is_dataframe:
242+
return deepcopy(DEFAULT_OUTPUT)
243+
244+
return self.pipeline.get_outputs()

sigpro/primitives/sigpro.process_signals.json

Lines changed: 0 additions & 47 deletions
This file was deleted.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"name": "sigpro.SigPro",
3+
"primitive": "sigpro.SigPro",
4+
"classifiers": {
5+
"type": "preprocessor",
6+
"subtype": "feature_extractor"
7+
},
8+
"produce": {
9+
"method": "process_signal",
10+
"args": "get_input_args",
11+
"output": "get_output_args"
12+
},
13+
"hyperparameters": {
14+
"fixed": {
15+
"keep_columns": {
16+
"type": "bool or list",
17+
"default": false
18+
},
19+
"values_column_name": {
20+
"type": "str",
21+
"default": "values"
22+
},
23+
"transformations": {
24+
"type": "list"
25+
},
26+
"aggregations": {
27+
"type": "list"
28+
},
29+
"input_is_dataframe": {
30+
"type": "bool",
31+
"default": true
32+
}
33+
}
34+
}
35+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"name": "sigpro.transformations.amplitude.spectrum.power_spectrum",
3+
"primitive": "sigpro.transformations.amplitude.spectrum.power_spectrum",
4+
"classifiers": {
5+
"type": "transformation",
6+
"subtype": "amplitude"
7+
},
8+
"produce": {
9+
"args": [
10+
{
11+
"name": "amplitude_values",
12+
"type": "numpy.ndarray"
13+
},
14+
{
15+
"name": "sampling_frequency",
16+
"type": "float"
17+
}
18+
],
19+
"output": [
20+
{
21+
"name": "amplitude_values",
22+
"type": "numpy.ndarray"
23+
},
24+
{
25+
"name": "frequency_values",
26+
"type": "numpy.ndarray"
27+
}
28+
]
29+
}
30+
}

0 commit comments

Comments
 (0)