make release-tag: Merge branch 'master' into stable

sarahmish · sarahmish · commit 93fd38bf66ae · 2023-04-06T13:12:43.000-04:00
diff --git a/.github/workflows/latest-dependencies.yml b/.github/workflows/latest-dependencies.yml
@@ -0,0 +1,30 @@
+name: Latest Dependency Checker
+on:
+  schedule:
+    - cron: '0 * * * *'
+  workflow_dispatch:
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Update dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install -e .[test]
+        make checkdeps OUTPUT_PATH=tests/requirement_files/latest_requirements.txt
+    - name: Create pull request
+      uses: peter-evans/create-pull-request@v3
+      with:
+        token: ${{ secrets.REPO_SCOPED_TOKEN }}
+        commit-message: Update latest dependencies
+        title: Automated Latest Dependency Updates
+        body: "This is an auto-generated PR with **latest** dependency updates."
+        branch: latest-dep-update
+        branch-suffix: short-commit-hash
+        base: master
+        team-reviewers: core
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,28 +1,82 @@
 name: Run Tests
 
 on:
-  - push
-  - pull_request
+  push:
+    branches: [ '*' ]
+  pull_request:
+    branches: [ master ]
 
 jobs:
-  build:
+  lint:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.8]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package
+      run: pip install invoke .[dev]
+    - name: invoke lint
+      run: invoke lint
+
+
+  docs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-latest, macos-latest]
+        os: [ubuntu-20.04]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package
+      run: pip install .[dev]
+    - name: make docs
+      run: make docs
 
+
+  unit:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-20.04, macos-latest, windows-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
+    - if: matrix.os == 'windows-latest' && matrix.python-version == 3.6
+      name: Install dependencies - Windows with Python 3.6
+      run: python -m pip install pywinpty==2.0.1
+    - name: Install package and dependencies
+      run: pip install invoke .[test]
+    - name: invoke pytest
+      run: invoke pytest
 
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
 
-    - name: Test with tox
-      run: tox
+  minimum:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-20.04, macos-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package and dependencies
+      run: pip install invoke .[test]
+    - name: invoke minimum
+      run: invoke minimum
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -259,11 +259,12 @@ imported and used like this:
 from sigpro.demo import get_amplitude_demo
 from sigpro.demo import get_frequency_demo
 from sigpro.demo import get_frequency_time_demo
+from sigpro.demo import get_demo_data 
 
 amplitude_values, sampling_frequency = get_amplitude_demo()
 amplitude_values, frequency_values = get_frequency_demo()
 amplitude_values, frequency_values, time_values = get_frequency_time_demo()
-dataframe = get_frequency_time_demo(dataframe=True)
+dataframe = get_demo_data()
 ```
 
 In all cases, the functions will return values that correspond to a
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,5 +1,16 @@
 # History
 
+## 0.1.1 - 2023-04-06
+
+### Features
+* Accepting single value data frame format - [Issue #36](https://github.com/sintel-dev/SigPro/issues/36) by @frances-h @sarahmish
+* Update demos - [Issue #26](https://github.com/sintel-dev/SigPro/pull/26) by @frances-h
+
+## 0.1.0 - 2021-11-14
+
+### Features
+* Rework SigPro to be class based
+
 ## 0.0.3 - 2021-09-27
 
 ### Features
diff --git a/Makefile b/Makefile
@@ -255,3 +255,8 @@ release-minor: check-release bumpversion-minor release
 
 .PHONY: release-major
 release-major: check-release bumpversion-major release
+
+.PHONY: checkdeps
+checkdeps: # Save the currently installed versions of the dependencies as the latest versions 
+	$(eval allow_list='mlblocks|pandas|numpy|psutil')
+	pip freeze | grep -v "sintel-dev/SigPro.git" | grep -E $(allow_list) > $(OUTPUT_PATH)
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0
+current_version = 0.1.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
@@ -12,10 +12,10 @@
     history = history_file.read()
 
 install_requires = [
-    'mlblocks>=0.4.1,<0.5',
-    'pandas>=1,<2',
-    'numpy>=1.17.4,<1.19',
-    'scipy>=1.3.3,<2',
+    'mlblocks>=0.4.1',
+    'pandas>=1',
+    'numpy>=1.17.4',
+    'scipy>=1.3.3',
 ]
 
 setup_requires = [
@@ -40,6 +40,8 @@
     'Sphinx>=1.7.1,<3',
     'sphinx_rtd_theme>=0.2.4,<0.5',
     'autodocsumm>=0.1.10',
+    'markupsafe<2.1.0',
+    'Jinja2>=2,<3',
 
     # style check
     'flake8>=3.7.7,<4',
@@ -100,6 +102,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/SigPro',
-    version='0.1.0',
+    version='0.1.1.dev0',
     zip_safe=False,
 )
diff --git a/sigpro/__init__.py b/sigpro/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.0'
+__version__ = '0.1.1.dev0'
 
 import os
 
diff --git a/sigpro/core.py b/sigpro/core.py
@@ -163,19 +163,28 @@ def __init__(self, transformations, aggregations, values_column_name='values',
         self.input_is_dataframe = input_is_dataframe
         self.pipeline = self._build_pipeline()
 
-    def _apply_pipeline(self, row):
+    def _apply_pipeline(self, window, is_series=False):
         """Apply a ``mlblocks.MLPipeline`` to a row.
 
-        Apply a ``MLPipeline`` to a row of a ``pd.DataFrame``, this function can
+        Apply a ``MLPipeline`` to a window of a ``pd.DataFrame``, this function can
         be combined with the ``pd.DataFrame.apply`` method to be applied to the
         entire data frame.
 
         Args:
-            row (pd.Series):
-                Row used to apply the pipeline to.
+            window (pd.Series):
+                Row or multiple rows (window) used to apply the pipeline to.
+            is_series (bool):
+                Indicator whether window is formated as a series or dataframe.
         """
-        context = row.to_dict()
-        amplitude_values = context.pop(self.values_column_name)
+        if is_series:
+            context = window.to_dict()
+            amplitude_values = context.pop(self.values_column_name)
+        else:
+            context = {} if window.empty else {
+                k: v for k, v in window.iloc[0].to_dict().items() if k != self.values_column_name
+            }
+            amplitude_values = list(window[self.values_column_name])
+
         output = self.pipeline.predict(
             amplitude_values=amplitude_values,
             **context,
@@ -187,12 +196,19 @@ def _apply_pipeline(self, row):
 
         return pd.Series(dict(zip(output_names, output)))
 
-    def process_signal(self, data=None, feature_columns=None, **kwargs):
+    def process_signal(self, data=None, window=None, time_index=None, groupby_index=None,
+                       feature_columns=None, **kwargs):
         """Apply multiple transformation and aggregation primitives.
 
         Args:
             data (pandas.DataFrame):
                 Dataframe with a column that contains signal values.
+            window (str):
+                Duration of window size, e.g. ('1h').
+            time_index (str):
+                Column in ``data`` that represents the time index.
+            groupby_index (str or list[str]):
+                Column(s) to group together and take the window over.
             feature_columns (list):
                 List of column names from the input data frame that must be considered as
                 features and should not be dropped.
@@ -207,15 +223,25 @@ def process_signal(self, data=None, feature_columns=None, **kwargs):
                     A list with the feature names generated.
         """
         if data is None:
-            row = pd.Series(kwargs)
-            values = self._apply_pipeline(row).values
+            window = pd.Series(kwargs)
+            values = self._apply_pipeline(window, is_series=True).values
             return values if len(values) > 1 else values[0]
 
-        features = data.apply(
-            self._apply_pipeline,
-            axis=1
-        )
-        data = pd.concat([data, features], axis=1)
+        data = data.copy()
+        if window is not None and groupby_index is not None:
+            features = data.set_index(time_index).groupby(groupby_index).resample(
+                rule=window, **kwargs).apply(
+                self._apply_pipeline
+            ).reset_index()
+            data = features
+
+        else:
+            features = data.apply(
+                self._apply_pipeline,
+                axis=1,
+                is_series=True
+            )
+            data = pd.concat([data, features], axis=1)
 
         if feature_columns:
             feature_columns = feature_columns + list(features.columns)
diff --git a/sigpro/demo.py b/sigpro/demo.py
@@ -11,18 +11,60 @@
 DEMO_PATH = os.path.join(os.path.dirname(__file__), 'data')
 
 
+def _load_demo(nrows=None):
+    demo_path = os.path.join(DEMO_PATH, 'demo_timeseries.csv')
+    df = pd.read_csv(demo_path, parse_dates=['timestamp'], nrows=nrows)
+    df['sampling_frequency'] = 1000
+    df["values"] = df["values"].apply(json.loads).apply(list)
+
+    return df
+
+
 def get_demo_data(nrows=None):
     """Get a demo ``pandas.DataFrame`` containing the accepted data format.
 
+    Args:
+        nrows (int):
+            Number of rows to load from the demo datasets.
+
     Returns:
         A ``pd.DataFrame`` containing as ``values`` the signal values.
     """
-    demo_path = os.path.join(DEMO_PATH, 'demo_timeseries.csv')
-    df = pd.read_csv(demo_path, parse_dates=['timestamp'], nrows=nrows)
-    df["values"] = df["values"].apply(json.loads).apply(list)
+    df = _load_demo(nrows)
+    df = df.explode('values').reset_index(drop=True)
+
+    time_delta = pd.to_timedelta(list(range(400)) * 750, 's')
+    df['timestamp'] = df['timestamp'] + time_delta
     return df
 
 
+def get_demo_primitives():
+    """Get a dict of demo transformation and aggregation primitives.
+
+    Returns:
+        A tuple containing the list of transformation primitives and
+        the list aggregation primitives
+    """
+    transformations = [
+        {
+            "name": "fft",
+            "primitive": "sigpro.transformations.frequency.fft.fft"
+        }
+    ]
+    aggregations = [
+        {
+            "name": "mean",
+            "primitive": "sigpro.aggregations.amplitude.statistical.mean"
+        },
+        {
+            "name": "std",
+            "primitive": "sigpro.aggregations.amplitude.statistical.std"
+        }
+    ]
+
+    return transformations, aggregations
+
+
 def get_amplitude_demo(index=None):
     """Get amplitude values and sampling frequency used.
 
@@ -43,7 +85,7 @@ def get_amplitude_demo(index=None):
             A tuple with a `np.array` containing amplitude values and as second element the
             sampling frequency used.
     """
-    df = get_demo_data()
+    df = _load_demo()
     if index is None:
         index = random.randint(0, len(df))
 
diff --git a/tests/integration/test_demo.py b/tests/integration/test_demo.py
diff --git a/tests/requirement_files/latest_requirements.txt b/tests/requirement_files/latest_requirements.txt