diff --git a/doc/conf.py b/doc/conf.py index 078549f..59f20c6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,8 +10,6 @@ import importlib.metadata from datetime import datetime -# project root -# sys.path.insert(0, os.path.abspath("..")) # General information about the project. project = "fastcan" copyright = f"2024 - {datetime.now().year}, fastcan developers (MIT License)" @@ -59,6 +57,7 @@ # generate autosummary even if no references autosummary_generate = True +autosummary_ignore_module_all = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" diff --git a/doc/index.rst b/doc/index.rst index 06fde70..fd01887 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,30 +17,13 @@ Architecture Diagram API Reference ------------- -.. automodule:: fastcan +.. autosummary:: + :toctree: generated/ + :template: module.rst - .. rubric:: Classes - - .. autosummary:: - :toctree: generated/ - - FastCan - - .. rubric:: Functions - - .. autosummary:: - :toctree: generated/ - - refine - minibatch - - .. rubric:: Submodules - - .. autosummary:: - :toctree: generated/ - - narx - utils + fastcan + fastcan.narx + fastcan.utils Useful Links ------------ diff --git a/doc/templates/autosummary/class.rst b/doc/templates/class.rst similarity index 100% rename from doc/templates/autosummary/class.rst rename to doc/templates/class.rst diff --git a/doc/templates/autosummary/function.rst b/doc/templates/function.rst similarity index 100% rename from doc/templates/autosummary/function.rst rename to doc/templates/function.rst diff --git a/doc/templates/autosummary/module.rst b/doc/templates/module.rst similarity index 89% rename from doc/templates/autosummary/module.rst rename to doc/templates/module.rst index 14ad518..b8c2515 100644 --- a/doc/templates/autosummary/module.rst +++ b/doc/templates/module.rst @@ -8,6 +8,7 @@ .. autosummary:: :toctree: + :template: class.rst {% for item in classes %} {{ item }} {%- endfor %} @@ -21,6 +22,7 @@ .. autosummary:: :toctree: + :template: function.rst {% for item in functions %} {{ item }} {%- endfor %} diff --git a/fastcan/__init__.py b/fastcan/__init__.py index ae06fbc..cc4a6c8 100644 --- a/fastcan/__init__.py +++ b/fastcan/__init__.py @@ -1,6 +1,5 @@ """ -The implementation of fast canonical correlation analysis based feature selection -algorithm. +Fast canonical correlation analysis based greedy search algorithm. """ # Authors: The fastcan developers diff --git a/fastcan/_cancorr_fast.pyx b/fastcan/_cancorr_fast.pyx index 1bc5e6e..3e24f2e 100644 --- a/fastcan/_cancorr_fast.pyx +++ b/fastcan/_cancorr_fast.pyx @@ -1,5 +1,5 @@ """ -Fast feature selection with sum squared canonical correlation coefficients +Fast forward greedy search with sum squared canonical correlation coefficients. """ # Authors: The fastcan developers # SPDX-License-Identifier: MIT diff --git a/fastcan/_fastcan.py b/fastcan/_fastcan.py index cedb3d7..e5579bc 100644 --- a/fastcan/_fastcan.py +++ b/fastcan/_fastcan.py @@ -1,5 +1,5 @@ """ -Feature selection +Fast canonical correlation analysis based selector. """ # Authors: The fastcan developers diff --git a/fastcan/_minibatch.py b/fastcan/_minibatch.py index 305f6d1..0e7daef 100644 --- a/fastcan/_minibatch.py +++ b/fastcan/_minibatch.py @@ -1,5 +1,5 @@ """ -Feature selection with mini-batch. +Mini-batch selection. """ # Authors: The fastcan developers diff --git a/fastcan/_refine.py b/fastcan/_refine.py index a58e947..ec40288 100644 --- a/fastcan/_refine.py +++ b/fastcan/_refine.py @@ -1,5 +1,5 @@ """ -Refine fastcan selection results +Refine fastcan selection results. """ # Authors: The fastcan developers diff --git a/fastcan/meson.build b/fastcan/meson.build index 402f933..1ac9d8b 100644 --- a/fastcan/meson.build +++ b/fastcan/meson.build @@ -15,10 +15,4 @@ py.extension_module( install: true ) -py.extension_module( - '_narx_fast', - '_narx_fast.pyx', - subdir: 'fastcan', - cython_args: cython_args, - install: true -) \ No newline at end of file +subdir('narx') \ No newline at end of file diff --git a/fastcan/narx.py b/fastcan/narx.py deleted file mode 100644 index 4ee5ccc..0000000 --- a/fastcan/narx.py +++ /dev/null @@ -1,1521 +0,0 @@ -""" -The module related to nonlinear autoregressive exogenous (NARX) model for system -identification. -""" - -# Authors: The fastcan developers -# SPDX-License-Identifier: MIT - -import math -import warnings -from itertools import combinations_with_replacement -from numbers import Integral - -import numpy as np -from scipy.optimize import least_squares -from scipy.stats import rankdata -from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin -from sklearn.linear_model import LinearRegression -from sklearn.utils import check_array, check_consistent_length, column_or_1d -from sklearn.utils._param_validation import Interval, StrOptions, validate_params -from sklearn.utils.validation import ( - _check_sample_weight, - check_is_fitted, - validate_data, -) - -from ._fastcan import FastCan -from ._narx_fast import ( # type: ignore[attr-defined] - _predict_step, - _update_cfd, - _update_terms, -) -from ._refine import refine -from .utils import mask_missing_values - - -@validate_params( - {"X": ["array-like"], "ids": ["array-like"]}, - prefer_skip_nested_validation=True, -) -def make_time_shift_features(X, ids): - """Make time shift features. - - Parameters - ---------- - X : array-likeof shape (n_samples, n_features) - The data to transform, column by column. - - ids : array-like of shape (n_outputs, 2) - The unique id numbers of output features, which are - (feature_idx, delay). - - Returns - ------- - out : ndarray of shape (n_samples, n_outputs) - The matrix of features, where `n_outputs` is the number of time shift - features generated from the combination of inputs. - - Examples - -------- - >>> from fastcan.narx import make_time_shift_features - >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]] - >>> ids = [[0, 0], [0, 1], [1, 1]] - >>> make_time_shift_features(X, ids) - array([[ 1., nan, nan], - [ 3., 1., 2.], - [ 5., 3., 4.], - [ 7., 5., 6.]]) - """ - X = check_array(X, ensure_2d=True, dtype=float, ensure_all_finite="allow-nan") - ids = check_array(ids, ensure_2d=True, dtype=int) - n_samples = X.shape[0] - n_outputs = ids.shape[0] - out = np.zeros([n_samples, n_outputs]) - for i, id_temp in enumerate(ids): - out[:, i] = np.r_[ - np.full(id_temp[1], np.nan), - X[: -id_temp[1] or None, id_temp[0]], - ] - - return out - - -@validate_params( - { - "n_features": [ - Interval(Integral, 1, None, closed="left"), - ], - "max_delay": [ - Interval(Integral, 0, None, closed="left"), - ], - "include_zero_delay": ["boolean", "array-like"], - }, - prefer_skip_nested_validation=True, -) -def make_time_shift_ids( - n_features=1, - max_delay=1, - include_zero_delay=False, -): - """Generate ids for time shift features. - (variable_index, delay_number) - - Parameters - ---------- - n_features: int, default=1 - The number of input features. - - max_delay : int, default=1 - The maximum delay of time shift features. - - include_zero_delay : {bool, array-like} of shape (n_features,) default=False - Whether to include the original (zero-delay) features. - - Returns - ------- - ids : array-like of shape (`n_output_features_`, 2) - The unique id numbers of output features. - - Examples - -------- - >>> from fastcan.narx import make_time_shift_ids - >>> make_time_shift_ids(2, max_delay=3, include_zero_delay=[True, False]) - array([[0, 0], - [0, 1], - [0, 2], - [0, 3], - [1, 1], - [1, 2], - [1, 3]]) - """ - if isinstance(include_zero_delay, bool): - return np.stack( - np.meshgrid( - range(n_features), - range(not include_zero_delay, max_delay + 1), - indexing="ij", - ), - -1, - ).reshape(-1, 2) - - include_zero_delay = check_array(include_zero_delay, ensure_2d=False, dtype=bool) - if include_zero_delay.shape[0] != n_features: - raise ValueError( - f"The length of `include_zero_delay`={include_zero_delay} " - f"should be equal to `n_features`={n_features}." - ) - - ids = np.stack( - np.meshgrid( - range(n_features), - range(max_delay + 1), - indexing="ij", - ), - -1, - ).reshape(-1, 2) - exclude_zero_delay_idx = np.where(~include_zero_delay)[0] - mask = np.isin(ids[:, 0], exclude_zero_delay_idx) & (ids[:, 1] == 0) - return ids[~mask] - - -@validate_params( - {"X": ["array-like"], "ids": ["array-like"]}, - prefer_skip_nested_validation=True, -) -def make_poly_features(X, ids): - """Make polynomial features. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - The data to transform, column by column. - - ids : array-like of shape (n_outputs, degree) - The unique id numbers of output features, which are formed - of non-negative int values. - - Returns - ------- - out.T : ndarray of shape (n_samples, n_outputs) - The matrix of features, where n_outputs is the number of polynomial - features generated from the combination of inputs. - - Examples - -------- - >>> from fastcan.narx import make_poly_features - >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]] - >>> ids = [[0, 0], [0, 1], [1, 1], [0, 2]] - >>> make_poly_features(X, ids) - array([[ 1., 1., 1., 2.], - [ 1., 3., 9., 4.], - [ 1., 5., 25., 6.], - [ 1., 7., 49., 8.]]) - """ - X = check_array(X, ensure_2d=True, dtype=float, ensure_all_finite="allow-nan") - ids = check_array(ids, ensure_2d=True, dtype=int) - n_samples = X.shape[0] - n_outputs, degree = ids.shape - - # Generate polynomial features - out = np.ones([n_outputs, n_samples]) - unique_features = np.unique(ids) - unique_features = unique_features[unique_features != 0] - for i in range(degree): - for j in unique_features: - mask = ids[:, i] == j - out[mask, :] *= X[:, j - 1] - - return out.T - - -@validate_params( - { - "n_features": [ - Interval(Integral, 1, None, closed="left"), - ], - "degree": [ - None, - Interval(Integral, 1, None, closed="left"), - ], - }, - prefer_skip_nested_validation=True, -) -def make_poly_ids( - n_features=1, - degree=1, -): - """Generate ids for polynomial features. - (variable_index, variable_index, ...) - variable_index starts from 1, and 0 represents constant. - - Parameters - ---------- - n_features: int, default=1 - The number of input features. - - degree : int, default=1 - The maximum degree of polynomial features. - - Returns - ------- - ids : array-like of shape (n_outputs, degree) - The unique id numbers of output features. - - Examples - -------- - >>> from fastcan.narx import make_poly_ids - >>> make_poly_ids(2, degree=3) - array([[0, 0, 1], - [0, 0, 2], - [0, 1, 1], - [0, 1, 2], - [0, 2, 2], - [1, 1, 1], - [1, 1, 2], - [1, 2, 2], - [2, 2, 2]]) - """ - n_outputs = math.comb(n_features + degree, degree) - 1 - if n_outputs > np.iinfo(np.intp).max: - msg = ( - "The output that would result from the current configuration would" - f" have {n_outputs} features which is too large to be" - f" indexed by {np.intp().dtype.name}." - ) - raise ValueError(msg) - - ids = np.array( - list( - combinations_with_replacement( - range(n_features + 1), - degree, - ) - ) - ) - - const_id = np.where((ids == 0).all(axis=1)) - return np.delete(ids, const_id, 0) # remove the constant feature - - -def _valiate_time_shift_poly_ids( - time_shift_ids, poly_ids, n_samples=None, n_features=None, n_outputs=None -): - if n_samples is None: - n_samples = np.inf - if n_features is None: - n_features = np.inf - if n_outputs is None: - n_outputs = np.inf - - # Validate time_shift_ids - time_shift_ids_ = check_array( - time_shift_ids, - ensure_2d=True, - dtype=int, - ) - if time_shift_ids_.shape[1] != 2: - raise ValueError( - "time_shift_ids should have shape (n_variables, 2), " - f"but got {time_shift_ids_.shape}." - ) - if (time_shift_ids_[:, 0].min() < 0) or ( - time_shift_ids_[:, 0].max() >= n_features + n_outputs - ): - raise ValueError( - "The element x of the first column of time_shift_ids should " - f"satisfy 0 <= x < {n_features + n_outputs}." - ) - if (time_shift_ids_[:, 1].min() < 0) or (time_shift_ids_[:, 1].max() >= n_samples): - raise ValueError( - "The element x of the second column of time_shift_ids should " - f"satisfy 0 <= x < {n_samples}." - ) - # Validate poly_ids - poly_ids_ = check_array( - poly_ids, - ensure_2d=True, - dtype=int, - ) - if (poly_ids_.min() < 0) or (poly_ids_.max() > time_shift_ids_.shape[0]): - raise ValueError( - "The element x of poly_ids should " - f"satisfy 0 <= x <= {time_shift_ids_.shape[0]}." - ) - return time_shift_ids_, poly_ids_ - - -def _validate_feat_delay_ids( - feat_ids, delay_ids, n_samples=None, n_features=None, n_outputs=None -): - """Validate feat_ids and delay_ids.""" - if n_samples is None: - n_samples = np.inf - if n_features is None: - n_features = np.inf - if n_outputs is None: - n_outputs = np.inf - - # Validate feat_ids - feat_ids_ = check_array( - feat_ids, - ensure_2d=True, - dtype=np.int32, - order="C", - ) - if (feat_ids_.min() < -1) or (feat_ids_.max() > n_features + n_outputs - 1): - raise ValueError( - "The element x of feat_ids should " - f"satisfy -1 <= x <= {n_features + n_outputs - 1}." - ) - # Check if any row of feat_ids only contains -1 - if np.all(feat_ids_ == -1, axis=1).any(): - raise ValueError("`feat_ids` should not contain rows that only have -1.") - # Validate delay_ids - delay_ids_ = check_array( - delay_ids, - ensure_2d=True, - dtype=np.int32, - order="C", - ) - if delay_ids_.shape != feat_ids_.shape: - raise ValueError( - "The shape of delay_ids should be equal to " - f"the shape of feat_ids {feat_ids_.shape}, " - f"but got {delay_ids_.shape}." - ) - if ((delay_ids_ == -1) != (feat_ids_ == -1)).any(): - raise ValueError( - "The element x of delay_ids should be -1 " - "if and only if the element x of feat_ids is -1." - ) - if (delay_ids_.min() < -1) or (delay_ids_.max() >= n_samples): - raise ValueError( - f"The element x of delay_ids should satisfy -1 <= x < {n_samples}." - ) - return feat_ids_, delay_ids_ - - -@validate_params( - { - "feat_ids": ["array-like"], - "delay_ids": ["array-like"], - }, - prefer_skip_nested_validation=True, -) -def fd2tp(feat_ids, delay_ids): - """ - Convert feat_ids and delay_ids to time_shift_ids and poly_ids. - The polynomial terms, e.g., x0(k-1)^2, x0(k-2)x1(k-3), can be - represented by two ways: - - #. feat_ids and delay_ids, e.g., [[0, 0], [0, 1]] and [[1, 1], [2, 3]] - - #. time_shift_ids and poly_ids, e.g., [[0, 1], [0, 2], [1, 3]] and [[1, 1], [2, 3]] - - For feat_ids, [0, 0] and [0, 1] represent x0*x0 and x0*x1, while - for delay_ids, [1, 1] and [2, 3] represent the delays of features in feat_ids. - - For time_shift_ids, [0, 1], [0, 2], and [1, 3] represents x0(k-1), x0(k-2), - and x1(k-3), respectively. For poly_ids, [1, 1] and [2, 3] represent the first - variable multiplying the first variable given by time_shift_ids, i.e., - x0(k-1)*x0(k-1), and the second variable multiplying the third variable, i.e., - x0(k-1)*x1(k-3). - - Parameters - ---------- - feat_ids : array-like of shape (n_terms, degree), default=None - The unique id numbers of features to form polynomial terms. - The id -1 stands for the constant 1. - The id 0 to n are the index of features. - - delay_ids : array-like of shape (n_terms, degree), default=None - The delays of each feature in polynomial terms. - The id -1 stands for empty. - The id 0 stands for 0 delay. - The positive integer id k stands for k-th delay. - - Returns - ------- - time_shift_ids : array-like of shape (n_variables, 2), default=None - The unique id numbers of time shift variables, which are - (feature_idx, delay). - - poly_ids : array-like of shape (n_polys, degree), default=None - The unique id numbers of polynomial terms, excluding the intercept. - The id 0 stands for the constant 1. - The id 1 to n are the index+1 of time_shift_ids. - - Examples - -------- - >>> from fastcan.narx import fd2tp - >>> # Encode x0(k-1), x0(k-2)x1(k-3) - >>> feat_ids = [[-1, 0], [0, 1]] - >>> delay_ids = [[-1, 1], [2, 3]] - >>> time_shift_ids, poly_ids = fd2tp(feat_ids, delay_ids) - >>> print(time_shift_ids) - [[0 1] - [0 2] - [1 3]] - >>> print(poly_ids) - [[0 1] - [2 3]] - """ - _feat_ids, _delay_ids = _validate_feat_delay_ids(feat_ids, delay_ids) - featd = np.c_[_feat_ids.flatten(), _delay_ids.flatten()] - # Ensure featd has at least one [-1, -1] - time_shift_ids = np.unique(np.r_[[[-1, -1]], featd], axis=0) - poly_ids = np.array( - [np.where((time_shift_ids == row).all(axis=1))[0][0] for row in featd] - ).reshape(_feat_ids.shape) - time_shift_ids = time_shift_ids[time_shift_ids[:, 0] != -1] - return time_shift_ids, poly_ids - - -@validate_params( - { - "time_shift_ids": ["array-like"], - "poly_ids": ["array-like"], - }, - prefer_skip_nested_validation=True, -) -def tp2fd(time_shift_ids, poly_ids): - """ - Convert time_shift_ids and poly_ids to feat_ids and delay_ids. - The polynomial terms, e.g., x0(k-1)^2, x0(k-2)x1(k-3), can be - represented by two ways: - - #. feat_ids and delay_ids, e.g., [[0, 0], [0, 1]] and [[1, 1], [2, 3]] - - #. time_shift_ids and poly_ids, e.g., [[0, 1], [0, 2], [1, 3]] and [[1, 1], [2, 3]] - - For feat_ids, [0, 0] and [0, 1] represent x0*x0 and x0*x1, while - for delay_ids, [1, 1] and [2, 3] represent the delays of features in feat_ids. - - For time_shift_ids, [0, 1], [0, 2], and [1, 3] represents x0(k-1), x0(k-2), - and x1(k-3), respectively. For poly_ids, [1, 1] and [2, 3] represent the first - variable multiplying the first variable given by time_shift_ids, i.e., - x0(k-1)*x0(k-1), and the second variable multiplying the third variable, i.e., - x0(k-1)*x1(k-3). - - Parameters - ---------- - time_shift_ids : array-like of shape (n_variables, 2) - The unique id numbers of time shift variables, which are - (feature_idx, delay). - - poly_ids : array-like of shape (n_polys, degree) - The unique id numbers of polynomial terms, excluding the intercept. - The id 0 stands for the constant 1. - The id 1 to n are the index+1 of time_shift_ids. - - Returns - ------- - feat_ids : array-like of shape (n_terms, degree), default=None - The unique id numbers of features to form polynomial terms. - The id -1 stands for the constant 1. - The id 0 to n are the index of features. - - delay_ids : array-like of shape (n_terms, degree), default=None - The delays of each feature in polynomial terms. - The id -1 stands for empty. - The id 0 stands for 0 delay. - The positive integer id k stands for k-th delay. - - Examples - -------- - >>> from fastcan.narx import tp2fd - >>> # Encode x0(k-1), x0(k-2)x1(k-3) - >>> time_shift_ids = [[0, 1], [0, 2], [1, 3]] - >>> poly_ids = [[0, 1], [2, 3]] - >>> feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids) - >>> print(feat_ids) - [[-1 0] - [ 0 1]] - >>> print(delay_ids) - [[-1 1] - [ 2 3]] - """ - _time_shift_ids, _poly_ids = _valiate_time_shift_poly_ids( - time_shift_ids, - poly_ids, - ) - feat_ids = np.full_like(_poly_ids, -1, dtype=int) - delay_ids = np.full_like(_poly_ids, -1, dtype=int) - for i, poly_id in enumerate(_poly_ids): - for j, variable_id in enumerate(poly_id): - if variable_id != 0: - feat_ids[i, j] = _time_shift_ids[variable_id - 1, 0] - delay_ids[i, j] = _time_shift_ids[variable_id - 1, 1] - feat_ids = feat_ids - delay_ids = delay_ids - return feat_ids, delay_ids - - -class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator): - """The Nonlinear Autoregressive eXogenous (NARX) model class. - For example, a (polynomial) NARX model is like - y(k) = y(k-1)*u(k-1) + u(k-1)^2 + u(k-2) + 1.5 - where y(k) is the system output at the k-th time step, - u(k) is the system input at the k-th time step, - u and y is called features, - u(k-1) is called a (time shift) variable, - u(k-1)^2 is called a (polynomial) term, and - 1.5 is called an intercept. - - Parameters - ---------- - feat_ids : array-like of shape (n_terms, degree), default=None - The unique id numbers of features to form polynomial terms. - The id -1 stands for the constant 1. - The id 0 to `n_features_in_`-1 are the input features. - The id `n_features_in_` to `n_features_in_ + n_outputs_`-1 are - the output features. - E.g., for a 2-input 1-output system, the feat_ids [[-1, 0], [-1, 1], [0, 2]] - may represent the polynomial terms 1*u(k-1, 0), 1*u(k, 1), - and u(k-1, 0)*y(k-2, 0). - - delay_ids : array-like of shape (n_terms, degree), default=None - The delays of each feature in polynomial terms. - The id -1 stands for empty. - The id 0 stands for 0 delay. - The positive integer id k stands for k-th delay. - E.g., for the polynomial terms 1*u(k-1, 0), 1*u(k, 1), - and u(k-1, 0)*y(k-2, 0), the delay_ids [[-1, 1], [-1, 0], [1, 2]]. - - output_ids : array-like of shape (n_polys,), default=None - The id numbers indicate which output the polynomial term belongs to. - It is useful in multi-output case. - - fit_intercept : bool, default=True - Whether to fit the intercept. If set to False, intercept will be zeros. - - Attributes - ---------- - coef_ : array of shape (`n_features_in_`,) - Estimated coefficients for the linear regression problem. - - intercept_ : array of shape (`n_outputs_`,) - Independent term in the linear model. - - n_features_in_ : int - Number of features seen during :term:`fit`. - - n_outputs_ : int - Number of outputs seen during :term:`fit`. - - feature_names_in_ : ndarray of shape (`n_features_in_`,) - Names of features seen during :term:`fit`. Defined only when `X` - has feature names that are all strings. - - max_delay_ : int - The maximum time delay of the time shift variables. - - feat_ids_ : array-like of shape (n_terms, degree) - The unique id numbers of features to form polynomial terms. - The id -1 stands for the constant 1. - - delay_ids_ : array-like of shape (n_terms, degree) - The delays of each feature in polynomial terms. - The id -1 stands for empty. - - References - ---------- - * Billings, Stephen A. (2013). - Nonlinear System Identification: Narmax Methods in the Time, Frequency, - and Spatio-Temporal Domains. - - Examples - -------- - >>> import numpy as np - >>> from fastcan.narx import NARX, print_narx - >>> rng = np.random.default_rng(12345) - >>> n_samples = 1000 - >>> max_delay = 3 - >>> e = rng.normal(0, 0.1, n_samples) - >>> u = rng.uniform(0, 1, n_samples+max_delay) # input - >>> y = np.zeros(n_samples+max_delay) # output - >>> for i in range(max_delay, n_samples+max_delay): - ... y[i] = 0.5*y[i-1] + 0.7*u[i-2] + 1.5*u[i-1]*u[i-3] + 1 - >>> y = y[max_delay:]+e - >>> X = u[max_delay:].reshape(-1, 1) - >>> feat_ids = [[-1, 1], # 1*y - ... [-1, 0], # 1*u - ... [0, 0]] # u^2 - >>> delay_ids = [[-1, 1], # 1*y(k-1) - ... [-1, 2], # 1*u(k-2) - ... [1, 3]] # u(k-1)*u(k-3) - >>> narx = NARX(feat_ids=feat_ids, - ... delay_ids=delay_ids).fit(X, y, coef_init="one_step_ahead") - >>> print_narx(narx) - | yid | Term | Coef | - ======================================= - | 0 | Intercept | 1.008 | - | 0 | y_hat[k-1,0] | 0.498 | - | 0 | X[k-2,0] | 0.701 | - | 0 | X[k-1,0]*X[k-3,0] | 1.496 | - """ - - _parameter_constraints: dict = { - "feat_ids": [None, "array-like"], - "delay_ids": [None, "array-like"], - "output_ids": [None, "array-like"], - "fit_intercept": ["boolean"], - } - - def __init__( - self, - *, # keyword call only - feat_ids=None, - delay_ids=None, - output_ids=None, - fit_intercept=True, - ): - self.feat_ids = feat_ids - self.delay_ids = delay_ids - self.output_ids = output_ids - self.fit_intercept = fit_intercept - - @validate_params( - { - "coef_init": [None, StrOptions({"one_step_ahead"}), "array-like"], - "sample_weight": ["array-like", None], - }, - prefer_skip_nested_validation=True, - ) - def fit(self, X, y, sample_weight=None, coef_init=None, **params): - """ - Fit narx model. - - Parameters - ---------- - X : {array-like, sparse matrix} of shape (n_samples, `n_features_in_`) or None - Training data. - - y : array-like of shape (n_samples,) or (n_samples, `n_outputs_`) - Target values. Will be cast to X's dtype if necessary. - - sample_weight : array-like of shape (n_samples,), default=None - Individual weights for each sample. - - coef_init : array-like of shape (n_terms,), default=None - The initial values of coefficients and intercept for optimization. - When `coef_init` is None, the model will be a One-Step-Ahead NARX. - When `coef_init` is `one_step_ahead`, the model will be a Multi-Step-Ahead - NARX whose coefficients and intercept are initialized by the a - One-Step-Ahead NARX. - When `coef_init` is an array, the model will be a Multi-Step-Ahead - NARX whose coefficients and intercept are initialized by the array. - - .. note:: - When coef_init is `one_step_ahead`, the model will be trained as a - Multi-Step-Ahead NARX, rather than a One-Step-Ahead NARX. - - **params : dict - Keyword arguments passed to - `scipy.optimize.least_squares`. - - Returns - ------- - self : object - Fitted Estimator. - """ - check_X_params = dict( - dtype=float, order="C", ensure_all_finite="allow-nan", ensure_min_features=0 - ) - check_y_params = dict( - ensure_2d=False, dtype=float, order="C", ensure_all_finite="allow-nan" - ) - X, y = validate_data( - self, X, y, validate_separately=(check_X_params, check_y_params) - ) - check_consistent_length(X, y) - sample_weight = _check_sample_weight( - sample_weight, X, dtype=X.dtype, ensure_non_negative=True - ) - if y.ndim == 1: - y = y.reshape(-1, 1) - self.n_outputs_ = y.shape[1] - n_samples, n_features = X.shape - - if self.feat_ids is None: - if n_features == 0: - feat_ids_ = make_poly_ids(self.n_outputs_, 1) - 1 - else: - feat_ids_ = make_poly_ids(n_features, 1) - 1 - else: - feat_ids_ = self.feat_ids - - if self.delay_ids is None: - delay_ids_ = np.copy(feat_ids_) - delay_ids_[(feat_ids_ > -1) & (feat_ids_ < n_features)] = 0 - delay_ids_[(feat_ids_ >= n_features)] = 1 - else: - delay_ids_ = self.delay_ids - - # Validate feat_ids and delay_ids - self.feat_ids_, self.delay_ids_ = _validate_feat_delay_ids( - feat_ids_, - delay_ids_, - n_samples=n_samples, - n_features=n_features, - n_outputs=self.n_outputs_, - ) - - n_terms = self.feat_ids_.shape[0] - # Validate output_ids - if self.output_ids is None: - self.output_ids_ = np.zeros(n_terms, dtype=np.int32) - else: - self.output_ids_ = column_or_1d( - self.output_ids, - dtype=np.int32, - warn=True, - ) - if len(self.output_ids_) != n_terms: - raise ValueError( - "The length of output_ids should be equal to " - f"the number of polynomial terms, {n_terms}, " - f"but got {len(self.output_ids_)}." - ) - if (self.output_ids_.min() < 0) or ( - self.output_ids_.max() >= self.n_outputs_ - ): - raise ValueError( - "The element x of output_ids should " - f"satisfy 0 <= x < {self.n_outputs_}." - ) - # Check if self.output_ids_ contains all values from 0 to n_outputs-1 - if set(self.output_ids_) != set(range(self.n_outputs_)): - warnings.warn( - f"output_ids got {self.output_ids_}, which does not " - f"contain all values from 0 to {self.n_outputs_ - 1}." - "The prediction for the missing outputs will be a constant" - "(i.e., intercept).", - UserWarning, - ) - - self.max_delay_ = self.delay_ids_.max() - - if isinstance(coef_init, (type(None), str)): - # fit a one-step-ahead NARX model - time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_) - xy_hstack = np.c_[X, y] - osa_narx = LinearRegression(fit_intercept=self.fit_intercept) - time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids) - poly_terms = make_poly_features(time_shift_vars, poly_ids) - # Remove missing values - poly_terms_masked, y_masked, sample_weight_masked = mask_missing_values( - poly_terms, y, sample_weight - ) - coef = np.zeros(n_terms, dtype=float) - intercept = np.zeros(self.n_outputs_, dtype=float) - for i in range(self.n_outputs_): - output_i_mask = self.output_ids_ == i - if np.sum(output_i_mask) == 0: - if self.fit_intercept: - intercept[i] = np.mean(y_masked[:, i]) - else: - intercept[i] = 0.0 - continue - osa_narx.fit( - poly_terms_masked[:, output_i_mask], - y_masked[:, i], - sample_weight_masked, - ) - coef[output_i_mask] = osa_narx.coef_ - intercept[i] = osa_narx.intercept_ - if coef_init is None: - self.coef_ = coef - self.intercept_ = intercept - return self - - if self.fit_intercept: - coef_init = np.r_[coef, intercept] - else: - coef_init = coef - else: - coef_init = check_array( - coef_init, - ensure_2d=False, - dtype=float, - ) - if self.fit_intercept: - n_coef_intercept = n_terms + self.n_outputs_ - else: - n_coef_intercept = n_terms - if coef_init.shape[0] != n_coef_intercept: - raise ValueError( - "`coef_init` should have the shape of " - f"({(n_coef_intercept,)}), but got {coef_init.shape}." - ) - - grad_yyd_ids, grad_coef_ids, grad_feat_ids, grad_delay_ids = NARX._get_cfd_ids( - self.feat_ids_, self.delay_ids_, self.output_ids_, X.shape[1] - ) - sample_weight_sqrt = np.sqrt(sample_weight).reshape(-1, 1) - res = least_squares( - NARX._loss, - x0=coef_init, - jac=NARX._grad, - args=( - _predict_step, - X, - y, - self.feat_ids_, - self.delay_ids_, - self.output_ids_, - self.fit_intercept, - sample_weight_sqrt, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - ), - **params, - ) - if self.fit_intercept: - self.coef_ = res.x[: -self.n_outputs_] - self.intercept_ = res.x[-self.n_outputs_ :] - else: - self.coef_ = res.x - self.intercept_ = np.zeros(self.n_outputs_, dtype=float) - return self - - @staticmethod - def _predict( - expression, - X, - y_ref, - coef, - intercept, - feat_ids, - delay_ids, - output_ids, - ): - n_samples = X.shape[0] - n_ref, n_outputs = y_ref.shape - max_delay = np.max(delay_ids) - y_hat = np.zeros((n_samples, n_outputs), dtype=float) - at_init = True - init_k = 0 - for k in range(n_samples): - if not np.all(np.isfinite(X[k])): - at_init = True - init_k = k + 1 - y_hat[k] = np.nan - continue - if k - init_k == max_delay: - at_init = False - - if at_init: - y_hat[k] = y_ref[k % n_ref] - else: - y_hat[k] = intercept - expression( - X, - y_hat, - y_hat[k], - coef, - feat_ids, - delay_ids, - output_ids, - k, - ) - if np.any(y_hat[k] > 1e20): - y_hat[k:] = 1e20 - return y_hat - return y_hat - - @staticmethod - def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in): - """ - Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx. - Maps coefficients to their corresponding features and delays. - """ - - # Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay - # axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx] - # axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx] - # axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx] - grad_yyd_ids = [] - grad_coef_ids = [] - grad_feat_ids = [] - grad_delay_ids = [] - - for coef_id, (term_feat_ids, term_delay_ids) in enumerate( - zip(feat_ids, delay_ids) - ): - row_y_id = output_ids[coef_id] # y(k, id) - for var_id, (feat_id, delay_id) in enumerate( - zip(term_feat_ids, term_delay_ids) - ): - if feat_id >= n_features_in and delay_id > 0: - col_y_id = feat_id - n_features_in # y(k-1, id) - grad_yyd_ids.append([row_y_id, col_y_id, delay_id - 1]) - grad_coef_ids.append(coef_id) - grad_feat_ids.append(np.delete(term_feat_ids, var_id)) - grad_delay_ids.append(np.delete(term_delay_ids, var_id)) - - return ( - np.array(grad_yyd_ids, dtype=np.int32), - np.array(grad_coef_ids, dtype=np.int32), - np.array(grad_feat_ids, dtype=np.int32), - np.array(grad_delay_ids, dtype=np.int32), - ) - - @staticmethod - def _update_dydx( - X, - y_hat, - coef, - feat_ids, - delay_ids, - output_ids, - fit_intercept, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - ): - """ - Computation of the Jacobian matrix dydx. - - Returns - ------- - dydx : ndarray of shape (n_samples, n_outputs, n_x) - Jacobian matrix of the outputs with respect to coefficients and intercepts. - """ - n_samples, n_y = y_hat.shape - max_delay = np.max(delay_ids) - n_coefs = feat_ids.shape[0] - if fit_intercept: - n_x = n_coefs + n_y # Total number of coefficients and intercepts - y_ids = np.r_[output_ids, np.arange(n_y)] - else: - n_x = n_coefs - y_ids = output_ids - - x_ids = np.arange(n_x) - - dydx = np.zeros((n_samples, n_y, n_x), dtype=float) - at_init = True - init_k = 0 - for k in range(n_samples): - if not np.all(np.isfinite(X[k])): - at_init = True - init_k = k + 1 - continue - if k - init_k == max_delay: - at_init = False - - if at_init: - continue - # Compute terms for time step k - terms = np.ones(n_x, dtype=float) - _update_terms( - X, - y_hat, - terms, - feat_ids, - delay_ids, - k, - ) - - # Update constant terms of Jacobian - dydx[k, y_ids, x_ids] = terms - - # Update dynamic terms of Jacobian - if max_delay > 0 and grad_yyd_ids.size > 0: - cfd = np.zeros((n_y, n_y, max_delay), dtype=float) - _update_cfd( - X, - y_hat, - cfd, - coef, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - k, - ) - for d in range(max_delay): - dydx[k] += cfd[:, :, d] @ dydx[k - d - 1] - - # Handle divergence - if np.any(dydx[k] > 1e20): - dydx[k:] = 1e20 - break - - return dydx - - @staticmethod - def _loss( - coef_intercept, - expression, - X, - y, - feat_ids, - delay_ids, - output_ids, - fit_intercept, - sample_weight_sqrt, - *args, - ): - # Sum of squared errors - n_outputs = y.shape[1] - if fit_intercept: - coef = coef_intercept[:-n_outputs] - intercept = coef_intercept[-n_outputs:] - else: - coef = coef_intercept - intercept = np.zeros(n_outputs, dtype=float) - - y_hat = NARX._predict( - expression, - X, - y, - coef, - intercept, - feat_ids, - delay_ids, - output_ids, - ) - - y_masked, y_hat_masked, sample_weight_sqrt_masked = mask_missing_values( - y, y_hat, sample_weight_sqrt - ) - - return (sample_weight_sqrt_masked * (y_hat_masked - y_masked)).sum(axis=1) - - @staticmethod - def _grad( - coef_intercept, - expression, - X, - y, - feat_ids, - delay_ids, - output_ids, - fit_intercept, - sample_weight_sqrt, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - ): - # Sum of squared errors - n_outputs = y.shape[1] - if fit_intercept: - coef = coef_intercept[:-n_outputs] - intercept = coef_intercept[-n_outputs:] - else: - coef = coef_intercept - intercept = np.zeros(n_outputs, dtype=float) - - y_hat = NARX._predict( - expression, - X, - y, - coef, - intercept, - feat_ids, - delay_ids, - output_ids, - ) - dydx = NARX._update_dydx( - X, - y_hat, - coef, - feat_ids, - delay_ids, - output_ids, - fit_intercept, - grad_yyd_ids, - grad_coef_ids, - grad_feat_ids, - grad_delay_ids, - ) - - mask_valid = mask_missing_values(y, y_hat, sample_weight_sqrt, return_mask=True) - - sample_weight_sqrt_masked = sample_weight_sqrt[mask_valid] - dydx_masked = dydx[mask_valid] - - return dydx_masked.sum(axis=1) * sample_weight_sqrt_masked - - @validate_params( - { - "y_init": [None, "array-like"], - }, - prefer_skip_nested_validation=True, - ) - def predict(self, X, y_init=None): - """ - Predict using the linear model. - - Parameters - ---------- - X : array-like of shape (n_samples, `n_features_in_`) - Samples. - - y_init : array-like of shape (n_init, `n_outputs_`), default=None - The initial values for the prediction of y. - It should at least have one sample. - - Returns - ------- - y_hat : array-like of shape (n_samples,) - Returns predicted values. - """ - check_is_fitted(self) - - X = validate_data( - self, - X, - dtype=float, - order="C", - reset=False, - ensure_all_finite="allow-nan", - ensure_min_features=0, - ) - if y_init is None: - y_init = np.zeros((self.max_delay_, self.n_outputs_)) - else: - y_init = check_array( - y_init, - ensure_2d=False, - dtype=float, - ensure_min_samples=0, - ensure_all_finite="allow-nan", - ) - if y_init.ndim == 1: - y_init = y_init.reshape(-1, 1) - if y_init.shape[1] != self.n_outputs_: - raise ValueError( - f"`y_init` should have {self.n_outputs_} outputs " - f"but got {y_init.shape[1]}." - ) - y_hat = NARX._predict( - _predict_step, - X, - y_init, - self.coef_, - self.intercept_, - self.feat_ids_, - self.delay_ids_, - self.output_ids_, - ) - if self.n_outputs_ == 1: - y_hat = y_hat.flatten() - return y_hat - - def __sklearn_tags__(self): - tags = super().__sklearn_tags__() - tags.input_tags.allow_nan = True - return tags - - -@validate_params( - { - "narx": [NARX], - "term_space": [Interval(Integral, 1, None, closed="left")], - "coef_space": [Interval(Integral, 1, None, closed="left")], - "float_precision": [Interval(Integral, 0, None, closed="left")], - }, - prefer_skip_nested_validation=True, -) -def print_narx( - narx, - term_space=20, - coef_space=10, - float_precision=3, -): - """Print a NARX model as a Table which contains y idx, Term, and Coef. - y idx is used to indicate which output (assuming it is a multi-output case) - the term belongs to. - - Parameters - ---------- - narx : NARX model - The NARX model to be printed. - - term_space: int, default=20 - The space for the column of Term. - - coef_space: int, default=10 - The space for the column of Coef. - - float_precision: int, default=3 - The number of places after the decimal for Coef. - - Returns - ------- - table : str - The table of output index, terms, and coefficients of the NARX model. - - Examples - -------- - >>> from sklearn.datasets import load_diabetes - >>> from fastcan.narx import print_narx, NARX - >>> X, y = load_diabetes(return_X_y=True) - >>> print_narx(NARX().fit(X, y), term_space=10, coef_space=5, float_precision=0) - | yid | Term |Coef | - ======================== - | 0 |Intercept | 152 | - | 0 | X[k,0] | -10 | - | 0 | X[k,1] |-240 | - | 0 | X[k,2] | 520 | - | 0 | X[k,3] | 324 | - | 0 | X[k,4] |-792 | - | 0 | X[k,5] | 477 | - | 0 | X[k,6] | 101 | - | 0 | X[k,7] | 177 | - | 0 | X[k,8] | 751 | - | 0 | X[k,9] | 68 | - """ - check_is_fitted(narx) - - def _get_term_str(term_feat_ids, term_delay_ids): - term_str = "" - for _, (feat_id, delay_id) in enumerate(zip(term_feat_ids, term_delay_ids)): - if -1 < feat_id < narx.n_features_in_: - if delay_id == 0: - term_str += f"*X[k,{feat_id}]" - else: - term_str += f"*X[k-{delay_id},{feat_id}]" - elif feat_id >= narx.n_features_in_: - term_str += f"*y_hat[k-{delay_id},{feat_id - narx.n_features_in_}]" - return term_str[1:] - - yid_space = 5 - print( - f"|{'yid':^{yid_space}}" - + f"|{'Term':^{term_space}}" - + f"|{'Coef':^{coef_space}}|" - ) - print("=" * (yid_space + term_space + coef_space + 4)) - for i in range(narx.n_outputs_): - print( - f"|{i:^{yid_space}}|" - + f"{'Intercept':^{term_space}}|" - + f"{narx.intercept_[i]:^{coef_space}.{float_precision}f}|" - ) - for i, term_id in enumerate(zip(narx.feat_ids_, narx.delay_ids_)): - print( - f"|{narx.output_ids_[i]:^{yid_space}}|" - + f"{_get_term_str(*term_id):^{term_space}}|" - + f"{narx.coef_[i]:^{coef_space}.{float_precision}f}|" - ) - - -@validate_params( - { - "X": ["array-like"], - "y": ["array-like"], - "n_terms_to_select": [ - Interval(Integral, 1, None, closed="left"), - "array-like", - ], - "max_delay": [ - Interval(Integral, 0, None, closed="left"), - ], - "poly_degree": [ - Interval(Integral, 1, None, closed="left"), - ], - "fit_intercept": ["boolean"], - "include_zero_delay": [None, "array-like"], - "static_indices": [None, "array-like"], - "refine_verbose": ["verbose"], - "refine_drop": [ - None, - Interval(Integral, 1, None, closed="left"), - StrOptions({"all"}), - ], - "refine_max_iter": [ - None, - Interval(Integral, 1, None, closed="left"), - ], - }, - prefer_skip_nested_validation=True, -) -def make_narx( - X, - y, - n_terms_to_select, - max_delay=1, - poly_degree=1, - *, - fit_intercept=True, - include_zero_delay=None, - static_indices=None, - refine_verbose=1, - refine_drop=None, - refine_max_iter=None, - **params, -): - """Find `time_shift_ids`, `poly_ids`, `output_ids` for a NARX model. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - Feature matrix. - - y : array-like of shape (n_samples,) or (n_samples, n_outputs) - Target vector or matrix. - - n_terms_to_select : int or array-like of shape (n_outputs,) - The parameter is the absolute number of polynomial terms to select for - each output. If `n_terms_to_select` is an integer, it is the - same for all outputs. - - max_delay : int, default=1 - The maximum delay of time shift features. - - poly_degree : int, default=1 - The maximum degree of polynomial features. - - fit_intercept : bool, default=True - Whether to fit the intercept. If set to False, intercept will be zeros. - - include_zero_delay : {None, array-like} of shape (n_features,) default=None - Whether to include the original (zero-delay) features. - - static_indices : {None, array-like} of shape (n_static_features,) default=None - The indices of static features without time delay. - - .. note:: - If the corresponding include_zero_delay of the static features is False, the - static feature will be excluded from candidate features. - - refine_verbose : int, default=1 - The verbosity level of refine. - - refine_drop : int or "all", default=None - The number of the selected features dropped for the consequencing - reselection. If `drop` is None, no refining will be performed. - - refine_max_iter : int, default=None - The maximum number of valid iterations in the refining process. - - **params : dict - Keyword arguments passed to - `fastcan.FastCan`. - - Returns - ------- - narx : NARX - NARX instance. - - Examples - -------- - >>> import numpy as np - >>> from sklearn.metrics import mean_squared_error - >>> from fastcan.narx import make_narx, print_narx - >>> rng = np.random.default_rng(12345) - >>> n_samples = 1000 - >>> max_delay = 3 - >>> e = rng.normal(0, 0.1, n_samples) - >>> u0 = rng.uniform(0, 1, n_samples+max_delay) # input - >>> u1 = rng.normal(0, 0.1, n_samples) # static input (i.e. no time shift) - >>> y = np.zeros(n_samples+max_delay) # output - >>> for i in range(max_delay, n_samples+max_delay): - ... y[i] = (0.5*y[i-1] + 0.3*u0[i]**2 + 2*u0[i-1]*u0[i-3] + - ... 1.5*u0[i-2]*u1[i-max_delay] + 1) - >>> y = y[max_delay:]+e - >>> X = np.c_[u0[max_delay:], u1] - >>> narx = make_narx(X=X, - ... y=y, - ... n_terms_to_select=4, - ... max_delay=3, - ... poly_degree=2, - ... static_indices=[1], - ... eta=True, - ... verbose=0, - ... refine_verbose=0, - ... refine_drop=1) - >>> print(f"{mean_squared_error(y, narx.fit(X, y).predict(X)):.4f}") - 0.0289 - >>> print_narx(narx) - | yid | Term | Coef | - ======================================= - | 0 | Intercept | 1.050 | - | 0 | y_hat[k-1,0] | 0.484 | - | 0 | X[k,0]*X[k,0] | 0.306 | - | 0 | X[k-1,0]*X[k-3,0] | 2.000 | - | 0 | X[k-2,0]*X[k,1] | 1.528 | - """ - X = check_array( - X, - dtype=float, - ensure_2d=True, - ensure_all_finite="allow-nan", - ensure_min_features=0, - ) - y = check_array(y, dtype=float, ensure_2d=False, ensure_all_finite="allow-nan") - check_consistent_length(X, y) - if y.ndim == 1: - y = y.reshape(-1, 1) - n_outputs = y.shape[1] - if isinstance(n_terms_to_select, Integral): - n_terms_to_select = np.full(n_outputs, n_terms_to_select, dtype=int) - else: - n_terms_to_select = column_or_1d(n_terms_to_select, dtype=int, warn=True) - if len(n_terms_to_select) != n_outputs: - raise ValueError( - "The length of `n_terms_to_select` should be equal to " - f"the number of outputs, {n_outputs}, but got " - f"{len(n_terms_to_select)}." - ) - - xy_hstack = np.c_[X, y] - n_features = X.shape[1] - - if include_zero_delay is None: - _include_zero_delay = [True] * n_features + [False] * n_outputs - else: - _include_zero_delay = include_zero_delay + [False] * n_outputs - - time_shift_ids_all = make_time_shift_ids( - n_features=xy_hstack.shape[1], - max_delay=max_delay, - include_zero_delay=_include_zero_delay, - ) - - time_shift_ids_all = np.delete( - time_shift_ids_all, - ( - np.isin(time_shift_ids_all[:, 0], static_indices) - & (time_shift_ids_all[:, 1] > 0) - ), - 0, - ) - time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids_all) - - poly_ids_all = make_poly_ids( - time_shift_ids_all.shape[0], - poly_degree, - ) - poly_terms = make_poly_features(time_shift_vars, poly_ids_all) - - # Remove missing values - poly_terms_masked, y_masked = mask_missing_values(poly_terms, y) - - selected_poly_ids = [] - for i in range(n_outputs): - csf = FastCan( - n_terms_to_select[i], - **params, - ).fit(poly_terms_masked, y_masked[:, i]) - if refine_drop is not None: - indices, _ = refine( - csf, drop=refine_drop, max_iter=refine_max_iter, verbose=refine_verbose - ) - support = np.zeros(shape=poly_ids_all.shape[0], dtype=bool) - support[indices] = True - else: - support = csf.get_support() - selected_poly_ids.append(poly_ids_all[support]) - - selected_poly_ids = np.vstack(selected_poly_ids) - - time_shift_ids = time_shift_ids_all[ - np.unique(selected_poly_ids[selected_poly_ids.nonzero()]) - 1, : - ] - poly_ids = ( - rankdata( - np.r_[[[0] * poly_degree], selected_poly_ids], - method="dense", - ).reshape(-1, poly_degree)[1:] - - 1 - ) - - output_ids = [i for i in range(n_outputs) for _ in range(n_terms_to_select[i])] - feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids) - - return NARX( - feat_ids=feat_ids, - delay_ids=delay_ids, - output_ids=output_ids, - fit_intercept=fit_intercept, - ) diff --git a/fastcan/narx/__init__.py b/fastcan/narx/__init__.py new file mode 100644 index 0000000..9149e6d --- /dev/null +++ b/fastcan/narx/__init__.py @@ -0,0 +1,32 @@ +""" +Nonlinear autoregressive exogenous (NARX) model for system identification. +""" + +# Authors: The fastcan developers +# SPDX-License-Identifier: MIT + +from ._base import NARX +from ._feature import ( + fd2tp, + make_poly_features, + make_poly_ids, + make_time_shift_features, + make_time_shift_ids, + tp2fd, +) +from ._utils import ( + make_narx, + print_narx, +) + +__all__ = [ + "NARX", + "fd2tp", + "make_narx", + "make_poly_features", + "make_poly_ids", + "make_time_shift_features", + "make_time_shift_ids", + "print_narx", + "tp2fd", +] diff --git a/fastcan/narx/_base.py b/fastcan/narx/_base.py new file mode 100644 index 0000000..4df4001 --- /dev/null +++ b/fastcan/narx/_base.py @@ -0,0 +1,698 @@ +""" +NARX model. +""" + +# Authors: The fastcan developers +# SPDX-License-Identifier: MIT + +import warnings + +import numpy as np +from scipy.optimize import least_squares +from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin +from sklearn.linear_model import LinearRegression +from sklearn.utils import check_array, check_consistent_length, column_or_1d +from sklearn.utils._param_validation import StrOptions, validate_params +from sklearn.utils.validation import ( + _check_sample_weight, + check_is_fitted, + validate_data, +) + +from ..utils import mask_missing_values +from ._feature import ( + _validate_feat_delay_ids, + fd2tp, + make_poly_features, + make_poly_ids, + make_time_shift_features, +) +from ._narx_fast import ( # type: ignore[attr-defined] + _predict_step, + _update_cfd, + _update_terms, +) + + +class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator): + """The Nonlinear Autoregressive eXogenous (NARX) model class. + For example, a (polynomial) NARX model is like + y(k) = y(k-1)*u(k-1) + u(k-1)^2 + u(k-2) + 1.5 + where y(k) is the system output at the k-th time step, + u(k) is the system input at the k-th time step, + u and y is called features, + u(k-1) is called a (time shift) variable, + u(k-1)^2 is called a (polynomial) term, and + 1.5 is called an intercept. + + Parameters + ---------- + feat_ids : array-like of shape (n_terms, degree), default=None + The unique id numbers of features to form polynomial terms. + The id -1 stands for the constant 1. + The id 0 to `n_features_in_`-1 are the input features. + The id `n_features_in_` to `n_features_in_ + n_outputs_`-1 are + the output features. + E.g., for a 2-input 1-output system, the feat_ids [[-1, 0], [-1, 1], [0, 2]] + may represent the polynomial terms 1*u(k-1, 0), 1*u(k, 1), + and u(k-1, 0)*y(k-2, 0). + + delay_ids : array-like of shape (n_terms, degree), default=None + The delays of each feature in polynomial terms. + The id -1 stands for empty. + The id 0 stands for 0 delay. + The positive integer id k stands for k-th delay. + E.g., for the polynomial terms 1*u(k-1, 0), 1*u(k, 1), + and u(k-1, 0)*y(k-2, 0), the delay_ids [[-1, 1], [-1, 0], [1, 2]]. + + output_ids : array-like of shape (n_polys,), default=None + The id numbers indicate which output the polynomial term belongs to. + It is useful in multi-output case. + + fit_intercept : bool, default=True + Whether to fit the intercept. If set to False, intercept will be zeros. + + Attributes + ---------- + coef_ : array of shape (`n_features_in_`,) + Estimated coefficients for the linear regression problem. + + intercept_ : array of shape (`n_outputs_`,) + Independent term in the linear model. + + n_features_in_ : int + Number of features seen during :term:`fit`. + + n_outputs_ : int + Number of outputs seen during :term:`fit`. + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + max_delay_ : int + The maximum time delay of the time shift variables. + + feat_ids_ : array-like of shape (n_terms, degree) + The unique id numbers of features to form polynomial terms. + The id -1 stands for the constant 1. + + delay_ids_ : array-like of shape (n_terms, degree) + The delays of each feature in polynomial terms. + The id -1 stands for empty. + + References + ---------- + * Billings, Stephen A. (2013). + Nonlinear System Identification: Narmax Methods in the Time, Frequency, + and Spatio-Temporal Domains. + + Examples + -------- + >>> import numpy as np + >>> from fastcan.narx import NARX, print_narx + >>> rng = np.random.default_rng(12345) + >>> n_samples = 1000 + >>> max_delay = 3 + >>> e = rng.normal(0, 0.1, n_samples) + >>> u = rng.uniform(0, 1, n_samples+max_delay) # input + >>> y = np.zeros(n_samples+max_delay) # output + >>> for i in range(max_delay, n_samples+max_delay): + ... y[i] = 0.5*y[i-1] + 0.7*u[i-2] + 1.5*u[i-1]*u[i-3] + 1 + >>> y = y[max_delay:]+e + >>> X = u[max_delay:].reshape(-1, 1) + >>> feat_ids = [[-1, 1], # 1*y + ... [-1, 0], # 1*u + ... [0, 0]] # u^2 + >>> delay_ids = [[-1, 1], # 1*y(k-1) + ... [-1, 2], # 1*u(k-2) + ... [1, 3]] # u(k-1)*u(k-3) + >>> narx = NARX(feat_ids=feat_ids, + ... delay_ids=delay_ids).fit(X, y, coef_init="one_step_ahead") + >>> print_narx(narx) + | yid | Term | Coef | + ======================================= + | 0 | Intercept | 1.008 | + | 0 | y_hat[k-1,0] | 0.498 | + | 0 | X[k-2,0] | 0.701 | + | 0 | X[k-1,0]*X[k-3,0] | 1.496 | + """ + + _parameter_constraints: dict = { + "feat_ids": [None, "array-like"], + "delay_ids": [None, "array-like"], + "output_ids": [None, "array-like"], + "fit_intercept": ["boolean"], + } + + def __init__( + self, + *, # keyword call only + feat_ids=None, + delay_ids=None, + output_ids=None, + fit_intercept=True, + ): + self.feat_ids = feat_ids + self.delay_ids = delay_ids + self.output_ids = output_ids + self.fit_intercept = fit_intercept + + @validate_params( + { + "coef_init": [None, StrOptions({"one_step_ahead"}), "array-like"], + "sample_weight": ["array-like", None], + }, + prefer_skip_nested_validation=True, + ) + def fit(self, X, y, sample_weight=None, coef_init=None, **params): + """ + Fit narx model. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, `n_features_in_`) or None + Training data. + + y : array-like of shape (n_samples,) or (n_samples, `n_outputs_`) + Target values. Will be cast to X's dtype if necessary. + + sample_weight : array-like of shape (n_samples,), default=None + Individual weights for each sample. + + coef_init : array-like of shape (n_terms,), default=None + The initial values of coefficients and intercept for optimization. + When `coef_init` is None, the model will be a One-Step-Ahead NARX. + When `coef_init` is `one_step_ahead`, the model will be a Multi-Step-Ahead + NARX whose coefficients and intercept are initialized by the a + One-Step-Ahead NARX. + When `coef_init` is an array, the model will be a Multi-Step-Ahead + NARX whose coefficients and intercept are initialized by the array. + + .. note:: + When coef_init is `one_step_ahead`, the model will be trained as a + Multi-Step-Ahead NARX, rather than a One-Step-Ahead NARX. + + **params : dict + Keyword arguments passed to + `scipy.optimize.least_squares`. + + Returns + ------- + self : object + Fitted Estimator. + """ + check_X_params = dict( + dtype=float, order="C", ensure_all_finite="allow-nan", ensure_min_features=0 + ) + check_y_params = dict( + ensure_2d=False, dtype=float, order="C", ensure_all_finite="allow-nan" + ) + X, y = validate_data( + self, X, y, validate_separately=(check_X_params, check_y_params) + ) + check_consistent_length(X, y) + sample_weight = _check_sample_weight( + sample_weight, X, dtype=X.dtype, ensure_non_negative=True + ) + if y.ndim == 1: + y = y.reshape(-1, 1) + self.n_outputs_ = y.shape[1] + n_samples, n_features = X.shape + + if self.feat_ids is None: + if n_features == 0: + feat_ids_ = make_poly_ids(self.n_outputs_, 1) - 1 + else: + feat_ids_ = make_poly_ids(n_features, 1) - 1 + else: + feat_ids_ = self.feat_ids + + if self.delay_ids is None: + delay_ids_ = np.copy(feat_ids_) + delay_ids_[(feat_ids_ > -1) & (feat_ids_ < n_features)] = 0 + delay_ids_[(feat_ids_ >= n_features)] = 1 + else: + delay_ids_ = self.delay_ids + + # Validate feat_ids and delay_ids + self.feat_ids_, self.delay_ids_ = _validate_feat_delay_ids( + feat_ids_, + delay_ids_, + n_samples=n_samples, + n_features=n_features, + n_outputs=self.n_outputs_, + ) + + n_terms = self.feat_ids_.shape[0] + # Validate output_ids + if self.output_ids is None: + self.output_ids_ = np.zeros(n_terms, dtype=np.int32) + else: + self.output_ids_ = column_or_1d( + self.output_ids, + dtype=np.int32, + warn=True, + ) + if len(self.output_ids_) != n_terms: + raise ValueError( + "The length of output_ids should be equal to " + f"the number of polynomial terms, {n_terms}, " + f"but got {len(self.output_ids_)}." + ) + if (self.output_ids_.min() < 0) or ( + self.output_ids_.max() >= self.n_outputs_ + ): + raise ValueError( + "The element x of output_ids should " + f"satisfy 0 <= x < {self.n_outputs_}." + ) + # Check if self.output_ids_ contains all values from 0 to n_outputs-1 + if set(self.output_ids_) != set(range(self.n_outputs_)): + warnings.warn( + f"output_ids got {self.output_ids_}, which does not " + f"contain all values from 0 to {self.n_outputs_ - 1}." + "The prediction for the missing outputs will be a constant" + "(i.e., intercept).", + UserWarning, + ) + + self.max_delay_ = self.delay_ids_.max() + + if isinstance(coef_init, (type(None), str)): + # fit a one-step-ahead NARX model + time_shift_ids, poly_ids = fd2tp(self.feat_ids_, self.delay_ids_) + xy_hstack = np.c_[X, y] + osa_narx = LinearRegression(fit_intercept=self.fit_intercept) + time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids) + poly_terms = make_poly_features(time_shift_vars, poly_ids) + # Remove missing values + poly_terms_masked, y_masked, sample_weight_masked = mask_missing_values( + poly_terms, y, sample_weight + ) + coef = np.zeros(n_terms, dtype=float) + intercept = np.zeros(self.n_outputs_, dtype=float) + for i in range(self.n_outputs_): + output_i_mask = self.output_ids_ == i + if np.sum(output_i_mask) == 0: + if self.fit_intercept: + intercept[i] = np.mean(y_masked[:, i]) + else: + intercept[i] = 0.0 + continue + osa_narx.fit( + poly_terms_masked[:, output_i_mask], + y_masked[:, i], + sample_weight_masked, + ) + coef[output_i_mask] = osa_narx.coef_ + intercept[i] = osa_narx.intercept_ + if coef_init is None: + self.coef_ = coef + self.intercept_ = intercept + return self + + if self.fit_intercept: + coef_init = np.r_[coef, intercept] + else: + coef_init = coef + else: + coef_init = check_array( + coef_init, + ensure_2d=False, + dtype=float, + ) + if self.fit_intercept: + n_coef_intercept = n_terms + self.n_outputs_ + else: + n_coef_intercept = n_terms + if coef_init.shape[0] != n_coef_intercept: + raise ValueError( + "`coef_init` should have the shape of " + f"({(n_coef_intercept,)}), but got {coef_init.shape}." + ) + + grad_yyd_ids, grad_coef_ids, grad_feat_ids, grad_delay_ids = NARX._get_cfd_ids( + self.feat_ids_, self.delay_ids_, self.output_ids_, X.shape[1] + ) + sample_weight_sqrt = np.sqrt(sample_weight).reshape(-1, 1) + res = least_squares( + NARX._loss, + x0=coef_init, + jac=NARX._grad, + args=( + _predict_step, + X, + y, + self.feat_ids_, + self.delay_ids_, + self.output_ids_, + self.fit_intercept, + sample_weight_sqrt, + grad_yyd_ids, + grad_coef_ids, + grad_feat_ids, + grad_delay_ids, + ), + **params, + ) + if self.fit_intercept: + self.coef_ = res.x[: -self.n_outputs_] + self.intercept_ = res.x[-self.n_outputs_ :] + else: + self.coef_ = res.x + self.intercept_ = np.zeros(self.n_outputs_, dtype=float) + return self + + @staticmethod + def _predict( + expression, + X, + y_ref, + coef, + intercept, + feat_ids, + delay_ids, + output_ids, + ): + n_samples = X.shape[0] + n_ref, n_outputs = y_ref.shape + max_delay = np.max(delay_ids) + y_hat = np.zeros((n_samples, n_outputs), dtype=float) + at_init = True + init_k = 0 + for k in range(n_samples): + if not np.all(np.isfinite(X[k])): + at_init = True + init_k = k + 1 + y_hat[k] = np.nan + continue + if k - init_k == max_delay: + at_init = False + + if at_init: + y_hat[k] = y_ref[k % n_ref] + else: + y_hat[k] = intercept + expression( + X, + y_hat, + y_hat[k], + coef, + feat_ids, + delay_ids, + output_ids, + k, + ) + if np.any(y_hat[k] > 1e20): + y_hat[k:] = 1e20 + return y_hat + return y_hat + + @staticmethod + def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in): + """ + Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx. + Maps coefficients to their corresponding features and delays. + """ + + # Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay + # axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx] + # axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx] + # axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx] + grad_yyd_ids = [] + grad_coef_ids = [] + grad_feat_ids = [] + grad_delay_ids = [] + + for coef_id, (term_feat_ids, term_delay_ids) in enumerate( + zip(feat_ids, delay_ids) + ): + row_y_id = output_ids[coef_id] # y(k, id) + for var_id, (feat_id, delay_id) in enumerate( + zip(term_feat_ids, term_delay_ids) + ): + if feat_id >= n_features_in and delay_id > 0: + col_y_id = feat_id - n_features_in # y(k-1, id) + grad_yyd_ids.append([row_y_id, col_y_id, delay_id - 1]) + grad_coef_ids.append(coef_id) + grad_feat_ids.append(np.delete(term_feat_ids, var_id)) + grad_delay_ids.append(np.delete(term_delay_ids, var_id)) + + return ( + np.array(grad_yyd_ids, dtype=np.int32), + np.array(grad_coef_ids, dtype=np.int32), + np.array(grad_feat_ids, dtype=np.int32), + np.array(grad_delay_ids, dtype=np.int32), + ) + + @staticmethod + def _update_dydx( + X, + y_hat, + coef, + feat_ids, + delay_ids, + output_ids, + fit_intercept, + grad_yyd_ids, + grad_coef_ids, + grad_feat_ids, + grad_delay_ids, + ): + """ + Computation of the Jacobian matrix dydx. + + Returns + ------- + dydx : ndarray of shape (n_samples, n_outputs, n_x) + Jacobian matrix of the outputs with respect to coefficients and intercepts. + """ + n_samples, n_y = y_hat.shape + max_delay = np.max(delay_ids) + n_coefs = feat_ids.shape[0] + if fit_intercept: + n_x = n_coefs + n_y # Total number of coefficients and intercepts + y_ids = np.r_[output_ids, np.arange(n_y)] + else: + n_x = n_coefs + y_ids = output_ids + + x_ids = np.arange(n_x) + + dydx = np.zeros((n_samples, n_y, n_x), dtype=float) + at_init = True + init_k = 0 + for k in range(n_samples): + if not np.all(np.isfinite(X[k])): + at_init = True + init_k = k + 1 + continue + if k - init_k == max_delay: + at_init = False + + if at_init: + continue + # Compute terms for time step k + terms = np.ones(n_x, dtype=float) + _update_terms( + X, + y_hat, + terms, + feat_ids, + delay_ids, + k, + ) + + # Update constant terms of Jacobian + dydx[k, y_ids, x_ids] = terms + + # Update dynamic terms of Jacobian + if max_delay > 0 and grad_yyd_ids.size > 0: + cfd = np.zeros((n_y, n_y, max_delay), dtype=float) + _update_cfd( + X, + y_hat, + cfd, + coef, + grad_yyd_ids, + grad_coef_ids, + grad_feat_ids, + grad_delay_ids, + k, + ) + for d in range(max_delay): + dydx[k] += cfd[:, :, d] @ dydx[k - d - 1] + + # Handle divergence + if np.any(dydx[k] > 1e20): + dydx[k:] = 1e20 + break + + return dydx + + @staticmethod + def _loss( + coef_intercept, + expression, + X, + y, + feat_ids, + delay_ids, + output_ids, + fit_intercept, + sample_weight_sqrt, + *args, + ): + # Sum of squared errors + n_outputs = y.shape[1] + if fit_intercept: + coef = coef_intercept[:-n_outputs] + intercept = coef_intercept[-n_outputs:] + else: + coef = coef_intercept + intercept = np.zeros(n_outputs, dtype=float) + + y_hat = NARX._predict( + expression, + X, + y, + coef, + intercept, + feat_ids, + delay_ids, + output_ids, + ) + + y_masked, y_hat_masked, sample_weight_sqrt_masked = mask_missing_values( + y, y_hat, sample_weight_sqrt + ) + + return (sample_weight_sqrt_masked * (y_hat_masked - y_masked)).sum(axis=1) + + @staticmethod + def _grad( + coef_intercept, + expression, + X, + y, + feat_ids, + delay_ids, + output_ids, + fit_intercept, + sample_weight_sqrt, + grad_yyd_ids, + grad_coef_ids, + grad_feat_ids, + grad_delay_ids, + ): + # Sum of squared errors + n_outputs = y.shape[1] + if fit_intercept: + coef = coef_intercept[:-n_outputs] + intercept = coef_intercept[-n_outputs:] + else: + coef = coef_intercept + intercept = np.zeros(n_outputs, dtype=float) + + y_hat = NARX._predict( + expression, + X, + y, + coef, + intercept, + feat_ids, + delay_ids, + output_ids, + ) + dydx = NARX._update_dydx( + X, + y_hat, + coef, + feat_ids, + delay_ids, + output_ids, + fit_intercept, + grad_yyd_ids, + grad_coef_ids, + grad_feat_ids, + grad_delay_ids, + ) + + mask_valid = mask_missing_values(y, y_hat, sample_weight_sqrt, return_mask=True) + + sample_weight_sqrt_masked = sample_weight_sqrt[mask_valid] + dydx_masked = dydx[mask_valid] + + return dydx_masked.sum(axis=1) * sample_weight_sqrt_masked + + @validate_params( + { + "y_init": [None, "array-like"], + }, + prefer_skip_nested_validation=True, + ) + def predict(self, X, y_init=None): + """ + Predict using the linear model. + + Parameters + ---------- + X : array-like of shape (n_samples, `n_features_in_`) + Samples. + + y_init : array-like of shape (n_init, `n_outputs_`), default=None + The initial values for the prediction of y. + It should at least have one sample. + + Returns + ------- + y_hat : array-like of shape (n_samples,) + Returns predicted values. + """ + check_is_fitted(self) + + X = validate_data( + self, + X, + dtype=float, + order="C", + reset=False, + ensure_all_finite="allow-nan", + ensure_min_features=0, + ) + if y_init is None: + y_init = np.zeros((self.max_delay_, self.n_outputs_)) + else: + y_init = check_array( + y_init, + ensure_2d=False, + dtype=float, + ensure_min_samples=0, + ensure_all_finite="allow-nan", + ) + if y_init.ndim == 1: + y_init = y_init.reshape(-1, 1) + if y_init.shape[1] != self.n_outputs_: + raise ValueError( + f"`y_init` should have {self.n_outputs_} outputs " + f"but got {y_init.shape[1]}." + ) + y_hat = NARX._predict( + _predict_step, + X, + y_init, + self.coef_, + self.intercept_, + self.feat_ids_, + self.delay_ids_, + self.output_ids_, + ) + if self.n_outputs_ == 1: + y_hat = y_hat.flatten() + return y_hat + + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.input_tags.allow_nan = True + return tags diff --git a/fastcan/narx/_feature.py b/fastcan/narx/_feature.py new file mode 100644 index 0000000..a97889e --- /dev/null +++ b/fastcan/narx/_feature.py @@ -0,0 +1,512 @@ +""" +Time-series features for NARX model. +""" + +# Authors: The fastcan developers +# SPDX-License-Identifier: MIT + +import math +from itertools import combinations_with_replacement +from numbers import Integral + +import numpy as np +from sklearn.utils import check_array +from sklearn.utils._param_validation import Interval, validate_params + + +@validate_params( + {"X": ["array-like"], "ids": ["array-like"]}, + prefer_skip_nested_validation=True, +) +def make_time_shift_features(X, ids): + """Make time shift features. + + Parameters + ---------- + X : array-likeof shape (n_samples, n_features) + The data to transform, column by column. + + ids : array-like of shape (n_outputs, 2) + The unique id numbers of output features, which are + (feature_idx, delay). + + Returns + ------- + out : ndarray of shape (n_samples, n_outputs) + The matrix of features, where `n_outputs` is the number of time shift + features generated from the combination of inputs. + + Examples + -------- + >>> from fastcan.narx import make_time_shift_features + >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]] + >>> ids = [[0, 0], [0, 1], [1, 1]] + >>> make_time_shift_features(X, ids) + array([[ 1., nan, nan], + [ 3., 1., 2.], + [ 5., 3., 4.], + [ 7., 5., 6.]]) + """ + X = check_array(X, ensure_2d=True, dtype=float, ensure_all_finite="allow-nan") + ids = check_array(ids, ensure_2d=True, dtype=int) + n_samples = X.shape[0] + n_outputs = ids.shape[0] + out = np.zeros([n_samples, n_outputs]) + for i, id_temp in enumerate(ids): + out[:, i] = np.r_[ + np.full(id_temp[1], np.nan), + X[: -id_temp[1] or None, id_temp[0]], + ] + + return out + + +@validate_params( + { + "n_features": [ + Interval(Integral, 1, None, closed="left"), + ], + "max_delay": [ + Interval(Integral, 0, None, closed="left"), + ], + "include_zero_delay": ["boolean", "array-like"], + }, + prefer_skip_nested_validation=True, +) +def make_time_shift_ids( + n_features=1, + max_delay=1, + include_zero_delay=False, +): + """Generate ids for time shift features. + (variable_index, delay_number) + + Parameters + ---------- + n_features: int, default=1 + The number of input features. + + max_delay : int, default=1 + The maximum delay of time shift features. + + include_zero_delay : {bool, array-like} of shape (n_features,) default=False + Whether to include the original (zero-delay) features. + + Returns + ------- + ids : array-like of shape (`n_output_features_`, 2) + The unique id numbers of output features. + + Examples + -------- + >>> from fastcan.narx import make_time_shift_ids + >>> make_time_shift_ids(2, max_delay=3, include_zero_delay=[True, False]) + array([[0, 0], + [0, 1], + [0, 2], + [0, 3], + [1, 1], + [1, 2], + [1, 3]]) + """ + if isinstance(include_zero_delay, bool): + return np.stack( + np.meshgrid( + range(n_features), + range(not include_zero_delay, max_delay + 1), + indexing="ij", + ), + -1, + ).reshape(-1, 2) + + include_zero_delay = check_array(include_zero_delay, ensure_2d=False, dtype=bool) + if include_zero_delay.shape[0] != n_features: + raise ValueError( + f"The length of `include_zero_delay`={include_zero_delay} " + f"should be equal to `n_features`={n_features}." + ) + + ids = np.stack( + np.meshgrid( + range(n_features), + range(max_delay + 1), + indexing="ij", + ), + -1, + ).reshape(-1, 2) + exclude_zero_delay_idx = np.where(~include_zero_delay)[0] + mask = np.isin(ids[:, 0], exclude_zero_delay_idx) & (ids[:, 1] == 0) + return ids[~mask] + + +@validate_params( + {"X": ["array-like"], "ids": ["array-like"]}, + prefer_skip_nested_validation=True, +) +def make_poly_features(X, ids): + """Make polynomial features. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The data to transform, column by column. + + ids : array-like of shape (n_outputs, degree) + The unique id numbers of output features, which are formed + of non-negative int values. + + Returns + ------- + out.T : ndarray of shape (n_samples, n_outputs) + The matrix of features, where n_outputs is the number of polynomial + features generated from the combination of inputs. + + Examples + -------- + >>> from fastcan.narx import make_poly_features + >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]] + >>> ids = [[0, 0], [0, 1], [1, 1], [0, 2]] + >>> make_poly_features(X, ids) + array([[ 1., 1., 1., 2.], + [ 1., 3., 9., 4.], + [ 1., 5., 25., 6.], + [ 1., 7., 49., 8.]]) + """ + X = check_array(X, ensure_2d=True, dtype=float, ensure_all_finite="allow-nan") + ids = check_array(ids, ensure_2d=True, dtype=int) + n_samples = X.shape[0] + n_outputs, degree = ids.shape + + # Generate polynomial features + out = np.ones([n_outputs, n_samples]) + unique_features = np.unique(ids) + unique_features = unique_features[unique_features != 0] + for i in range(degree): + for j in unique_features: + mask = ids[:, i] == j + out[mask, :] *= X[:, j - 1] + + return out.T + + +@validate_params( + { + "n_features": [ + Interval(Integral, 1, None, closed="left"), + ], + "degree": [ + None, + Interval(Integral, 1, None, closed="left"), + ], + }, + prefer_skip_nested_validation=True, +) +def make_poly_ids( + n_features=1, + degree=1, +): + """Generate ids for polynomial features. + (variable_index, variable_index, ...) + variable_index starts from 1, and 0 represents constant. + + Parameters + ---------- + n_features: int, default=1 + The number of input features. + + degree : int, default=1 + The maximum degree of polynomial features. + + Returns + ------- + ids : array-like of shape (n_outputs, degree) + The unique id numbers of output features. + + Examples + -------- + >>> from fastcan.narx import make_poly_ids + >>> make_poly_ids(2, degree=3) + array([[0, 0, 1], + [0, 0, 2], + [0, 1, 1], + [0, 1, 2], + [0, 2, 2], + [1, 1, 1], + [1, 1, 2], + [1, 2, 2], + [2, 2, 2]]) + """ + n_outputs = math.comb(n_features + degree, degree) - 1 + if n_outputs > np.iinfo(np.intp).max: + msg = ( + "The output that would result from the current configuration would" + f" have {n_outputs} features which is too large to be" + f" indexed by {np.intp().dtype.name}." + ) + raise ValueError(msg) + + ids = np.array( + list( + combinations_with_replacement( + range(n_features + 1), + degree, + ) + ) + ) + + const_id = np.where((ids == 0).all(axis=1)) + return np.delete(ids, const_id, 0) # remove the constant feature + + +def _valiate_time_shift_poly_ids( + time_shift_ids, poly_ids, n_samples=None, n_features=None, n_outputs=None +): + if n_samples is None: + n_samples = np.inf + if n_features is None: + n_features = np.inf + if n_outputs is None: + n_outputs = np.inf + + # Validate time_shift_ids + time_shift_ids_ = check_array( + time_shift_ids, + ensure_2d=True, + dtype=int, + ) + if time_shift_ids_.shape[1] != 2: + raise ValueError( + "time_shift_ids should have shape (n_variables, 2), " + f"but got {time_shift_ids_.shape}." + ) + if (time_shift_ids_[:, 0].min() < 0) or ( + time_shift_ids_[:, 0].max() >= n_features + n_outputs + ): + raise ValueError( + "The element x of the first column of time_shift_ids should " + f"satisfy 0 <= x < {n_features + n_outputs}." + ) + if (time_shift_ids_[:, 1].min() < 0) or (time_shift_ids_[:, 1].max() >= n_samples): + raise ValueError( + "The element x of the second column of time_shift_ids should " + f"satisfy 0 <= x < {n_samples}." + ) + # Validate poly_ids + poly_ids_ = check_array( + poly_ids, + ensure_2d=True, + dtype=int, + ) + if (poly_ids_.min() < 0) or (poly_ids_.max() > time_shift_ids_.shape[0]): + raise ValueError( + "The element x of poly_ids should " + f"satisfy 0 <= x <= {time_shift_ids_.shape[0]}." + ) + return time_shift_ids_, poly_ids_ + + +def _validate_feat_delay_ids( + feat_ids, delay_ids, n_samples=None, n_features=None, n_outputs=None +): + """Validate feat_ids and delay_ids.""" + if n_samples is None: + n_samples = np.inf + if n_features is None: + n_features = np.inf + if n_outputs is None: + n_outputs = np.inf + + # Validate feat_ids + feat_ids_ = check_array( + feat_ids, + ensure_2d=True, + dtype=np.int32, + order="C", + ) + if (feat_ids_.min() < -1) or (feat_ids_.max() > n_features + n_outputs - 1): + raise ValueError( + "The element x of feat_ids should " + f"satisfy -1 <= x <= {n_features + n_outputs - 1}." + ) + # Check if any row of feat_ids only contains -1 + if np.all(feat_ids_ == -1, axis=1).any(): + raise ValueError("`feat_ids` should not contain rows that only have -1.") + # Validate delay_ids + delay_ids_ = check_array( + delay_ids, + ensure_2d=True, + dtype=np.int32, + order="C", + ) + if delay_ids_.shape != feat_ids_.shape: + raise ValueError( + "The shape of delay_ids should be equal to " + f"the shape of feat_ids {feat_ids_.shape}, " + f"but got {delay_ids_.shape}." + ) + if ((delay_ids_ == -1) != (feat_ids_ == -1)).any(): + raise ValueError( + "The element x of delay_ids should be -1 " + "if and only if the element x of feat_ids is -1." + ) + if (delay_ids_.min() < -1) or (delay_ids_.max() >= n_samples): + raise ValueError( + f"The element x of delay_ids should satisfy -1 <= x < {n_samples}." + ) + return feat_ids_, delay_ids_ + + +@validate_params( + { + "feat_ids": ["array-like"], + "delay_ids": ["array-like"], + }, + prefer_skip_nested_validation=True, +) +def fd2tp(feat_ids, delay_ids): + """ + Convert feat_ids and delay_ids to time_shift_ids and poly_ids. + The polynomial terms, e.g., x0(k-1)^2, x0(k-2)x1(k-3), can be + represented by two ways: + + #. feat_ids and delay_ids, e.g., [[0, 0], [0, 1]] and [[1, 1], [2, 3]] + + #. time_shift_ids and poly_ids, e.g., [[0, 1], [0, 2], [1, 3]] and [[1, 1], [2, 3]] + + For feat_ids, [0, 0] and [0, 1] represent x0*x0 and x0*x1, while + for delay_ids, [1, 1] and [2, 3] represent the delays of features in feat_ids. + + For time_shift_ids, [0, 1], [0, 2], and [1, 3] represents x0(k-1), x0(k-2), + and x1(k-3), respectively. For poly_ids, [1, 1] and [2, 3] represent the first + variable multiplying the first variable given by time_shift_ids, i.e., + x0(k-1)*x0(k-1), and the second variable multiplying the third variable, i.e., + x0(k-1)*x1(k-3). + + Parameters + ---------- + feat_ids : array-like of shape (n_terms, degree), default=None + The unique id numbers of features to form polynomial terms. + The id -1 stands for the constant 1. + The id 0 to n are the index of features. + + delay_ids : array-like of shape (n_terms, degree), default=None + The delays of each feature in polynomial terms. + The id -1 stands for empty. + The id 0 stands for 0 delay. + The positive integer id k stands for k-th delay. + + Returns + ------- + time_shift_ids : array-like of shape (n_variables, 2), default=None + The unique id numbers of time shift variables, which are + (feature_idx, delay). + + poly_ids : array-like of shape (n_polys, degree), default=None + The unique id numbers of polynomial terms, excluding the intercept. + The id 0 stands for the constant 1. + The id 1 to n are the index+1 of time_shift_ids. + + Examples + -------- + >>> from fastcan.narx import fd2tp + >>> # Encode x0(k-1), x0(k-2)x1(k-3) + >>> feat_ids = [[-1, 0], [0, 1]] + >>> delay_ids = [[-1, 1], [2, 3]] + >>> time_shift_ids, poly_ids = fd2tp(feat_ids, delay_ids) + >>> print(time_shift_ids) + [[0 1] + [0 2] + [1 3]] + >>> print(poly_ids) + [[0 1] + [2 3]] + """ + _feat_ids, _delay_ids = _validate_feat_delay_ids(feat_ids, delay_ids) + featd = np.c_[_feat_ids.flatten(), _delay_ids.flatten()] + # Ensure featd has at least one [-1, -1] + time_shift_ids = np.unique(np.r_[[[-1, -1]], featd], axis=0) + poly_ids = np.array( + [np.where((time_shift_ids == row).all(axis=1))[0][0] for row in featd] + ).reshape(_feat_ids.shape) + time_shift_ids = time_shift_ids[time_shift_ids[:, 0] != -1] + return time_shift_ids, poly_ids + + +@validate_params( + { + "time_shift_ids": ["array-like"], + "poly_ids": ["array-like"], + }, + prefer_skip_nested_validation=True, +) +def tp2fd(time_shift_ids, poly_ids): + """ + Convert time_shift_ids and poly_ids to feat_ids and delay_ids. + The polynomial terms, e.g., x0(k-1)^2, x0(k-2)x1(k-3), can be + represented by two ways: + + #. feat_ids and delay_ids, e.g., [[0, 0], [0, 1]] and [[1, 1], [2, 3]] + + #. time_shift_ids and poly_ids, e.g., [[0, 1], [0, 2], [1, 3]] and [[1, 1], [2, 3]] + + For feat_ids, [0, 0] and [0, 1] represent x0*x0 and x0*x1, while + for delay_ids, [1, 1] and [2, 3] represent the delays of features in feat_ids. + + For time_shift_ids, [0, 1], [0, 2], and [1, 3] represents x0(k-1), x0(k-2), + and x1(k-3), respectively. For poly_ids, [1, 1] and [2, 3] represent the first + variable multiplying the first variable given by time_shift_ids, i.e., + x0(k-1)*x0(k-1), and the second variable multiplying the third variable, i.e., + x0(k-1)*x1(k-3). + + Parameters + ---------- + time_shift_ids : array-like of shape (n_variables, 2) + The unique id numbers of time shift variables, which are + (feature_idx, delay). + + poly_ids : array-like of shape (n_polys, degree) + The unique id numbers of polynomial terms, excluding the intercept. + The id 0 stands for the constant 1. + The id 1 to n are the index+1 of time_shift_ids. + + Returns + ------- + feat_ids : array-like of shape (n_terms, degree), default=None + The unique id numbers of features to form polynomial terms. + The id -1 stands for the constant 1. + The id 0 to n are the index of features. + + delay_ids : array-like of shape (n_terms, degree), default=None + The delays of each feature in polynomial terms. + The id -1 stands for empty. + The id 0 stands for 0 delay. + The positive integer id k stands for k-th delay. + + Examples + -------- + >>> from fastcan.narx import tp2fd + >>> # Encode x0(k-1), x0(k-2)x1(k-3) + >>> time_shift_ids = [[0, 1], [0, 2], [1, 3]] + >>> poly_ids = [[0, 1], [2, 3]] + >>> feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids) + >>> print(feat_ids) + [[-1 0] + [ 0 1]] + >>> print(delay_ids) + [[-1 1] + [ 2 3]] + """ + _time_shift_ids, _poly_ids = _valiate_time_shift_poly_ids( + time_shift_ids, + poly_ids, + ) + feat_ids = np.full_like(_poly_ids, -1, dtype=int) + delay_ids = np.full_like(_poly_ids, -1, dtype=int) + for i, poly_id in enumerate(_poly_ids): + for j, variable_id in enumerate(poly_id): + if variable_id != 0: + feat_ids[i, j] = _time_shift_ids[variable_id - 1, 0] + delay_ids[i, j] = _time_shift_ids[variable_id - 1, 1] + feat_ids = feat_ids + delay_ids = delay_ids + return feat_ids, delay_ids diff --git a/fastcan/_narx_fast.pyx b/fastcan/narx/_narx_fast.pyx similarity index 98% rename from fastcan/_narx_fast.pyx rename to fastcan/narx/_narx_fast.pyx index 85d46ca..5543f0b 100644 --- a/fastcan/_narx_fast.pyx +++ b/fastcan/narx/_narx_fast.pyx @@ -1,5 +1,5 @@ """ -Fast gradient computation for narx +Fast computation of prediction and gradient for narx. """ # Authors: The fastcan developers # SPDX-License-Identifier: MIT diff --git a/fastcan/narx/_utils.py b/fastcan/narx/_utils.py new file mode 100644 index 0000000..4379571 --- /dev/null +++ b/fastcan/narx/_utils.py @@ -0,0 +1,349 @@ +"""NARX-related utilities.""" + +# Authors: The fastcan developers +# SPDX-License-Identifier: MIT + +from numbers import Integral + +import numpy as np +from scipy.stats import rankdata +from sklearn.utils import check_array, check_consistent_length, column_or_1d +from sklearn.utils._param_validation import Interval, StrOptions, validate_params +from sklearn.utils.validation import check_is_fitted + +from .._fastcan import FastCan +from .._refine import refine +from ..utils import mask_missing_values +from ._base import NARX +from ._feature import ( + make_poly_features, + make_poly_ids, + make_time_shift_features, + make_time_shift_ids, + tp2fd, +) + + +@validate_params( + { + "narx": [NARX], + "term_space": [Interval(Integral, 1, None, closed="left")], + "coef_space": [Interval(Integral, 1, None, closed="left")], + "float_precision": [Interval(Integral, 0, None, closed="left")], + }, + prefer_skip_nested_validation=True, +) +def print_narx( + narx, + term_space=20, + coef_space=10, + float_precision=3, +): + """Print a NARX model as a Table which contains y idx, Term, and Coef. + y idx is used to indicate which output (assuming it is a multi-output case) + the term belongs to. + + Parameters + ---------- + narx : NARX model + The NARX model to be printed. + + term_space: int, default=20 + The space for the column of Term. + + coef_space: int, default=10 + The space for the column of Coef. + + float_precision: int, default=3 + The number of places after the decimal for Coef. + + Returns + ------- + table : str + The table of output index, terms, and coefficients of the NARX model. + + Examples + -------- + >>> from sklearn.datasets import load_diabetes + >>> from fastcan.narx import print_narx, NARX + >>> X, y = load_diabetes(return_X_y=True) + >>> print_narx(NARX().fit(X, y), term_space=10, coef_space=5, float_precision=0) + | yid | Term |Coef | + ======================== + | 0 |Intercept | 152 | + | 0 | X[k,0] | -10 | + | 0 | X[k,1] |-240 | + | 0 | X[k,2] | 520 | + | 0 | X[k,3] | 324 | + | 0 | X[k,4] |-792 | + | 0 | X[k,5] | 477 | + | 0 | X[k,6] | 101 | + | 0 | X[k,7] | 177 | + | 0 | X[k,8] | 751 | + | 0 | X[k,9] | 68 | + """ + check_is_fitted(narx) + + def _get_term_str(term_feat_ids, term_delay_ids): + term_str = "" + for _, (feat_id, delay_id) in enumerate(zip(term_feat_ids, term_delay_ids)): + if -1 < feat_id < narx.n_features_in_: + if delay_id == 0: + term_str += f"*X[k,{feat_id}]" + else: + term_str += f"*X[k-{delay_id},{feat_id}]" + elif feat_id >= narx.n_features_in_: + term_str += f"*y_hat[k-{delay_id},{feat_id - narx.n_features_in_}]" + return term_str[1:] + + yid_space = 5 + print( + f"|{'yid':^{yid_space}}" + + f"|{'Term':^{term_space}}" + + f"|{'Coef':^{coef_space}}|" + ) + print("=" * (yid_space + term_space + coef_space + 4)) + for i in range(narx.n_outputs_): + print( + f"|{i:^{yid_space}}|" + + f"{'Intercept':^{term_space}}|" + + f"{narx.intercept_[i]:^{coef_space}.{float_precision}f}|" + ) + for i, term_id in enumerate(zip(narx.feat_ids_, narx.delay_ids_)): + print( + f"|{narx.output_ids_[i]:^{yid_space}}|" + + f"{_get_term_str(*term_id):^{term_space}}|" + + f"{narx.coef_[i]:^{coef_space}.{float_precision}f}|" + ) + + +@validate_params( + { + "X": ["array-like"], + "y": ["array-like"], + "n_terms_to_select": [ + Interval(Integral, 1, None, closed="left"), + "array-like", + ], + "max_delay": [ + Interval(Integral, 0, None, closed="left"), + ], + "poly_degree": [ + Interval(Integral, 1, None, closed="left"), + ], + "fit_intercept": ["boolean"], + "include_zero_delay": [None, "array-like"], + "static_indices": [None, "array-like"], + "refine_verbose": ["verbose"], + "refine_drop": [ + None, + Interval(Integral, 1, None, closed="left"), + StrOptions({"all"}), + ], + "refine_max_iter": [ + None, + Interval(Integral, 1, None, closed="left"), + ], + }, + prefer_skip_nested_validation=True, +) +def make_narx( + X, + y, + n_terms_to_select, + max_delay=1, + poly_degree=1, + *, + fit_intercept=True, + include_zero_delay=None, + static_indices=None, + refine_verbose=1, + refine_drop=None, + refine_max_iter=None, + **params, +): + """Find `time_shift_ids`, `poly_ids`, `output_ids` for a NARX model. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Feature matrix. + + y : array-like of shape (n_samples,) or (n_samples, n_outputs) + Target vector or matrix. + + n_terms_to_select : int or array-like of shape (n_outputs,) + The parameter is the absolute number of polynomial terms to select for + each output. If `n_terms_to_select` is an integer, it is the + same for all outputs. + + max_delay : int, default=1 + The maximum delay of time shift features. + + poly_degree : int, default=1 + The maximum degree of polynomial features. + + fit_intercept : bool, default=True + Whether to fit the intercept. If set to False, intercept will be zeros. + + include_zero_delay : {None, array-like} of shape (n_features,) default=None + Whether to include the original (zero-delay) features. + + static_indices : {None, array-like} of shape (n_static_features,) default=None + The indices of static features without time delay. + + .. note:: + If the corresponding include_zero_delay of the static features is False, the + static feature will be excluded from candidate features. + + refine_verbose : int, default=1 + The verbosity level of refine. + + refine_drop : int or "all", default=None + The number of the selected features dropped for the consequencing + reselection. If `drop` is None, no refining will be performed. + + refine_max_iter : int, default=None + The maximum number of valid iterations in the refining process. + + **params : dict + Keyword arguments passed to + `fastcan.FastCan`. + + Returns + ------- + narx : NARX + NARX instance. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.metrics import mean_squared_error + >>> from fastcan.narx import make_narx, print_narx + >>> rng = np.random.default_rng(12345) + >>> n_samples = 1000 + >>> max_delay = 3 + >>> e = rng.normal(0, 0.1, n_samples) + >>> u0 = rng.uniform(0, 1, n_samples+max_delay) # input + >>> u1 = rng.normal(0, 0.1, n_samples) # static input (i.e. no time shift) + >>> y = np.zeros(n_samples+max_delay) # output + >>> for i in range(max_delay, n_samples+max_delay): + ... y[i] = (0.5*y[i-1] + 0.3*u0[i]**2 + 2*u0[i-1]*u0[i-3] + + ... 1.5*u0[i-2]*u1[i-max_delay] + 1) + >>> y = y[max_delay:]+e + >>> X = np.c_[u0[max_delay:], u1] + >>> narx = make_narx(X=X, + ... y=y, + ... n_terms_to_select=4, + ... max_delay=3, + ... poly_degree=2, + ... static_indices=[1], + ... eta=True, + ... verbose=0, + ... refine_verbose=0, + ... refine_drop=1) + >>> print(f"{mean_squared_error(y, narx.fit(X, y).predict(X)):.4f}") + 0.0289 + >>> print_narx(narx) + | yid | Term | Coef | + ======================================= + | 0 | Intercept | 1.050 | + | 0 | y_hat[k-1,0] | 0.484 | + | 0 | X[k,0]*X[k,0] | 0.306 | + | 0 | X[k-1,0]*X[k-3,0] | 2.000 | + | 0 | X[k-2,0]*X[k,1] | 1.528 | + """ + X = check_array( + X, + dtype=float, + ensure_2d=True, + ensure_all_finite="allow-nan", + ensure_min_features=0, + ) + y = check_array(y, dtype=float, ensure_2d=False, ensure_all_finite="allow-nan") + check_consistent_length(X, y) + if y.ndim == 1: + y = y.reshape(-1, 1) + n_outputs = y.shape[1] + if isinstance(n_terms_to_select, Integral): + n_terms_to_select = np.full(n_outputs, n_terms_to_select, dtype=int) + else: + n_terms_to_select = column_or_1d(n_terms_to_select, dtype=int, warn=True) + if len(n_terms_to_select) != n_outputs: + raise ValueError( + "The length of `n_terms_to_select` should be equal to " + f"the number of outputs, {n_outputs}, but got " + f"{len(n_terms_to_select)}." + ) + + xy_hstack = np.c_[X, y] + n_features = X.shape[1] + + if include_zero_delay is None: + _include_zero_delay = [True] * n_features + [False] * n_outputs + else: + _include_zero_delay = include_zero_delay + [False] * n_outputs + + time_shift_ids_all = make_time_shift_ids( + n_features=xy_hstack.shape[1], + max_delay=max_delay, + include_zero_delay=_include_zero_delay, + ) + + time_shift_ids_all = np.delete( + time_shift_ids_all, + ( + np.isin(time_shift_ids_all[:, 0], static_indices) + & (time_shift_ids_all[:, 1] > 0) + ), + 0, + ) + time_shift_vars = make_time_shift_features(xy_hstack, time_shift_ids_all) + + poly_ids_all = make_poly_ids( + time_shift_ids_all.shape[0], + poly_degree, + ) + poly_terms = make_poly_features(time_shift_vars, poly_ids_all) + + # Remove missing values + poly_terms_masked, y_masked = mask_missing_values(poly_terms, y) + + selected_poly_ids = [] + for i in range(n_outputs): + csf = FastCan( + n_terms_to_select[i], + **params, + ).fit(poly_terms_masked, y_masked[:, i]) + if refine_drop is not None: + indices, _ = refine( + csf, drop=refine_drop, max_iter=refine_max_iter, verbose=refine_verbose + ) + support = np.zeros(shape=poly_ids_all.shape[0], dtype=bool) + support[indices] = True + else: + support = csf.get_support() + selected_poly_ids.append(poly_ids_all[support]) + + selected_poly_ids = np.vstack(selected_poly_ids) + + time_shift_ids = time_shift_ids_all[ + np.unique(selected_poly_ids[selected_poly_ids.nonzero()]) - 1, : + ] + poly_ids = ( + rankdata( + np.r_[[[0] * poly_degree], selected_poly_ids], + method="dense", + ).reshape(-1, poly_degree)[1:] + - 1 + ) + + output_ids = [i for i in range(n_outputs) for _ in range(n_terms_to_select[i])] + feat_ids, delay_ids = tp2fd(time_shift_ids, poly_ids) + + return NARX( + feat_ids=feat_ids, + delay_ids=delay_ids, + output_ids=output_ids, + fit_intercept=fit_intercept, + ) diff --git a/fastcan/narx/meson.build b/fastcan/narx/meson.build new file mode 100644 index 0000000..204e4e8 --- /dev/null +++ b/fastcan/narx/meson.build @@ -0,0 +1,7 @@ +py.extension_module( + '_narx_fast', + '_narx_fast.pyx', + subdir: 'fastcan/narx', + cython_args: cython_args, + install: true +) \ No newline at end of file diff --git a/fastcan/narx/tests/__init__.py b/fastcan/narx/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_narx.py b/fastcan/narx/tests/test_narx.py similarity index 100% rename from tests/test_narx.py rename to fastcan/narx/tests/test_narx.py diff --git a/tests/test_narx_jac.py b/fastcan/narx/tests/test_narx_jac.py similarity index 96% rename from tests/test_narx_jac.py rename to fastcan/narx/tests/test_narx_jac.py index f049741..91c81d3 100644 --- a/tests/test_narx_jac.py +++ b/fastcan/narx/tests/test_narx_jac.py @@ -1,12 +1,12 @@ """Test Jacobian matrix of NARX""" import numpy as np -from fastcan._narx_fast import _predict_step # type: ignore[attr-defined] from numpy.testing import assert_allclose, assert_almost_equal, assert_array_equal from scipy.integrate import odeint from sklearn.metrics import r2_score from fastcan.narx import NARX, make_narx +from fastcan.narx._narx_fast import _predict_step # type: ignore[attr-defined] def test_simple(): @@ -318,16 +318,6 @@ def duffing_equation(y, t): dur = 10 n_samples = 1000 - y0 = None - if y0 is None: - n_init = 10 - x0 = np.linspace(0, 2, n_init) - y0_y = np.cos(np.pi * x0) - y0_x = np.sin(np.pi * x0) - y0 = np.c_[y0_x, y0_y] - else: - n_init = len(y0) - dur = 10 n_samples = 1000 rng = np.random.default_rng(12345) diff --git a/fastcan/utils.py b/fastcan/utils.py index 160d53e..17a2ece 100644 --- a/fastcan/utils.py +++ b/fastcan/utils.py @@ -10,6 +10,8 @@ from sklearn.utils import _safe_indexing, check_consistent_length, check_X_y from sklearn.utils._param_validation import Interval, validate_params +__all__ = ["mask_missing_values", "ols", "ssc"] + @validate_params( { diff --git a/pixi.lock b/pixi.lock index 29cd125..a933333 100644 --- a/pixi.lock +++ b/pixi.lock @@ -3610,7 +3610,7 @@ packages: - pypi: ./ name: fastcan version: 0.4.0 - sha256: d6ce8adf0fa75fd949496cb6cb7602278dae2a1029b5ff4803901e07ff4a7b47 + sha256: d77a867eb1089c1057661ebe9f1771dd1b610c1125f23e903ba25a005fede67c requires_dist: - scikit-learn>=1.7.0 - furo ; extra == 'docs' diff --git a/pyproject.toml b/pyproject.toml index faffea4..41d1f96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,8 +105,8 @@ profile-minibatch = { cmd = '''python -c "import cProfile; import numpy as np; f time-narx = '''python -m timeit -n 1 -s "import numpy as np; from fastcan.narx import make_narx; rng = np.random.default_rng(5); X = rng.random((1000, 10)); y = rng.random((1000, 2)); m = make_narx(X, y, 10, max_delay=2, poly_degree=2, verbose=0)" "m.fit(X, y, coef_init='one_step_ahead', verbose=1)"''' [tool.pixi.feature.test.tasks] -test = { cmd = "pytest", cwd = "tests" } -test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report $FMT --cov=$PACKAGE tests/", env = { PACKAGE = "fastcan", FMT = "html" } } +test = { cmd = "pytest", cwd = "." } +test-coverage = { cmd = "rm -rf .coverage && pytest --cov-report $FMT --cov=$PACKAGE .", env = { PACKAGE = "fastcan", FMT = "html" } } [tool.pixi.feature.build.tasks] build-wheel = "rm -rf dist && python -m build -wnx -Cinstall-args=--tags=runtime,python-runtime,devel" @@ -134,11 +134,14 @@ dev = ["docs", "test", "build", "jupyter"] static = { features = ["static"], no-default-feature = true } nogil = { features = ["nogil"], no-default-feature = true } +[tool.coverage.run] +omit = ["**/tests/*"] + [tool.codespell] skip = [ "./doc/_build", "./doc/auto_examples", - "./doc/modules/generated", + "./doc/generated", "./htmlcov", "./build", "./pixi.lock", @@ -208,7 +211,7 @@ notice-rgx = "\\#\\ Authors:\\ The\\ fastcan\\ developers\\\r?\\\n\\#\\ SPDX\\-L # folder. "examples/*" = ["E402"] "doc/conf.py" = ["E402"] -"tests/*" = ["CPY001"] +"**/tests/*" = ["CPY001"] "doc/*" = ["CPY001"] ".github/*"=["CPY001"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29