Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 30 additions & 23 deletions derivative/dglobal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,48 @@
from scipy import interpolate, sparse
from scipy.special import legendre
from sklearn.linear_model import Lasso
from specderiv import cheb_deriv, fourier_deriv


@register("spectral")
class Spectral(Derivative):
def __init__(self, **kwargs):
def __init__(self, order=1, axis=0, basis='fourier', filter=None):
"""
Compute the numerical derivative by first computing the FFT. In Fourier space, derivatives are multiplication
by i*phase; compute the IFFT after.

Args:
**kwargs: Optional keyword arguments.

Compute the numerical derivative by spectral methods. In Fourier space, derivatives are multiplication
by i*phase; compute the inverse transform after. Use either Fourier modes of Chebyshev polynomials as
the basis.

Keyword Args:
filter: Optional. A function that takes in frequencies and outputs weights to scale the coefficient at
the input frequency in Fourier space. Input frequencies are the discrete fourier transform sample
frequencies associated with the domain variable. Look into python signal processing resources in
scipy.signal for common filters.

order (int): order of the derivative, defaults to 1st order
axis (int): the dimension of the data along which to differentiate, defaults to first dimension
basis (str): 'fourier' or 'chebyshev', the set of basis functions to use for differentiation
Note `basis='fourier'` assumes your function is periodic and sampled over a period of its domain,
[a, b), and `basis='chebyshev'` assumes your function is sampled at cosine-spaced points on the
domain [a, b].
filter: Optional. A function that takes in wavenumbers and outputs weights to scale the
corresponding mode in frequency space. Input wavenumbers are the k used in the discrete fourier
transform. Look into python signal processing resources in scipy.signal for common filters.
"""
# Filter function. Default: Identity filter
self.filter = kwargs.get('filter', np.vectorize(lambda f: 1))
self._x_hat = None
self._freq = None

def _dglobal(self, t, x):
self._x_hat = np.fft.fft(x)
self._freq = np.fft.fftfreq(t.size, d=(t[1] - t[0]))
self.order = order
self.axis = axis
if basis not in ['chebyshev', 'fourier']:
raise ValueError("Only chebyshev and fourier bases are allowed.")
self.basis = basis
self.filter = filter

@_memoize_arrays(1) # the memoization is 1 deep, as in only remembers the most recent args
def _global(self, t, x):
print("HELLO!")
if self.basis == 'chebyshev':
return cheb_deriv(x, t, self.order, self.axis, self.filter)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It struck me just now that filtering by frequency on cosine-spaced points isn’t totally sound: high-frequency variations look lower frequency around the edges where sample density is higher.

Copy link
Contributor Author

@pavelkomarov pavelkomarov Feb 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been pondering this more pavelkomarov/spectral-derivatives#14. Basically what I'm currently doing is keeping filter(k) amount of the $k^{th}$ Chebyshev polynomial, versus in the Fourier case keeping filter(k) amount of the $k^{th}$ wavenumber. This may be the best thing to do, most spiritually-aligned with "spectral". I so far haven't thought of a better alternative. But it's counter-intuitive: Because the Chebyshev polynomials have steeper slope at the edges of the domain, we can fit high-frequencies better there. So if the assumption is that noise is high frequency and needs to be separated from the data this way, we can't filter it as consistently using this basis.

Copy link
Contributor Author

@pavelkomarov pavelkomarov Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made a notebook to empirically compare spectral filtering in the Fourier and Chebyshev bases, and pulled in a little of Floris' PyNumDiff to compare with Savitzky-Golay filtering + Gaussian smoothing. Fourier spectral is the gold standard, but even it can be fragile. Chebyshev can work okay (not great but really not worse than savgoldiff) for 1st and 2nd derivatives in the presence of a little noise but starts to blow up at the edges of the domain beyond that.

else: # self.basis == 'fourier'
return fourier_deriv(x, t, self.order, self.axis, self.filter)

def compute(self, t, x, i):
return next(self.compute_for(t, x, [i]))

def compute_for(self, t, x, indices):
self._dglobal(t, x)
res = np.fft.ifft(1j * 2 * np.pi * self._freq * self.filter(self._freq) * self._x_hat).real
res = self._global(t, x) # cached
for i in indices:
yield res[i]

Expand Down Expand Up @@ -212,7 +220,6 @@ def __init__(self, alpha=None):
"""
self.alpha = alpha


@_memoize_arrays(1)
def _global(self, t, z, alpha):
if alpha is None:
Expand Down
20 changes: 11 additions & 9 deletions derivative/differentiation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _gen_method(x, t, kind, axis, **kwargs):
return methods.get(kind)(**kwargs)


def dxdt(x, t, kind=None, axis=1, **kwargs):
def dxdt(x, t, kind=None, axis=0, **kwargs):
"""
Compute the derivative of x with respect to t along axis using the numerical derivative specified by "kind". This is
the functional interface of the Derivative class.
Expand All @@ -35,7 +35,7 @@ def dxdt(x, t, kind=None, axis=1, **kwargs):
x (:obj:`ndarray` of float): Ordered measurement values.
t (:obj:`ndarray` of float): Ordered measurement times.
kind (string): Derivative method name (see available kinds).
axis ({0,1}): Axis of x along which to differentiate. Default 1.
axis ({0,1}): Axis of x along which to differentiate. Default 0.
**kwargs: Keyword arguments for the derivative method "kind".

Available kinds
Expand All @@ -56,7 +56,7 @@ def dxdt(x, t, kind=None, axis=1, **kwargs):
return method.d(x, t, axis=axis)


def smooth_x(x, t, kind=None, axis=1, **kwargs):
def smooth_x(x, t, kind=None, axis=0, **kwargs):
"""
Compute the smoothed version of x given t along axis using the numerical
derivative specified by "kind". This is the functional interface of
Expand All @@ -71,7 +71,7 @@ def smooth_x(x, t, kind=None, axis=1, **kwargs):
x (:obj:`ndarray` of float): Ordered measurement values.
t (:obj:`ndarray` of float): Ordered measurement times.
kind (string): Derivative method name (see available kinds).
axis ({0,1}): Axis of x along which to differentiate. Default 1.
axis ({0,1}): Axis of x along which to differentiate. Default 0.
**kwargs: Keyword arguments for the derivative method "kind".

Available kinds
Expand Down Expand Up @@ -100,7 +100,7 @@ def compute(self, t, x, i):
"""
Compute the derivative of one-dimensional data x with respect to t at the index i of x, (dx/dt)[i].

Computation of a derivative should fail explicitely if the implementation is unable to compute a derivative at
Computation of a derivative should fail explicitly if the implementation is unable to compute a derivative at
the desired index. Used for global differentiation methods, for example.

This requires that x and t have equal lengths >= 2, and that the index i is a valid index.
Expand Down Expand Up @@ -174,7 +174,7 @@ def compute_x_for(self, t, x, indices):
for i in indices:
yield self.compute_x(t, x, i)

def d(self, X, t, axis=1):
def d(self, X, t, axis=0):
"""
Compute the derivative of measurements X taken at times t.

Expand All @@ -184,7 +184,7 @@ def d(self, X, t, axis=1):
Args:
X (:obj:`ndarray` of float): Ordered measurements values. Multiple measurements allowed.
t (:obj:`ndarray` of float): Ordered measurement times.
axis ({0,1}). axis of X along which to differentiate. default 1.
axis ({0,1}). axis of X along which to differentiate. default 0.

Returns:
:obj:`ndarray` of float: Returns dX/dt along axis.
Expand All @@ -202,7 +202,7 @@ def d(self, X, t, axis=1):
return _restore_axes(dX, axis, flat)


def x(self, X, t, axis=1):
def x(self, X, t, axis=0):
"""
Compute the smoothed X values from measurements X taken at times t.

Expand All @@ -212,7 +212,7 @@ def x(self, X, t, axis=1):
Args:
X (:obj:`ndarray` of float): Ordered measurements values. Multiple measurements allowed.
t (:obj:`ndarray` of float): Ordered measurement times.
axis ({0,1}). axis of X along which to smooth. default 1.
axis ({0,1}). axis of X along which to smooth. default 0.

Returns:
:obj:`ndarray` of float: Returns dX/dt along axis.
Expand All @@ -228,6 +228,8 @@ def x(self, X, t, axis=1):


def _align_axes(X, t, axis) -> tuple[NDArray, tuple[int, ...]]:
"""Reshapes the data so the derivative always happens along axis 1. Do we need this?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this function? All this reshaping is a bit confusing. Can we just take the data as is and differentiate along whichever axis the user specifies? There are ways of indexing in numpy that make this fairly easy usually. For instance I have this line.

The code could probably be both more general and simplified if we do this right, but it's a separate PR for sure.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy for this, but the challenge is that all methods internally have been allowed to expect one particular axis. So it probably means changing a fair amount.

"""
X = np.array(X)
orig_shape = X.shape
# By convention, differentiate axis 1
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ numpy = ">=1.18.3"
scipy = "^1.4.1"
scikit-learn = "^1"
importlib-metadata = ">=7.1.0"
spectral-derivatives = ">=0.7.1"

# docs
sphinx = {version = "7.2.6", optional = true}
Expand Down
69 changes: 38 additions & 31 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
from derivative.differentiation import _gen_method


# Utilities for tests
# ===================
def default_args(kind):
""" The assumption is that the function will have dt = 1/100 over a range of 1 and not vary much. The goal is to
to set the parameters such that we obtain effective derivatives under these conditions.
""" The assumption is that the function will have dt = 1/100 or 2pi/100 over an interval of length 1 or 2pi
and not vary much. The goal is to to set the parameters such that we obtain effective derivatives under these
conditions.
"""
if kind == 'spectral':
# frequencies should be guaranteed to be between 0 and 50 (a filter will reduce bad outliers)
Expand All @@ -26,8 +29,7 @@ def default_args(kind):
return {"sigma": 1, "lmbd": .01, "kernel": "gaussian"}
else:
raise ValueError('Unimplemented default args for kind {}.'.format(kind))



class NumericalExperiment:
def __init__(self, fn, fn_str, t, kind, args):
self.fn = fn
Expand All @@ -40,7 +42,6 @@ def __init__(self, fn, fn_str, t, kind, args):
def run(self):
return dxdt(self.fn(self.t), self.t, self.kind, self.axis, **self.kwargs)


def compare(experiment, truth, rel_tol, abs_tol, shape_only=False):
""" Compare a numerical experiment to theoretical expectations. Issue warnings for derivative methods that fail,
use asserts for implementation requirements.
Expand All @@ -60,8 +61,8 @@ def mean_sq(x):
assert np.linalg.norm(residual, ord=np.inf) < max(abs_tol, np.linalg.norm(truth, ord=np.inf) * rel_tol)


# Check that numbers are returned
# ===============================
# Check that only numbers are returned
# ====================================
@pytest.mark.parametrize("m", methods)
def test_notnan(m):
t = np.linspace(0, 1, 100)
Expand All @@ -71,8 +72,8 @@ def test_notnan(m):
assert not np.any(np.isnan(values)), message


# Test some basic functions
# =========================
# Test that basic functions are differentiated correctly
# ======================================================
funcs_and_derivs = (
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good library of examples, well thought out.

(lambda t: np.ones_like(t), "f(t) = 1", lambda t: np.zeros_like(t), "const1"),
(lambda t: np.zeros_like(t), "f(t) = 0", lambda t: np.zeros_like(t), "const0"),
Expand All @@ -81,37 +82,31 @@ def test_notnan(m):
(lambda t: -t, "f(t) = -t", lambda t: -np.ones_like(t), "lin-neg"),
(lambda t: t ** 2 - t + np.ones_like(t), "f(t) = t^2-t+1", lambda t: 2 * t -np.ones_like(t), "polynomial"),
(lambda t: np.sin(t) + np.ones_like(t) / 2, "f(t) = sin(t)+1/2", lambda t: np.cos(t), "trig"),
(
lambda t: np.array([2 * t, - t]),
"f(t) = [2t, -t]",
lambda t: np.vstack((2 * np.ones_like(t), -np.ones_like(t))),
"2D linear",
),
(
lambda t: np.array([np.sin(t), np.cos(t)]),
"f(t) = [sin(t), cos(t)]",
lambda t: np.vstack((np.cos(t), -np.sin(t))),
"2D trig",
),
(lambda t: np.array([2 * t, - t]), "f(t) = [2t, -t]", lambda t: np.vstack((2 * np.ones_like(t), -np.ones_like(t))), "2D linear"),
(lambda t: np.array([np.sin(t), np.cos(t)]), "f(t) = [sin(t), cos(t)]", lambda t: np.vstack((np.cos(t), -np.sin(t))), "2D trig",
),
)
@pytest.mark.filterwarnings('ignore::sklearn.exceptions.ConvergenceWarning')
@pytest.mark.parametrize("m", methods)
@pytest.mark.parametrize("func_spec", funcs_and_derivs, ids=(tup[-1] for tup in funcs_and_derivs))
@pytest.mark.parametrize("func_spec", funcs_and_derivs)
def test_fn(m, func_spec):
func, fname, deriv, f_id = func_spec
t = np.linspace(0, 2*np.pi, 100)
t = np.linspace(0, 2*np.pi, 100, endpoint=False) # For periodic functions, it's important the endpoint not be included
if m == 'trend_filtered':
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

f_mod stuff needn't be here to silence the warning. We can just filter warnings with pytest.

# Add noise to avoid all zeros non-convergence warning for sklearn lasso
f_mod = lambda t: func(t) + 1e-9 * np.random.randn(*t.shape) # rename to avoid infinite loop
else:
f_mod = func
nexp = NumericalExperiment(f_mod, fname, t, m, default_args(m))
bad_combo=False
# spectral is only accurate for periodic data. Ideally fixed in decorators
# spectral is only accurate for periodic data. Ideally fixed in decorators
if ("lin" in f_id or "poly" in f_id) and m == "spectral":
bad_combo=True
compare(nexp, deriv(t), 1e-1, 1e-1, bad_combo)


# Test smoothing for those that do it
# ===================================
@pytest.mark.parametrize("kind", ("kalman", "trend_filtered"))
def test_smoothing_x(kind):
t = np.linspace(0, 1, 100)
Expand All @@ -122,7 +117,6 @@ def test_smoothing_x(kind):
# MSE
assert np.linalg.norm(x_est - np.sin(t)) ** 2 / len(t) < 1e-1


@pytest.mark.parametrize("kind", ("kalman", "trend_filtered"))
def test_smoothing_functional(kind):
t = np.linspace(0, 1, 100)
Expand All @@ -133,13 +127,14 @@ def test_smoothing_functional(kind):
assert np.linalg.norm(x_est - np.sin(t)) ** 2 / len(t) < 1e-1


# Test caching of the expensive _gen_method using a dummy
# =======================================================
@pytest.fixture
def clean_gen_method_cache():
_gen_method.cache_clear()
yield
_gen_method.cache_clear()


def test_gen_method_caching(clean_gen_method_cache):
x = np.ones(3)
t = np.arange(3)
Expand All @@ -150,7 +145,6 @@ def test_gen_method_caching(clean_gen_method_cache):
assert _gen_method.cache_info().currsize == 1
assert id(expected) == id(result)


def test_gen_method_kwarg_caching(clean_gen_method_cache):
x = np.ones(3)
t = np.arange(3)
Expand All @@ -164,6 +158,8 @@ def test_gen_method_kwarg_caching(clean_gen_method_cache):
assert id(expected) != id(result)


# Test caching of the expensive private _global methods using a dummy
# ===================================================================
@pytest.fixture
def method_inst(request):
x = np.ones(3)
Expand All @@ -173,9 +169,9 @@ def method_inst(request):
yield x, t, method
method._global.cache_clear()


@pytest.mark.filterwarnings('ignore::sklearn.exceptions.ConvergenceWarning')
@pytest.mark.parametrize("method_inst", ["kalman", "trend_filtered"], indirect=True)
def test_dglobal_caching(method_inst):
def test_global_caching_xd(method_inst):
# make sure we're not recomputing expensive _global() method
x, t, method = method_inst
method.x(x, t)
Expand All @@ -184,11 +180,22 @@ def test_dglobal_caching(method_inst):
assert method._global.cache_info().misses == 1
assert method._global.cache_info().currsize == 1

@pytest.mark.filterwarnings('ignore::sklearn.exceptions.ConvergenceWarning')
@pytest.mark.parametrize("method_inst", ["kalman", "trend_filtered", "spectral"], indirect=True)
def test_global_caching_dd(method_inst):
# make sure we're not recomputing expensive _global() method
x, t, method = method_inst
method.d(x, t)
method.d(x, t)
assert method._global.cache_info().hits == 1
assert method._global.cache_info().misses == 1
assert method._global.cache_info().currsize == 1

@pytest.mark.filterwarnings('ignore::sklearn.exceptions.ConvergenceWarning')
@pytest.mark.parametrize("method_inst", ["kalman", "trend_filtered"], indirect=True)
def test_cached_global_order(method_inst):
x, t, method = method_inst
x = np.vstack((x, -x))
first_result = method.x(x, t)
second_result = method.x(x, t)
first_result = method.x(x, t, axis=1)
second_result = method.x(x, t, axis=1)
np.testing.assert_equal(first_result, second_result)