diff --git a/libpysal/graph/_kernel.py b/libpysal/graph/_kernel.py index 344ec30c6..ce288e5bd 100644 --- a/libpysal/graph/_kernel.py +++ b/libpysal/graph/_kernel.py @@ -34,7 +34,7 @@ def _parabolic(distances, bandwidth): def _gaussian(distances, bandwidth): u = distances / bandwidth - return numpy.exp(-((u / 2) ** 2)) / (numpy.sqrt(2 * numpy.pi)) + return numpy.exp(-(u**2)/2) / (numpy.sqrt(2 * numpy.pi)) def _bisquare(distances, bandwidth): @@ -140,9 +140,9 @@ def _kernel( coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES ) else: - assert coordinates.shape[0] == coordinates.shape[1], ( - "coordinates should represent a distance matrix if metric='precomputed'" - ) + assert ( + coordinates.shape[0] == coordinates.shape[1] + ), "coordinates should represent a distance matrix if metric='precomputed'" if ids is None: ids = numpy.arange(coordinates.shape[0]) diff --git a/libpysal/graph/tests/test_builders.py b/libpysal/graph/tests/test_builders.py index c384d0dd5..a21cf3228 100644 --- a/libpysal/graph/tests/test_builders.py +++ b/libpysal/graph/tests/test_builders.py @@ -237,26 +237,26 @@ def test_kernel_precompute(self): g = graph.Graph.build_kernel(distmat, metric="precomputed") expected = np.array( [ - 0.126, - 0.266, - 0.174, - 0.071, - 0.126, - 0.329, - 0.311, - 0.291, - 0.266, - 0.329, - 0.31, - 0.205, - 0.174, - 0.311, - 0.31, - 0.339, - 0.071, - 0.291, - 0.205, - 0.339, + 0.04, + 0.177, + 0.076, + 0.013, + 0.04, + 0.271, + 0.242, + 0.212, + 0.177, + 0.271, + 0.241, + 0.105, + 0.076, + 0.242, + 0.241, + 0.288, + 0.013, + 0.212, + 0.105, + 0.288, ] ) diff --git a/libpysal/graph/tests/test_kernel.py b/libpysal/graph/tests/test_kernel.py index dfe0d41dc..cd089c796 100644 --- a/libpysal/graph/tests/test_kernel.py +++ b/libpysal/graph/tests/test_kernel.py @@ -210,8 +210,8 @@ def test_kernels(kernel, grocs): assert weight.mean() == pytest.approx(0.10312196315841769) assert weight.max() == pytest.approx(0.749881829575671) elif kernel == "gaussian": - assert weight.mean() == pytest.approx(0.19932294761630429) - assert weight.max() == pytest.approx(0.3989265663183409) + assert weight.mean() == pytest.approx(0.13787969156713978) + assert weight.max() == pytest.approx(0.39891085285421685) elif kernel == "bisquare": assert weight.mean() == pytest.approx(0.09084085210598618) assert weight.max() == pytest.approx(0.9372045972129259) diff --git a/libpysal/graph/tests/test_triangulation.py b/libpysal/graph/tests/test_triangulation.py index c60ef26aa..1d55df72a 100644 --- a/libpysal/graph/tests/test_triangulation.py +++ b/libpysal/graph/tests/test_triangulation.py @@ -257,24 +257,25 @@ def test_ids(ids, stores_unique): def test_kernel(): _, _, weight = _delaunay(cau_coords, kernel="gaussian") + expected = np.array( [ - 0.231415, - 0.307681, - 0.395484, - 0.231415, - 0.237447, - 0.057774, - 0.307681, - 0.237447, - 0.123151, - 0.319563, - 0.057774, - 0.123151, - 0.035525, - 0.395484, - 0.319563, - 0.035525, + 0.134237, + 0.237297, + 0.392056, + 0.134237, + 0.141327, + 0.008367, + 0.237297, + 0.141327, + 0.038016, + 0.255978, + 0.008367, + 0.038016, + 0.003163, + 0.392056, + 0.255978, + 0.003163, ] ) diff --git a/libpysal/kernels.py b/libpysal/kernels.py new file mode 100644 index 000000000..dbf7a64d1 --- /dev/null +++ b/libpysal/kernels.py @@ -0,0 +1,265 @@ +""" +kernels.py + +This module defines a collection of common kernel functions used for +distance-based weighting in spatial analysis, nonparametric regression, +and density estimation. + +Each kernel function takes as input an array of distances and a bandwidth +parameter and returns an array of weights according to the shape of the kernel. + +A general ``kernel()`` dispatcher is provided to apply a named kernel or a +user-supplied callable. + +Available kernels: + - ``triangular`` + - ``parabolic`` (Epanechnikov) + - ``gaussian`` + - ``bisquare`` (quartic) + - ``cosine`` + - ``exponential`` + - ``boxcar`` (uniform) + - ``identity`` (raw distances) + +Mathematical Formulation +------------------------ + +All kernels are evaluated as: + +.. math:: + + K(z), \\quad \\text{where} \\ z = \\frac{d}{h} + +- :math:`d` is the distance between points. +- :math:`h` is the kernel bandwidth. +- For :math:`z > 1`, all kernels return :math:`K(z) = 0`. + +""" + +import numpy + + +def _trim(d, bandwidth): + """ + Normalize and clip distances to the range [0, 1]. + + Parameters + ---------- + d : ndarray + Array of distances. + bandwidth : float + Bandwidth parameter. + + Returns + ------- + ndarray + Clipped and normalized distances. + """ + return numpy.clip(numpy.abs(d) / bandwidth, 0, 1) + + +def _triangular(distances, bandwidth): + """ + Triangular kernel function. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Triangular kernel weights. + """ + return 1 - _trim(distances, bandwidth) + + +def _parabolic(distances, bandwidth): + """ + Parabolic (Epanechnikov) kernel function. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Parabolic kernel weights. + """ + z = _trim(distances, bandwidth) + return 0.75 * (1 - z**2) + + +def _gaussian(distances, bandwidth): + """ + Gaussian kernel function (truncated at z=1). + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Gaussian kernel weights. + """ + z = distances / bandwidth + exponent_term = -0.5 * (z**2) + c = 1 / (bandwidth * numpy.sqrt(2 * numpy.pi)) + k = c * numpy.exp(exponent_term) + return numpy.where(z <= 1, k, 0) + + +def _bisquare(distances, bandwidth): + """ + Bisquare (quartic) kernel function. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Bisquare kernel weights. + """ + z = numpy.clip(distances / bandwidth, 0, 1) + return (15 / 16) * (1 - z**2) ** 2 + + +def _cosine(distances, bandwidth): + """ + Cosine kernel function. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Cosine kernel weights. + """ + z = numpy.clip(distances / bandwidth, 0, 1) + return (numpy.pi / 4) * numpy.cos(numpy.pi / 2 * z) + + +def _exponential(distances, bandwidth): + """ + Exponential kernel function, truncated at z=1. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Exponential kernel weights. + """ + z = distances / bandwidth + k = numpy.exp(-z) + return numpy.where(z <= 1, k, 0) + + +def _boxcar(distances, bandwidth): + """ + Boxcar (uniform) kernel function. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + + Returns + ------- + ndarray + Binary weights: 1 if distance < bandwidth, else 0. + """ + return (distances < bandwidth).astype(int) + + +def _identity(distances, _): + """ + Identity kernel (no weighting, returns raw distances). + + Parameters + ---------- + distances : ndarray + Array of distances. + _ : float + Unused bandwidth parameter. + + Returns + ------- + ndarray + The raw input distances. + """ + return distances + + +_kernel_functions = { + "triangular": _triangular, + "parabolic": _parabolic, + "gaussian": _gaussian, + "bisquare": _bisquare, + "cosine": _cosine, + "boxcar": _boxcar, + "discrete": _boxcar, + "exponential": _exponential, + "identity": _identity, + None: _identity, +} + + +def kernel(distances, bandwidth, kernel="gaussian"): + """ + Evaluate a kernel function over a distance array. + + Parameters + ---------- + distances : ndarray + Array of distances. + bandwidth : float + Kernel bandwidth. + kernel : str or callable, optional + The kernel function to use. If a string, must be one of the predefined + kernel names: 'triangular', 'parabolic', 'gaussian', 'bisquare', + 'cosine', 'boxcar', 'discrete', 'exponential', 'identity'. + If callable, it should have the signature `(distances, bandwidth)`. + If None, the 'identity' kernel is used. + + Returns + ------- + ndarray + Kernel weights. + """ + if callable(kernel): + return kernel(distances, bandwidth) + elif kernel is None: + func = _kernel_functions[None] + else: + func = _kernel_functions[kernel.lower()] + + return func(distances, bandwidth) diff --git a/libpysal/tests/test_kernels.py b/libpysal/tests/test_kernels.py new file mode 100644 index 000000000..72b4867be --- /dev/null +++ b/libpysal/tests/test_kernels.py @@ -0,0 +1,114 @@ +import numpy as np +import pytest + +import libpysal.kernels as kernels + + +@pytest.fixture +def distances(): + return np.array([0.0, 0.5, 1.0, 1.5]) + + +@pytest.fixture +def bandwidth(): + return 1.0 + + +# --- Individual kernel tests --- + +def test_trim_clipping(): + d = np.array([-2.0, -1.0, 0.0, 1.0, 2.0]) + bw = 1.0 + result = kernels._trim(d, bw) + expected = np.clip(np.abs(d) / bw, 0, 1) + np.testing.assert_array_almost_equal(result, expected) + + +def test_triangular(distances, bandwidth): + expected = 1 - np.clip(np.abs(distances) / bandwidth, 0, 1) + np.testing.assert_array_almost_equal( + kernels._triangular(distances, bandwidth), expected + ) + + +def test_parabolic(distances, bandwidth): + z = np.clip(np.abs(distances) / bandwidth, 0, 1) + expected = 0.75 * (1 - z**2) + np.testing.assert_array_almost_equal( + kernels._parabolic(distances, bandwidth), expected + ) + + +def test_gaussian(distances, bandwidth): + z = distances / bandwidth + exponent = -0.5 * z**2 + c = 1 / (bandwidth * np.sqrt(2 * np.pi)) + expected = np.where(z <= 1, c * np.exp(exponent), 0) + np.testing.assert_array_almost_equal( + kernels._gaussian(distances, bandwidth), expected + ) + + +def test_bisquare(distances, bandwidth): + z = np.clip(distances / bandwidth, 0, 1) + expected = (15 / 16) * (1 - z**2) ** 2 + np.testing.assert_array_almost_equal( + kernels._bisquare(distances, bandwidth), expected + ) + + +def test_cosine(distances, bandwidth): + z = np.clip(distances / bandwidth, 0, 1) + expected = (np.pi / 4) * np.cos(np.pi / 2 * z) + np.testing.assert_array_almost_equal( + kernels._cosine(distances, bandwidth), expected + ) + + +def test_exponential(distances, bandwidth): + z = distances / bandwidth + expected = np.where(z <= 1, np.exp(-z), 0) + np.testing.assert_array_almost_equal( + kernels._exponential(distances, bandwidth), expected + ) + + +def test_boxcar(distances, bandwidth): + expected = (distances < bandwidth).astype(int) + np.testing.assert_array_equal( + kernels._boxcar(distances, bandwidth), expected + ) + + +def test_identity(distances, bandwidth): + np.testing.assert_array_equal( + kernels._identity(distances, bandwidth), distances + ) + + +# --- Dispatcher tests --- + +@pytest.mark.parametrize("name", [ + "triangular", "parabolic", "gaussian", "bisquare", + "cosine", "boxcar", "discrete", "exponential", "identity", None +]) +def test_kernel_dispatcher_names(distances, bandwidth, name): + # Should not raise or error + result = kernels.kernel(distances, bandwidth, kernel=name) + assert isinstance(result, np.ndarray) + assert result.shape == distances.shape + + +def test_kernel_dispatcher_callable(distances, bandwidth): + def linear_kernel(d, bw): + return 1 - np.clip(d / bw, 0, 1) + + result = kernels.kernel(distances, bandwidth, kernel=linear_kernel) + expected = linear_kernel(distances, bandwidth) + np.testing.assert_array_almost_equal(result, expected) + + +def test_kernel_dispatcher_invalid_name(distances, bandwidth): + with pytest.raises(KeyError): + kernels.kernel(distances, bandwidth, kernel="not-a-kernel") + diff --git a/libpysal/weights/distance.py b/libpysal/weights/distance.py index a8384d322..3a4e97768 100644 --- a/libpysal/weights/distance.py +++ b/libpysal/weights/distance.py @@ -372,8 +372,8 @@ class Kernel(W): bandwidth : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. - fixed : binary - If true then :math:`h_i=h \\forall i`. If false then + fixed : bool + If True then :math:`h_i=h \\forall i`. If False then bandwidth is adaptive across observations. k : int the number of nearest neighbors to use for determining @@ -424,6 +424,10 @@ class Kernel(W): eps : float adjustment to ensure knn distance range is closed on the knnth observations + normalize : bool + If True (default) Gaussian kernel is normalized to integrate to 1. + If False K(0)=1. + Attributes ---------- @@ -530,8 +534,10 @@ def __init__( diagonal=False, distance_metric="euclidean", radius=None, + normalize=True, **kwargs, ): + self._normalize = normalize if radius is not None: distance_metric = "arc" if isKDTree(data): @@ -685,7 +691,9 @@ def _eval_kernel(self): ) z.append(zi) zs = z - # functions follow Anselin and Rey (2010) table 5.4 + + # functions follow Anselin and Rey (2014) Modern Spatial Econometircs in Practice. Pg 78 + if self.function == "triangular": self.kernel = [1 - zi for zi in zs] elif self.function == "uniform": @@ -697,6 +705,8 @@ def _eval_kernel(self): elif self.function == "gaussian": c = np.pi * 2 c = c ** (-0.5) + if self._normalize is False: + c = 1 self.kernel = [c * np.exp(-(zi**2) / 2.0) for zi in zs] else: print(("Unsupported kernel function", self.function)) diff --git a/libpysal/weights/tests/test_distance.py b/libpysal/weights/tests/test_distance.py index e32948d03..3d659b49d 100644 --- a/libpysal/weights/tests/test_distance.py +++ b/libpysal/weights/tests/test_distance.py @@ -351,6 +351,18 @@ def test_from_geodataframe(self): for k, v in list(w[self.known_wi5 - 1].items()): np.testing.assert_allclose(v, self.known_w5[k + 1], rtol=RTOL) + def test_w_normalize(self): + wg = d.Kernel.from_array(self.points, function="gaussian") + np.testing.assert_allclose( + wg.weights[0], + [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], + rtol=RTOL, + ) + wgun = d.Kernel.from_array(self.points, function="gaussian", normalize=False) + np.testing.assert_allclose( + wgun.weights[0], [1.0, 0.8824969246470149, 0.8553453540369604], rtol=RTOL + ) + ########################## # Function/User tests # ##########################