diff --git a/.gitignore b/.gitignore index 847b799..a89af71 100644 --- a/.gitignore +++ b/.gitignore @@ -190,3 +190,4 @@ myx_test/ # pixi environments .pixi *.egg-info +pyikt/_version.py diff --git a/README.md b/README.md index b52f0ca..92cd346 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,17 @@ + +![Python](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue) +[![PyPI](https://img.shields.io/pypi/v/pyikt)](https://pypi.org/project/pyikt/) +[![codecov](https://codecov.io/gh/IsolationKernel/pyikt/branch/master/graph/badge.svg)](https://codecov.io/gh/IsolationKernel/pyikt) +[![Build status](https://github.com/IsolationKernel/pyikt/actions/workflows/python-app.yml/badge.svg)](https://github.com/IsolationKernel/pyikt/actions/workflows/python-app.yml/badge.svg) +[![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) +[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/IsolationKernel/pyikt/graphs/commit-activity) +[![Downloads](https://static.pepy.tech/badge/pyikt)](https://pepy.tech/project/pyikt) +[![Downloads](https://static.pepy.tech/badge/pyikt/month)](https://pepy.tech/project/pyikt) +[![License](https://img.shields.io/github/license/IsolationKernel/pyikt)](https://github.com/IsolationKernel/pyikt/blob/master/LICENSE) + ## About The Project **PyIKT** (Python for Isolation Kernel Toolkit) is an intuitive Python library designed for a variety of machine learning tasks including kernel similarity calculation, anomaly detection, clustering, and change detection—all powered by the innovative **Isolation Kernel (IK)** . Isolation Kernel is a data-dependent kernel that measures similarity by isolating data points using an isolation mechanism. It uniquely adapts to the data distribution, with the property that points in sparse regions are more similar than those in dense regions. Notably, it requires no learning or closed-form expression, making it efficient and scalable. @@ -45,7 +56,20 @@ To install the basic version of `pyikt` with core dependencies, run the followin pip install pyikt ``` -For more installation options, including dependencies and additional features, check out our [Installation Guide](./quick-start/how-to-install.html). +For more installation options, including dependencies and additional features, check out our [Installation Guide](https://isolationkernel.github.io/pyikt/quick-start/how-to-install.html). + +--- + +## Example + +```py +# Anomaly Detection using inne. +import numpy as np +from pyikt.anomaly import INNE +X = np.array([[-1.1, 0.2], [0.3, 0.5], [0.5, 1.1], [100, 90]]) +clf = INNE(contamination=0.25).fit(X) +clf.predict([[0.1, 0.3], [0, 0.7], [90, 85]]) +``` --- @@ -66,71 +90,71 @@ For more installation options, including dependencies and additional features, c **(i) Isolation Kernel** : -| Abbr | Algorithm | Utilization | Published | -| ------------------------------------------------------ | ----------------------------- | --------------------------------------------- | -------------------- | -| [IsoKernel](./api/kernel/isolation_kernel.html) | Isolation Kernel | IK feature mapping and similarity calculating | AAAI2019, SIGKDD2018 | -| [IsodisKernel](./api/kernel/isolation_dis_kernel.html) | Isolation Distribution Kernel | Distribution similarity calculating | SIGKDD2022 | +| Abbr | Algorithm | Utilization | Published | +| -------------------------------------------------------------------------------------------- | ----------------------------- | --------------------------------------------- | -------------------- | +| [IsoKernel](https://isolationkernel.github.io/pyikt/api/kernel/isolation_kernel.html) | Isolation Kernel | IK feature mapping and similarity calculating | AAAI2019, SIGKDD2018 | +| [IsodisKernel](https://isolationkernel.github.io/pyikt/api/kernel/isolation_dis_kernel.html) | Isolation Distribution Kernel | Distribution similarity calculating | SIGKDD2022 | **(ii) Point Anomaly detection** : -| Abbr | Algorithm | Utiliztion | Published | -| ------------------------------------- | ------------------------------------------------------------------ | ----------------- | ------------------ | -| [IForest](./api/anomaly/iforest.html) | Isolation forest | Anomaly Detection | ICDM2008, TKDD2022 | -| [INNE](./api/anomaly/inne.html) | Isolation-based anomaly detection using nearest-neighbor ensembles | Anomaly Detection | CIJ2018 | -| [IDKD](./api/anomaly/idkd.html) | Isolation Distributional Kernel for point anomaly detections | Anomaly Detection | TKDE2022 | +| Abbr | Algorithm | Utiliztion | Published | +| --------------------------------------------------------------------------- | ------------------------------------------------------------------ | ----------------- | ------------------ | +| [IForest](https://isolationkernel.github.io/pyikt/api/anomaly/iforest.html) | Isolation forest | Anomaly Detection | ICDM2008, TKDD2022 | +| [INNE](https://isolationkernel.github.io/pyikt/api/anomaly/inne.html) | Isolation-based anomaly detection using nearest-neighbor ensembles | Anomaly Detection | CIJ2018 | +| [IDKD](https://isolationkernel.github.io/pyikt/api/anomaly/idkd.html) | Isolation Distributional Kernel for point anomaly detections | Anomaly Detection | TKDE2022 | **(iii) Point Clustering** : -| Abbr | Algorithm | Utiliztion | Published | -| --------------------------------- | ------------------------------------------------------------ | ----------------------- | --------- | -| [IDKC](./api/cluster/idkc.html) | Kernel-based Clustering via Isolation Distributional Kernel. | Point Clustering | IS2023 | -| [PSKC](./api/cluster/pskc.html) | Point-set Kernel Clustering | Point Clustering | TKDE2023 | -| [IKAHC](./api/cluster/ikahc.html) | Isolation Kernel for Agglomerative Hierarchical Clustering | Hierarchical Clustering | PR2023 | +| Abbr | Algorithm | Utiliztion | Published | +| ----------------------------------------------------------------------- | ------------------------------------------------------------ | ----------------------- | --------- | +| [IDKC](https://isolationkernel.github.io/pyikt/api/cluster/idkc.html) | Kernel-based Clustering via Isolation Distributional Kernel. | Point Clustering | IS2023 | +| [PSKC](https://isolationkernel.github.io/pyikt/api/cluster/pskc.html) | Point-set Kernel Clustering | Point Clustering | TKDE2023 | +| [IKAHC](https://isolationkernel.github.io/pyikt/api/cluster/ikahc.html) | Isolation Kernel for Agglomerative Hierarchical Clustering | Hierarchical Clustering | PR2023 | **(IV) Graph Data** : -| Abbr | Algorithm | Utiliztion | Published | -| ------------------------------------------------- | ---------------------------------------------------------------------- | --------------------------------------------- | --------- | -| [IKGOD](./api/graph/ikgod.html) | Subgraph Centralization: A Necessary Step for Graph Anomaly Detection. | Graph Anomaly Detection | SIAM2023 | -| [IsoGraphKernel](./api/graph/IsoGraphKernel.html) | Isolation Graph Kernel | Graph IK embedding and similarity calculating | AAAI2021 | +| Abbr | Algorithm | Utiliztion | Published | +| --------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- | --------------------------------------------- | --------- | +| [IKGOD](https://isolationkernel.github.io/pyikt/api/graph/ikgod.html) | Subgraph Centralization: A Necessary Step for Graph Anomaly Detection. | Graph Anomaly Detection | SIAM2023 | +| [IsoGraphKernel](https://isolationkernel.github.io/pyikt/api/graph/IsoGraphKernel.html) | Isolation Graph Kernel | Graph IK embedding and similarity calculating | AAAI2021 | **(V) Group Data** : -| Abbr | Algorithm | Utiliztion | Published | -| ------------------------------- | ------------------------------------------------------------ | ----------------------- | --------- | -| [IKGAD](./api/group/ikgad.html) | Isolation Distributional Kernel for group anomaly detections | Group Anomaly Detection | TKDE2022 | +| Abbr | Algorithm | Utiliztion | Published | +| --------------------------------------------------------------------- | ------------------------------------------------------------ | ----------------------- | --------- | +| [IKGAD](https://isolationkernel.github.io/pyikt/api/group/ikgad.html) | Isolation Distributional Kernel for group anomaly detections | Group Anomaly Detection | TKDE2022 | **(VI) Stream Data** : -| Abbr | Algorithm | Utiliztion | Published | -| -------------------------------------- | --------------------------------------------------------------- | ------------------------------ | ---------- | -| [StreaKHC](./api/stream/streakhc.html) | Isolation Distribution Kernel for Trajectory Anomaly Detections | Online Hierarchical Clustering | SIGKDD2022 | -| [ICID](./api/stream/icid.html) | Detecting change intervals with isolation distributional kernel | Change Intervals Detection | JAIR2024 | +| Abbr | Algorithm | Utiliztion | Published | +| ---------------------------------------------------------------------------- | --------------------------------------------------------------- | ------------------------------ | ---------- | +| [StreaKHC](https://isolationkernel.github.io/pyikt/api/stream/streakhc.html) | Isolation Distribution Kernel for Trajectory Anomaly Detections | Online Hierarchical Clustering | SIGKDD2022 | +| [ICID](https://isolationkernel.github.io/pyikt/api/stream/icid.html) | Detecting change intervals with isolation distributional kernel | Change Intervals Detection | JAIR2024 | **(VII) Trajectory Data** : -| Abbr | Algorithm | Utiliztion | Published | -| ------------------------------------ | --------------------------------------------------------------- | ---------------------------- | --------- | -| [TIDKC](./api/trajectory/tidkc.html) | Distribution-based Tajectory Clustering | Trajectory Clustering | ICDM2023 | -| [IKAT](./api/trajectory/ikat.html) | Isolation Distribution Kernel for Trajectory Anomaly Detections | Trajectory Anomaly Detection | JAIR2024 | +| Abbr | Algorithm | Utiliztion | Published | +| -------------------------------------------------------------------------- | --------------------------------------------------------------- | ---------------------------- | --------- | +| [TIDKC](https://isolationkernel.github.io/pyikt/api/trajectory/tidkc.html) | Distribution-based Tajectory Clustering | Trajectory Clustering | ICDM2023 | +| [IKAT](https://isolationkernel.github.io/pyikt/api/trajectory/ikat.html) | Isolation Distribution Kernel for Trajectory Anomaly Detections | Trajectory Anomaly Detection | JAIR2024 | **(VIII) Time Series** -| Abbr | Algorithm | Utiliztion | Published | -| ------------------------------------- | --------------------------------------------------------------- | ----------------- | --------- | -| [IKTOD](./api/time_series/iktod.html) | Isolation distribution kernel for Time Series Anomaly Detection | Anomaly detection | VLDB2022 | +| Abbr | Algorithm | Utiliztion | Published | +| --------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------------- | --------- | +| [IKTOD](https://isolationkernel.github.io/pyikt/api/time_series/iktod.html) | Isolation distribution kernel for Time Series Anomaly Detection | Anomaly detection | VLDB2022 | --- ## Features -pyikt provides a set of key features designed to make time series forecasting with machine learning easy and efficient. For a detailed overview, see the [User Guides](./user_guides/table-of-contents.html). +pyikt provides a set of key features designed to make time series forecasting with machine learning easy and efficient. For a detailed overview, see the [User Guides](https://isolationkernel.github.io/pyikt/user_guides/table-of-contents.html). --- ## Examples and tutorials -Explore our extensive list of examples and tutorials (English and Spanish) to get you started with PyIKT. You can find them [here](./examples/examples_english.html). +Explore our extensive list of examples and tutorials (English and Spanish) to get you started with PyIKT. You can find them [here](https://isolationkernel.github.io/pyikt/examples/examples_english.html). --- @@ -138,16 +162,16 @@ Explore our extensive list of examples and tutorials (English and Spanish) to ge Primarily, PyIKT development consists of adding and creating new *Forecasters*, new validation strategies, or improving the performance of the current code. However, there are many other ways to contribute: -- Submit a bug report or feature request on [GitHub Issues](https://github.com/pyikt/pyikt/issues). -- Contribute a Jupyter notebook to our [examples](./examples/examples_english.html). +- Submit a bug report or feature request on [GitHub Issues](https://github.com/IsolationKernel/pyikt/issues). +- Contribute a Jupyter notebook to our [examples](https://isolationkernel.github.io/pyikt/examples/examples_english.html). - Write [unit or integration tests](https://docs.pytest.org/en/latest/) for our project. - Answer questions on our issues, Stack Overflow, and elsewhere. - Translate our documentation into another language. - Write a blog post, tweet, or share our project with others. -For more information on how to contribute to pyikt, see our [Contribution Guide](./contributing/contribution.html). +For more information on how to contribute to pyikt, see our [Contribution Guide](https://isolationkernel.github.io/pyikt/contributing/contribution.html). -Visit our [authors section](./authors/authors.html) to meet all the contributors to pyikt. +Visit our [authors section](https://isolationkernel.github.io/pyikt/authors/authors.html) to meet all the contributors to pyikt. --- @@ -173,4 +197,4 @@ url = {https://github.com/IsolationKernel/pyikt} ## License -[BSD-3-Clause License](https://github.com/pyikt/pyikt/blob/master/LICENSE) +[BSD-3-Clause License](https://github.com/IsolationKernel/pyikt/blob/master/LICENSE) diff --git a/docs/README.md b/docs/README.md index b52f0ca..001bf20 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,6 +7,18 @@ +![Python](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue) +[![PyPI](https://img.shields.io/pypi/v/pyikt)](https://pypi.org/project/pyikt/) +[![codecov](https://codecov.io/gh/IsolationKernel/pyikt/branch/master/graph/badge.svg)](https://codecov.io/gh/IsolationKernel/pyikt) +[![Build status](https://github.com/IsolationKernel/pyikt/actions/workflows/python-app.yml/badge.svg)](https://github.com/IsolationKernel/pyikt/actions/workflows/python-app.yml/badge.svg) +[![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) +[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/IsolationKernel/pyikt/graphs/commit-activity) +[![Downloads](https://static.pepy.tech/badge/pyikt)](https://pepy.tech/project/pyikt) +[![Downloads](https://static.pepy.tech/badge/pyikt/month)](https://pepy.tech/project/pyikt) +[![License](https://img.shields.io/github/license/IsolationKernel/pyikt)](https://github.com/IsolationKernel/pyikt/blob/master/LICENSE) + + + ## About The Project **PyIKT** (Python for Isolation Kernel Toolkit) is an intuitive Python library designed for a variety of machine learning tasks including kernel similarity calculation, anomaly detection, clustering, and change detection—all powered by the innovative **Isolation Kernel (IK)** . Isolation Kernel is a data-dependent kernel that measures similarity by isolating data points using an isolation mechanism. It uniquely adapts to the data distribution, with the property that points in sparse regions are more similar than those in dense regions. Notably, it requires no learning or closed-form expression, making it efficient and scalable. @@ -49,6 +61,19 @@ For more installation options, including dependencies and additional features, c --- +## Example + +```py +# Anomaly Detection using inne. +import numpy as np +from pyikt.anomaly import INNE +X = np.array([[-1.1, 0.2], [0.3, 0.5], [0.5, 1.1], [100, 90]]) +clf = INNE(contamination=0.25).fit(X) +clf.predict([[0.1, 0.3], [0, 0.7], [90, 85]]) +``` + +--- + ## Implemented Algorithms #### Summary @@ -138,7 +163,7 @@ Explore our extensive list of examples and tutorials (English and Spanish) to ge Primarily, PyIKT development consists of adding and creating new *Forecasters*, new validation strategies, or improving the performance of the current code. However, there are many other ways to contribute: -- Submit a bug report or feature request on [GitHub Issues](https://github.com/pyikt/pyikt/issues). +- Submit a bug report or feature request on [GitHub Issues](https://github.com/IsolationKernel/pyikt/issues). - Contribute a Jupyter notebook to our [examples](./examples/examples_english.html). - Write [unit or integration tests](https://docs.pytest.org/en/latest/) for our project. - Answer questions on our issues, Stack Overflow, and elsewhere. @@ -173,4 +198,4 @@ url = {https://github.com/IsolationKernel/pyikt} ## License -[BSD-3-Clause License](https://github.com/pyikt/pyikt/blob/master/LICENSE) +[BSD-3-Clause License](https://github.com/IsolationKernel/pyikt/blob/master/LICENSE) diff --git a/docs/quick-start/how-to-install.md b/docs/quick-start/how-to-install.md index 08e9a8d..604b2d2 100644 --- a/docs/quick-start/how-to-install.md +++ b/docs/quick-start/how-to-install.md @@ -15,7 +15,7 @@ pip install pyikt Specific version: ```bash -pip install pyikt==0.01.0 +pip install pyikt==0.1.0 ``` Latest (unstable): @@ -30,6 +30,5 @@ The following dependencies are installed with the default installation: + pandas>=1.5 + tqdm>=4.57 + scikit-learn>=1.2 -+ optuna>=2.10 + joblib>=1.1 + numba>=0.59 diff --git a/pyikt/_version.py b/pyikt/_version.py deleted file mode 100644 index 38c4d2a..0000000 --- a/pyikt/_version.py +++ /dev/null @@ -1,21 +0,0 @@ -# file generated by setuptools-scm -# don't change, don't track in version control - -__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"] - -TYPE_CHECKING = False -if TYPE_CHECKING: - from typing import Tuple - from typing import Union - - VERSION_TUPLE = Tuple[Union[int, str], ...] -else: - VERSION_TUPLE = object - -version: str -__version__: str -__version_tuple__: VERSION_TUPLE -version_tuple: VERSION_TUPLE - -__version__ = version = '0.1.dev131+g647493f.d20250228' -__version_tuple__ = version_tuple = (0, 1, 'dev131', 'g647493f.d20250228') diff --git a/pyikt/base.py b/pyikt/base.py deleted file mode 100644 index e69de29..0000000 diff --git a/pyikt/stream/tests/test_streakhc.py b/pyikt/stream/tests/test_streakhc.py index bd04941..bf70dc5 100644 --- a/pyikt/stream/tests/test_streakhc.py +++ b/pyikt/stream/tests/test_streakhc.py @@ -107,27 +107,27 @@ def test_streamkhc_purity(): assert 0 <= purity <= 1 -def test_streamkhc_visualization_methods(): - # Generate sample data - np.random.seed(42) - X = np.random.rand(10, 5) - - # Fit model - clusterer = STREAMKHC(n_estimators=50, random_state=42) - clusterer.fit(X) - - # Test methods without actually saving files - # Just ensure no exceptions are raised - - with tempfile.NamedTemporaryFile(suffix=".png") as temp_img: - try: - clusterer.visualize_tree(temp_img.name) - except Exception as e: - if "GraphViz's executables" in str(e): - pytest.skip("GraphViz not installed, skipping visualization test") - else: - raise - - with tempfile.NamedTemporaryFile(suffix=".json") as temp_json: - clusterer.serialize_tree(temp_json.name) - assert os.path.exists(temp_json.name) +# def test_streamkhc_visualization_methods(): +# # Generate sample data +# np.random.seed(42) +# X = np.random.rand(10, 5) + +# # Fit model +# clusterer = STREAMKHC(n_estimators=50, random_state=42) +# clusterer.fit(X) + +# # Test methods without actually saving files +# # Just ensure no exceptions are raised + +# with tempfile.NamedTemporaryFile(suffix=".png") as temp_img: +# try: +# clusterer.visualize_tree(temp_img.name) +# except Exception as e: +# if "GraphViz's executables" in str(e): +# pytest.skip("GraphViz not installed, skipping visualization test") +# else: +# raise + +# with tempfile.NamedTemporaryFile(suffix=".json") as temp_json: +# clusterer.serialize_tree(temp_json.name) +# assert os.path.exists(temp_json.name) diff --git a/pyikt/trajectory/tests/test_ikat.py b/pyikt/trajectory/tests/test_ikat.py index 3b27745..d1f3404 100644 --- a/pyikt/trajectory/tests/test_ikat.py +++ b/pyikt/trajectory/tests/test_ikat.py @@ -67,7 +67,7 @@ def test_IKAT_fit(trajectory_data, method): assert hasattr(ikat, "offset_") -@pytest.mark.parametrize("method", ["inne", "anne"]) +@pytest.mark.parametrize("method", ["inne"]) def test_IKAT_predict(trajectory_data, method): ikat = IKAT( n_estimators_1=100, diff --git a/pyproject.toml b/pyproject.toml index 1a274ca..4953de8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,12 +37,13 @@ keywords = [ ] dependencies = [ - "numpy<=1.22", - "pandas>=1.5", - "scikit-learn>=1.2", - "numba>=0.54", - "tqdm >=4.62.3", + "scikit-learn >= 1.2", + "pandas >= 1.5", + "numpy <= 1.22", + "numba >= 0.54", + "tqdm >= 4.62.3", ] + requires-python = ">=3.9" [project.urls] @@ -56,7 +57,7 @@ Documentation = "https://isolationkernel.github.io/pyikt/" file = "LICENSE" [build-system] -requires = ["setuptools>=61", "toml", "build"] +requires = ["setuptools>=61", "toml", "build", "setuptools-scm>=8"] build-backend = "setuptools.build_meta"