diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1604f58 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,78 @@ +name: Tests + +on: + push: + branches: [ master, dev ] + pull_request: + branches: [ master, dev ] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ['3.9', '3.10', '3.11', '3.12'] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache pip dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip-${{ matrix.python-version }}- + ${{ runner.os }}-pip- + + - name: Install system dependencies (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install -y ffmpeg + + - name: Install system dependencies (macOS) + if: matrix.os == 'macos-latest' + run: | + brew install ffmpeg + + - name: Install system dependencies (Windows) + if: matrix.os == 'windows-latest' + run: | + choco install ffmpeg + continue-on-error: true + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-cov + pip install -r requirements.txt + + - name: Install package in development mode + run: | + pip install -e . + + - name: Run pytest + run: | + pytest -v --tb=short + + - name: Run pytest with coverage (Ubuntu Python 3.12 only) + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12' + run: | + pytest --cov=hypertools --cov-report=xml --cov-report=term-missing + + - name: Upload coverage to Codecov (Ubuntu Python 3.12 only) + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12' + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6dd3c18 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,96 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +HyperTools is a Python library for visualizing and manipulating high-dimensional data. It provides a unified interface for dimensionality reduction, data alignment, clustering, and visualization, built on top of matplotlib, scikit-learn, and seaborn. + +## Key Commands + +### Testing +- `pytest` - Run all tests from the hypertools/ directory +- `pytest tests/test_.py` - Run tests for a specific module +- `pytest tests/test_.py::test_` - Run a specific test function + +### Development Setup +- `pip install -e .` - Install in development mode +- `pip install -r requirements.txt` - Install dependencies +- `pip install -r docs/doc_requirements.txt` - Install documentation dependencies + +### Documentation +- `cd docs && make html` - Build HTML documentation +- `cd docs && make clean` - Clean documentation build files + +## Code Architecture + +### Core Components + +**DataGeometry Class** (`hypertools/datageometry.py`) +- Central data container that holds raw data, transformed data, and transformation parameters +- Stores matplotlib figure/axes handles and animation objects +- Contains normalization, reduction, and alignment model parameters + +**Main API Functions** (`hypertools/__init__.py`) +- `plot()` - Primary visualization function +- `analyze()` - Data analysis and dimensionality reduction +- `reduce()` - Dimensionality reduction utilities +- `align()` - Data alignment across datasets +- `normalize()` - Data normalization +- `describe()` - Data description and summary +- `cluster()` - Clustering functionality +- `load()` - Data loading utilities + +**Tools Module** (`hypertools/tools/`) +- `align.py` - Hyperalignment and Procrustes alignment +- `reduce.py` - Dimensionality reduction (PCA, t-SNE, UMAP, etc.) +- `normalize.py` - Data normalization methods +- `cluster.py` - K-means and other clustering algorithms +- `format_data.py` - Data preprocessing and formatting +- `text2mat.py` - Text-to-matrix conversion +- `df2mat.py` - DataFrame-to-matrix conversion +- `load.py` - Data loading from various sources +- `missing_inds.py` - Missing data handling +- `procrustes.py` - Procrustes analysis + +**Plot Module** (`hypertools/plot/`) +- `plot.py` - Main plotting interface and logic +- `backend.py` - matplotlib backend configuration +- `draw.py` - Low-level drawing functions + +**External Dependencies** (`hypertools/_externals/`) +- `ppca.py` - Probabilistic Principal Component Analysis +- `srm.py` - Shared Response Model + +### Data Flow + +1. **Input Processing**: Data is formatted and validated through `format_data()` +2. **Normalization**: Optional data normalization via `normalize()` +3. **Alignment**: Optional cross-dataset alignment via `align()` +4. **Dimensionality Reduction**: Data is reduced via `reduce()` +5. **Clustering**: Optional clustering via `cluster()` +6. **Visualization**: Final plotting through `plot()` + +### Key Design Patterns + +- **Modular Architecture**: Each major operation (align, reduce, normalize, etc.) is in its own module +- **Unified Interface**: All functions accept similar input formats (lists of arrays, DataFrames, etc.) +- **Flexible Data Types**: Supports numpy arrays, pandas DataFrames, text data, and mixed inputs +- **Matplotlib Integration**: Deep integration with matplotlib for customizable visualizations +- **Animation Support**: Built-in support for animated visualizations + +## Development Notes + +- The package follows a functional programming style with separate modules for each operation +- All major functions are designed to work with multiple input formats +- The DataGeometry class serves as the central data container and state manager +- Tests are located in `tests/` directory and follow pytest conventions +- Documentation is built with Sphinx and uses example galleries +- The codebase maintains compatibility with Python 3.9+ + +## Testing Strategy + +- Unit tests for individual tools and functions +- Integration tests for end-to-end workflows +- Example-based testing through documentation +- Visual regression testing for plot outputs \ No newline at end of file diff --git a/hypertools/config.py b/hypertools/config.py index 0c14087..03ebc94 100644 --- a/hypertools/config.py +++ b/hypertools/config.py @@ -1,4 +1,8 @@ -from pkg_resources import get_distribution +try: + from importlib.metadata import version +except ImportError: + # Fallback for Python < 3.8 + from importlib_metadata import version -__version__ = get_distribution('hypertools').version +__version__ = version('hypertools') diff --git a/hypertools/tools/describe.py b/hypertools/tools/describe.py index ec577d3..52e52e0 100644 --- a/hypertools/tools/describe.py +++ b/hypertools/tools/describe.py @@ -2,7 +2,7 @@ import warnings import numpy as np -from scipy.stats.stats import pearsonr +from scipy.stats import pearsonr from scipy.spatial.distance import cdist import matplotlib.pyplot as plt import seaborn as sns diff --git a/notes/github_actions_info.md b/notes/github_actions_info.md new file mode 100644 index 0000000..82d8532 --- /dev/null +++ b/notes/github_actions_info.md @@ -0,0 +1,43 @@ +# GitHub Actions CI/CD Setup + +## Test Matrix +The GitHub Actions workflow (`/.github/workflows/test.yml`) runs comprehensive tests on: + +### Python Versions +- Python 3.9 +- Python 3.10 +- Python 3.11 +- Python 3.12 + +### Operating Systems +- Ubuntu Latest (Linux) +- Windows Latest +- macOS Latest + +### Features +- **Dependency caching**: Pip cache is used to speed up builds +- **System dependencies**: FFmpeg is installed for animation support +- **Coverage reporting**: Coverage is collected on Ubuntu Python 3.12 and uploaded to Codecov +- **Matrix strategy**: Tests run in parallel across all combinations (12 total jobs) +- **Fail-fast disabled**: All combinations run even if one fails + +## Triggers +- Push to `master` or `dev` branches +- Pull requests to `master` or `dev` branches + +## Badge +Add this badge to README.md to show build status: +```markdown +[![Tests](https://github.com/ContextLab/hypertools/workflows/Tests/badge.svg)](https://github.com/ContextLab/hypertools/actions) +``` + +## Local Testing +To run the same tests locally: +```bash +pytest -v --tb=short +``` + +For coverage: +```bash +pytest --cov=hypertools --cov-report=xml --cov-report=term-missing +``` \ No newline at end of file diff --git a/notes/numpy_pandas_compatibility_review.csv b/notes/numpy_pandas_compatibility_review.csv new file mode 100644 index 0000000..f2989c8 --- /dev/null +++ b/notes/numpy_pandas_compatibility_review.csv @@ -0,0 +1,14 @@ +File,Issue Type,Description,Current Status,Priority,Notes +hypertools/tools/reduce.py,NumPy Deprecated,np.string_ removed in NumPy 2.0,Fixed,High,Already fixed in line 116 comment +hypertools/config.py,Package,pkg_resources deprecated,Fixed,High,Replaced with importlib.metadata with fallback +hypertools/tools/describe.py,SciPy Deprecated,scipy.stats.stats.pearsonr deprecated,Fixed,High,Changed import to scipy.stats.pearsonr +hypertools/_shared/helpers.py,NumPy Deprecated,Potential deprecated features,OK,High,No issues found - uses compatible numpy patterns +hypertools/tools/format_data.py,NumPy Array,Array creation and dtype handling,OK,High,Uses np.float64 - compatible with numpy 2.0+ +hypertools/tools/align.py,NumPy Array,Matrix operations and dtypes,OK,High,Compatible numpy array operations +hypertools/tools/normalize.py,NumPy Array,Array operations,OK,Medium,Compatible numpy array operations +hypertools/tools/df2mat.py,Pandas,DataFrame to matrix conversion,OK,High,Compatible with pandas 2.0+ patterns +hypertools/datageometry.py,Pandas,DataFrame handling,OK,High,Uses to_dict('list') - compatible with pandas 2.0+ +hypertools/tools/text2mat.py,Pandas,Text processing with pandas,OK,Medium,Compatible pandas operations +hypertools/tools/load.py,NumPy/Pandas,Data loading operations,OK,Medium,Compatible with numpy 2.0+ and pandas 2.0+ +hypertools/_externals/srm.py,NumPy Random,Uses np.random.seed and np.random.random,OK,Low,These are still supported in numpy 2.0 +tests/,NumPy/Pandas,Test compatibility,OK,High,All 129 tests pass with numpy 2.0+ and pandas 2.0+ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f707feb..6ea4a2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ numpy>=2.0.0 umap-learn>=0.5.5 requests>=2.31.0 ipympl>=0.9.3 +importlib_metadata>=1.0.0; python_version < "3.8" diff --git a/tests/test_reduce.py b/tests/test_reduce.py index 18b3f9e..aaea8ea 100644 --- a/tests/test_reduce.py +++ b/tests/test_reduce.py @@ -92,7 +92,7 @@ def test_reduce_MiniBatchDictionaryLearning(): def test_reduce_TSNE(): - reduced_data_3d = reducer(data, reduce='TSNE', ndims=3) + reduced_data_3d = reducer(data, reduce={'model': 'TSNE', 'params': {'perplexity': 5}}, ndims=3) assert reduced_data_3d[0].shape==(10,3)