diff --git a/.cursor/rules/conda-environment.mdc b/.cursor/rules/conda-environment.mdc
new file mode 100644
index 0000000..ac4252c
--- /dev/null
+++ b/.cursor/rules/conda-environment.mdc
@@ -0,0 +1,10 @@
+---
+description:
+globs:
+alwaysApply: true
+---
+
+To run anything in the terminal or console, ALWAYS:
+1. Run: conda activate confopt_env
+2. Run: pip install .
+3. Run your command
diff --git a/.cursor/rules/python-best-practice-instructions.mdc b/.cursor/rules/python-best-practice-instructions.mdc
new file mode 100644
index 0000000..ed8187a
--- /dev/null
+++ b/.cursor/rules/python-best-practice-instructions.mdc
@@ -0,0 +1,25 @@
+---
+description:
+globs:
+alwaysApply: true
+---
+# Coding Style Guidelines
+
+- Adopt the DRY principle. If code is repeated in multiple places, it should be functionalized and called in those places.
+- Make all inputs explicit. Avoid relying on state or shared context unless encapsulated.
+- Avoid implicit behavior (e.g., mutation of input lists, in place dataframe modification).
+- Variable names should be descriptive and reduced to the shortest possible length.
+- If a function returns multiple types, refactor. Don't return Union[str, dict, None].
+- Use pytest.mark.parametrize for testing functions with categorical input values.
+- If mocking in unit testing is required, mock external APIs and I/O only.
+- No print() statements anywhere in the code. Use logging instead.
+- No single-letter variable names unless in mathematical contexts or loops.
+- No hard coded values. Use constants or configuration files.
+- Don't use early returns in if else statements.
+- Don't create classes if functions are sufficient.
+- Keep modules small and focused (under 500 lines).
+- Comments should explain why, not what. Keep comments under 10% of code.
+- Don't write docstrings.
+- Don't rely on default values for function arguments.
+- Avoid *args or **kwargs unless absolutely necessary.
+- Use pydantic models for configuration values.
diff --git a/.cursor/rules/software-engineering-best-practices.mdc b/.cursor/rules/software-engineering-best-practices.mdc
new file mode 100644
index 0000000..df86087
--- /dev/null
+++ b/.cursor/rules/software-engineering-best-practices.mdc
@@ -0,0 +1,10 @@
+---
+description:
+globs:
+alwaysApply: true
+---
+- Always comply with DRY and SOLID principles.
+- Use as little code as is necessary to carry out the desired functionality, do not over-engineer or over-validate your code.
+- Write easily testable and maintainable code.
+- Maximize separation of concerns.
+- Consider how your changes will affect the wider codebase, think several dependancies ahead.
diff --git a/.cursor/rules/system-permissions.mdc b/.cursor/rules/system-permissions.mdc
new file mode 100644
index 0000000..56139fb
--- /dev/null
+++ b/.cursor/rules/system-permissions.mdc
@@ -0,0 +1,8 @@
+---
+description:
+globs:
+alwaysApply: true
+---
+- NEVER commit any changes.
+- NEVER revert commits or affect the commit history.
+- NEVER push commits or pull from remote, or interact at all with remote branches.
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..bc10dcb
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,31 @@
+# Coding Style Guidelines
+
+- Adopt the DRY principle. If code is repeated in multiple places, it should be functionalized and called in those places.
+- Make all inputs explicit. Avoid relying on state or shared context unless encapsulated.
+- Avoid implicit behavior (e.g., mutation of input lists, in place dataframe modification).
+- Variable names should be descriptive and reduced to the shortest possible length.
+- If a function returns multiple types, refactor. Don't return Union[str, dict, None].
+- Use pytest.mark.parametrize for testing functions with categorical input values.
+- If mocking in unit testing is required, mock external APIs and I/O only.
+- No print() statements anywhere in the code. Use logging instead.
+- No single-letter variable names unless in mathematical contexts or loops.
+- No hard coded values. Use constants or configuration files.
+- Don't use early returns in if else statements.
+- Don't create classes if functions are sufficient.
+- Keep modules small and focused (under 500 lines).
+- Comments should explain why, not what. Keep comments under 10% of code.
+- Don't write docstrings.
+- Don't rely on default values for function arguments.
+- Avoid *args or **kwargs unless absolutely necessary.
+- Use pydantic models for configuration values.
+- Always comply with DRY and SOLID principles.
+- Use as little code as is necessary to carry out the desired functionality, do not over-engineer or over-validate your code.
+- Write easily testable and maintainable code.
+- Maximize separation of concerns.
+- Consider how your changes will affect the wider codebase, think several dependancies ahead.
+
+
+To run anything in the terminal or console, ALWAYS:
+1. Run: conda activate confopt_env
+2. Run: pip install .
+3. Run your command
diff --git a/.github/documentation-instructions.md b/.github/documentation-instructions.md
new file mode 100644
index 0000000..b43b5a4
--- /dev/null
+++ b/.github/documentation-instructions.md
@@ -0,0 +1,95 @@
+# Quantile Estimation Module Documentation Template
+
+This template provides a step-by-step guide and example prompt for documenting any Python module (e.g., `quantile_estimation.py`) in a style consistent with best practices for technical and developer documentation.
+
+---
+
+## 1. Docstring Requirements
+
+Add or update detailed and informative Google-style docstrings following these guidelines:
+
+### Module-level docstring:
+- Brief description of the module's purpose and core functionality
+- Key methodological approaches or architectural patterns used
+- Integration context within the broader framework
+- Focus on salient aspects, avoid trivial descriptions
+- Do not add any type hints in the doc strings.
+
+---
+
+## 2. Documentation File Requirements
+
+Create a comprehensive `.rst` documentation file in `docs/developer/components/[module_name].rst` with:
+
+### Structure Example:
+
+```
+[Module Name] Module
+===================
+
+Overview
+--------
+[Brief description and key features]
+
+Key Features
+------------
+[Bullet points of main capabilities]
+
+Architecture
+------------
+[Class hierarchy, design patterns, architectural decisions]
+
+[Methodology/Algorithm Sections]
+-------------------------------
+[Detailed explanations of key approaches, mathematical foundations where relevant]
+
+Usage Examples
+--------------
+[Practical code examples showing common usage patterns]
+
+Performance Considerations
+-------------------------
+[Computational complexity, scaling considerations, best practices]
+
+Integration Points
+-----------------
+[How this module connects with other framework components]
+
+Common Pitfalls
+---------------
+[Common mistakes and how to avoid them]
+
+See Also
+--------
+[Cross-references to related modules]
+```
+
+### Content requirements:
+- Technical depth appropriate for developers
+- Mathematical foundations with LaTeX equations where relevant (cite relevant papers if mainstream, do not hallucinate, if unsure do not cite any)
+- Practical usage examples with actual code
+- Performance and scalability guidance
+- Integration context within the framework
+- Best practices and common pitfalls
+- Cross-references to related components
+
+---
+
+## 3. Index Update
+
+Update `docs/developer/components/index.rst` to include the new module documentation in the appropriate section.
+
+---
+
+## 5. Best Practices
+- Documentation should be contextually relevant and technically accurate
+- Focus on methodology and implementation details that matter to developers
+- Provide both theoretical understanding and practical guidance
+- Ensure consistency with existing documentation style and organization
+- Make it easy for both newcomers and experienced developers to understand and use the module
+
+---
+
+## 6. Example Output (for quantile_estimation.py)
+
+See the current `quantile_estimation.py` for a fully documented example, and `docs/developer/components/quantile_estimation.rst` for a comprehensive documentation file.
diff --git a/.github/testing-instructions.md b/.github/testing-instructions.md
new file mode 100644
index 0000000..6dfdd50
--- /dev/null
+++ b/.github/testing-instructions.md
@@ -0,0 +1,22 @@
+# Coding Style Guidelines
+
+- Use pytest for all testing, use unittest for mocking.
+- Use pytest.mark.parametrize for testing functions with categorical input values:
+ - For literals, you should automatically cycle through all possible Literal values in your parametrization.
+ - For ordinal categories or discrete inputs (eg. n_observations, n_recommendations, etc.) pick some sensible ranges (eg. 0 if allowed, or minimum otherwise, then a sensible every day value, say 10, then a very large value, if it's not computationally expensive, say 1000).
+- Mock external APIs and I/O only, do not use mocking as a crutch to abstract away components who's behaviour you need to test.
+- Use fixtures to store toy data, mocked objects or any other object you plan to reference in the main tests, particularly if it will be used more than once. If the toy data is small and specific to the one test it's called in, it's ok to define it inside the test function.
+- Never define nested functions (function def is inside another function) unless explicitly required because of scope (eg. nested generator builders).
+- Avoid defining helper functions at the top of a test module, tests should be simple and mostly check existing methods' outputs. Very complex tests may require helper functions, but this should be limited.
+- ALL fixtures need to be defined in the tests/conftest.py file, NEVER define them directly in a test module.
+- Do not test initialization of classes. Do not use asserts that just check if an attribute of a class exists, or is equal to what you just defined it as, these are bloated tests that accomplish little, but add maintenance cost.
+- If you're testing a function or method that returns a shaped object, always check the shape (should it be the same as the input's? Should it be different? Should it be a specific size based on the inputs you passed to the function? etc. based on these questions formulate asserts that check those shape aspects)
+- Test the intent behind a function or method, not form or attributes. Read through the function or method carefully, understand its goals and approach, then write meaningful tests that check quality of outputs relative to intent.
+- Do not add strings after asserts, eg. do NOT do this:
+ assert len(final_alphas) == len(initial_alphas), "Alpha count should remain consistent"
+ after any assert statement, it should just be assert len(final_alphas) == len(initial_alphas)
+- Keep comments to a minimum, comments should just explain more obscure asserts or tests.
+- Each unit test should be a function, functions should not be grouped in testing classes and should not have self attributes.
+- When testing mathematical functions, understand the derivations and test assumptions and outputs given mathematical constraints and theory.
+- Do not write excessive amounts of tests, focus on the most important aspects of each function.
+- Avoid lenghty code repetition. If multiple tests share the same set ups or fixture processing but only differ in asserts, join them in a single test and add comments before each assert.
diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
new file mode 100644
index 0000000..7b1cf84
--- /dev/null
+++ b/.github/workflows/ci-cd.yml
@@ -0,0 +1,563 @@
+name: CI/CD Pipeline
+
+on:
+ push:
+ branches: [ '**' ]
+ pull_request:
+ types: [closed]
+ branches: [main]
+
+# Cancel in-progress workflows when a new commit is pushed
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+env:
+ PYTHON_VERSION: "3.11"
+
+jobs:
+ test:
+ name: Test Suite
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Cache pip dependencies
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pip
+ key: pip-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml', '**/requirements*.txt') }}
+ restore-keys: |
+ pip-${{ runner.os }}-${{ matrix.python-version }}-
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -e ".[dev]"
+ pip install build twine
+
+ - name: Run tests
+ run: |
+ pytest tests/ -v --tb=short --junitxml=test-results-${{ matrix.python-version }}.xml -m "not slow"
+
+ - name: Upload test results
+ uses: actions/upload-artifact@v4
+ if: always()
+ with:
+ name: test-results-${{ matrix.python-version }}
+ path: test-results-${{ matrix.python-version }}.xml
+ retention-days: 2
+
+ lint:
+ name: Code Quality
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Cache pre-commit
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pre-commit
+ key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
+
+ - name: Install pre-commit
+ run: |
+ python -m pip install --upgrade pip
+ pip install pre-commit
+
+ - name: Run pre-commit hooks
+ run: pre-commit run --all-files
+
+ check-package-label:
+ name: Check Package Label
+ runs-on: ubuntu-latest
+ if: github.event_name == 'pull_request' && github.event.pull_request.merged == true
+ outputs:
+ has_package_label: ${{ steps.check_label.outputs.has_label }}
+ pr_number: ${{ github.event.pull_request.number }}
+
+ steps:
+ - name: Check for Package label
+ id: check_label
+ uses: actions/github-script@v7
+ with:
+ script: |
+ const labels = context.payload.pull_request.labels.map(label => label.name);
+ const has_package_label = labels.includes('package');
+
+ console.log('PR Labels:', labels);
+ console.log('Has package label:', has_package_label);
+
+ core.setOutput('has_label', has_package_label);
+
+ if (!has_package_label) {
+ console.log('⏭️ Skipping package deployment - no Package label found');
+ } else {
+ console.log('✅ Package label found - proceeding with deployment pipeline');
+ }
+
+ version-check:
+ name: Version Check
+ runs-on: ubuntu-latest
+ needs: [test, lint, check-package-label]
+ if: needs.check-package-label.outputs.has_package_label == 'true'
+ outputs:
+ version: ${{ steps.get_version.outputs.version }}
+ is_new_version: ${{ steps.check_pypi.outputs.is_new }}
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install dependencies for PyPI API
+ run: pip install requests packaging
+
+ - name: Get current version and compare with PyPI
+ id: version_check
+ run: |
+ python << 'EOF'
+ import re
+ import sys
+ import os
+ import requests
+ from packaging import version
+
+ def get_latest_pypi_version(package_name: str) -> str:
+ """Fetch the latest version of a PyPI package by name."""
+ url = f"https://pypi.org/pypi/{package_name}/json"
+ try:
+ response = requests.get(url, timeout=10)
+ if response.status_code == 200:
+ data = response.json()
+ return data["info"]["version"]
+ else:
+ raise Exception(f"Package '{package_name}' returned status {response.status_code}")
+ except requests.exceptions.RequestException as e:
+ raise Exception(f"Failed to fetch package info: {e}")
+
+ # Get current version from pyproject.toml
+ with open('pyproject.toml', 'r') as f:
+ content = f.read()
+ match = re.search(r'version = "([^"]+)"', content)
+
+ if not match:
+ print("❌ ERROR: Could not find version in pyproject.toml")
+ sys.exit(1)
+
+ current_version = match.group(1)
+ package_name = "confopt"
+
+ print(f"Current version from pyproject.toml: {current_version}")
+
+ # Get latest version from PyPI
+ try:
+ pypi_version = get_latest_pypi_version(package_name)
+ print(f"Latest version on PyPI: {pypi_version}")
+ except Exception as e:
+ print(f"❌ ERROR: Could not fetch PyPI version: {e}")
+ sys.exit(1)
+
+ # Compare versions
+ try:
+ current_ver = version.parse(current_version)
+ pypi_ver = version.parse(pypi_version)
+
+ if current_ver > pypi_ver:
+ print(f"✅ Version bump detected: {pypi_version} → {current_version}")
+ print("Proceeding with deployment")
+ is_new_version = True
+ elif current_ver == pypi_ver:
+ print(f"❌ No version bump: current version {current_version} equals PyPI version {pypi_version}")
+ print("Please bump the version in pyproject.toml before deploying")
+ is_new_version = False
+ sys.exit(1)
+ else:
+ print(f"❌ Version downgrade detected: {pypi_version} → {current_version}")
+ print("Current version is lower than PyPI version - this should not happen")
+ is_new_version = False
+ sys.exit(1)
+
+ except Exception as e:
+ print(f"❌ ERROR: Could not parse versions: {e}")
+ print(f"Current: {current_version}, PyPI: {pypi_version}")
+ sys.exit(1)
+
+ # Set outputs
+ with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+ f.write(f"version={current_version}\n")
+ f.write(f"pypi_version={pypi_version}\n")
+ f.write(f"is_new_version={'true' if is_new_version else 'false'}\n")
+ EOF
+
+ build:
+ name: Build Python Package
+ runs-on: ubuntu-latest
+ needs: [test, lint, version-check]
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install build dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build twine
+
+ - name: Build package (wheel and sdist)
+ run: python -m build
+
+ - name: Verify built packages
+ run: twine check dist/*
+
+ - name: Upload built packages
+ uses: actions/upload-artifact@v4
+ with:
+ name: python-package-distributions
+ path: dist/
+ retention-days: 2
+ verify_package:
+ name: Verify Package Installation
+ runs-on: ubuntu-latest
+ needs: [build]
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Download build artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: python-package-distributions
+ path: dist/
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: List built packages
+ run: |
+ echo "Built packages:"
+ ls -la dist/
+ echo ""
+ echo "Package summary:"
+ echo "Source distributions: $(ls dist/*.tar.gz 2>/dev/null | wc -l)"
+ echo "Wheels: $(ls dist/*.whl 2>/dev/null | wc -l)"
+
+ - name: Test wheel installation
+ run: |
+ # Test wheel installation
+ python -m venv test_wheel_env
+ source test_wheel_env/bin/activate
+ pip install --upgrade pip
+ pip install dist/*.whl
+
+ # Run minimal confopt test
+ python -c "
+ import numpy as np
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange
+
+ # Minimal synthetic test
+ def simple_objective(configuration):
+ # Simple quadratic function with noise
+ x, y = configuration['x'], configuration['y']
+ return (x - 2)**2 + (y - 3)**2 + np.random.normal(0, 0.1)
+
+ search_space = {
+ 'x': FloatRange(min_value=0.0, max_value=5.0),
+ 'y': FloatRange(min_value=0.0, max_value=5.0)
+ }
+
+ tuner = ConformalTuner(
+ objective_function=simple_objective,
+ search_space=search_space,
+ minimize=True
+ )
+
+ tuner.tune(max_searches=50, n_random_searches=15, verbose=False)
+ best_params = tuner.get_best_params()
+ best_value = tuner.get_best_value()
+
+ print(f'Wheel installation and basic functionality test successful!')
+ print(f'Best params: {best_params}')
+ print(f'Best value: {best_value:.4f}')
+ "
+
+ deactivate
+ rm -rf test_wheel_env
+
+ - name: Test source distribution installation
+ run: |
+ # Test source installation
+ python -m venv test_sdist_env
+ source test_sdist_env/bin/activate
+ pip install --upgrade pip
+ pip install dist/*.tar.gz
+
+ # Run minimal confopt test
+ python -c "
+ import numpy as np
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange
+
+ # Minimal synthetic test
+ def simple_objective(configuration):
+ # Simple quadratic function with noise
+ x, y = configuration['x'], configuration['y']
+ return (x - 2)**2 + (y - 3)**2 + np.random.normal(0, 0.1)
+
+ search_space = {
+ 'x': FloatRange(min_value=0.0, max_value=5.0),
+ 'y': FloatRange(min_value=0.0, max_value=5.0)
+ }
+
+ tuner = ConformalTuner(
+ objective_function=simple_objective,
+ search_space=search_space,
+ minimize=True
+ )
+
+ tuner.tune(max_searches=50, n_random_searches=15, verbose=False)
+ best_params = tuner.get_best_params()
+ best_value = tuner.get_best_value()
+
+ print(f'Source distribution installation and basic functionality test successful!')
+ print(f'Best params: {best_params}')
+ print(f'Best value: {best_value:.4f}')
+ "
+
+ deactivate
+ rm -rf test_sdist_env
+ test-publish:
+ name: Publish to TestPyPI
+ runs-on: ubuntu-latest
+ needs: [verify_package]
+
+ steps:
+ - name: Download build artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: python-package-distributions
+ path: dist/
+
+ - name: Publish to TestPyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ repository-url: https://test.pypi.org/legacy/
+ password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+ skip-existing: true # Skip if version already exists
+
+ - name: Wait for TestPyPI propagation
+ run: sleep 30
+ verify-testpypi:
+ name: Verify TestPyPI Installation
+ runs-on: ubuntu-latest
+ needs: [test-publish]
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Get version
+ id: get_version
+ run: |
+ # Get version from pyproject.toml
+ python << 'EOF'
+ import re
+ import sys
+ import os
+
+ with open('pyproject.toml', 'r') as f:
+ content = f.read()
+ match = re.search(r'version = "([^"]+)"', content)
+
+ if not match:
+ print("ERROR: Could not find version in pyproject.toml")
+ sys.exit(1)
+
+ version = match.group(1)
+ with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+ f.write(f"version={version}\n")
+ print(f"Current version: {version}")
+ EOF
+
+ - name: Test installation scenarios from TestPyPI
+ run: |
+ VERSION=${{ steps.get_version.outputs.version }}
+
+ # Test wheel installation from TestPyPI
+ echo "Test: Wheel installation from TestPyPI..."
+ python -m venv test_wheel_env
+ source test_wheel_env/bin/activate
+ pip install --upgrade pip
+
+ pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ confopt==$VERSION
+
+ python -c "
+ import numpy as np
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange
+
+ # Minimal synthetic test
+ def simple_objective(configuration):
+ # Simple quadratic function with noise
+ x, y = configuration['x'], configuration['y']
+ return (x - 2)**2 + (y - 3)**2 + np.random.normal(0, 0.1)
+
+ search_space = {
+ 'x': FloatRange(min_value=0.0, max_value=5.0),
+ 'y': FloatRange(min_value=0.0, max_value=5.0)
+ }
+
+ tuner = ConformalTuner(
+ objective_function=simple_objective,
+ search_space=search_space,
+ minimize=True
+ )
+
+ tuner.tune(max_searches=50, n_random_searches=15, verbose=False)
+ best_params = tuner.get_best_params()
+ best_value = tuner.get_best_value()
+
+ print('TestPyPI wheel installation and functionality test successful!')
+ print(f'Best params: {best_params}')
+ print(f'Best value: {best_value:.4f}')
+ "
+
+ deactivate
+ rm -rf test_wheel_env
+
+
+
+ # Test source installation from TestPyPI
+ echo "Test: Source installation from TestPyPI..."
+ python -m venv test_source_env
+ source test_source_env/bin/activate
+ pip install --upgrade pip
+
+ pip install --no-binary=confopt --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ confopt==$VERSION
+
+ python -c "
+ import numpy as np
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange
+
+ # Minimal synthetic test
+ def simple_objective(configuration):
+ # Simple quadratic function with noise
+ x, y = configuration['x'], configuration['y']
+ return (x - 2)**2 + (y - 3)**2 + np.random.normal(0, 0.1)
+
+ search_space = {
+ 'x': FloatRange(min_value=0.0, max_value=5.0),
+ 'y': FloatRange(min_value=0.0, max_value=5.0)
+ }
+
+ tuner = ConformalTuner(
+ objective_function=simple_objective,
+ search_space=search_space,
+ minimize=True
+ )
+
+ tuner.tune(max_searches=50, n_random_searches=15, verbose=False)
+ best_params = tuner.get_best_params()
+ best_value = tuner.get_best_value()
+
+ print('TestPyPI source installation and functionality test successful!')
+ print(f'Best params: {best_params}')
+ print(f'Best value: {best_value:.4f}')
+ "
+
+ deactivate
+ rm -rf test_source_env
+
+ echo "All TestPyPI installation scenarios validated successfully!"
+ publish:
+ name: Publish to PyPI
+ runs-on: ubuntu-latest
+ needs: [verify-testpypi]
+
+ steps:
+ - name: Download build artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: python-package-distributions
+ path: dist/
+
+ - name: Publish to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ password: ${{ secrets.PYPI_API_TOKEN }}
+
+ release:
+ name: Create GitHub Release
+ runs-on: ubuntu-latest
+ needs: [publish]
+ permissions:
+ contents: write
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Download build artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: python-package-distributions
+ path: dist/
+
+ - name: Create GitHub Release Draft
+ uses: softprops/action-gh-release@v2
+ with:
+ tag_name: v${{ needs.version-check.outputs.version }}
+ name: Release v${{ needs.version-check.outputs.version }}
+ body: |
+ ## Package Information
+ - **Version**: ${{ needs.version-check.outputs.version }}
+ - **PyPI**: https://pypi.org/project/confopt/${{ needs.version-check.outputs.version }}/
+
+ ## Changes
+ *Please add release notes and changelog information here before publishing.*
+
+ ---
+
+ **Build Information:**
+ - Commit: ${{ github.sha }}
+ - PR: #${{ needs.check-package-label.outputs.pr_number }}
+ files: dist/*
+ draft: true
+ prerelease: false
diff --git a/.gitignore b/.gitignore
index f2c98bd..e0bb25d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,14 @@ var/
*.egg-info/
.installed.cfg
*.egg
+
+# Compiled extension modules
+*.pyd
+*.so
+*.c
+*.whl
+
+# Dev
+cache/
+_build/
+benchmarks/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0e904be..4bc081a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,6 +6,7 @@ repos:
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
+ exclude: \.c$
- id: debug-statements
- id: detect-private-key
- repo: https://github.com/psf/black
@@ -16,4 +17,9 @@ repos:
rev: 7.0.0
hooks:
- id: flake8
- args: ['--max-line-length=131', '--ignore=E203,W503']
+ args: ['--max-line-length=131', '--ignore=E203,W503,E501']
+- repo: https://github.com/PyCQA/autoflake
+ rev: v2.2.0 # Use the latest stable version
+ hooks:
+ - id: autoflake
+ args: ["--remove-all-unused-imports", "--remove-unused-variables", "--in-place"]
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 0000000..03690f0
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,32 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.11"
+ jobs:
+ post_install:
+ # Install the package with documentation dependencies
+ - pip install -e ".[docs]"
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+ configuration: docs/conf.py
+ fail_on_warning: true
+
+# Build formats
+formats:
+ - pdf
+ - epub
+
+# Declare the Python requirements required to build your documentation
+python:
+ install:
+ - method: pip
+ path: .
+ extra_requirements:
+ - docs
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..b876f29
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,35 @@
+# Include essential files
+include LICENSE
+include README.md
+include requirements.txt
+
+# Exclude build artifacts and temporary files
+prune build
+prune dist
+prune *.egg-info
+prune __pycache__
+global-exclude *.pyc
+global-exclude *.pyo
+
+global-exclude .DS_Store
+
+# Exclude development and testing directories
+prune tests
+prune examples
+prune misc
+prune benchmarks
+prune cache
+prune docs
+prune assets
+
+# Exclude result and profile directories
+prune benchmark_results
+prune code_optimization_results
+prune comparison_results
+prune method_profiles
+prune optimization_results
+
+# Exclude development files
+exclude requirements-dev.txt
+exclude .pre-commit-config.yaml
+exclude .readthedocs.yml
diff --git a/README.md b/README.md
index d145f0f..a9528b1 100644
--- a/README.md
+++ b/README.md
@@ -1,92 +1,165 @@
-# ConfOpt
+
-[](https://opensource.org/licenses/Apache-2.0)
-[](https://doi.org/10.48550/arXiv.2207.03017)
+

+
-ConfOpt is an inferential hyperparameter optimization package designed to
-speed up model hyperparameter tuning.
+
-The package currently implements Adaptive Conformal Hyperparameter Optimization (ACHO), as detailed
-in [the original paper](https://doi.org/10.48550/arXiv.2207.03017).
+
-## Installation
+[](https://pepy.tech/project/confopt)
+[](https://pepy.tech/project/confopt)
+[](https://badge.fury.io/py/confopt)
+[](https://confopt.readthedocs.io/)
+[](https://pypi.org/project/confopt/)
+
-You can install ConfOpt from [PyPI](https://pypi.org/project/confopt) using `pip`:
+
+
+---
+
+Built for machine learning practitioners requiring flexible and robust hyperparameter tuning, **ConfOpt** delivers superior optimization performance through conformal uncertainty quantification and a wide selection of surrogate models.
+
+## 📦 Installation
+
+Install ConfOpt from PyPI using pip:
```bash
pip install confopt
```
-## Getting Started
+For the latest development version:
+
+```bash
+git clone https://github.com/rick12000/confopt.git
+cd confopt
+pip install -e .
+```
+
+## 🎯 Getting Started
-As an example, we'll tune a Random Forest model with data from a regression task.
+The example below shows how to optimize hyperparameters for a RandomForest classifier. You can find more examples in the [documentation](https://confopt.readthedocs.io/).
-Start by setting up your training and validation data:
+### Step 1: Import Required Libraries
```python
-from sklearn.datasets import fetch_california_housing
-
-X, y = fetch_california_housing(return_X_y=True)
-split_idx = int(len(X) * 0.5)
-X_train, y_train = X[:split_idx, :], y[:split_idx]
-X_val, y_val = X[split_idx:, :], y[split_idx:]
+from confopt.tuning import ConformalTuner
+from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.datasets import load_wine
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
```
+We import the necessary libraries for tuning and model evaluation. The `load_wine` function is used to load the wine dataset, which serves as our example data for optimizing the hyperparameters of the RandomForest classifier (the dataset is trivial and we can easily reach 100% accuracy, this is for example purposes only).
-Then import the Random Forest model to tune and define a search space for
-its parameters (must be a dictionary mapping the model's parameter names to
-possible values of that parameter to search):
+### Step 2: Define the Objective Function
```python
-from sklearn.ensemble import RandomForestRegressor
-
-parameter_search_space = {
- "n_estimators": [10, 30, 50, 100, 150, 200, 300, 400],
- "min_samples_split": [0.005, 0.01, 0.1, 0.2, 0.3],
- "min_samples_leaf": [0.005, 0.01, 0.1, 0.2, 0.3],
- "max_features": [None, 0.8, 0.9, 1],
-}
+def objective_function(configuration):
+ X, y = load_wine(return_X_y=True)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.3, random_state=42, stratify=y
+ )
+
+ model = RandomForestClassifier(
+ n_estimators=configuration['n_estimators'],
+ max_features=configuration['max_features'],
+ criterion=configuration['criterion'],
+ random_state=42
+ )
+ model.fit(X_train, y_train)
+ predictions = model.predict(X_test)
+
+ return accuracy_score(y_test, predictions)
```
+This function defines the objective we want to optimize. It loads the wine dataset, splits it into training and testing sets, and trains a RandomForest model using the provided configuration. The function returns test accuracy, which will be the objective value ConfOpt will optimize for.
-Now import the `ConformalSearcher` class and initialize it with:
+### Step 3: Define the Search Space
-- The model to tune.
-- The raw X and y data.
-- The parameter search space.
-- An extra variable clarifying whether this is a regression or classification problem.
+```python
+search_space = {
+ 'n_estimators': IntRange(min_value=50, max_value=200),
+ 'max_features': FloatRange(min_value=0.1, max_value=1.0),
+ 'criterion': CategoricalRange(choices=['gini', 'entropy', 'log_loss'])
+}
+```
+Here, we specify the search space for hyperparameters. In this Random Forest example, this includes defining the range for the number of estimators, the proportion of features to consider when looking for the best split, and the criterion for measuring the quality of a split.
-Hyperparameter tuning can be kicked off with the `search` method and a specification
-of how long the tuning should run for (in seconds):
+### Step 4: Create and Run the Tuner
```python
-from confopt.tuning import ConformalSearcher
-
-searcher = ConformalSearcher(
- model=RandomForestRegressor(),
- X_train=X_train,
- y_train=y_train,
- X_val=X_val,
- y_val=y_val,
- search_space=parameter_search_space,
- prediction_type="regression",
-)
-
-searcher.search(
- runtime_budget=120 # How many seconds to run the search for
+tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=False
)
+tuner.tune(max_searches=50, n_random_searches=10)
```
+We initialize the `ConformalTuner` with the objective function and search space. The `tune` method then kickstarts hyperparameter search and finds the hyperparameters that maximize test accuracy.
-Once done, you can retrieve the best parameters obtained during tuning using:
+### Step 5: Retrieve and Display Results
```python
-best_params = searcher.get_best_params()
+best_params = tuner.get_best_params()
+best_score = tuner.get_best_value()
+
+print(f"Best accuracy: {best_score:.4f}")
+print(f"Best parameters: {best_params}")
```
+Finally, we retrieve the optimization's best parameters and test accuracy score and print them to the console for review.
-Or automatically retrain your model on full data and optimal parameters with:
+For detailed examples and explanations see the [documentation](https://confopt.readthedocs.io/).
-```python
-best_model = searcher.fit_best_model()
-```
+## 📚 Documentation
+
+### **User Guide**
+- **[Classification Example](https://confopt.readthedocs.io/en/latest/basic_usage/classification_example.html)**: RandomForest hyperparameter tuning on a classification task.
+- **[Regression Example](https://confopt.readthedocs.io/en/latest/basic_usage/regression_example.html)**: RandomForest hyperparameter tuning on a regression task.
+
+### **Developer Resources**
+- **[Architecture Overview](https://confopt.readthedocs.io/en/latest/architecture.html)**: System design and module interactions.
+- **[API Reference](https://confopt.readthedocs.io/en/latest/api_reference.html)**:
+Complete reference for main classes, methods, and parameters.
+
+## 📈 Benchmarks
+
+
+

+
+
+**ConfOpt** is significantly better than plain old random search, but it also beats established tools like **Optuna** or traditional **Gaussian Processes**!
+
+The above benchmark considers neural architecture search on complex image recognition datasets (JAHS-201) and neural network tuning on tabular classification datasets (LCBench-L).
+
+For a fuller analysis of caveats and benchmarking results, refer to the latest methodological paper.
+
+## 🔬 Theory
+
+ConfOpt implements surrogate models and acquisition functions from the following papers:
+
+> **Adaptive Conformal Hyperparameter Optimization**
+> [arXiv, 2022](https://doi.org/10.48550/arXiv.2207.03017)
+
+> **Optimizing Hyperparameters with Conformal Quantile Regression**
+> [PMLR, 2023](https://proceedings.mlr.press/v202/salinas23a/salinas23a.pdf)
+
+> **Enhancing Performance and Calibration in Quantile Hyperparameter Optimization**
+> [arXiv, 2025](https://www.arxiv.org/abs/2509.17051)
+
+## 🤝 Contributing
+
+If you'd like to contribute, please email [r.doyle.edu@gmail.com](mailto:r.doyle.edu@gmail.com) with a quick summary of the feature you'd like to add and we can discuss it before setting up a PR!
+
+If you want to contribute a fix relating to a new bug, first raise an [issue](https://github.com/rick12000/confopt/issues) on GitHub, then email [r.doyle.edu@gmail.com](mailto:r.doyle.edu@gmail.com) referencing the issue. Issues will be regularly monitored, only send an email if you want to contribute a fix.
+
+## 📄 License
+
+[Apache License 2.0](https://github.com/rick12000/confopt/blob/main/LICENSE)
+
+---
-More information on specific parameters and overrides not mentioned
-in this walk-through can be found in the docstrings or in the `examples`
-folder of the main repository.
+
+
Ready to take your hyperparameter optimization to the next level?
+
Get Started |
+
API Docs
+
diff --git a/assets/benchmark_results.png b/assets/benchmark_results.png
new file mode 100644
index 0000000..5e3f6d3
Binary files /dev/null and b/assets/benchmark_results.png differ
diff --git a/assets/logo.png b/assets/logo.png
new file mode 100644
index 0000000..9cdfa68
Binary files /dev/null and b/assets/logo.png differ
diff --git a/confopt/__init__.py b/confopt/__init__.py
index e69de29..1f2e48c 100644
--- a/confopt/__init__.py
+++ b/confopt/__init__.py
@@ -0,0 +1,18 @@
+"""confopt package initialization.
+
+Apply package-wide warnings filters here so that importing `confopt`
+silences known noisy warnings coming from optional dependencies
+like statsmodels (e.g., IterationLimitWarning from quantile regression).
+"""
+import warnings
+
+# Silence known noisy warning from statsmodels' quantile regression
+try:
+ from statsmodels.tools.sm_exceptions import IterationLimitWarning
+except Exception:
+ IterationLimitWarning = None
+
+if IterationLimitWarning is not None:
+ warnings.filterwarnings("ignore", category=IterationLimitWarning)
+
+__all__ = []
diff --git a/confopt/config.py b/confopt/config.py
deleted file mode 100644
index 447c592..0000000
--- a/confopt/config.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from typing import List, Dict
-
-# Reference names of search estimator architectures:
-QGBM_NAME: str = "qgbm"
-QRF_NAME: str = "qrf"
-KR_NAME: str = "kr"
-GP_NAME: str = "gp"
-GBM_NAME: str = "gbm"
-KNN_NAME: str = "knn"
-RF_NAME: str = "rf"
-DNN_NAME: str = "dnn"
-
-# Reference names of quantile regression estimators:
-QUANTILE_ESTIMATOR_ARCHITECTURES: List[str] = [QGBM_NAME, QRF_NAME]
-
-# Reference names of estimators that don't need their input data normalized:
-NON_NORMALIZING_ARCHITECTURES: List[str] = [RF_NAME, GBM_NAME, QRF_NAME, QGBM_NAME]
-
-# Lookup of metrics to their direction of optimization (direct
-# for performance metrics, inverse for loss or error metrics)
-METRIC_PROPORTIONALITY_LOOKUP: Dict[str, str] = {
- "accuracy_score": "direct",
- "log_loss": "inverse",
- "mean_squared_error": "inverse",
-}
diff --git a/confopt/estimation.py b/confopt/estimation.py
deleted file mode 100644
index b1de366..0000000
--- a/confopt/estimation.py
+++ /dev/null
@@ -1,800 +0,0 @@
-import logging
-from typing import Dict, Optional, List, Tuple
-
-import numpy as np
-from quantile_forest import RandomForestQuantileRegressor
-from sklearn import metrics
-from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import RationalQuadratic, RBF
-from sklearn.kernel_ridge import KernelRidge
-from sklearn.metrics import mean_pinball_loss
-from sklearn.model_selection import KFold
-from sklearn.neighbors import KNeighborsRegressor
-from sklearn.neural_network import MLPRegressor
-
-from confopt.config import (
- GBM_NAME,
- QRF_NAME,
- QGBM_NAME,
- DNN_NAME,
- GP_NAME,
- KNN_NAME,
- KR_NAME,
- RF_NAME,
- QUANTILE_ESTIMATOR_ARCHITECTURES,
-)
-from confopt.optimization import RuntimeTracker
-from confopt.quantile_wrappers import QuantileGBM
-from confopt.utils import get_tuning_configurations, get_perceptron_layers
-
-logger = logging.getLogger(__name__)
-
-SEARCH_MODEL_TUNING_SPACE: Dict[str, Dict] = {
- DNN_NAME: {
- "solver": ["adam", "sgd"],
- "learning_rate_init": [0.0001, 0.001, 0.01, 0.1],
- "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 3, 10],
- "hidden_layer_sizes": get_perceptron_layers(
- n_layers_grid=[2, 3, 4], layer_size_grid=[16, 32, 64, 128]
- ),
- },
- RF_NAME: {
- "n_estimators": [25, 50, 100, 150, 200],
- "max_features": [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
- "min_samples_split": [2, 3, 5],
- "min_samples_leaf": [1, 2, 3],
- },
- KNN_NAME: {"n_neighbors": [1, 2, 3]},
- GBM_NAME: {
- "learning_rate": [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.8],
- "n_estimators": [25, 50, 100, 200],
- "min_samples_split": [2, 3, 5],
- "min_samples_leaf": [1, 3, 5],
- "max_depth": [2, 3, 5, 10],
- },
- GP_NAME: {"kernel": [RBF(), RationalQuadratic()]},
- KR_NAME: {"alpha": [0.001, 0.1, 1, 10]},
- QRF_NAME: {"n_estimators": [25, 50, 100, 150, 200]},
- QGBM_NAME: {
- "learning_rate": [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.8],
- "n_estimators": [25, 50, 100, 200],
- "min_samples_split": [2, 3, 5],
- "min_samples_leaf": [1, 3, 5],
- "max_depth": [2, 3, 5, 10],
- },
-}
-
-SEARCH_MODEL_DEFAULT_CONFIGURATIONS: Dict[str, Dict] = {
- DNN_NAME: {
- "solver": "adam",
- "learning_rate_init": 0.001,
- "alpha": 0.1,
- "hidden_layer_sizes": (32, 16),
- },
- RF_NAME: {
- "n_estimators": 150,
- "max_features": 0.8,
- "min_samples_split": 2,
- "min_samples_leaf": 2,
- },
- KNN_NAME: {"n_neighbors": 2},
- GBM_NAME: {
- "learning_rate": 0.2,
- "n_estimators": 100,
- "min_samples_split": 2,
- "min_samples_leaf": 2,
- "max_depth": 3,
- },
- GP_NAME: {"kernel": RBF()},
- KR_NAME: {"alpha": 0.1},
- QRF_NAME: {"n_estimators": 100},
- QGBM_NAME: {
- "learning_rate": 0.2,
- "n_estimators": 100,
- "min_samples_split": 2,
- "min_samples_leaf": 2,
- "max_depth": 3,
- },
-}
-
-
-def initialize_point_estimator(
- estimator_architecture: str,
- initialization_params: Dict,
- random_state: Optional[int] = None,
-):
- """
- Initialize a point estimator from an input dictionary.
-
- Classes are usually scikit-learn estimators and dictionaries must
- contain all required inputs for the class, in addition to any
- optional inputs to be overridden.
-
- Parameters
- ----------
- estimator_architecture :
- String name for the type of estimator to initialize.
- initialization_params :
- Dictionary of initialization parameters, where each key and
- value pair corresponds to a variable name and variable value
- to pass to the estimator class to initialize.
- random_state :
- Random generation seed.
-
- Returns
- -------
- initialized_model :
- An initialized estimator class instance.
- """
- if estimator_architecture == DNN_NAME:
- initialized_model = MLPRegressor(
- **initialization_params, random_state=random_state
- )
- elif estimator_architecture == RF_NAME:
- initialized_model = RandomForestRegressor(
- **initialization_params, random_state=random_state
- )
- elif estimator_architecture == KNN_NAME:
- initialized_model = KNeighborsRegressor(**initialization_params)
- elif estimator_architecture == GBM_NAME:
- initialized_model = GradientBoostingRegressor(
- **initialization_params, random_state=random_state
- )
- elif estimator_architecture == GP_NAME:
- initialized_model = GaussianProcessRegressor(
- **initialization_params, random_state=random_state
- )
- elif estimator_architecture == KR_NAME:
- initialized_model = KernelRidge(**initialization_params)
- else:
- raise ValueError(
- f"{estimator_architecture} is not a valid point estimator architecture."
- )
-
- return initialized_model
-
-
-def initialize_quantile_estimator(
- estimator_architecture: str,
- initialization_params: Dict,
- pinball_loss_alpha: List[float],
- random_state: Optional[int] = None,
-):
- """
- Initialize a quantile estimator from an input dictionary.
-
- Classes are usually external dependancies or custom wrappers or
- scikit-learn estimator classes. Passed dictionaries must
- contain all required inputs for the class, in addition to any
- optional inputs to be overridden.
-
- Parameters
- ----------
- estimator_architecture :
- String name for the type of estimator to initialize.
- initialization_params :
- Dictionary of initialization parameters, where each key and
- value pair corresponds to a variable name and variable value
- to pass to the estimator class to initialize.
- pinball_loss_alpha :
- List of pinball loss alpha levels that will result in the
- estimator predicting the alpha-corresponding quantiles.
- For eg. passing [0.25, 0.75] will initialize a quantile
- estimator that predicts the 25th and 75th percentiles of
- the data.
- random_state :
- Random generation seed.
-
- Returns
- -------
- initialized_model :
- An initialized estimator class instance.
- """
- if estimator_architecture == QRF_NAME:
- initialized_model = RandomForestQuantileRegressor(
- **initialization_params,
- default_quantiles=pinball_loss_alpha,
- random_state=random_state,
- )
- elif estimator_architecture == QGBM_NAME:
- initialized_model = QuantileGBM(
- **initialization_params,
- quantiles=pinball_loss_alpha,
- random_state=random_state,
- )
- else:
- raise ValueError(
- f"{estimator_architecture} is not a valid estimator architecture."
- )
-
- return initialized_model
-
-
-def average_scores_across_folds(
- scored_configurations: List[Dict], scores: List[float]
-) -> Tuple[List[Dict], List[float]]:
- # TODO: Refactor so it's more efficient or contained.
- # This is a very convoluted function that does something
- # very simple.
- aggregated_scores = {}
- fold_counts = {}
-
- for configuration, score in zip(scored_configurations, scores):
- tuplified_configuration = tuple(configuration.items())
- if tuplified_configuration not in aggregated_scores:
- aggregated_scores[tuplified_configuration] = score
- fold_counts[tuplified_configuration] = 1
- else:
- aggregated_scores[tuplified_configuration] += score
- fold_counts[tuplified_configuration] += 1
-
- for tuplified_configuration in aggregated_scores:
- aggregated_scores[tuplified_configuration] /= fold_counts[
- tuplified_configuration
- ]
-
- aggregated_configurations = [
- dict(list(tuplified_configuration))
- for tuplified_configuration in list(aggregated_scores.keys())
- ]
- aggregated_scores = list(aggregated_scores.values())
-
- return aggregated_configurations, aggregated_scores
-
-
-def cross_validate_configurations(
- configurations: List[Dict],
- estimator_architecture: str,
- X: np.array,
- y: np.array,
- k_fold_splits: int = 3,
- quantiles: Optional[List[float]] = None,
- random_state: Optional[int] = None,
-) -> Tuple[List[Dict], List[float]]:
- """
- Cross validate a specified estimator on a passed X, y dataset.
-
- Cross validation loops through a list of passed hyperparameter
- configurations for the previously specified estimator and returns
- an average score across folds for each.
-
- Parameters
- ----------
- configurations :
- List of estimator parameter configurations, where each
- configuration contains all parameter values necessary
- to create an estimator instance.
- estimator_architecture :
- String name for the type of estimator to cross validate.
- X :
- Explanatory variables to train estimator on.
- y :
- Target variable to train estimator on.
- k_fold_splits :
- Number of cross validation data splits.
- quantiles :
- If the estimator to cross validate is a quantile estimator,
- specify the quantiles it should estimate as a list in this
- variable (eg. [0.25, 0.75] will cross validate an estimator
- predicting the 25th and 75th percentiles of the target variable).
- random_state :
- Random generation seed.
-
- Returns
- -------
- cross_fold_scored_configurations :
- List of cross validated configurations.
- cross_fold_scores :
- List of corresponding cross validation scores (averaged across
- folds).
- """
- scored_configurations, scores = [], []
- kf = KFold(n_splits=k_fold_splits, random_state=random_state, shuffle=True)
- for train_index, test_index in kf.split(X):
- X_train, X_val = X[train_index, :], X[test_index, :]
- Y_train, Y_val = y[train_index], y[test_index]
-
- for configuration in configurations:
- logger.debug(
- f"Evaluating search model parameter configuration: {configuration}"
- )
- if estimator_architecture in QUANTILE_ESTIMATOR_ARCHITECTURES:
- if quantiles is None:
- raise ValueError(
- "'quantiles' cannot be None if passing a quantile regression estimator."
- )
- else:
- model = initialize_quantile_estimator(
- estimator_architecture=estimator_architecture,
- initialization_params=configuration,
- pinball_loss_alpha=quantiles,
- random_state=random_state,
- )
- else:
- model = initialize_point_estimator(
- estimator_architecture=estimator_architecture,
- initialization_params=configuration,
- random_state=random_state,
- )
- model.fit(X_train, Y_train)
- y_pred = model.predict(X_val)
-
- try:
- if estimator_architecture in QUANTILE_ESTIMATOR_ARCHITECTURES:
- if quantiles is None:
- raise ValueError(
- "'quantiles' cannot be None if passing a quantile regression estimator."
- )
- else:
- # Then evaluate on pinball loss:
- lo_y_pred = model.predict(X_val)[:, 0]
- hi_y_pred = model.predict(X_val)[:, 1]
- lo_score = mean_pinball_loss(
- Y_val, lo_y_pred, alpha=quantiles[0]
- )
- hi_score = mean_pinball_loss(
- Y_val, hi_y_pred, alpha=quantiles[1]
- )
- score = (lo_score + hi_score) / 2
- else:
- # Then evaluate on MSE:
- score = metrics.mean_squared_error(Y_val, y_pred)
-
- scored_configurations.append(configuration)
- scores.append(score)
-
- except Exception as e:
- logger.warning(
- "Scoring failed and result was not appended."
- f"Caught exception: {e}"
- )
- continue
-
- cross_fold_scored_configurations, cross_fold_scores = average_scores_across_folds(
- scored_configurations=scored_configurations, scores=scores
- )
-
- return cross_fold_scored_configurations, cross_fold_scores
-
-
-class LocallyWeightedConformalRegression:
- """
- Locally weighted conformal regression.
-
- Fits sequential estimators on X and y data to form point and
- variability predictions for y.
-
- The class contains tuning, fitting and prediction methods.
- """
-
- def __init__(
- self,
- point_estimator_architecture: str,
- demeaning_estimator_architecture: str,
- variance_estimator_architecture: str,
- ):
- self.point_estimator_architecture = point_estimator_architecture
- self.demeaning_estimator_architecture = demeaning_estimator_architecture
- self.variance_estimator_architecture = variance_estimator_architecture
-
- self.training_time = None
-
- def _tune_component_estimator(
- self,
- X: np.array,
- y: np.array,
- estimator_architecture: str,
- n_searches: int,
- k_fold_splits: int = 3,
- random_state: Optional[int] = None,
- ) -> Dict:
- """
- Tune specified estimator's hyperparameters.
-
- Hyperparameters are selected randomly as part of the
- tuning process and a final optimal hyperparameter
- configuration is returned.
-
- Parameters
- ----------
- X :
- Explanatory variables.
- y :
- Target variable.
- estimator_architecture :
- String name for the type of estimator to tune.
- n_searches :
- Number of tuning searches to perform (eg. 5 means
- the model will randomly select 5 hyperparameter
- configurations for the estimator to evaluate).
- k_fold_splits :
- Number of cross validation data splits.
- random_state :
- Random generation seed.
-
- Returns
- -------
- best_configuration :
- Best performing hyperparameter configuration
- in tuning.
- """
- tuning_configurations = get_tuning_configurations(
- parameter_grid=SEARCH_MODEL_TUNING_SPACE[estimator_architecture],
- n_configurations=n_searches,
- random_state=random_state,
- )
- tuning_configurations.append(
- SEARCH_MODEL_DEFAULT_CONFIGURATIONS[estimator_architecture]
- )
-
- scored_configurations, scores = cross_validate_configurations(
- configurations=tuning_configurations,
- estimator_architecture=estimator_architecture,
- X=X,
- y=y,
- k_fold_splits=k_fold_splits,
- quantiles=None,
- random_state=random_state,
- )
- best_configuration = scored_configurations[scores.index(max(scores))]
-
- return best_configuration
-
- def _fit_component_estimator(
- self,
- X,
- y,
- estimator_architecture,
- tuning_iterations,
- random_state: Optional[int] = None,
- ):
- """
- Fit component estimator with option to tune.
-
- Component estimators are loosely defined, general use
- point estimators. Their final purpose is dependent on
- what X and y data is passed to the function (eg. if y is
- a target, a residual, etc.).
-
- Parameters
- ----------
- X :
- Explanatory variables.
- y :
- Target variable.
- estimator_architecture :
- String name for the type of estimator to tune.
- tuning_iterations :
- Number of tuning searches to perform (eg. 5 means
- the model will randomly select 5 hyperparameter
- configurations for the estimator to evaluate).
- To skip tuning during fitting, set this to 0.
- random_state :
- Random generation seed.
-
- Returns
- -------
- estimator :
- Fitted estimator object.
- """
- if tuning_iterations > 1:
- initialization_params = self._tune_component_estimator(
- X=X,
- y=y,
- estimator_architecture=estimator_architecture,
- n_searches=tuning_iterations,
- random_state=random_state,
- )
- else:
- initialization_params = SEARCH_MODEL_DEFAULT_CONFIGURATIONS[
- estimator_architecture
- ].copy()
- estimator = initialize_point_estimator(
- estimator_architecture=estimator_architecture,
- initialization_params=initialization_params,
- random_state=random_state,
- )
- self.training_time_tracker.resume_runtime()
- estimator.fit(X, y)
- self.training_time_tracker.pause_runtime()
-
- return estimator
-
- def fit(
- self,
- X_pe: np.array,
- y_pe: np.array,
- X_ve: np.array,
- y_ve: np.array,
- X_val: np.array,
- y_val: np.array,
- tuning_iterations: Optional[int] = 0,
- random_state: Optional[int] = None,
- ):
- """
- Fit conformal regression model on specified data.
-
- Fitting process involves the following sequential steps:
- 1. Fitting an estimator on a first portion of the
- data, training on X to predict y.
- 2. Obtaining residuals between the estimator and
- observed y's on a second portion of the data.
- 3. Fitting a conditional mean estimator on the
- residual data.
- 4. Using the mean estimator to de-mean the residual
- data.
- 5. Fitting an estimator to predict absolute, de-meaned
- residuals (residual spread around the local mean).
- 6. Using a third portion of the data as a conformal
- hold out set to calibrate intervals for the estimator.
-
- Parameters
- ----------
- X_pe :
- Explanatory variables used to train the point estimator.
- y_pe :
- Target variable used to train the point estimator.
- X_ve :
- Explanatory variables used to train the residual spread
- (variability) estimator.
- y_ve :
- Target variable used to train the residual spread
- (variability) estimator.
- X_val :
- Explanatory variables used to calibrate the point estimator.
- y_val :
- Target variable used to calibrate the point estimator.
- tuning_iterations :
- Number of tuning searches to perform (eg. 5 means
- the model will randomly select 5 hyperparameter
- configurations for the estimator to evaluate).
- To skip tuning during fitting, set this to 0.
- random_state :
- Random generation seed.
- """
- self.training_time_tracker = RuntimeTracker()
- self.training_time_tracker.pause_runtime()
-
- self.pe_estimator = self._fit_component_estimator(
- X=X_pe,
- y=y_pe,
- estimator_architecture=self.point_estimator_architecture,
- tuning_iterations=tuning_iterations,
- random_state=random_state,
- )
- pe_residuals = y_ve - self.pe_estimator.predict(X_ve)
-
- de_estimator = self._fit_component_estimator(
- X=X_ve,
- y=pe_residuals,
- estimator_architecture=self.demeaning_estimator_architecture,
- tuning_iterations=tuning_iterations,
- random_state=random_state,
- )
- demeaned_pe_residuals = abs(pe_residuals - de_estimator.predict(X_ve))
-
- self.ve_estimator = self._fit_component_estimator(
- X=X_ve,
- y=demeaned_pe_residuals,
- estimator_architecture=self.variance_estimator_architecture,
- tuning_iterations=tuning_iterations,
- random_state=random_state,
- )
-
- var_pred = self.ve_estimator.predict(X_val)
- var_pred = np.array([1 if x <= 0 else x for x in var_pred])
-
- self.nonconformity_scores = (
- abs(np.array(y_val) - self.pe_estimator.predict(X_val)) / var_pred
- )
- self.training_time = self.training_time_tracker.return_runtime()
-
- def predict(self, X: np.array, confidence_level: float):
- """
- Predict conformal interval bounds for specified X examples.
-
- Must be called after a relevant conformal estimator has
- been trained.
-
- Parameters
- ----------
- X :
- Explanatory variables to return targets for.
- confidence_level :
- Confidence level used to generate intervals.
-
- Returns
- -------
- lower_interval_bound :
- Lower bound(s) of conformal interval for specified
- X example(s).
- upper_interval_bound :
- Upper bound(s) of conformal interval for specified
- X example(s).
- """
- score_quantile = np.quantile(self.nonconformity_scores, confidence_level)
-
- y_pred = np.array(self.pe_estimator.predict(X))
-
- var_pred = self.ve_estimator.predict(X)
- var_pred = np.array([max(x, 0) for x in var_pred])
- scaled_score = score_quantile * var_pred
-
- lower_interval_bound = y_pred - scaled_score
- upper_interval_bound = y_pred + scaled_score
-
- return lower_interval_bound, upper_interval_bound
-
-
-class QuantileConformalRegression:
- """
- Quantile conformal regression.
-
- Fits quantile estimators on X and y data and applies non-conformity
- adjustments to validate quantile estimates.
-
- The class contains tuning, fitting and prediction methods.
- """
-
- def __init__(self, quantile_estimator_architecture: str):
- self.quantile_estimator_architecture = quantile_estimator_architecture
-
- self.training_time = None
-
- def _tune(
- self,
- X: np.array,
- y: np.array,
- estimator_architecture: str,
- n_searches: int,
- confidence_level: float,
- k_fold_splits: int = 3,
- random_state: Optional[int] = None,
- ) -> Dict:
- tuning_configurations = get_tuning_configurations(
- parameter_grid=SEARCH_MODEL_TUNING_SPACE[estimator_architecture],
- n_configurations=n_searches,
- random_state=random_state,
- )
- tuning_configurations.append(
- SEARCH_MODEL_DEFAULT_CONFIGURATIONS[estimator_architecture]
- )
-
- scored_configurations, scores = cross_validate_configurations(
- configurations=tuning_configurations,
- estimator_architecture=estimator_architecture,
- X=X,
- y=y,
- k_fold_splits=k_fold_splits,
- quantiles=[
- ((1 - confidence_level) / 2),
- confidence_level + ((1 - confidence_level) / 2),
- ],
- random_state=random_state,
- )
- best_configuration = scored_configurations[scores.index(max(scores))]
-
- return best_configuration
-
- def fit(
- self,
- X_train: np.array,
- y_train: np.array,
- X_val: np.array,
- y_val: np.array,
- confidence_level: float,
- tuning_iterations: Optional[int] = 0,
- random_state: Optional[int] = None,
- ):
- """
- Fit quantile estimator with option to tune.
-
- Quantile estimators are fitted based on a specified confidence
- level and return two quantile estimates for the symmetrical
- lower and upper bounds around that level.
-
- Parameters
- ----------
- X_train :
- Explanatory variables used to train the quantile estimator.
- y_train :
- Target variable used to train the quantile estimator.
- X_val :
- Explanatory variables used to calibrate conformal intervals.
- y_val :
- Target variable used to calibrate conformal intervals.
- confidence_level :
- Confidence level determining quantiles to be predicted
- by the quantile estimator. Quantiles are obtained symmetrically
- around the confidence level (eg. 0.5 confidence level would
- result in a quantile estimator for the 25th and 75th percentiles
- of the target variable).
- tuning_iterations :
- Number of tuning searches to perform (eg. 5 means
- the model will randomly select 5 hyperparameter
- configurations for the quantile estimator to evaluate).
- To skip tuning during fitting, set this to 0.
- random_state :
- Random generation seed.
-
- Returns
- -------
- estimator :
- Fitted estimator object.
- """
- if tuning_iterations > 1:
- initialization_params = self._tune(
- X=X_train,
- y=y_train,
- estimator_architecture=self.quantile_estimator_architecture,
- n_searches=tuning_iterations,
- confidence_level=confidence_level,
- random_state=random_state,
- )
- else:
- initialization_params = SEARCH_MODEL_DEFAULT_CONFIGURATIONS[
- self.quantile_estimator_architecture
- ].copy()
-
- self.quantile_estimator = initialize_quantile_estimator(
- estimator_architecture=self.quantile_estimator_architecture,
- initialization_params=initialization_params,
- pinball_loss_alpha=[
- ((1 - confidence_level) / 2),
- confidence_level + ((1 - confidence_level) / 2),
- ],
- random_state=random_state,
- )
- training_time_tracker = RuntimeTracker()
- self.quantile_estimator.fit(X_train, y_train)
- self.training_time = training_time_tracker.return_runtime()
-
- lower_conformal_deviations = list(
- self.quantile_estimator.predict(X_val)[:, 0] - y_val
- )
- upper_conformal_deviations = list(
- y_val - self.quantile_estimator.predict(X_val)[:, 1]
- )
- nonconformity_scores = []
- for lower_deviation, upper_deviation in zip(
- lower_conformal_deviations, upper_conformal_deviations
- ):
- nonconformity_scores.append(max(lower_deviation, upper_deviation))
- self.nonconformity_scores = np.array(nonconformity_scores)
-
- def predict(self, X: np.array, confidence_level: float):
- """
- Predict conformal interval bounds for specified X examples.
-
- Must be called after a relevant quantile estimator has
- been trained. Intervals will be generated based on a passed
- confidence level, which should ideally be the same confidence
- level specified in training, but may differ (though this is
- less desirable and there should rarely be a valid reason).
-
- Parameters
- ----------
- X :
- Explanatory variables to return targets for.
- confidence_level :
- Confidence level used to generate intervals.
-
- Returns
- -------
- lower_interval_bound :
- Lower bound(s) of conformal interval for specified
- X example(s).
- upper_interval_bound :
- Upper bound(s) of conformal interval for specified
- X example(s).
- """
- score_quantile = np.quantile(self.nonconformity_scores, confidence_level)
- lower_interval_bound = (
- np.array(self.quantile_estimator.predict(X)[:, 0]) - score_quantile
- )
- upper_interval_bound = (
- np.array(self.quantile_estimator.predict(X)[:, 1]) + score_quantile
- )
-
- return lower_interval_bound, upper_interval_bound
diff --git a/confopt/optimization.py b/confopt/optimization.py
deleted file mode 100644
index 077de1f..0000000
--- a/confopt/optimization.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import logging
-import time
-
-logger = logging.getLogger(__name__)
-
-
-class RuntimeTracker:
- def __init__(self):
- self.start_time = time.time()
- self.runtime = 0
-
- def _elapsed_runtime(self):
- take_time = time.time()
- return abs(take_time - self.start_time)
-
- def pause_runtime(self):
- self.runtime = self.runtime + self._elapsed_runtime()
-
- def resume_runtime(self):
- self.start_time = time.time()
-
- def return_runtime(self):
- self.pause_runtime()
- taken_runtime = self.runtime
- self.resume_runtime()
- return taken_runtime
-
-
-def derive_optimal_tuning_count(
- baseline_model_runtime: float,
- search_model_runtime: float,
- search_model_retraining_freq: int,
- search_to_baseline_runtime_ratio: float,
-) -> int:
- """
- Derives the optimal number of tuning evaluations to perform on a search model.
-
- The number of evaluations will satisfy a specified runtime ratio between
- the search model and the baseline model being optimized by it.
-
- Parameters
- ----------
- baseline_model_runtime :
- Baseline model training time (per training event).
- search_model_runtime :
- Search model training time (per training event).
- search_model_retraining_freq :
- Search model retraining frequency. Determines how often the
- search model will be retrained and thus re-tuned.
- search_to_baseline_runtime_ratio :
- Desired ratio between the total training time of the search
- model and the baseline model. A ratio > 1 indicates the search
- model is allowed to train for longer than the baseline model
- and vice versa. The number of tuning evaluations will be set
- to ensure the runtime ratio is met (or closely matched).
-
- Returns
- -------
- search_model_tuning_count :
- Optimal number of search model tuning evaluations, given runtime
- ratio constraint.
- """
- search_model_tuning_count = (
- baseline_model_runtime * search_model_retraining_freq
- ) / (search_model_runtime * (1 / search_to_baseline_runtime_ratio) ** 2)
-
- # Hard coded number of maximum useful evaluations (arbitrary):
- count_ceiling = 60
- search_model_tuning_count = min(
- count_ceiling, max(1, int(round(search_model_tuning_count)))
- )
-
- logger.debug(f"Optimal search model param evaluations: {search_model_tuning_count}")
-
- return search_model_tuning_count
diff --git a/confopt/quantile_wrappers.py b/confopt/quantile_wrappers.py
deleted file mode 100644
index d0fb88a..0000000
--- a/confopt/quantile_wrappers.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import abc
-from typing import List, Union
-
-import numpy as np
-from sklearn.base import BaseEstimator
-from sklearn.ensemble import GradientBoostingRegressor
-
-
-class BiQuantileEstimator:
- """
- Base class for bi-quantile estimators.
-
- Estimators fit on X features to predict two symmetrical conditional
- quantiles of some target Y variable.
- """
-
- def __init__(
- self,
- quantiles: List[float],
- random_state: int,
- ):
- self.quantiles = quantiles
- self.random_state = random_state
-
- @abc.abstractmethod
- def fit(self, X: np.array, y: np.array):
- return
-
- def _predict(
- self,
- lo_quantile_estimator: BaseEstimator,
- hi_quantile_estimator: BaseEstimator,
- X: np.array,
- ) -> np.array:
- """
- Make quantile predictions using features in X.
-
- Parameters
- ----------
- lo_quantile_estimator :
- Trained lower quantile estimator.
- hi_quantile_estimator :
- Trained upper quantile estimator.
- X :
- Features used to return predictions.
-
- Returns
- -------
- y_pred :
- Quantile predictions, organized in a len(X) by
- 2 array, where the first column contains lower
- quantile predictions, and the second contains
- higher quantile predictions.
- """
- lo_y_pred = lo_quantile_estimator.predict(X).reshape(len(X), 1)
- hi_y_pred = hi_quantile_estimator.predict(X).reshape(len(X), 1)
- y_pred = np.hstack([lo_y_pred, hi_y_pred])
-
- return y_pred
-
-
-class QuantileGBM(BiQuantileEstimator):
- """
- Quantile gradient boosted machine estimator.
- """
-
- def __init__(
- self,
- quantiles: List[float],
- learning_rate: float,
- n_estimators: int,
- min_samples_split: Union[float, int],
- min_samples_leaf: Union[float, int],
- max_depth: int,
- random_state: int,
- ):
- self.learning_rate = learning_rate
- self.n_estimators = n_estimators
- self.min_samples_split = min_samples_split
- self.min_samples_leaf = min_samples_leaf
- self.max_depth = max_depth
- super().__init__(quantiles, random_state)
-
- def __str__(self):
- return "QuantileGBM()"
-
- def __repr__(self):
- return "QuantileGBM()"
-
- def fit(self, X: np.array, y: np.array):
- """
- Trains a bi-quantile GBM model on X and y data.
-
- Two separate quantile estimators are trained, one predicting
- an upper quantile and one predicting a symmetrical lower quantile.
- The estimators are aggregated in a tuple, for later joint
- use in prediction.
-
- Parameters
- ----------
- X :
- Feature variables.
- y :
- Target variable.
- """
- trained_estimators = ()
- for quantile in self.quantiles:
- quantile_estimator = GradientBoostingRegressor(
- learning_rate=self.learning_rate,
- n_estimators=self.n_estimators,
- min_samples_split=self.min_samples_split,
- min_samples_leaf=self.min_samples_leaf,
- max_depth=self.max_depth,
- random_state=self.random_state,
- loss="quantile",
- alpha=quantile,
- )
- quantile_estimator.fit(X, y)
- trained_estimators = trained_estimators + (quantile_estimator,)
- self.lo_quantile_estimator, self.hi_quantile_estimator = trained_estimators
-
- def predict(self, X: np.array) -> np.array:
- return self._predict(
- lo_quantile_estimator=self.lo_quantile_estimator,
- hi_quantile_estimator=self.hi_quantile_estimator,
- X=X,
- )
diff --git a/confopt/selection/__init__.py b/confopt/selection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/confopt/selection/acquisition.py b/confopt/selection/acquisition.py
new file mode 100644
index 0000000..d04e996
--- /dev/null
+++ b/confopt/selection/acquisition.py
@@ -0,0 +1,618 @@
+"""Conformal acquisition functions for Bayesian optimization.
+
+This module implements acquisition functions that combine conformal prediction with
+Bayesian optimization strategies. It provides uncertainty-aware point selection
+for hyperparameter optimization through two main approaches: locally weighted
+conformal prediction and quantile-based conformal prediction.
+
+The module bridges conformal prediction estimators with acquisition strategies,
+enabling adaptive optimization that adjusts exploration based on prediction
+uncertainty and coverage feedback. All acquisition functions provide finite-sample
+coverage guarantees while optimizing for different exploration-exploitation trade-offs.
+
+Key Components:
+ - BaseConformalSearcher: Abstract interface for conformal acquisition functions
+ - QuantileConformalSearcher: Quantile-based conformal acquisition
+
+Integration Context:
+ Serves as the primary interface between the conformal prediction framework
+ and optimization algorithms, supporting various acquisition strategies while
+ maintaining theoretical coverage guarantees throughout the optimization process.
+"""
+
+import logging
+from typing import Optional, Union, Literal, Tuple
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from abc import ABC, abstractmethod
+
+
+from confopt.selection.conformalization import (
+ QuantileConformalEstimator,
+)
+from confopt.selection.sampling.bound_samplers import (
+ LowerBoundSampler,
+ PessimisticLowerBoundSampler,
+)
+from confopt.selection.sampling.thompson_samplers import ThompsonSampler
+from confopt.selection.sampling.expected_improvement_samplers import (
+ ExpectedImprovementSampler,
+)
+
+from confopt.selection.estimation import initialize_estimator
+from confopt.selection.estimator_configuration import (
+ QUANTILE_TO_POINT_ESTIMATOR_MAPPING,
+)
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_IG_SAMPLER_RANDOM_STATE = 1234
+
+
+class BaseConformalSearcher(ABC):
+ """Abstract base class for conformal prediction-based acquisition functions.
+
+ Defines the common interface for acquisition functions that combine conformal
+ prediction with various sampling strategies for Bayesian optimization. Provides
+ unified handling of different acquisition strategies while maintaining coverage
+ guarantees through conformal prediction.
+
+ The class implements a strategy pattern where different samplers define the
+ acquisition behavior, while the searcher manages the conformal prediction
+ component and adaptive alpha updating based on coverage feedback.
+
+ Args:
+ sampler: Acquisition strategy implementation that defines point selection
+ behavior. Must implement the appropriate calculation methods for the
+ chosen acquisition function.
+
+ Attributes:
+ sampler: The acquisition strategy instance.
+ conformal_estimator: Fitted conformal prediction estimator (set by subclasses).
+ X_train: Current training features, updated through optimization process.
+ y_train: Current training targets, updated through optimization process.
+ X_val: Validation features for conformal calibration.
+ y_val: Validation targets for conformal calibration.
+ last_beta: Most recent coverage feedback for single-alpha samplers.
+ predictions_per_interval: Cached interval predictions from last predict() call.
+ point_estimator: Fitted point estimator for optimistic Thompson sampling.
+
+ Design Pattern:
+ Implements Template Method pattern with strategy injection, where the
+ acquisition strategy is delegated to the sampler while coverage tracking
+ and adaptive behavior are handled by the base searcher framework.
+ """
+
+ def __init__(
+ self,
+ sampler: Union[
+ LowerBoundSampler,
+ ThompsonSampler,
+ PessimisticLowerBoundSampler,
+ ExpectedImprovementSampler,
+ ],
+ ):
+ self.sampler = sampler
+ self.conformal_estimator: Optional[QuantileConformalEstimator] = None
+ self.X_train = None
+ self.y_train = None
+ self.last_beta = None
+ self.predictions_per_interval = None
+
+ def predict(self, X: np.array):
+ """Generate acquisition function values for candidate points.
+
+ Routes prediction requests to the appropriate sampler-specific method
+ based on the configured acquisition strategy. Handles the interface
+ between the generic acquisition API and strategy-specific implementations.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Acquisition function values, shape (n_candidates,). Higher values
+ indicate more promising candidates for evaluation.
+
+ Raises:
+ ValueError: If sampler type is not supported or conformal estimator
+ is not fitted.
+
+ Implementation Notes:
+ Caches interval predictions in self.predictions_per_interval for
+ potential reuse by update() method. The specific acquisition behavior
+ depends on the sampler strategy:
+ - LowerBoundSampler: Upper confidence bound with exploration decay
+ - ThompsonSampler: Posterior sampling with optional optimistic bias
+ - PessimisticLowerBoundSampler: Conservative lower bound selection
+ - ExpectedImprovementSampler: Expected improvement over current best
+
+
+ """
+ if isinstance(self.sampler, LowerBoundSampler):
+ return self._predict_with_ucb(X)
+ elif isinstance(self.sampler, ThompsonSampler):
+ return self._predict_with_thompson(X)
+ elif isinstance(self.sampler, PessimisticLowerBoundSampler):
+ return self._predict_with_pessimistic_lower_bound(X)
+ elif isinstance(self.sampler, ExpectedImprovementSampler):
+ return self._predict_with_expected_improvement(X)
+
+ else:
+ raise ValueError(f"Unsupported sampler type: {type(self.sampler)}")
+
+ @abstractmethod
+ def _predict_with_ucb(self, X: np.array):
+ """Generate upper confidence bound acquisition values.
+
+ Subclasses must implement UCB acquisition using their
+ specific conformal prediction approach.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ UCB acquisition values, shape (n_candidates,).
+ """
+
+ @abstractmethod
+ def _predict_with_thompson(self, X: np.array):
+ """Generate Thompson sampling acquisition values.
+
+ Subclasses must implement Thompson sampling using their
+ specific conformal prediction approach.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Thompson sampling acquisition values, shape (n_candidates,).
+ """
+
+ @abstractmethod
+ def _predict_with_pessimistic_lower_bound(self, X: np.array):
+ """Generate pessimistic lower bound acquisition values.
+
+ Subclasses must implement pessimistic lower bound acquisition
+ using their specific conformal prediction approach.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Lower bound acquisition values, shape (n_candidates,).
+ """
+
+ @abstractmethod
+ def _predict_with_expected_improvement(self, X: np.array):
+ """Generate expected improvement acquisition values.
+
+ Subclasses must implement expected improvement acquisition
+ using their specific conformal prediction approach.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Expected improvement acquisition values, shape (n_candidates,).
+ """
+
+ @abstractmethod
+ def _calculate_betas(self, X: np.array, y_true: float) -> list[float]:
+ """Calculate coverage feedback (beta values) for adaptive alpha updating.
+
+ Subclasses must implement beta calculation using their
+ specific conformal prediction approach.
+
+ Args:
+ X: Configuration where observation was made, shape (n_features,).
+ y_true: Observed performance value at the configuration.
+
+ Returns:
+ List of beta values, one per alpha level, representing coverage feedback.
+ """
+
+ def get_interval(self, X: np.array) -> Tuple[float, float]:
+ """Get prediction interval bounds for a given configuration.
+
+ Returns the lower and upper bounds of the prediction interval for
+ interval-based samplers. This method is specifically designed for
+ samplers that provide single coverage levels.
+
+ Args:
+ X: Input configuration, shape (n_features,).
+
+ Returns:
+ Tuple of (lower_bound, upper_bound) for the prediction interval.
+
+ Raises:
+ ValueError: If conformal estimator is not fitted or if sampler type
+ does not support interval retrieval.
+
+ Coverage Information:
+ Only works for LowerBoundSampler and PessimisticLowerBoundSampler as
+ these samplers use single intervals. Multi-alpha samplers require
+ more complex interval handling through the adaptive alpha mechanism.
+ """
+ if isinstance(self.sampler, (LowerBoundSampler, PessimisticLowerBoundSampler)):
+ if self.conformal_estimator is None:
+ raise ValueError(
+ "Conformal estimator not initialized. Call fit() before getting interval."
+ )
+
+ predictions_per_interval = self.conformal_estimator.predict_intervals(
+ X.reshape(1, -1)
+ )
+
+ # Grab first predictions per interval object, since these samplers have only one alpha/interval
+ # Then grab first index of upper and lower bound, since we're predicting for only one X configuration
+ interval = predictions_per_interval[0]
+ lower_bound = interval.lower_bounds[0]
+ upper_bound = interval.upper_bounds[0]
+
+ return lower_bound, upper_bound
+
+ else:
+ raise ValueError(
+ "Interval retrieval only supported for LowerBoundSampler and PessimisticLowerBoundSampler"
+ )
+
+ def update(self, X: np.array, y_true: float) -> None:
+ """Update searcher state with new observation and adapt coverage levels.
+
+ Incorporates new data point into the optimization process and updates
+ adaptive components based on observed coverage performance. Handles
+ sampler-specific updates and alpha adaptation for coverage control.
+
+ Args:
+ X: Newly evaluated configuration, shape (n_features,).
+ y_true: Observed performance for the configuration.
+
+ Adaptive Mechanisms:
+ - ExpectedImprovementSampler: Updates best observed value
+ - LowerBoundSampler: Updates exploration schedule and beta decay
+ - Adaptive samplers: Updates interval widths based on coverage feedback
+ - Conformal estimator: Updates alpha levels if adaptation is enabled
+
+ Coverage Adaptation Process:
+ 1. Calculate coverage feedback (betas) for the new observation
+ 2. Update sampler interval widths based on coverage performance
+ 3. Propagate updated alphas to conformal estimator
+ 4. Maintain coverage targets through adaptive alpha adjustment
+
+ Implementation Notes:
+ The update process varies by sampler type:
+ - Single-alpha samplers receive scalar beta values
+ - Multi-alpha samplers receive beta vectors for each coverage level
+ - Information-gain samplers may cache additional state for efficiency
+ """
+ if isinstance(self.sampler, ExpectedImprovementSampler):
+ self.sampler.update_best_value(y_true)
+ if isinstance(self.sampler, LowerBoundSampler):
+ self.sampler.update_exploration_step()
+ if self.conformal_estimator.fold_scores_per_alpha is not None:
+ uses_adaptation = (
+ hasattr(self.sampler, "adapter") and self.sampler.adapter is not None
+ ) or (
+ hasattr(self.sampler, "adapters") and self.sampler.adapters is not None
+ )
+ if uses_adaptation:
+ betas = self._calculate_betas(X, y_true)
+ if isinstance(
+ self.sampler,
+ (
+ ThompsonSampler,
+ ExpectedImprovementSampler,
+ ),
+ ):
+ self.sampler.update_interval_width(betas=betas)
+ elif isinstance(
+ self.sampler, (PessimisticLowerBoundSampler, LowerBoundSampler)
+ ):
+ if len(betas) == 1:
+ self.last_beta = betas[0]
+ self.sampler.update_interval_width(beta=betas[0])
+ else:
+ raise ValueError(
+ "Multiple betas returned for single beta sampler."
+ )
+ self.conformal_estimator.update_alphas(self.sampler.fetch_alphas())
+
+
+QuantileEstimatorArchitecture = Literal[
+ "qgbm", "qgp", "qrf", "qknn", "ql", "qleaf", "qens5"
+]
+
+
+class QuantileConformalSearcher(BaseConformalSearcher):
+ """Conformal acquisition function using quantile-based prediction intervals.
+
+ Implements acquisition functions based on quantile conformal prediction,
+ directly estimating prediction quantiles and applying conformal adjustments
+ when sufficient calibration data is available. Provides flexible acquisition
+ strategies while maintaining coverage guarantees.
+
+ This approach is particularly effective when the objective function exhibits
+ asymmetric uncertainty or when specific quantile behaviors are of interest.
+ Automatically switches between conformalized and non-conformalized modes
+ based on data availability.
+
+ Args:
+ quantile_estimator_architecture: Architecture identifier for the quantile
+ estimator. Must be registered in ESTIMATOR_REGISTRY and support
+ simultaneous multi-quantile estimation.
+ sampler: Acquisition strategy that defines point selection behavior.
+ n_pre_conformal_trials: Minimum total samples required for conformal mode.
+ Below this threshold, uses direct quantile predictions.
+
+ Attributes:
+ quantile_estimator_architecture: Quantile estimator configuration.
+ n_pre_conformal_trials: Threshold for conformal vs non-conformal mode.
+ conformal_estimator: Fitted QuantileConformalEstimator instance.
+ point_estimator: Optional point estimator for optimistic Thompson sampling.
+
+
+ Mathematical Foundation:
+ Uses quantile conformal prediction where intervals have the form:
+
+ Conformalized: [q̂_{α/2}(x) - C_α, q̂_{1-α/2}(x) + C_α]
+ Non-conformalized: [q̂_{α/2}(x), q̂_{1-α/2}(x)]
+
+ Where:
+ - q̂_τ(x): τ-quantile estimate at location x
+ - C_α: Conformal adjustment based on nonconformity scores
+ - Mode selection based on n_pre_conformal_trials threshold
+
+ Adaptive Behavior:
+ Supports sampler-specific adaptation mechanisms including upper quantile
+ capping for conservative samplers and point estimator integration for
+ optimistic Thompson sampling when enabled.
+ """
+
+ def __init__(
+ self,
+ quantile_estimator_architecture: QuantileEstimatorArchitecture,
+ sampler: Union[
+ LowerBoundSampler,
+ ThompsonSampler,
+ PessimisticLowerBoundSampler,
+ ExpectedImprovementSampler,
+ ],
+ n_pre_conformal_trials: int = 32,
+ n_calibration_folds: int = 3,
+ calibration_split_strategy: Literal[
+ "cv", "train_test_split", "adaptive"
+ ] = "adaptive",
+ ):
+ super().__init__(sampler)
+ self.quantile_estimator_architecture = quantile_estimator_architecture
+ self.n_pre_conformal_trials = n_pre_conformal_trials
+ self.n_calibration_folds = n_calibration_folds
+ self.calibration_split_strategy = calibration_split_strategy
+
+ self.scaler = StandardScaler()
+ self.conformal_estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=self.quantile_estimator_architecture,
+ alphas=self.sampler.fetch_alphas(),
+ n_pre_conformal_trials=self.n_pre_conformal_trials,
+ n_calibration_folds=self.n_calibration_folds,
+ calibration_split_strategy=self.calibration_split_strategy,
+ )
+
+ def fit(
+ self,
+ X: np.array,
+ y: np.array,
+ tuning_iterations: Optional[int] = 0,
+ random_state: Optional[int] = None,
+ ):
+ """Fit the quantile conformal estimator for acquisition.
+
+ Trains the quantile estimator and sets up conformal calibration,
+ with automatic mode selection based on data availability. Handles
+ sampler-specific configurations and point estimator setup for
+ optimistic Thompson sampling and median estimation for bound samplers.
+
+ Args:
+ X: Input features for estimator fitting, shape (n_samples, n_features).
+ y: Target values for estimator fitting, shape (n_samples,).
+ tuning_iterations: Number of hyperparameter tuning iterations (0 disables tuning).
+ random_state: Random seed for reproducible results.
+
+ Implementation Process:
+ 1. Store data for potential use by acquisition strategies
+ 2. Configure sampler-specific quantile estimation and point estimators
+ 3. Set default random state for Information Gain Sampler if not provided
+ 4. Fit QuantileConformalEstimator with internal data splitting
+ 5. Store estimator performance metrics for quality assessment
+
+ Sampler-Specific Setup:
+ - Bound samplers: Median (0.5 quantile) estimator for UCB point estimates
+ - Optimistic Thompson: Additional point estimator training
+ - Information-based: Full quantile range support
+ """
+ # Store data for potential use by samplers (though splitting is now internal)
+ self.X_train = X # For backwards compatibility
+ self.y_train = y
+ random_state = random_state
+
+ # Create median/mean estimator for bound samplers (UCB point estimates) and Optimistic Thompson sampling
+ if isinstance(
+ self.sampler, (LowerBoundSampler, PessimisticLowerBoundSampler)
+ ) or (
+ isinstance(self.sampler, ThompsonSampler)
+ and (
+ hasattr(self.sampler, "enable_optimistic_sampling")
+ and self.sampler.enable_optimistic_sampling
+ )
+ ):
+ # Fit scaler on training data and transform X for point estimator training
+ X_normalized = self.scaler.fit_transform(X)
+
+ if (
+ self.quantile_estimator_architecture
+ in QUANTILE_TO_POINT_ESTIMATOR_MAPPING
+ ):
+ point_estimator_architecture = QUANTILE_TO_POINT_ESTIMATOR_MAPPING[
+ self.quantile_estimator_architecture
+ ]
+ self.point_estimator = initialize_estimator(
+ estimator_architecture=point_estimator_architecture,
+ random_state=random_state,
+ )
+ self.point_estimator.fit(X=X_normalized, y=y)
+ # TODO: Temporary fallback to median as point estimator for architectures that
+ # don't yet have a point counterpart in the code:
+ else:
+ self.point_estimator = initialize_estimator(
+ estimator_architecture=self.quantile_estimator_architecture,
+ random_state=random_state,
+ )
+ self.point_estimator.fit(
+ X=X_normalized,
+ y=y,
+ quantiles=[0.5], # Only estimate the median
+ )
+
+ # NOTE: Scrappy wrapper to align predict calls between quantile and point estimators
+ # TODO: Remove in future
+ class PointWrapper:
+ def __init__(self, estimator: QuantileConformalEstimator):
+ self.estimator = estimator
+
+ def predict(self, X):
+ return self.estimator.predict(X)[:, 0]
+
+ self.point_estimator = PointWrapper(self.point_estimator)
+
+ self.conformal_estimator.fit(
+ X=X,
+ y=y,
+ tuning_iterations=tuning_iterations,
+ random_state=random_state,
+ )
+
+ def _predict_with_pessimistic_lower_bound(self, X: np.array):
+ """Generate pessimistic lower bound acquisition values.
+
+ Returns the lower bounds of quantile-based prediction intervals,
+ implementing conservative exploration using direct quantile predictions
+ or conformally adjusted intervals depending on data availability.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Lower bounds of prediction intervals, shape (n_candidates,).
+
+ Quantile-Based Strategy:
+ Uses estimated quantiles directly for conservative point selection,
+ with automatic conformal adjustment when sufficient calibration
+ data is available.
+ """
+ self.predictions_per_interval = self.conformal_estimator.predict_intervals(X)
+ return self.predictions_per_interval[0].lower_bounds
+
+ def _predict_with_ucb(self, X: np.array):
+ """Generate upper confidence bound acquisition values.
+
+ Implements UCB acquisition using quantile-based intervals with
+ median estimator predictions as point estimates and symmetric variance assumption.
+ Adapts automatically to conformalized or non-conformalized mode.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ UCB acquisition values, shape (n_candidates,).
+
+ Mathematical Formulation:
+ UCB(x) = point_estimate(x) - β × (interval_width(x) / 2)
+ Where point_estimate comes from dedicated point estimator and
+ interval bounds come from quantile estimation with symmetric variance assumption.
+ """
+ self.predictions_per_interval = self.conformal_estimator.predict_intervals(X)
+ interval = self.predictions_per_interval[0]
+
+ # Use dedicated point estimator for point estimates (index 0 since we only fit quantile 0.5)
+ X_normalized = self.scaler.transform(X)
+ point_estimates = self.point_estimator.predict(X_normalized)
+
+ # Use half the interval width for symmetric variance assumption
+ half_width = np.abs(interval.upper_bounds - interval.lower_bounds) / 2
+ return self.sampler.calculate_ucb_predictions(
+ point_estimates=point_estimates,
+ half_width=half_width,
+ )
+
+ def _predict_with_thompson(self, X: np.array):
+ """Generate Thompson sampling acquisition values.
+
+ Implements Thompson sampling using quantile-based prediction intervals,
+ with optional point estimator integration for optimistic bias.
+ Automatically adapts to available conformal calibration.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Thompson sampling acquisition values, shape (n_candidates,).
+
+ Sampling Strategy:
+ Draws random samples from quantile-based intervals, with optional
+ optimistic constraints from separately fitted point estimator
+ when enable_optimistic_sampling is True.
+ """
+ self.predictions_per_interval = self.conformal_estimator.predict_intervals(X)
+ point_predictions = None
+ if self.sampler.enable_optimistic_sampling:
+ X_normalized = self.scaler.transform(X)
+ point_predictions = self.point_estimator.predict(X_normalized)
+ return self.sampler.calculate_thompson_predictions(
+ predictions_per_interval=self.predictions_per_interval,
+ point_predictions=point_predictions,
+ )
+
+ def _predict_with_expected_improvement(self, X: np.array):
+ """Generate expected improvement acquisition values.
+
+ Calculates expected improvement using quantile-based prediction
+ intervals, automatically accounting for conformalized or
+ non-conformalized interval construction.
+
+ Args:
+ X: Candidate points for evaluation, shape (n_candidates, n_features).
+
+ Returns:
+ Expected improvement acquisition values, shape (n_candidates,).
+
+ Quantile-Based EI:
+ Integrates improvement probabilities over quantile-estimated
+ intervals, naturally handling asymmetric uncertainty patterns
+ in the objective function.
+ """
+ self.predictions_per_interval = self.conformal_estimator.predict_intervals(X)
+ return self.sampler.calculate_expected_improvement(
+ predictions_per_interval=self.predictions_per_interval
+ )
+
+ def _calculate_betas(self, X: np.array, y_true: float) -> list[float]:
+ """Calculate coverage feedback (beta values) for adaptive alpha updating.
+
+ Computes alpha-specific coverage feedback using quantile-based
+ nonconformity scores. Provides separate beta values for each
+ alpha level to enable granular coverage control.
+
+ Args:
+ X: Configuration where observation was made, shape (n_features,).
+ y_true: Observed performance value at the configuration.
+
+ Returns:
+ List of beta values, one per alpha level, representing coverage feedback.
+
+ Quantile-Based Beta Calculation:
+ For each alpha level, computes nonconformity as the maximum
+ deviation from the corresponding quantile interval, then
+ calculates the proportion of calibration scores at or below
+ this nonconformity for adaptive alpha adjustment.
+ """
+ return self.conformal_estimator.calculate_betas(X, y_true)
diff --git a/confopt/selection/adaptation.py b/confopt/selection/adaptation.py
new file mode 100644
index 0000000..3c0bf41
--- /dev/null
+++ b/confopt/selection/adaptation.py
@@ -0,0 +1,156 @@
+import numpy as np
+import logging
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+def pinball_loss(beta: float, theta: float, alpha: float) -> float:
+ """Calculate pinball loss for conformal prediction adaptation.
+
+ Args:
+ beta: Empirical coverage probability (proportion of calibration scores >= test score)
+ theta: Parameter (in DtACI context, this is α_t^i, the expert's alpha value)
+ alpha: Global target miscoverage level
+
+ Returns:
+ Pinball loss value
+
+ Mathematical Details:
+ From the paper: ℓ(β_t, θ) := α(β_t - θ) - min{0, β_t - θ}
+
+ This is the theoretical pinball loss used in the DtACI algorithm.
+ In the algorithm, θ = α_t^i (expert's alpha value) and α is the global target.
+
+ Beta represents the empirical coverage probability of the new observation.
+ High beta (> α) means the observation is "easy" (low nonconformity relative to
+ calibration) and intervals should be tightened. Low beta (< α) means the
+ observation is "hard" (high nonconformity) and intervals should be widened.
+ """
+ return alpha * (beta - theta) - min(0, beta - theta)
+
+
+class DtACI:
+ """Dynamically-tuned Adaptive Conformal Inference.
+
+ Implements the DtACI algorithm from Gibbs & Candès (2021) with K experts using
+ different learning rates γ_k. Each expert maintains its own miscoverage level α_t^k,
+ combined using exponential weighting based on pinball loss performance.
+
+ Mathematical Components from the Paper:
+ 1. Pinball loss: ℓ(β_t, α_t^i) := α(β_t - α_t^i) - min{0, β_t - α_t^i}
+ 2. Weight update: w_t+1^i ∝ w_t^i × exp(-η × ℓ(β_t, α_t^i))
+ 3. Expert update: α_t+1^i = α_t^i + γ_i × (α - err_t^i)
+ 4. Selection: α_t via weighted average or random sampling
+ 5. Regularization: w_t+1^i = (1-σ)w̄_t^i + σ/k
+ """
+
+ def __init__(
+ self,
+ alpha: float = 0.1,
+ gamma_values: Optional[list[float]] = None,
+ use_weighted_average: bool = True,
+ ):
+ """Initialize DtACI with theoretical parameters.
+
+ Args:
+ alpha: Target miscoverage level (α ∈ (0,1))
+ gamma_values: Learning rates for each expert. If single value provided,
+ functions as simple ACI. If None, uses conservative multi-expert defaults.
+ use_weighted_average: If True, uses deterministic weighted average (Algorithm 2).
+ If False, uses random sampling (Algorithm 1).
+ """
+ if not 0 < alpha < 1:
+ raise ValueError("alpha must be in (0, 1)")
+
+ self.alpha = alpha
+ self.alpha_t = alpha
+ self.use_weighted_average = use_weighted_average
+
+ if gamma_values is None:
+ gamma_values = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128]
+
+ if any(gamma <= 0 for gamma in gamma_values):
+ raise ValueError("All gamma values must be positive")
+
+ self.k = len(gamma_values)
+ self.gamma_values = np.asarray(gamma_values)
+ self.alpha_t_candidates = np.array([alpha] * self.k)
+
+ # Theoretical parameters from Algorithm 1 in the paper
+ self.interval = 500
+ self.sigma = 1 / (2 * self.interval)
+ self.eta = (
+ np.sqrt(3 / self.interval)
+ * np.sqrt(np.log(self.interval * self.k) + 2)
+ / ((1 - alpha) ** 2 * alpha**2)
+ )
+
+ self.weights = np.ones(self.k) / self.k
+ self.update_count = 0
+ self.beta_history = []
+ self.alpha_history = []
+ self.weight_history = []
+
+ def update(self, beta: float) -> float:
+ """Update alpha values based on empirical coverage feedback.
+
+ Implements Algorithm 1 from Gibbs & Candès (2021):
+ 1. Compute pinball losses for each expert
+ 2. Update expert weights using exponential weighting
+ 3. Update each expert's alpha using gradient step
+ 4. Sample final alpha from weight distribution
+
+ Args:
+ beta: Empirical coverage feedback (β_t ∈ [0,1])
+
+ Returns:
+ Updated miscoverage level α_t+1
+ """
+ if not 0 <= beta <= 1:
+ raise ValueError(f"beta must be in [0, 1], got {beta}")
+
+ self.update_count += 1
+ self.beta_history.append(beta)
+
+ # Compute pinball losses for each expert
+ # From paper: ℓ(β_t, α_t^i) where β_t is empirical coverage and α_t^i is expert's alpha
+ losses = np.array(
+ [
+ pinball_loss(beta=beta, theta=alpha_val, alpha=self.alpha)
+ for alpha_val in self.alpha_t_candidates
+ ]
+ )
+
+ updated_weights = self.weights * np.exp(-self.eta * losses)
+ sum_of_updated_weights = np.sum(updated_weights)
+ self.weights = (1 - self.sigma) * updated_weights + (
+ (self.sigma * sum_of_updated_weights) / self.k
+ )
+
+ # Update each expert's alpha using gradient step
+ # err_indicators = 1 if breach (beta < alpha), 0 if coverage (beta >= alpha)
+ err_indicators = (beta < self.alpha_t_candidates).astype(float)
+ self.alpha_t_candidates = self.alpha_t_candidates + self.gamma_values * (
+ self.alpha - err_indicators
+ )
+ self.alpha_t_candidates = np.clip(self.alpha_t_candidates, 0.001, 0.999)
+
+ if np.sum(self.weights) > 0:
+ normalized_weights = self.weights / np.sum(self.weights)
+ else:
+ normalized_weights = np.ones(self.k) / self.k
+ logger.warning("All expert weights became zero, reverting to uniform")
+
+ if self.use_weighted_average:
+ # Deterministic weighted average (Algorithm 2)
+ self.alpha_t = np.sum(normalized_weights * self.alpha_t_candidates)
+ else:
+ # Random sampling (Algorithm 1)
+ chosen_idx = np.random.choice(self.k, p=normalized_weights)
+ self.alpha_t = self.alpha_t_candidates[chosen_idx]
+
+ self.alpha_history.append(self.alpha_t)
+ self.weight_history.append(normalized_weights.copy())
+
+ return self.alpha_t
diff --git a/confopt/selection/conformalization.py b/confopt/selection/conformalization.py
new file mode 100644
index 0000000..573612a
--- /dev/null
+++ b/confopt/selection/conformalization.py
@@ -0,0 +1,639 @@
+import logging
+import numpy as np
+from typing import Optional, Tuple, List, Literal
+from sklearn.model_selection import KFold
+from sklearn.preprocessing import StandardScaler
+from confopt.wrapping import ConformalBounds
+from confopt.utils.preprocessing import train_val_split
+from confopt.selection.estimation import (
+ initialize_estimator,
+ QuantileTuner,
+)
+from confopt.selection.estimator_configuration import ESTIMATOR_REGISTRY
+from copy import deepcopy
+
+logger = logging.getLogger(__name__)
+
+
+def set_calibration_split(n_observations: int) -> float:
+ """Determines the calibration split ratio based on dataset size.
+
+ Ensures a minimum of 4 observations for calibration while defaulting to 20%.
+
+ Args:
+ n_observations: Total number of observations in the dataset.
+
+ Returns:
+ Calibration split ratio between 0 and 1.
+ """
+ candidate_split = 0.2
+ if candidate_split * n_observations < 4:
+ return 4 / n_observations
+ else:
+ return candidate_split
+
+
+def alpha_to_quantiles(alpha: float) -> Tuple[float, float]:
+ """Converts miscoverage level to corresponding quantile bounds.
+
+ Creates symmetric quantile bounds for two-sided prediction intervals.
+
+ Args:
+ alpha: Miscoverage level (e.g., 0.1 for 90% coverage intervals).
+
+ Returns:
+ Tuple of (lower_quantile, upper_quantile) values.
+ """
+ lower_quantile = alpha / 2
+ upper_quantile = 1 - lower_quantile
+ return lower_quantile, upper_quantile
+
+
+class QuantileConformalEstimator:
+ def __init__(
+ self,
+ quantile_estimator_architecture: str,
+ alphas: List[float],
+ n_pre_conformal_trials: int = 32,
+ n_calibration_folds: int = 3,
+ calibration_split_strategy: Literal[
+ "cv", "train_test_split", "adaptive"
+ ] = "adaptive",
+ adaptive_threshold: int = 50,
+ normalize_features: bool = True,
+ ):
+ """Conformal quantile regression estimator with adaptive calibration strategies.
+
+ Implements conformal prediction to create statistically valid prediction intervals
+ using quantile regression models.
+
+ Args:
+ quantile_estimator_architecture: Architecture name from ESTIMATOR_REGISTRY (e.g., 'qgbm', 'qrf', 'qknn').
+ alphas: List of miscoverage levels for prediction intervals (e.g., [0.1] for 90% coverage).
+ n_pre_conformal_trials: Minimum observations needed before using conformal prediction.
+ n_calibration_folds: Number of folds for cross-validation calibration.
+ calibration_split_strategy: Strategy for data splitting during calibration.
+ adaptive_threshold: Observation threshold for adaptive strategy switching.
+ normalize_features: Whether to standardize input features using StandardScaler.
+ """
+ self.quantile_estimator_architecture = quantile_estimator_architecture
+ self.alphas = alphas
+ self.updated_alphas = self.alphas.copy()
+ self.n_pre_conformal_trials = n_pre_conformal_trials
+ self.n_calibration_folds = n_calibration_folds
+ self.calibration_split_strategy = calibration_split_strategy
+ self.adaptive_threshold = adaptive_threshold
+ self.normalize_features = normalize_features
+
+ self.quantile_estimator = None
+ self.fold_scores_per_alpha = None
+ self.flattened_quantiles = None
+ self.quantile_indices = None
+ self.conformalize_predictions = False
+ self.last_best_params = None
+ self.feature_scaler = None
+ self.fold_estimators = []
+
+ def _determine_splitting_strategy(self, n_observations: int) -> str:
+ """Selects the optimal data splitting strategy based on dataset size.
+
+ Uses cross-validation for small datasets and train-test split for larger ones when adaptive.
+
+ Args:
+ n_observations: Total number of observations in the training dataset.
+
+ Returns:
+ Strategy name: 'cv', 'train_test_split', or the fixed calibration_split_strategy.
+ """
+ if self.calibration_split_strategy == "adaptive":
+ return (
+ "cv" if n_observations < self.adaptive_threshold else "train_test_split"
+ )
+ return self.calibration_split_strategy
+
+ def _fit_non_conformal(
+ self,
+ X: np.ndarray,
+ y: np.ndarray,
+ flattened_quantiles: List[float],
+ tuning_iterations: int,
+ min_obs_for_tuning: int,
+ random_state: Optional[int],
+ last_best_params: Optional[dict],
+ ):
+ """Fits a standard quantile estimator without conformal calibration.
+
+ Used when dataset size is below n_pre_conformal_trials threshold.
+
+ Args:
+ X: Input feature matrix, shape (n_samples, n_features).
+ y: Target values array, shape (n_samples,).
+ flattened_quantiles: Sorted list of unique quantile levels derived from alphas.
+ tuning_iterations: Number of hyperparameter optimization iterations using QuantileTuner.
+ min_obs_for_tuning: Minimum observations required for hyperparameter tuning.
+ random_state: Random seed for reproducible estimator initialization.
+ last_best_params: Previously optimized parameters from estimator_configuration to warm-start.
+ """
+ forced_param_configurations = []
+
+ if last_best_params is not None:
+ forced_param_configurations.append(last_best_params)
+
+ estimator_config = ESTIMATOR_REGISTRY[self.quantile_estimator_architecture]
+ default_params = deepcopy(estimator_config.default_params)
+ if default_params:
+ forced_param_configurations.append(default_params)
+
+ if tuning_iterations > 1 and len(X) > min_obs_for_tuning:
+ tuner = QuantileTuner(
+ random_state=random_state, quantiles=flattened_quantiles
+ )
+ initialization_params = tuner.tune(
+ X=X,
+ y=y,
+ estimator_architecture=self.quantile_estimator_architecture,
+ n_searches=tuning_iterations,
+ forced_param_configurations=forced_param_configurations,
+ )
+ self.last_best_params = initialization_params
+ else:
+ initialization_params = (
+ forced_param_configurations[0] if forced_param_configurations else None
+ )
+ self.last_best_params = last_best_params
+
+ self.quantile_estimator = initialize_estimator(
+ estimator_architecture=self.quantile_estimator_architecture,
+ initialization_params=initialization_params,
+ random_state=random_state,
+ )
+ self.quantile_estimator.fit(X, y, quantiles=flattened_quantiles)
+
+ self.fold_estimators = [self.quantile_estimator]
+ self.conformalize_predictions = False
+
+ def _fit_cv_plus(
+ self,
+ X: np.ndarray,
+ y: np.ndarray,
+ flattened_quantiles: List[float],
+ tuning_iterations: int,
+ min_obs_for_tuning: int,
+ random_state: Optional[int],
+ last_best_params: Optional[dict],
+ ):
+ """Fits conformal estimator using cross-validation for calibration.
+
+ Trains separate models on each fold and computes nonconformity scores for conformal adjustment.
+
+ Args:
+ X: Input feature matrix, shape (n_samples, n_features).
+ y: Target values array, shape (n_samples,).
+ flattened_quantiles: Sorted list of unique quantile levels derived from alphas.
+ tuning_iterations: Number of hyperparameter optimization iterations per fold.
+ min_obs_for_tuning: Minimum observations required for hyperparameter tuning per fold.
+ random_state: Random seed for KFold splitting and estimator initialization.
+ last_best_params: Previously optimized parameters to warm-start each fold.
+ """
+ kfold = KFold(
+ n_splits=self.n_calibration_folds, shuffle=True, random_state=random_state
+ )
+
+ fold_scores_per_alpha = [[] for _ in self.alphas]
+ self.fold_estimators = []
+
+ forced_param_configurations = []
+ if last_best_params is not None:
+ forced_param_configurations.append(last_best_params)
+
+ estimator_config = ESTIMATOR_REGISTRY[self.quantile_estimator_architecture]
+ default_params = deepcopy(estimator_config.default_params)
+ if default_params:
+ forced_param_configurations.append(default_params)
+
+ for _, (train_idx, val_idx) in enumerate(kfold.split(X)):
+ X_fold_train, X_fold_val = X[train_idx], X[val_idx]
+ y_fold_train, y_fold_val = y[train_idx], y[val_idx]
+
+ if tuning_iterations > 1 and len(X_fold_train) > min_obs_for_tuning:
+ tuner = QuantileTuner(
+ random_state=random_state if random_state else None,
+ quantiles=flattened_quantiles,
+ )
+ fold_initialization_params = tuner.tune(
+ X=X_fold_train,
+ y=y_fold_train,
+ estimator_architecture=self.quantile_estimator_architecture,
+ n_searches=tuning_iterations,
+ forced_param_configurations=forced_param_configurations,
+ )
+ else:
+ fold_initialization_params = (
+ forced_param_configurations[0]
+ if forced_param_configurations
+ else None
+ )
+
+ fold_estimator = initialize_estimator(
+ estimator_architecture=self.quantile_estimator_architecture,
+ initialization_params=fold_initialization_params,
+ random_state=random_state if random_state else None,
+ )
+ fold_estimator.fit(
+ X_fold_train, y_fold_train, quantiles=flattened_quantiles
+ )
+
+ self.fold_estimators.append(fold_estimator)
+
+ val_prediction = fold_estimator.predict(X_fold_val)
+
+ for i, alpha in enumerate(self.alphas):
+ lower_quantile, upper_quantile = alpha_to_quantiles(alpha)
+ lower_idx = self.quantile_indices[lower_quantile]
+ upper_idx = self.quantile_indices[upper_quantile]
+
+ lower_deviations = val_prediction[:, lower_idx] - y_fold_val
+ upper_deviations = y_fold_val - val_prediction[:, upper_idx]
+ fold_scores = np.maximum(lower_deviations, upper_deviations)
+ fold_scores_per_alpha[i].append(fold_scores)
+
+ self.fold_scores_per_alpha = fold_scores_per_alpha
+
+ self.last_best_params = last_best_params
+ self.conformalize_predictions = True
+
+ def _fit_train_test_split(
+ self,
+ X: np.ndarray,
+ y: np.ndarray,
+ flattened_quantiles: List[float],
+ tuning_iterations: int,
+ min_obs_for_tuning: int,
+ random_state: Optional[int],
+ last_best_params: Optional[dict],
+ ):
+ """Fits conformal estimator using train-test split for calibration.
+
+ Trains on training portion and computes nonconformity scores on validation portion.
+
+ Args:
+ X: Input feature matrix, shape (n_samples, n_features).
+ y: Target values array, shape (n_samples,).
+ flattened_quantiles: Sorted list of unique quantile levels derived from alphas.
+ tuning_iterations: Number of hyperparameter optimization iterations.
+ min_obs_for_tuning: Minimum observations required for hyperparameter tuning.
+ random_state: Random seed for train_val_split and estimator initialization.
+ last_best_params: Previously optimized parameters to warm-start training.
+ """
+ X_train, y_train, X_val, y_val = train_val_split(
+ X,
+ y,
+ train_split=(1 - set_calibration_split(len(X))),
+ normalize=False,
+ random_state=random_state,
+ )
+
+ forced_param_configurations = []
+
+ if last_best_params is not None:
+ forced_param_configurations.append(last_best_params)
+
+ estimator_config = ESTIMATOR_REGISTRY[self.quantile_estimator_architecture]
+ default_params = deepcopy(estimator_config.default_params)
+ if default_params:
+ forced_param_configurations.append(default_params)
+
+ if tuning_iterations > 1 and len(X_train) > min_obs_for_tuning:
+ tuner = QuantileTuner(
+ random_state=random_state, quantiles=flattened_quantiles
+ )
+ initialization_params = tuner.tune(
+ X=X_train,
+ y=y_train,
+ estimator_architecture=self.quantile_estimator_architecture,
+ n_searches=tuning_iterations,
+ forced_param_configurations=forced_param_configurations,
+ )
+ self.last_best_params = initialization_params
+ else:
+ initialization_params = (
+ forced_param_configurations[0] if forced_param_configurations else None
+ )
+ self.last_best_params = last_best_params
+
+ quantile_estimator = initialize_estimator(
+ estimator_architecture=self.quantile_estimator_architecture,
+ initialization_params=initialization_params,
+ random_state=random_state,
+ )
+ quantile_estimator.fit(X_train, y_train, quantiles=flattened_quantiles)
+
+ self.fold_estimators = [quantile_estimator]
+
+ val_prediction = quantile_estimator.predict(X_val)
+ fold_scores_per_alpha = [[] for _ in self.alphas]
+
+ for i, alpha in enumerate(self.alphas):
+ lower_quantile, upper_quantile = alpha_to_quantiles(alpha)
+ lower_idx = self.quantile_indices[lower_quantile]
+ upper_idx = self.quantile_indices[upper_quantile]
+
+ lower_deviations = val_prediction[:, lower_idx] - y_val
+ upper_deviations = y_val - val_prediction[:, upper_idx]
+ fold_scores = np.maximum(lower_deviations, upper_deviations)
+ fold_scores_per_alpha[i].append(fold_scores)
+
+ self.fold_scores_per_alpha = fold_scores_per_alpha
+ self.conformalize_predictions = True
+
+ def fit(
+ self,
+ X: np.array,
+ y: np.array,
+ tuning_iterations: Optional[int] = 0,
+ min_obs_for_tuning: int = 50,
+ random_state: Optional[int] = None,
+ last_best_params: Optional[dict] = None,
+ ):
+ """Trains the conformal quantile estimator on the provided data.
+
+ Automatically selects between conformal and non-conformal approaches based on dataset size.
+
+ Args:
+ X: Input feature matrix, shape (n_samples, n_features).
+ y: Target values array, shape (n_samples,).
+ tuning_iterations: Number of hyperparameter optimization iterations using QuantileTuner.
+ min_obs_for_tuning: Minimum observations required to enable hyperparameter tuning.
+ random_state: Random seed for reproducible results across folds and estimators.
+ last_best_params: Previously optimized parameters from ESTIMATOR_REGISTRY to warm-start tuning.
+ """
+ if self.normalize_features:
+ self.feature_scaler = StandardScaler()
+ X_scaled = self.feature_scaler.fit_transform(X)
+ else:
+ X_scaled = X
+ self.feature_scaler = None
+
+ flattened_quantiles = []
+ for alpha in self.alphas:
+ lower_quantile, upper_quantile = alpha_to_quantiles(alpha)
+ flattened_quantiles.append(lower_quantile)
+ flattened_quantiles.append(upper_quantile)
+ flattened_quantiles = sorted(list(set(flattened_quantiles)))
+
+ self.quantile_indices = {q: i for i, q in enumerate(flattened_quantiles)}
+
+ n_observations = len(X)
+ use_conformal = n_observations > self.n_pre_conformal_trials
+
+ if use_conformal:
+ strategy = self._determine_splitting_strategy(n_observations)
+
+ if strategy == "cv":
+ self._fit_cv_plus(
+ X=X_scaled,
+ y=y,
+ flattened_quantiles=flattened_quantiles,
+ tuning_iterations=tuning_iterations,
+ min_obs_for_tuning=min_obs_for_tuning,
+ random_state=random_state,
+ last_best_params=last_best_params,
+ )
+ else:
+ self._fit_train_test_split(
+ X=X_scaled,
+ y=y,
+ flattened_quantiles=flattened_quantiles,
+ tuning_iterations=tuning_iterations,
+ min_obs_for_tuning=min_obs_for_tuning,
+ random_state=random_state,
+ last_best_params=last_best_params,
+ )
+
+ else:
+ self._fit_non_conformal(
+ X=X_scaled,
+ y=y,
+ flattened_quantiles=flattened_quantiles,
+ tuning_iterations=tuning_iterations,
+ min_obs_for_tuning=min_obs_for_tuning,
+ random_state=random_state,
+ last_best_params=last_best_params,
+ )
+
+ def _preprocess_features(self, X: np.array) -> np.array:
+ """Applies feature preprocessing transformations to input data.
+
+ Normalizes features using fitted StandardScaler if enabled during initialization.
+
+ Args:
+ X: Raw input feature matrix, shape (n_samples, n_features).
+
+ Returns:
+ Preprocessed feature array with same shape, standardized if normalize_features=True.
+ """
+ X_processed = X.copy()
+ if self.normalize_features and self.feature_scaler is not None:
+ X_processed = self.feature_scaler.transform(X=X_processed)
+
+ return X_processed
+
+ def _get_quantile_indices(self, alpha: float) -> Tuple[int, int]:
+ """Retrieves array indices for lower and upper quantiles corresponding to alpha.
+
+ Maps miscoverage level to quantile positions in the prediction array.
+
+ Args:
+ alpha: Miscoverage level for the prediction interval.
+
+ Returns:
+ Tuple of (lower_index, upper_index) for quantile array positions.
+ """
+ lower_quantile, upper_quantile = alpha_to_quantiles(alpha=alpha)
+
+ return (
+ self.quantile_indices[lower_quantile],
+ self.quantile_indices[upper_quantile],
+ )
+
+ def _compute_conformal_bounds(
+ self,
+ X: np.array,
+ fold_nonconformity_scores: List[np.array],
+ alpha_adjusted: float,
+ lower_idx: int,
+ upper_idx: int,
+ ) -> Tuple[np.array, np.array]:
+ """Computes conformal prediction bounds using calibrated nonconformity scores.
+
+ Combines predictions from multiple folds with nonconformity scores to create
+ statistically valid prediction intervals.
+
+ Args:
+ X: Input features for prediction, shape (n_samples, n_features).
+ fold_nonconformity_scores: List of nonconformity score arrays from each calibration fold.
+ alpha_adjusted: Adjusted miscoverage level for the prediction interval from adaptive mechanisms.
+ lower_idx: Index of lower quantile in flattened_quantiles prediction array.
+ upper_idx: Index of upper quantile in flattened_quantiles prediction array.
+
+ Returns:
+ Tuple of (lower_bounds, upper_bounds) arrays for prediction intervals, shape (n_samples,).
+ """
+ fold_preds = [estimator.predict(X=X) for estimator in self.fold_estimators]
+
+ flattened_lower_values = np.concatenate(
+ [
+ pred[:, lower_idx] - scores.reshape(-1, 1)
+ for pred, scores in zip(fold_preds, fold_nonconformity_scores)
+ ]
+ )
+
+ flattened_upper_values = np.concatenate(
+ [
+ pred[:, upper_idx] + scores.reshape(-1, 1)
+ for pred, scores in zip(fold_preds, fold_nonconformity_scores)
+ ]
+ )
+
+ flattened_scores = np.concatenate(fold_nonconformity_scores)
+ n_scores = len(flattened_scores)
+ lower_quantile = alpha_adjusted / (1 + 1 / n_scores)
+ upper_quantile = (1 - alpha_adjusted) / (1 + 1 / n_scores)
+
+ lower_bound = np.quantile(
+ a=flattened_lower_values, q=lower_quantile, axis=0, method="linear"
+ )
+ upper_bound = np.quantile(
+ a=flattened_upper_values, q=upper_quantile, axis=0, method="linear"
+ )
+
+ return lower_bound, upper_bound
+
+ def _compute_nonconformal_bounds(
+ self, X_processed: np.array, lower_idx: int, upper_idx: int
+ ) -> Tuple[np.array, np.array]:
+ """Computes standard quantile bounds without conformal calibration.
+
+ Returns raw quantile predictions from the single trained estimator.
+
+ Args:
+ X_processed: Preprocessed input features, shape (n_samples, n_features).
+ lower_idx: Index of lower quantile in flattened_quantiles prediction array.
+ upper_idx: Index of upper quantile in flattened_quantiles prediction array.
+
+ Returns:
+ Tuple of (lower_bounds, upper_bounds) arrays from quantile predictions, shape (n_samples,).
+ """
+ prediction = self.fold_estimators[0].predict(X=X_processed)
+
+ return prediction[:, lower_idx], prediction[:, upper_idx]
+
+ def predict_intervals(self, X: np.array) -> List[ConformalBounds]:
+ """Generates prediction intervals for new input data.
+
+ Creates statistically valid prediction intervals using either conformal
+ or standard quantile bounds depending on the fitted model type.
+
+ Args:
+ X: Input feature matrix for prediction, shape (n_samples, n_features).
+
+ Returns:
+ List of ConformalBounds objects with lower_bounds and upper_bounds arrays, one per miscoverage level.
+
+ Raises:
+ ValueError: If the estimator has not been fitted yet.
+ """
+ if not self.fold_estimators:
+ raise ValueError("Fold estimators must be fitted before prediction")
+
+ X_processed = self._preprocess_features(X=X)
+ intervals = []
+
+ for i, (alpha, alpha_adjusted) in enumerate(
+ zip(self.alphas, self.updated_alphas)
+ ):
+ lower_idx, upper_idx = self._get_quantile_indices(alpha=alpha)
+
+ if self.conformalize_predictions:
+ fold_scores = self.fold_scores_per_alpha[i]
+ lower_bound, upper_bound = self._compute_conformal_bounds(
+ X=X_processed,
+ fold_nonconformity_scores=fold_scores,
+ alpha_adjusted=alpha_adjusted,
+ lower_idx=lower_idx,
+ upper_idx=upper_idx,
+ )
+ else:
+ lower_bound, upper_bound = self._compute_nonconformal_bounds(
+ X_processed=X_processed, lower_idx=lower_idx, upper_idx=upper_idx
+ )
+
+ intervals.append(
+ ConformalBounds(lower_bounds=lower_bound, upper_bounds=upper_bound)
+ )
+
+ return intervals
+
+ def calculate_betas(self, X: np.array, y_true: float) -> list[float]:
+ """Calculates beta values indicating empirical coverage probability.
+
+ Computes the fraction of calibration nonconformity scores that exceed the
+ nonconformity of the given observation, used for adaptive alpha adjustment.
+
+ Args:
+ X: Single observation features to evaluate, shape (n_features,).
+ y_true: True target value for the observation.
+
+ Returns:
+ List of beta values (empirical coverage probabilities), one per miscoverage level.
+
+ Raises:
+ ValueError: If the estimator has not been fitted yet.
+ """
+ if self.fold_estimators == []:
+ raise ValueError("Estimator must be fitted before calculating beta")
+
+ if not self.conformalize_predictions:
+ return [0.5] * len(self.alphas)
+
+ X_processed = X.reshape(1, -1)
+ if self.normalize_features and self.feature_scaler is not None:
+ X_processed = self.feature_scaler.transform(X_processed)
+
+ betas = []
+ for i, alpha in enumerate(self.alphas):
+ lower_quantile, upper_quantile = alpha_to_quantiles(alpha)
+ lower_idx = self.quantile_indices[lower_quantile]
+ upper_idx = self.quantile_indices[upper_quantile]
+
+ all_predictions = []
+ for fold_estimator in self.fold_estimators:
+ fold_pred = fold_estimator.predict(X_processed)
+ all_predictions.append(fold_pred)
+
+ avg_prediction = np.mean(all_predictions, axis=0)
+ lower_bound = avg_prediction[0, lower_idx]
+ upper_bound = avg_prediction[0, upper_idx]
+
+ lower_deviation = lower_bound - y_true
+ upper_deviation = y_true - upper_bound
+ nonconformity = max(lower_deviation, upper_deviation)
+
+ flattened_scores = []
+ for fold_scores in self.fold_scores_per_alpha[i]:
+ flattened_scores.extend(fold_scores)
+ beta = np.mean(np.array(flattened_scores) >= nonconformity)
+
+ betas.append(beta)
+
+ return betas
+
+ def update_alphas(self, new_alphas: List[float]):
+ """Updates the miscoverage levels for prediction intervals.
+
+ Allows dynamic adjustment of coverage levels without refitting the model.
+
+ Args:
+ new_alphas: New list of miscoverage levels to use for predictions.
+ """
+ self.updated_alphas = new_alphas.copy()
diff --git a/confopt/selection/estimation.py b/confopt/selection/estimation.py
new file mode 100644
index 0000000..75a8b58
--- /dev/null
+++ b/confopt/selection/estimation.py
@@ -0,0 +1,452 @@
+"""Hyperparameter tuning framework for quantile and point estimation models.
+
+This module provides automated hyperparameter optimization infrastructure for both
+quantile regression and standard point estimation models. It implements random search
+with cross-validation, supporting various split strategies and evaluation metrics.
+The framework integrates with the estimator registry system for unified model
+configuration and supports warm-start optimization with forced parameter configurations.
+"""
+
+import logging
+from typing import Dict, Optional, List, Union, Tuple, Any, Literal
+from copy import deepcopy
+import inspect
+
+from sklearn.base import BaseEstimator
+import numpy as np
+from sklearn.metrics import mean_pinball_loss, mean_squared_error
+from sklearn.model_selection import KFold
+
+from confopt.selection.estimator_configuration import (
+ ESTIMATOR_REGISTRY,
+ EstimatorConfig,
+)
+from confopt.selection.estimators.quantile_estimation import (
+ BaseSingleFitQuantileEstimator,
+ BaseMultiFitQuantileEstimator,
+)
+from confopt.selection.estimators.ensembling import QuantileEnsembleEstimator
+from confopt.utils.configurations.sampling import get_tuning_configurations
+
+logger = logging.getLogger(__name__)
+
+
+def initialize_estimator(
+ estimator_architecture: str,
+ initialization_params: Dict = None,
+ random_state: Optional[int] = None,
+):
+ """Initialize an estimator instance from registry with given configuration.
+
+ Creates estimator instances using configurations from the global estimator registry,
+ with support for parameter overrides and ensemble component initialization. Handles
+ random state propagation and special processing for ensemble estimators requiring
+ fresh sub-estimator instances.
+
+ Args:
+ estimator_architecture: Registered estimator name from ESTIMATOR_REGISTRY.
+ initialization_params: Parameter overrides for default configuration.
+ Missing parameters use registry defaults.
+ random_state: Seed for reproducible estimator initialization. Automatically
+ propagated to estimators supporting random_state parameter.
+
+ Returns:
+ Initialized estimator instance ready for fitting.
+
+ Raises:
+ KeyError: If estimator_architecture not found in registry.
+ TypeError: If initialization_params contain invalid parameters.
+ """
+ estimator_config = ESTIMATOR_REGISTRY[estimator_architecture]
+
+ # Start with default parameters
+ params = deepcopy(estimator_config.default_params)
+
+ # If additional parameters are provided, update the defaults
+ if initialization_params:
+ params.update(initialization_params)
+
+ # Check if random_state is a valid parameter for the estimator class
+ if random_state is not None:
+ estimator_class = estimator_config.estimator_class
+ init_signature = inspect.signature(estimator_class.__init__)
+ if "random_state" in init_signature.parameters:
+ params["random_state"] = random_state
+
+ # Special handling for ensemble estimators
+ if (
+ estimator_config.is_ensemble_estimator()
+ and estimator_config.ensemble_components
+ ):
+ # For ensemble models, initialize fresh sub-estimators from component configurations
+ fresh_estimators = []
+ for component in estimator_config.ensemble_components:
+ component_class = component["class"]
+ component_params = deepcopy(component["params"])
+
+ # Set random state if supported by this component
+ if random_state is not None:
+ component_init_signature = inspect.signature(component_class.__init__)
+ if "random_state" in component_init_signature.parameters:
+ component_params["random_state"] = random_state
+
+ # Create a fresh instance
+ fresh_estimator = component_class(**component_params)
+ fresh_estimators.append(fresh_estimator)
+
+ # Add the fresh estimators to the parameters
+ params["estimators"] = fresh_estimators
+
+ # Create and return the estimator instance
+ return estimator_config.estimator_class(**params)
+
+
+def average_scores_across_folds(
+ scored_configurations: List[List[Dict]], scores: List[float]
+) -> Tuple[List[Dict], List[float]]:
+ """Aggregate cross-validation scores by averaging across identical configurations.
+
+ Combines scores from multiple folds for configurations that appear multiple times,
+ computing mean performance across all evaluations. Used internally to consolidate
+ cross-validation results before selecting optimal hyperparameters.
+
+ Args:
+ scored_configurations: List of parameter dictionaries from cross-validation.
+ scores: Corresponding performance scores for each configuration.
+
+ Returns:
+ Tuple of (unique_configurations, averaged_scores) with consolidated results.
+ """
+ aggregated_scores = []
+ fold_counts = []
+ aggregated_configurations = []
+ for configuration, score in zip(scored_configurations, scores):
+ if configuration in aggregated_configurations:
+ index = aggregated_configurations.index(configuration)
+ aggregated_scores[index] += score
+ fold_counts[index] += 1
+ else:
+ aggregated_configurations.append(configuration)
+ aggregated_scores.append(score)
+ fold_counts.append(1)
+ for i in range(len(aggregated_scores)):
+ aggregated_scores[i] /= fold_counts[i]
+ return aggregated_configurations, aggregated_scores
+
+
+class RandomTuner:
+ """Base class for hyperparameter optimization using random search with cross-validation.
+
+ Implements random hyperparameter search with flexible cross-validation strategies
+ for model selection. Supports warm-start configurations, multiple split types,
+ and robust error handling during evaluation. Subclasses implement model-specific
+ fitting and evaluation logic for different learning tasks.
+
+ The tuning process randomly samples from parameter spaces defined in estimator
+ configurations, evaluates each configuration via cross-validation, and returns
+ the configuration with optimal performance.
+
+ Args:
+ random_state: Seed for reproducible parameter sampling and data splitting.
+ """
+
+ def __init__(self, random_state: Optional[int] = None):
+ self.random_state = random_state
+
+ def tune(
+ self,
+ X: np.array,
+ y: np.array,
+ estimator_architecture: str,
+ n_searches: int,
+ train_split: float = 0.8,
+ split_type: Literal["k_fold", "ordinal_split"] = "k_fold",
+ forced_param_configurations: Optional[List[Dict]] = None,
+ ) -> Dict:
+ """Perform hyperparameter optimization via random search with cross-validation.
+
+ Randomly samples parameter configurations from the estimator's parameter space,
+ evaluates each via cross-validation, and returns the best-performing configuration.
+ Supports warm-start configurations that are evaluated before random sampling.
+
+ Args:
+ X: Feature matrix with shape (n_samples, n_features).
+ y: Target values with shape (n_samples,).
+ estimator_architecture: Registered estimator name for optimization.
+ n_searches: Total number of configurations to evaluate.
+ train_split: Fraction of data for training in ordinal splits, or determines
+ K-fold count via 1/(1-train_split) for k_fold splits.
+ split_type: Cross-validation strategy. "k_fold" for random splits,
+ "ordinal_split" for single time-ordered split.
+ forced_param_configurations: Pre-specified configurations evaluated first.
+ Remaining slots filled with random sampling.
+
+ Returns:
+ Best parameter configuration dictionary based on cross-validation performance.
+ """
+ estimator_config = ESTIMATOR_REGISTRY[estimator_architecture]
+
+ # Handle warm start configurations
+ forced_param_configurations = forced_param_configurations or []
+
+ # Determine configurations to evaluate
+ n_random_configs = max(0, n_searches - len(forced_param_configurations))
+ if len(forced_param_configurations) >= n_searches:
+ tuning_configurations = forced_param_configurations[:n_searches]
+ else:
+ # Generate random configurations for the remaining slots
+ random_configs = get_tuning_configurations(
+ parameter_grid=estimator_config.estimator_parameter_space,
+ n_configurations=n_random_configs,
+ random_state=self.random_state,
+ sampling_method="uniform",
+ )
+ # Combine warm start and random configurations
+ tuning_configurations = forced_param_configurations + random_configs
+
+ logger.info(f"Tuning configurations: {tuning_configurations}")
+
+ scored_configurations, scores = self._score_configurations(
+ configurations=tuning_configurations,
+ estimator_config=estimator_config,
+ X=X,
+ y=y,
+ train_split=train_split,
+ split_type=split_type,
+ )
+
+ # Find the configuration with the minimum score
+ best_idx = scores.index(min(scores))
+ best_configuration = scored_configurations[best_idx]
+
+ logger.info(f"Best configuration: {best_configuration}")
+ return best_configuration
+
+ def _create_fold_indices(
+ self,
+ X: np.array,
+ train_split: float,
+ split_type: Literal["k_fold", "ordinal_split"],
+ ) -> List[Tuple[np.array, np.array]]:
+ """Generate cross-validation fold indices based on split strategy.
+
+ Creates train/test index pairs for cross-validation. Supports K-fold random
+ splitting and ordinal time-series splits for temporal data.
+
+ Args:
+ X: Feature matrix to determine data size.
+ train_split: Training fraction for ordinal splits or K-fold determination.
+ split_type: Split strategy specification.
+
+ Returns:
+ List of (train_indices, test_indices) tuples for cross-validation.
+ """
+ if split_type == "ordinal_split":
+ # Single train-test split
+ split_index = int(len(X) * train_split)
+ train_indices = np.arange(split_index)
+ test_indices = np.arange(split_index, len(X))
+ return [(train_indices, test_indices)]
+ else: # "k_fold"
+ # Reverse-engineer the number of folds based on train_split
+ k_fold_splits = round(1 / (1 - train_split))
+ kf = KFold(
+ n_splits=k_fold_splits, random_state=self.random_state, shuffle=True
+ )
+ return list(kf.split(X))
+
+ def _score_configurations(
+ self,
+ configurations: List[Dict],
+ estimator_config: EstimatorConfig,
+ X: np.array,
+ y: np.array,
+ train_split: float = 0.66,
+ split_type: Literal["k_fold", "ordinal_split"] = "k_fold",
+ ) -> Tuple[List[Dict], List[float]]:
+ """Evaluate parameter configurations via cross-validation.
+
+ Fits and evaluates each configuration across all cross-validation folds,
+ computing average performance scores. Handles training failures gracefully
+ by excluding failed configurations from results.
+
+ Args:
+ configurations: List of parameter dictionaries to evaluate.
+ estimator_config: Configuration object containing estimator metadata.
+ X: Feature matrix for model training and evaluation.
+ y: Target values for model training and evaluation.
+ train_split: Training data fraction for split generation.
+ split_type: Cross-validation split strategy.
+
+ Returns:
+ Tuple of (valid_configurations, average_scores) for successful evaluations.
+ """
+ # Initialize data structures to store results
+ config_scores = {i: [] for i in range(len(configurations))}
+ fold_indices = self._create_fold_indices(X, train_split, split_type)
+
+ # For each configuration, evaluate across all folds
+ for config_idx, configuration in enumerate(configurations):
+ for train_index, test_index in fold_indices:
+ X_train, X_val = X[train_index, :], X[test_index, :]
+ Y_train, Y_val = y[train_index], y[test_index]
+
+ model = initialize_estimator(
+ estimator_architecture=estimator_config.estimator_name,
+ initialization_params=configuration,
+ random_state=self.random_state,
+ )
+
+ try:
+ self._fit_model(model, X_train, Y_train)
+ score = self._evaluate_model(model, X_val, Y_val)
+ config_scores[config_idx].append(score)
+ except Exception as e:
+ logger.warning(
+ f"Configuration {config_idx} failed on a fold. Error: {e}"
+ )
+ config_scores[config_idx].append(np.nan)
+
+ # Compute average scores for each configuration
+ scored_configurations = []
+ scores = []
+ for config_idx, configuration in enumerate(configurations):
+ fold_scores = config_scores[config_idx]
+ valid_scores = [s for s in fold_scores if not np.isnan(s)]
+ if valid_scores:
+ avg_score = sum(valid_scores) / len(valid_scores)
+ scored_configurations.append(configuration)
+ scores.append(avg_score)
+
+ return scored_configurations, scores
+
+ def _fit_model(self, model, X_train: np.array, Y_train: np.array) -> None:
+ """Fit estimator to training data.
+
+ Args:
+ model: Estimator instance to train.
+ X_train: Training feature matrix.
+ Y_train: Training target values.
+
+ Raises:
+ NotImplementedError: Must be implemented by subclasses.
+ """
+ raise NotImplementedError("Subclasses must implement _fit_model")
+
+ def _evaluate_model(self, model, X_val: np.array, Y_val: np.array) -> float:
+ """Evaluate fitted model on validation data.
+
+ Args:
+ model: Fitted estimator instance.
+ X_val: Validation feature matrix.
+ Y_val: Validation target values.
+
+ Returns:
+ Performance score (lower is better).
+
+ Raises:
+ NotImplementedError: Must be implemented by subclasses.
+ """
+ raise NotImplementedError("Subclasses must implement _evaluate_model")
+
+
+class PointTuner(RandomTuner):
+ """Hyperparameter tuner for point estimation models using MSE evaluation.
+
+ Specializes RandomTuner for standard regression tasks where models predict
+ single-valued outputs. Uses mean squared error as the optimization metric
+ for selecting the best hyperparameter configuration.
+ """
+
+ def _fit_model(
+ self, model: BaseEstimator, X_train: np.array, Y_train: np.array
+ ) -> None:
+ """Fit point estimation model to training data.
+
+ Args:
+ model: Scikit-learn compatible estimator.
+ X_train: Training feature matrix.
+ Y_train: Training target values.
+ """
+ model.fit(X_train, Y_train)
+
+ def _evaluate_model(self, model: Any, X_val: np.array, Y_val: np.array) -> float:
+ """Evaluate point estimation model using mean squared error.
+
+ Args:
+ model: Fitted estimator instance.
+ X_val: Validation feature matrix.
+ Y_val: Validation target values.
+
+ Returns:
+ Mean squared error (lower is better).
+ """
+ y_pred = model.predict(X=X_val)
+ return mean_squared_error(Y_val, y_pred)
+
+
+class QuantileTuner(RandomTuner):
+ """Hyperparameter tuner for quantile regression models using pinball loss evaluation.
+
+ Specializes RandomTuner for quantile regression tasks where models predict
+ multiple quantile levels simultaneously. Uses average pinball loss across
+ all quantiles as the optimization metric for hyperparameter selection.
+
+ Args:
+ quantiles: List of quantile levels to predict (values in [0,1]).
+ random_state: Seed for reproducible optimization.
+ """
+
+ def __init__(self, quantiles: List[float], random_state: Optional[int] = None):
+ super().__init__(random_state)
+ self.quantiles = quantiles
+
+ def _fit_model(
+ self,
+ model: Union[
+ QuantileEnsembleEstimator,
+ BaseMultiFitQuantileEstimator,
+ BaseSingleFitQuantileEstimator,
+ ],
+ X_train: np.array,
+ Y_train: np.array,
+ ) -> None:
+ """Fit quantile regression model to training data.
+
+ Args:
+ model: Quantile regression estimator supporting multi-quantile fitting.
+ X_train: Training feature matrix.
+ Y_train: Training target values.
+ """
+ model.fit(X_train, Y_train, quantiles=self.quantiles)
+
+ def _evaluate_model(
+ self,
+ model: Union[
+ QuantileEnsembleEstimator,
+ BaseMultiFitQuantileEstimator,
+ BaseSingleFitQuantileEstimator,
+ ],
+ X_val: np.array,
+ Y_val: np.array,
+ ) -> float:
+ """Evaluate quantile regression model using average pinball loss.
+
+ Computes pinball loss for each quantile level and returns the average
+ as the overall performance metric for hyperparameter optimization.
+
+ Args:
+ model: Fitted quantile regression estimator.
+ X_val: Validation feature matrix.
+ Y_val: Validation target values.
+
+ Returns:
+ Average pinball loss across all quantiles (lower is better).
+ """
+ prediction = model.predict(X_val)
+ scores_list = []
+ for i, quantile in enumerate(self.quantiles):
+ y_pred = prediction[:, i]
+ quantile_score = mean_pinball_loss(Y_val, y_pred, alpha=quantile)
+ scores_list.append(quantile_score)
+ return sum(scores_list) / len(scores_list)
diff --git a/confopt/selection/estimator_configuration.py b/confopt/selection/estimator_configuration.py
new file mode 100644
index 0000000..c0f6ebb
--- /dev/null
+++ b/confopt/selection/estimator_configuration.py
@@ -0,0 +1,443 @@
+from typing import Dict, Any, Type, Optional, List
+from pydantic import BaseModel
+
+from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+
+# Import estimator classes
+from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.neighbors import KNeighborsRegressor
+from confopt.selection.estimators.quantile_estimation import (
+ BaseSingleFitQuantileEstimator,
+ BaseMultiFitQuantileEstimator,
+ QuantileGBM,
+ QuantileForest,
+ QuantileKNN,
+ QuantileLasso,
+ QuantileGP,
+ QuantileLeaf, # Added QuantileLeaf to imports
+)
+from confopt.wrapping import ParameterRange
+from confopt.selection.estimators.ensembling import (
+ BaseEnsembleEstimator,
+ QuantileEnsembleEstimator,
+)
+
+
+class EstimatorConfig(BaseModel):
+ estimator_name: str
+ estimator_class: Type
+ default_params: Dict[str, Any]
+ estimator_parameter_space: Dict[str, ParameterRange]
+ ensemble_components: Optional[
+ List[Dict[str, Any]]
+ ] = None # New field for ensemble components
+
+ class Config:
+ arbitrary_types_allowed = True
+
+ def is_ensemble_estimator(self) -> bool:
+ return issubclass(self.estimator_class, BaseEnsembleEstimator)
+
+ def is_quantile_estimator(self) -> bool:
+ return issubclass(
+ self.estimator_class,
+ (
+ BaseSingleFitQuantileEstimator,
+ BaseMultiFitQuantileEstimator,
+ QuantileEnsembleEstimator,
+ ),
+ )
+
+
+# Reference names of search estimator architectures:
+QGBM_NAME: str = "qgbm"
+QRF_NAME: str = "qrf"
+KR_NAME: str = "kr"
+GBM_NAME: str = "gbm"
+KNN_NAME: str = "knn"
+RF_NAME: str = "rf"
+QKNN_NAME: str = "qknn"
+QL_NAME: str = "ql"
+QGP_NAME: str = "qgp" # Gaussian Process Quantile Estimator
+QLEAF_NAME: str = "qleaf" # New quantile estimator
+
+# New ensemble estimator names
+QENS1_NAME: str = "qens1" # Ensemble of QL + QKNN + QRF
+QENS2_NAME: str = "qens2" # Ensemble of QL + QKNN + QGBM
+QENS3_NAME: str = "qens3" # Ensemble of QRF + QL
+QENS4_NAME: str = "qens4" # Ensemble of QRF + QGP
+QENS5_NAME: str = "qens5" # Ensemble of QGP + QRF + QKNN
+
+QUANTILE_TO_POINT_ESTIMATOR_MAPPING = {
+ QRF_NAME: RF_NAME,
+ QKNN_NAME: KNN_NAME,
+ QLEAF_NAME: RF_NAME,
+ QGBM_NAME: GBM_NAME,
+}
+
+# Consolidated estimator configurations
+ESTIMATOR_REGISTRY = {
+ # Point estimators
+ RF_NAME: EstimatorConfig(
+ estimator_name=RF_NAME,
+ estimator_class=RandomForestRegressor,
+ default_params={
+ "n_estimators": 50,
+ "max_features": "sqrt",
+ "min_samples_split": 2,
+ "min_samples_leaf": 1,
+ "max_depth": 3,
+ "bootstrap": True,
+ "random_state": None, # added to allow seeding
+ },
+ estimator_parameter_space={
+ "n_estimators": IntRange(min_value=25, max_value=200),
+ "max_features": CategoricalRange(choices=[0.5, 0.7, "sqrt"]),
+ "min_samples_split": IntRange(min_value=2, max_value=6),
+ "min_samples_leaf": IntRange(min_value=1, max_value=4),
+ "max_depth": IntRange(min_value=2, max_value=6),
+ "bootstrap": CategoricalRange(choices=[True, False]),
+ },
+ ),
+ KNN_NAME: EstimatorConfig(
+ estimator_name=KNN_NAME,
+ estimator_class=KNeighborsRegressor,
+ default_params={
+ "n_neighbors": 10,
+ "weights": "distance",
+ },
+ estimator_parameter_space={
+ "n_neighbors": IntRange(min_value=5, max_value=20),
+ "weights": CategoricalRange(choices=["uniform", "distance"]),
+ "p": CategoricalRange(choices=[1, 2]),
+ },
+ ),
+ GBM_NAME: EstimatorConfig(
+ estimator_name=GBM_NAME,
+ estimator_class=GradientBoostingRegressor,
+ default_params={
+ "learning_rate": 0.05,
+ "n_estimators": 100,
+ "min_samples_split": 2,
+ "min_samples_leaf": 1,
+ "max_depth": 3,
+ "subsample": 0.8,
+ "random_state": None, # added
+ },
+ estimator_parameter_space={
+ "learning_rate": FloatRange(min_value=0.02, max_value=0.15),
+ "n_estimators": IntRange(min_value=10, max_value=200),
+ "min_samples_split": IntRange(min_value=4, max_value=10),
+ "min_samples_leaf": IntRange(min_value=3, max_value=7),
+ "max_depth": IntRange(min_value=2, max_value=4),
+ "subsample": FloatRange(min_value=0.7, max_value=0.9),
+ },
+ ),
+ KR_NAME: EstimatorConfig(
+ estimator_name=KR_NAME,
+ estimator_class=KernelRidge,
+ default_params={
+ "alpha": 5.0,
+ "kernel": "rbf",
+ },
+ estimator_parameter_space={
+ "alpha": FloatRange(min_value=1.0, max_value=20.0, log_scale=True),
+ "kernel": CategoricalRange(choices=["linear", "rbf", "poly"]),
+ },
+ ),
+ # Single-fit quantile estimators
+ QRF_NAME: EstimatorConfig(
+ estimator_name=QRF_NAME,
+ estimator_class=QuantileForest,
+ default_params={
+ "n_estimators": 50,
+ "max_depth": 4,
+ "max_features": 0.7,
+ "min_samples_split": 4,
+ "bootstrap": True,
+ "random_state": None, # added
+ },
+ estimator_parameter_space={
+ "n_estimators": IntRange(min_value=25, max_value=100),
+ "max_depth": IntRange(min_value=2, max_value=6),
+ "max_features": FloatRange(min_value=0.6, max_value=0.8),
+ "min_samples_split": IntRange(min_value=2, max_value=6),
+ "bootstrap": CategoricalRange(choices=[True, False]),
+ },
+ ),
+ QKNN_NAME: EstimatorConfig(
+ estimator_name=QKNN_NAME,
+ estimator_class=QuantileKNN,
+ default_params={
+ "n_neighbors": 6,
+ },
+ estimator_parameter_space={
+ "n_neighbors": IntRange(min_value=5, max_value=20),
+ },
+ ),
+ QLEAF_NAME: EstimatorConfig(
+ estimator_name=QLEAF_NAME,
+ estimator_class=QuantileLeaf,
+ default_params={
+ "n_estimators": 50,
+ "max_depth": 3,
+ "max_features": 0.8,
+ "min_samples_split": 2,
+ "bootstrap": True,
+ "random_state": None,
+ },
+ estimator_parameter_space={
+ "n_estimators": IntRange(min_value=25, max_value=200),
+ "max_depth": IntRange(min_value=2, max_value=6),
+ "max_features": FloatRange(min_value=0.7, max_value=1.0),
+ "min_samples_split": IntRange(min_value=1, max_value=8),
+ "bootstrap": CategoricalRange(choices=[True, False]),
+ },
+ ),
+ # Multi-fit quantile estimators
+ QGBM_NAME: EstimatorConfig(
+ estimator_name=QGBM_NAME,
+ estimator_class=QuantileGBM,
+ default_params={
+ "learning_rate": 0.1,
+ "n_estimators": 100,
+ "min_samples_split": 6,
+ "min_samples_leaf": 1,
+ "max_depth": 2,
+ "subsample": 0.7,
+ "max_features": 0.7,
+ "random_state": None, # added
+ },
+ estimator_parameter_space={
+ "learning_rate": FloatRange(min_value=0.05, max_value=0.2),
+ "n_estimators": IntRange(min_value=25, max_value=200),
+ "min_samples_split": IntRange(min_value=2, max_value=8),
+ "min_samples_leaf": IntRange(min_value=1, max_value=3),
+ "max_depth": IntRange(min_value=2, max_value=6),
+ "subsample": FloatRange(min_value=0.6, max_value=0.8),
+ "max_features": FloatRange(min_value=0.6, max_value=0.8),
+ },
+ ),
+ QL_NAME: EstimatorConfig(
+ estimator_name=QL_NAME,
+ estimator_class=QuantileLasso,
+ default_params={
+ "max_iter": 300,
+ "p_tol": 1e-4,
+ "random_state": None, # added
+ },
+ estimator_parameter_space={
+ "max_iter": IntRange(min_value=200, max_value=800),
+ "p_tol": FloatRange(min_value=1e-5, max_value=1e-3, log_scale=True),
+ },
+ ),
+ # Ensemble estimators
+ QENS1_NAME: EstimatorConfig(
+ estimator_name=QENS1_NAME,
+ estimator_class=QuantileEnsembleEstimator,
+ default_params={
+ "weighting_strategy": "linear_stack",
+ "cv": 5,
+ "alpha": 0.001,
+ },
+ estimator_parameter_space={
+ "weighting_strategy": CategoricalRange(choices=["uniform", "linear_stack"]),
+ "alpha": FloatRange(min_value=0.001, max_value=0.1, log_scale=True),
+ },
+ ensemble_components=[
+ {
+ "class": QuantileLasso,
+ "params": {
+ "max_iter": 300,
+ "p_tol": 1e-4,
+ },
+ },
+ {
+ "class": QuantileKNN,
+ "params": {
+ "n_neighbors": 6,
+ },
+ },
+ {
+ "class": QuantileGBM,
+ "params": {
+ "learning_rate": 0.1,
+ "n_estimators": 100,
+ "min_samples_split": 6,
+ "min_samples_leaf": 1,
+ "max_depth": 2,
+ "subsample": 0.7,
+ "max_features": 0.7,
+ "random_state": None,
+ },
+ },
+ ],
+ ),
+ QENS2_NAME: EstimatorConfig(
+ estimator_name=QENS2_NAME,
+ estimator_class=QuantileEnsembleEstimator,
+ default_params={
+ "weighting_strategy": "linear_stack",
+ "cv": 5,
+ "alpha": 0.001,
+ },
+ estimator_parameter_space={
+ "weighting_strategy": CategoricalRange(choices=["uniform", "linear_stack"]),
+ "alpha": FloatRange(min_value=0.001, max_value=0.1, log_scale=True),
+ },
+ ensemble_components=[
+ {
+ "class": QuantileGBM,
+ "params": {
+ "learning_rate": 0.1,
+ "n_estimators": 100,
+ "min_samples_split": 6,
+ "min_samples_leaf": 1,
+ "max_depth": 2,
+ "subsample": 0.7,
+ "max_features": 0.7,
+ "random_state": None,
+ },
+ },
+ {
+ "class": QuantileForest,
+ "params": {
+ "n_estimators": 50,
+ "max_depth": 4,
+ "max_features": 0.7,
+ "min_samples_split": 4,
+ "bootstrap": True,
+ "random_state": None,
+ },
+ },
+ ],
+ ),
+ QENS3_NAME: EstimatorConfig(
+ estimator_name=QENS3_NAME,
+ estimator_class=QuantileEnsembleEstimator,
+ default_params={
+ "weighting_strategy": "linear_stack",
+ "cv": 5,
+ "alpha": 0.001,
+ },
+ estimator_parameter_space={
+ "weighting_strategy": CategoricalRange(choices=["uniform", "linear_stack"]),
+ "alpha": FloatRange(min_value=0.001, max_value=0.1, log_scale=True),
+ },
+ ensemble_components=[
+ {
+ "class": QuantileGBM,
+ "params": {
+ "learning_rate": 0.1,
+ "n_estimators": 100,
+ "min_samples_split": 6,
+ "min_samples_leaf": 1,
+ "max_depth": 2,
+ "subsample": 0.7,
+ "max_features": 0.7,
+ "random_state": None,
+ },
+ },
+ {
+ "class": QuantileLasso,
+ "params": {
+ "max_iter": 300,
+ "p_tol": 1e-4,
+ },
+ },
+ ],
+ ),
+ QENS4_NAME: EstimatorConfig(
+ estimator_name=QENS4_NAME,
+ estimator_class=QuantileEnsembleEstimator,
+ default_params={
+ "weighting_strategy": "linear_stack",
+ "cv": 5,
+ "alpha": 0.001,
+ },
+ estimator_parameter_space={
+ "weighting_strategy": CategoricalRange(choices=["uniform", "linear_stack"]),
+ "alpha": FloatRange(min_value=0.001, max_value=0.1, log_scale=True),
+ },
+ ensemble_components=[
+ {
+ "class": QuantileGBM,
+ "params": {
+ "learning_rate": 0.1,
+ "n_estimators": 100,
+ "min_samples_split": 6,
+ "min_samples_leaf": 1,
+ "max_depth": 2,
+ "subsample": 0.7,
+ "max_features": 0.7,
+ "random_state": None,
+ },
+ },
+ {
+ "class": QuantileGP,
+ "params": {
+ "kernel": "matern",
+ "alpha": 1e-8,
+ },
+ },
+ ],
+ ),
+ QENS5_NAME: EstimatorConfig(
+ estimator_name=QENS5_NAME,
+ estimator_class=QuantileEnsembleEstimator,
+ default_params={
+ "weighting_strategy": "linear_stack",
+ "cv": 5,
+ "alpha": 0.001,
+ },
+ estimator_parameter_space={
+ "weighting_strategy": CategoricalRange(choices=["uniform", "linear_stack"]),
+ "alpha": FloatRange(min_value=0.001, max_value=0.1, log_scale=True),
+ },
+ ensemble_components=[
+ {
+ "class": QuantileLasso,
+ "params": {
+ "max_iter": 300,
+ "p_tol": 1e-4,
+ },
+ },
+ {
+ "class": QuantileGP,
+ "params": {
+ "kernel": "matern",
+ "alpha": 1e-8,
+ "random_state": None,
+ },
+ },
+ {
+ "class": QuantileGBM,
+ "params": {
+ "learning_rate": 0.1,
+ "n_estimators": 100,
+ "min_samples_split": 6,
+ "min_samples_leaf": 1,
+ "max_depth": 2,
+ "subsample": 0.7,
+ "max_features": 0.7,
+ "random_state": None,
+ },
+ },
+ ],
+ ),
+ # Add new quantile estimators
+ QGP_NAME: EstimatorConfig(
+ estimator_name=QGP_NAME,
+ estimator_class=QuantileGP,
+ default_params={
+ "kernel": "matern",
+ "alpha": 1e-8,
+ "random_state": None,
+ },
+ estimator_parameter_space={
+ "kernel": CategoricalRange(choices=["rbf", "matern", "rational_quadratic"]),
+ "alpha": FloatRange(min_value=1e-10, max_value=1e-6, log_scale=True),
+ },
+ ),
+}
diff --git a/confopt/selection/estimators/__init__.py b/confopt/selection/estimators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/confopt/selection/estimators/ensembling.py b/confopt/selection/estimators/ensembling.py
new file mode 100644
index 0000000..053d7d9
--- /dev/null
+++ b/confopt/selection/estimators/ensembling.py
@@ -0,0 +1,598 @@
+import logging
+from typing import List, Optional, Tuple, Literal, Union
+import numpy as np
+from copy import deepcopy
+from sklearn.base import BaseEstimator
+from sklearn.model_selection import KFold
+from confopt.selection.estimators.quantile_estimation import (
+ BaseMultiFitQuantileEstimator,
+ BaseSingleFitQuantileEstimator,
+)
+from abc import ABC, abstractmethod
+from sklearn.linear_model import Lasso
+from scipy.optimize import minimize
+
+logger = logging.getLogger(__name__)
+
+
+def quantile_loss(y_true: np.ndarray, y_pred: np.ndarray, quantile: float) -> float:
+ """Compute the quantile loss (pinball loss) for quantile regression evaluation."""
+ errors = y_true - y_pred
+ return np.mean(np.maximum(quantile * errors, (quantile - 1) * errors))
+
+
+class QuantileLassoMeta:
+ """Quantile Lasso meta-learner that optimizes pinball loss with L1 regularization.
+
+ Custom implementation for ensemble meta-learning that directly optimizes
+ quantile loss (pinball loss) instead of mean squared error. Uses scipy
+ optimization for more robust convergence.
+
+ Args:
+ alpha: L1 regularization strength. Higher values promote sparsity.
+ quantile: Quantile level in [0, 1] to optimize for.
+ max_iter: Maximum iterations for optimization.
+ tol: Convergence tolerance for parameter changes.
+ positive: If True, constrain weights to be non-negative.
+ """
+
+ def __init__(
+ self,
+ alpha: float = 0.0,
+ quantile: float = 0.5,
+ max_iter: int = 1000,
+ tol: float = 1e-6,
+ positive: bool = True,
+ ):
+ self.alpha = alpha
+ self.quantile = quantile
+ self.max_iter = max_iter
+ self.tol = tol
+ self.positive = positive
+ self.coef_ = None
+
+ def _quantile_loss_objective(
+ self, weights: np.ndarray, X: np.ndarray, y: np.ndarray
+ ) -> float:
+ """Compute quantile loss + L1 penalty."""
+ y_pred = X @ weights
+ errors = y - y_pred
+ quantile_loss = np.mean(
+ np.maximum(self.quantile * errors, (self.quantile - 1) * errors)
+ )
+ l1_penalty = self.alpha * np.sum(np.abs(weights))
+ return quantile_loss + l1_penalty
+
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "QuantileLassoMeta":
+ """Fit quantile lasso using scipy optimization.
+
+ Args:
+ X: Feature matrix with shape (n_samples, n_features).
+ y: Target values with shape (n_samples,).
+
+ Returns:
+ Self for method chaining.
+ """
+ n_features = X.shape[1]
+
+ # Initialize with uniform weights
+ initial_weights = np.ones(n_features) / n_features
+
+ # Set up constraints
+ bounds = [
+ (0, None) if self.positive else (None, None) for _ in range(n_features)
+ ]
+
+ # Equality constraint: weights sum to 1
+ constraints = [{"type": "eq", "fun": lambda w: np.sum(w) - 1.0}]
+
+ # Optimize
+ result = minimize(
+ fun=self._quantile_loss_objective,
+ x0=initial_weights,
+ args=(X, y),
+ bounds=bounds,
+ constraints=constraints,
+ method="SLSQP",
+ options={"maxiter": self.max_iter, "ftol": self.tol},
+ )
+
+ if result.success:
+ self.coef_ = result.x
+ else:
+ logger.warning("Quantile Lasso optimization failed, using uniform weights")
+ self.coef_ = np.ones(n_features) / n_features
+
+ # Ensure weights are normalized and non-negative if required
+ if self.positive:
+ self.coef_ = np.maximum(self.coef_, 0)
+
+ if np.sum(self.coef_) > 0:
+ self.coef_ = self.coef_ / np.sum(self.coef_)
+ else:
+ self.coef_ = np.ones(n_features) / n_features
+
+ return self
+
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate predictions using fitted coefficients.
+
+ Args:
+ X: Feature matrix with shape (n_samples, n_features).
+
+ Returns:
+ Predictions with shape (n_samples,).
+ """
+ if self.coef_ is None:
+ raise ValueError("Must call fit before predict")
+ return X @ self.coef_
+
+
+class BaseEnsembleEstimator(ABC):
+ """Abstract base class for ensemble estimators."""
+
+ @abstractmethod
+ def fit(self, X: np.ndarray, y: np.ndarray, *args, **kwargs):
+ """Fit the ensemble to training data."""
+
+ @abstractmethod
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate predictions from the fitted ensemble."""
+
+
+class QuantileEnsembleEstimator(BaseEnsembleEstimator):
+ """Ensemble estimator for quantile regression combining multiple quantile predictors.
+
+ Implements ensemble methods that combine predictions from multiple quantile estimators
+ to improve uncertainty quantification and prediction accuracy. Uses separate weights
+ for each quantile level, allowing different estimators to specialize in different
+ quantile regions. Supports both uniform weighting and linear stacking strategies
+ with cross-validation for optimal weight computation.
+
+ Weighting Strategies:
+ - Uniform: Equal weights for all base estimators, providing simple averaging
+ that reduces variance through ensemble diversity without optimization overhead.
+ - Linear Stack: Quantile Lasso-based weight optimization using cross-validation to
+ minimize quantile loss (pinball loss). Automatically selects the best-performing
+ estimators and handles multicollinearity through L1 regularization, with separate
+ quantile-specific optimization for each quantile level.
+
+ Args:
+ estimators: List of quantile estimators to combine. Must be instances of
+ BaseMultiFitQuantileEstimator or BaseSingleFitQuantileEstimator. Requires
+ at least 2 estimators for meaningful ensemble benefits.
+ cv: Number of cross-validation folds for weight computation in linear stacking.
+ Higher values provide more robust weight estimates but increase computation.
+ Typical range: 3-10 folds.
+ weighting_strategy: Strategy for combining base estimator predictions.
+ "uniform" uses equal weights, "linear_stack" optimizes weights via quantile Lasso.
+ random_state: Seed for reproducible cross-validation splits and quantile Lasso fitting.
+ Ensures deterministic ensemble behavior across runs.
+ alpha: L1 regularization strength for quantile Lasso weight optimization. Higher values
+ increase sparsity in ensemble weights. Range: [0.0, 1.0] with 0.0 being
+ unregularized and higher values promoting sparser solutions.
+
+ Attributes:
+ quantiles: List of quantile levels fitted during training.
+ quantile_weights: Learned weights for combining base estimator predictions.
+ Shape (n_quantiles, n_estimators) with separate weights per quantile level.
+ stacker: Fitted quantile Lasso models used for linear stacking weight computation.
+
+ Raises:
+ ValueError: If fewer than 2 estimators provided or invalid parameter values.
+
+ Examples:
+ Basic uniform ensemble:
+ >>> estimators = [QuantileGBM(), QuantileForest(), QuantileKNN()]
+ >>> ensemble = QuantileEnsembleEstimator(estimators)
+ >>> ensemble.fit(X_train, y_train, quantiles=[0.1, 0.5, 0.9])
+ >>> predictions = ensemble.predict(X_test)
+
+ Linear stacking with regularization:
+ >>> ensemble = QuantileEnsembleEstimator(
+ ... estimators, weighting_strategy="linear_stack", alpha=0.01
+ ... )
+ >>> ensemble.fit(X_train, y_train, quantiles=np.linspace(0.05, 0.95, 19))
+ """
+
+ def __init__(
+ self,
+ estimators: List[
+ Union[BaseMultiFitQuantileEstimator, BaseSingleFitQuantileEstimator]
+ ],
+ cv: int = 5,
+ weighting_strategy: Literal["uniform", "linear_stack"] = "uniform",
+ random_state: Optional[int] = None,
+ alpha: float = 0.0,
+ ):
+ if len(estimators) < 2:
+ raise ValueError("At least 2 estimators required for ensemble")
+
+ self.estimators = estimators
+ self.cv = cv
+ self.weighting_strategy = weighting_strategy
+ self.random_state = random_state
+ self.alpha = alpha
+
+ self.quantiles = None
+ self.quantile_weights = None
+ self.stacker = None
+
+ def _get_stacking_training_data(
+ self, X: np.ndarray, y: np.ndarray, quantiles: List[float]
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+ """Generate cross-validation training data for linear stacking weight optimization.
+
+ Creates validation predictions using k-fold cross-validation to avoid overfitting
+ in weight computation. Each base estimator is trained on k-1 folds and predicts
+ on the held-out fold, generating unbiased predictions for Lasso weight fitting.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantiles: List of quantile levels to fit models for.
+
+ Returns:
+ Tuple containing:
+ - val_indices: Validation sample indices with shape (n_validation_samples,).
+ - val_targets: Validation targets with shape (n_validation_samples,).
+ - val_predictions: Validation predictions with shape
+ (n_validation_samples, n_estimators * n_quantiles).
+ """
+ cv_strategy = KFold(
+ n_splits=self.cv, shuffle=True, random_state=self.random_state
+ )
+
+ val_indices = []
+ val_targets = []
+ val_predictions = []
+
+ for train_idx, val_idx in cv_strategy.split(X):
+ X_train_fold, X_val_fold = X[train_idx], X[val_idx]
+ y_train_fold, y_val_fold = y[train_idx], y[val_idx]
+
+ fold_predictions = []
+
+ for estimator in self.estimators:
+ estimator_copy = deepcopy(estimator)
+ estimator_copy.fit(X_train_fold, y_train_fold, quantiles)
+ pred = estimator_copy.predict(X_val_fold)
+ fold_predictions.append(pred)
+
+ fold_predictions_reshaped = []
+ for pred in fold_predictions:
+ fold_predictions_reshaped.append(pred)
+ fold_predictions = np.concatenate(fold_predictions_reshaped, axis=1)
+
+ val_indices.extend(val_idx)
+ val_targets.extend(y_val_fold)
+ val_predictions.append(fold_predictions)
+
+ val_indices = np.array(val_indices)
+ val_targets = np.array(val_targets)
+ val_predictions = np.vstack(val_predictions)
+
+ return val_indices, val_targets, val_predictions
+
+ def _compute_linear_stack_weights(
+ self, X: np.ndarray, y: np.ndarray, quantiles: List[float]
+ ) -> np.ndarray:
+ """Compute optimal ensemble weights using quantile Lasso regression on validation predictions.
+
+ Implements linear stacking by fitting separate quantile Lasso regression models for each
+ quantile level to minimize quantile loss (pinball loss) on cross-validation predictions.
+ L1 regularization promotes sparse solutions, automatically selecting the most
+ relevant base estimators while handling multicollinearity. Uses custom quantile Lasso
+ that optimizes pinball loss instead of mean squared error.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantiles: List of quantile levels for weight optimization.
+
+ Returns:
+ Optimal ensemble weights with shape (n_quantiles, n_estimators).
+ """
+ val_indices, val_targets, val_predictions = self._get_stacking_training_data(
+ X, y, quantiles
+ )
+
+ sorted_indices = np.argsort(val_indices)
+ val_predictions_sorted = val_predictions[sorted_indices]
+ val_targets_sorted = val_targets[sorted_indices]
+
+ n_estimators = len(self.estimators)
+ n_quantiles = len(quantiles)
+
+ weights_per_quantile = []
+
+ for q_idx in range(n_quantiles):
+ quantile_predictions = []
+ for est_idx in range(n_estimators):
+ col_idx = est_idx * n_quantiles + q_idx
+ quantile_predictions.append(val_predictions_sorted[:, col_idx])
+
+ quantile_pred_matrix = np.column_stack(quantile_predictions)
+
+ quantile_stacker = QuantileLassoMeta(
+ alpha=self.alpha, quantile=quantiles[q_idx], positive=True
+ )
+ quantile_stacker.fit(quantile_pred_matrix, val_targets_sorted)
+ quantile_weights = quantile_stacker.coef_
+
+ if np.sum(quantile_weights) == 0:
+ logger.warning(
+ f"All QuantileLasso weights are zero for quantile {q_idx}, falling back to uniform weighting"
+ )
+ quantile_weights = np.ones(len(self.estimators))
+
+ quantile_weights = quantile_weights / np.sum(quantile_weights)
+ weights_per_quantile.append(quantile_weights)
+
+ return np.array(weights_per_quantile)
+
+ def _compute_quantile_weights(
+ self, X: np.ndarray, y: np.ndarray, quantiles: List[float]
+ ) -> np.ndarray:
+ """Compute ensemble weights based on the specified weighting strategy.
+
+ Dispatches to the appropriate weight computation method based on the weighting_strategy
+ parameter. Supports uniform weighting for simple averaging and linear stacking for
+ optimized weight computation via Lasso regression.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantiles: List of quantile levels for weight computation.
+
+ Returns:
+ Ensemble weights with shape (n_quantiles, n_estimators).
+
+ Raises:
+ ValueError: If unknown weighting strategy specified.
+ """
+ if self.weighting_strategy == "uniform":
+ n_estimators = len(self.estimators)
+ n_quantiles = len(quantiles)
+ return np.ones((n_quantiles, n_estimators)) / n_estimators
+ elif self.weighting_strategy == "linear_stack":
+ return self._compute_linear_stack_weights(X, y, quantiles)
+ else:
+ raise ValueError(f"Unknown weighting strategy: {self.weighting_strategy}")
+
+ def fit(
+ self, X: np.ndarray, y: np.ndarray, quantiles: List[float]
+ ) -> "QuantileEnsembleEstimator":
+ """Fit the quantile ensemble to training data.
+
+ Trains all base estimators on the provided data and computes separate ensemble
+ weights for each quantile level according to the specified weighting strategy.
+ For linear stacking, performs cross-validation to generate unbiased validation
+ predictions for weight optimization.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantiles: List of quantile levels in [0, 1] to fit models for.
+
+ Returns:
+ Self for method chaining.
+ """
+ self.quantiles = quantiles
+
+ for estimator in self.estimators:
+ estimator.fit(X, y, quantiles)
+
+ self.quantile_weights = self._compute_quantile_weights(X, y, quantiles)
+
+ return self
+
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate ensemble quantile predictions by combining base estimator outputs.
+
+ Combines predictions from all fitted base estimators using quantile-specific
+ weights learned during training. Each quantile level uses its own set of weights
+ for more flexible combination that allows estimators to specialize in different
+ quantile regions.
+
+ Args:
+ X: Features for prediction with shape (n_samples, n_features).
+
+ Returns:
+ Ensemble quantile predictions with shape (n_samples, n_quantiles).
+ Each column corresponds to one quantile level in the same order as
+ specified during fitting.
+
+ Raises:
+ ValueError: If called before fitting the ensemble.
+ """
+ if self.quantiles is None:
+ raise ValueError("Must call fit before predict")
+
+ predictions = []
+ for estimator in self.estimators:
+ pred = estimator.predict(X)
+ predictions.append(pred)
+
+ predictions = np.array(
+ predictions
+ ) # Shape: (n_estimators, n_samples, n_quantiles)
+ n_samples = predictions.shape[1]
+ n_quantiles = len(self.quantiles)
+
+ ensemble_predictions = np.zeros((n_samples, n_quantiles))
+ for q_idx in range(n_quantiles):
+ quantile_weights = self.quantile_weights[q_idx] # Shape: (n_estimators,)
+ quantile_preds = predictions[
+ :, :, q_idx
+ ] # Shape: (n_estimators, n_samples)
+ ensemble_predictions[:, q_idx] = np.dot(quantile_weights, quantile_preds)
+
+ return ensemble_predictions
+
+
+class PointEnsembleEstimator(BaseEnsembleEstimator):
+ """Ensemble estimator for point prediction combining multiple regression models.
+
+ Implements ensemble methods that combine predictions from multiple regression estimators
+ to improve prediction accuracy through variance reduction. Supports uniform weighting
+ for simple averaging and linear stacking with cross-validation for optimal weight
+ computation.
+
+ Weighting Strategies:
+ - Uniform: Equal weights for all base estimators, providing simple averaging
+ that reduces variance through model diversity without optimization overhead.
+ - Linear Stack: Lasso-based weight optimization using cross-validation to
+ minimize mean squared error. Automatically selects best-performing estimators
+ and handles multicollinearity through L1 regularization.
+
+ Args:
+ estimators: List of regression estimators to combine. Must be scikit-learn
+ compatible estimators with fit/predict methods. Requires at least 2
+ estimators for meaningful ensemble benefits.
+ cv: Number of cross-validation folds for weight computation in linear stacking.
+ Higher values provide more robust weight estimates but increase computation.
+ Typical range: 3-10 folds.
+ weighting_strategy: Strategy for combining base estimator predictions.
+ "uniform" uses equal weights, "linear_stack" optimizes weights via Lasso.
+ random_state: Seed for reproducible cross-validation splits and Lasso fitting.
+ Ensures deterministic ensemble behavior across runs.
+ alpha: L1 regularization strength for Lasso weight optimization. Higher values
+ increase sparsity in ensemble weights, promoting simpler combinations.
+ Range: [0.0, 1.0] with 0.0 being unregularized.
+
+ Attributes:
+ weights: Learned weights for combining base estimator predictions with
+ shape (n_estimators,). Weights sum to 1.0 for proper averaging.
+ stacker: Fitted Lasso model used for linear stacking weight computation.
+
+ Raises:
+ ValueError: If fewer than 2 estimators provided or invalid parameter values.
+
+ Examples:
+ Basic uniform ensemble:
+ >>> estimators = [RandomForestRegressor(), GradientBoostingRegressor(), SVR()]
+ >>> ensemble = PointEnsembleEstimator(estimators)
+ >>> ensemble.fit(X_train, y_train)
+ >>> predictions = ensemble.predict(X_test)
+
+ Linear stacking with regularization:
+ >>> ensemble = PointEnsembleEstimator(
+ ... estimators, weighting_strategy="linear_stack", alpha=0.01
+ ... )
+ >>> ensemble.fit(X_train, y_train)
+ >>> predictions = ensemble.predict(X_test)
+ """
+
+ def __init__(
+ self,
+ estimators: List[BaseEstimator],
+ cv: int = 5,
+ weighting_strategy: Literal["uniform", "linear_stack"] = "uniform",
+ random_state: Optional[int] = None,
+ alpha: float = 0.0,
+ ):
+ if len(estimators) < 2:
+ raise ValueError("At least 2 estimators required for ensemble")
+
+ self.estimators = estimators
+ self.cv = cv
+ self.weighting_strategy = weighting_strategy
+ self.random_state = random_state
+ self.alpha = alpha
+
+ self.weights = None
+ self.stacker = None
+
+ def _get_stacking_training_data(
+ self, X: np.ndarray, y: np.ndarray
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+ """Generate cross-validation training data for linear stacking weight optimization."""
+ cv_strategy = KFold(
+ n_splits=self.cv, shuffle=True, random_state=self.random_state
+ )
+
+ val_indices = []
+ val_targets = []
+ val_predictions = []
+
+ for train_idx, val_idx in cv_strategy.split(X):
+ X_train_fold, X_val_fold = X[train_idx], X[val_idx]
+ y_train_fold, y_val_fold = y[train_idx], y[val_idx]
+
+ fold_predictions = []
+
+ for estimator in self.estimators:
+ estimator_copy = deepcopy(estimator)
+ estimator_copy.fit(X_train_fold, y_train_fold)
+ pred = estimator_copy.predict(X_val_fold)
+ fold_predictions.append(pred)
+
+ fold_predictions = np.column_stack(fold_predictions)
+
+ val_indices.extend(val_idx)
+ val_targets.extend(y_val_fold)
+ val_predictions.append(fold_predictions)
+
+ val_indices = np.array(val_indices)
+ val_targets = np.array(val_targets)
+ val_predictions = np.vstack(val_predictions)
+
+ return val_indices, val_targets, val_predictions
+
+ def _compute_linear_stack_weights(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
+ """Compute optimal ensemble weights using Lasso regression on validation predictions."""
+ val_indices, val_targets, val_predictions = self._get_stacking_training_data(
+ X, y
+ )
+
+ sorted_indices = np.argsort(val_indices)
+ val_predictions_sorted = val_predictions[sorted_indices]
+ val_targets_sorted = val_targets[sorted_indices]
+
+ self.stacker = Lasso(alpha=self.alpha, fit_intercept=False, positive=True)
+ self.stacker.fit(val_predictions_sorted, val_targets_sorted)
+ weights = self.stacker.coef_
+
+ if np.sum(weights) == 0:
+ logger.warning(
+ "All Lasso weights are zero, falling back to uniform weighting"
+ )
+ weights = np.ones(len(self.estimators))
+
+ return weights / np.sum(weights)
+
+ def _compute_point_weights(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
+ """Compute ensemble weights based on the specified weighting strategy."""
+ if self.weighting_strategy == "uniform":
+ n_estimators = len(self.estimators)
+ return np.ones(n_estimators) / n_estimators
+ elif self.weighting_strategy == "linear_stack":
+ return self._compute_linear_stack_weights(X, y)
+ else:
+ raise ValueError(f"Unknown weighting strategy: {self.weighting_strategy}")
+
+ def fit(self, X: np.ndarray, y: np.ndarray) -> "PointEnsembleEstimator":
+ """Fit the point ensemble to training data."""
+ for estimator in self.estimators:
+ estimator.fit(X, y)
+
+ self.weights = self._compute_point_weights(X, y)
+
+ return self
+
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate ensemble point predictions by combining base estimator outputs."""
+ if self.weights is None:
+ raise ValueError("Must call fit before predict")
+
+ predictions = []
+ for estimator in self.estimators:
+ pred = estimator.predict(X)
+ predictions.append(pred)
+
+ predictions = np.array(predictions)
+
+ ensemble_predictions = np.dot(self.weights, predictions)
+
+ return ensemble_predictions
diff --git a/confopt/selection/estimators/quantile_estimation.py b/confopt/selection/estimators/quantile_estimation.py
new file mode 100644
index 0000000..3b09c04
--- /dev/null
+++ b/confopt/selection/estimators/quantile_estimation.py
@@ -0,0 +1,1113 @@
+"""Quantile regression estimators for distributional prediction.
+
+This module provides quantile regression implementations using different algorithmic
+approaches: multi-fit estimators that train separate models per quantile, and single-fit
+estimators that model the full conditional distribution. Includes gradient boosting,
+random forest, neural network, and Gaussian process variants optimized for uncertainty
+quantification in conformal prediction frameworks.
+"""
+
+from typing import List, Union, Optional
+import numpy as np
+from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
+from sklearn.neighbors import NearestNeighbors
+from statsmodels.regression.quantile_regression import QuantReg
+from sklearn.base import clone
+from abc import ABC, abstractmethod
+from scipy.stats import norm
+from scipy.linalg import solve_triangular, cholesky, LinAlgError
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import (
+ RBF,
+ Matern,
+ RationalQuadratic,
+ ExpSineSquared,
+ ConstantKernel as C,
+ Kernel,
+)
+import warnings
+import copy
+import logging
+
+
+class BaseMultiFitQuantileEstimator(ABC):
+ """Abstract base for quantile estimators that train separate models per quantile.
+
+ Multi-fit estimators train individual models for each requested quantile level,
+ allowing algorithms like gradient boosting to directly optimize quantile-specific
+ loss functions. This approach provides flexibility at the cost of increased
+ computational overhead proportional to the number of quantiles.
+
+ The base class handles the iteration over quantiles and result aggregation,
+ while subclasses implement the quantile-specific model fitting logic.
+ """
+
+ def fit(self, X: np.array, y: np.array, quantiles: List[float]):
+ """Fit separate models for each quantile level.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantiles: List of quantile levels in [0, 1] to fit models for.
+
+ Returns:
+ Self for method chaining.
+ """
+ self.trained_estimators = []
+ for quantile in quantiles:
+ quantile_estimator = self._fit_quantile_estimator(X, y, quantile)
+ self.trained_estimators.append(quantile_estimator)
+ return self
+
+ @abstractmethod
+ def _fit_quantile_estimator(self, X: np.array, y: np.array, quantile: float):
+ """Fit a single model for the specified quantile level.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantile: Quantile level in [0, 1] to fit model for.
+
+ Returns:
+ Fitted estimator for the quantile level.
+ """
+
+ def predict(self, X: np.array) -> np.array:
+ """Generate predictions for all fitted quantile levels.
+
+ Args:
+ X: Features for prediction with shape (n_samples, n_features).
+
+ Returns:
+ Quantile predictions with shape (n_samples, n_quantiles).
+
+ Raises:
+ RuntimeError: If called before fitting any models.
+ """
+ if not self.trained_estimators:
+ raise RuntimeError("Model must be fitted before prediction")
+
+ y_pred = np.column_stack(
+ [estimator.predict(X) for estimator in self.trained_estimators]
+ )
+ return y_pred
+
+
+class BaseSingleFitQuantileEstimator(ABC):
+ """Abstract base for quantile estimators that model the full conditional distribution.
+
+ Single-fit estimators train one model that captures the complete conditional
+ distribution of the target variable. Quantiles are then extracted from this
+ distribution, either through sampling or analytical computation. This approach
+ is computationally efficient and ensures monotonic quantile ordering.
+
+ Subclasses must implement distribution modeling and quantile extraction logic.
+ """
+
+ def fit(self, X: np.ndarray, y: np.ndarray, quantiles: List[float]):
+ """Fit a single model to capture the conditional distribution.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantiles: List of quantile levels in [0, 1] to extract later.
+
+ Returns:
+ Self for method chaining.
+ """
+ self.quantiles = quantiles
+ self._fit_implementation(X, y)
+ return self
+
+ @abstractmethod
+ def _fit_implementation(self, X: np.ndarray, y: np.ndarray):
+ """Implement the model fitting logic for the conditional distribution.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ """
+
+ @abstractmethod
+ def _get_candidate_local_distribution(self, X: np.ndarray) -> np.ndarray:
+ """Extract candidate distribution samples for quantile computation.
+
+ Args:
+ X: Features with shape (n_samples, n_features).
+
+ Returns:
+ Distribution samples with shape (n_samples, n_candidates).
+ """
+
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate quantile predictions from the fitted conditional distribution.
+
+ Args:
+ X: Features for prediction with shape (n_samples, n_features).
+
+ Returns:
+ Quantile predictions with shape (n_samples, n_quantiles).
+ """
+ candidate_distribution = self._get_candidate_local_distribution(X)
+ quantile_preds = np.quantile(candidate_distribution, self.quantiles, axis=1).T
+ return quantile_preds
+
+
+class QuantRegWrapper:
+ """Wrapper for statsmodels quantile regression results to provide scikit-learn interface.
+
+ Adapts statsmodels QuantReg fitted results to provide a predict method compatible
+ with the estimator framework. Handles intercept management for proper matrix
+ multiplication during prediction.
+
+ Args:
+ results: Fitted QuantReg results object from statsmodels.
+ has_intercept: Whether an intercept term was added to the design matrix.
+ """
+
+ def __init__(self, results, has_intercept):
+ self.results = results
+ self.has_intercept = has_intercept
+
+ def predict(self, X):
+ """Generate predictions using the fitted quantile regression coefficients.
+
+ Args:
+ X: Features for prediction with shape (n_samples, n_features).
+
+ Returns:
+ Predictions with shape (n_samples,).
+ """
+ if self.has_intercept:
+ X_with_intercept = np.column_stack([np.ones(len(X)), X])
+ else:
+ X_with_intercept = X
+
+ return X_with_intercept @ self.results.params
+
+
+class QuantileLasso(BaseMultiFitQuantileEstimator):
+ """Linear quantile regression using L1 regularization (Lasso).
+
+ Implements quantile regression with L1 penalty using statsmodels backend.
+ Fits separate linear models for each quantile level using the pinball loss
+ function. Automatically handles intercept terms and provides reproducible
+ results through random state control.
+
+ Args:
+ max_iter: Maximum iterations for optimization convergence.
+ p_tol: Convergence tolerance for parameter changes.
+ random_state: Seed for reproducible optimization.
+ """
+
+ def __init__(
+ self,
+ max_iter: int = 1000,
+ p_tol: float = 1e-6,
+ random_state: Optional[int] = None,
+ ):
+ super().__init__()
+ self.max_iter = max_iter
+ self.p_tol = p_tol
+ self.random_state = random_state
+
+ def _fit_quantile_estimator(self, X: np.array, y: np.array, quantile: float):
+ """Fit linear quantile regression for a specific quantile level.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantile: Quantile level in [0, 1] to fit model for.
+
+ Returns:
+ QuantRegWrapper containing fitted model for the quantile.
+ """
+ has_added_intercept = not np.any(np.all(X == 1, axis=0))
+ if has_added_intercept:
+ X_with_intercept = np.column_stack([np.ones(len(X)), X])
+ else:
+ X_with_intercept = X
+
+ # Add small regularization to prevent numerical issues
+ n_features = X_with_intercept.shape[1]
+ regularization = 1e-8 * np.eye(n_features)
+ X_with_intercept.T @ X_with_intercept + regularization
+
+ if self.random_state is not None:
+ np.random.seed(self.random_state)
+
+ try:
+ model = QuantReg(y, X_with_intercept)
+ result = model.fit(q=quantile, max_iter=self.max_iter, p_tol=self.p_tol)
+ return QuantRegWrapper(result, has_added_intercept)
+ except np.linalg.LinAlgError:
+ # Fallback to robust coordinate descent quantile regression
+ warnings.warn(
+ f"SVD convergence failed for quantile {quantile}. "
+ "Using coordinate descent fallback solution."
+ )
+
+ # Use coordinate descent for robust quantile regression
+ params = self._coordinate_descent_quantile_regression(
+ X_with_intercept, y, quantile
+ )
+
+ # Create a mock result object compatible with QuantRegWrapper
+ class MockQuantRegResult:
+ def __init__(self, params):
+ self.params = params
+
+ mock_result = MockQuantRegResult(params)
+ return QuantRegWrapper(mock_result, has_added_intercept)
+
+ def _coordinate_descent_quantile_regression(
+ self, X: np.ndarray, y: np.ndarray, quantile: float
+ ) -> np.ndarray:
+ """Coordinate descent algorithm for quantile regression with regularization.
+
+ Implements a robust coordinate descent solver for quantile regression that
+ handles numerical instability better than general-purpose optimizers.
+ Uses adaptive step sizes and convergence checking for stability.
+
+ Args:
+ X: Design matrix with shape (n_samples, n_features).
+ y: Target values with shape (n_samples,).
+ quantile: Quantile level in [0, 1].
+
+ Returns:
+ Coefficient vector with shape (n_features,).
+ """
+ n_samples, n_features = X.shape
+
+ # Initialize coefficients with robust least squares estimate
+ try:
+ # Try regularized least squares initialization
+ XtX = X.T @ X + 1e-6 * np.eye(n_features)
+ Xty = X.T @ y
+ beta = np.linalg.solve(XtX, Xty)
+ except np.linalg.LinAlgError:
+ # Fallback to zero initialization if solve fails
+ beta = np.zeros(n_features)
+
+ # Coordinate descent parameters
+ max_iter = self.max_iter
+ tolerance = self.p_tol
+ lambda_reg = 1e-6 # Small L2 regularization for stability
+
+ # Pre-compute frequently used values
+ X_norms_sq = np.sum(X**2, axis=0) + lambda_reg
+
+ for iteration in range(max_iter):
+ beta_old = beta.copy()
+
+ # Update each coefficient in turn
+ for j in range(n_features):
+ # Compute residual without j-th feature
+ residual = y - X @ beta + X[:, j] * beta[j]
+
+ # Compute coordinate-wise gradient components
+ r_pos = residual >= 0
+ r_neg = ~r_pos
+
+ # Subgradient of quantile loss w.r.t. beta[j]
+ grad_pos = -quantile * np.sum(X[r_pos, j])
+ grad_neg = -(quantile - 1) * np.sum(X[r_neg, j])
+ gradient = grad_pos + grad_neg
+
+ # Add L2 regularization gradient
+ gradient += lambda_reg * beta[j]
+
+ # Update using coordinate descent step
+ # For quantile regression, we use a simple gradient step with adaptive step size
+ step_size = 1.0 / X_norms_sq[j]
+ beta[j] -= step_size * gradient
+
+ # Apply soft thresholding for implicit L1 regularization
+ # This helps with numerical stability
+ thresh = 1e-8
+ if abs(beta[j]) < thresh:
+ beta[j] = 0.0
+
+ # Check convergence
+ param_change = np.linalg.norm(beta - beta_old)
+ if param_change < tolerance:
+ break
+
+ return beta
+
+
+class QuantileGBM(BaseMultiFitQuantileEstimator):
+ """Gradient boosting quantile regression using scikit-learn backend.
+
+ Implements quantile regression using gradient boosting with the quantile loss
+ function. Each quantile level trains a separate GBM model with the alpha
+ parameter set to the target quantile. Provides robust non-linear quantile
+ estimation with automatic feature selection and interaction detection.
+
+ Args:
+ learning_rate: Step size for gradient descent updates.
+ n_estimators: Number of boosting stages (trees) to fit.
+ min_samples_split: Minimum samples required to split internal nodes.
+ min_samples_leaf: Minimum samples required at leaf nodes.
+ max_depth: Maximum depth of individual trees.
+ subsample: Fraction of samples used for fitting individual trees.
+ max_features: Number of features considered for best split.
+ random_state: Seed for reproducible tree construction.
+ """
+
+ def __init__(
+ self,
+ learning_rate: float,
+ n_estimators: int,
+ min_samples_split: Union[float, int],
+ min_samples_leaf: Union[float, int],
+ max_depth: int,
+ subsample: float = 1.0,
+ max_features: Union[str, float, int] = None,
+ random_state: int = None,
+ ):
+ super().__init__()
+ self.base_estimator = GradientBoostingRegressor(
+ learning_rate=learning_rate,
+ n_estimators=n_estimators,
+ min_samples_split=min_samples_split,
+ min_samples_leaf=min_samples_leaf,
+ max_depth=max_depth,
+ subsample=subsample,
+ max_features=max_features,
+ random_state=random_state,
+ loss="quantile",
+ )
+
+ def _fit_quantile_estimator(self, X: np.array, y: np.array, quantile: float):
+ """Fit gradient boosting model for a specific quantile level.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+ quantile: Quantile level in [0, 1] to fit model for.
+
+ Returns:
+ Fitted GradientBoostingRegressor for the quantile.
+ """
+ estimator = clone(self.base_estimator)
+ estimator.set_params(alpha=quantile)
+ estimator.fit(X, y)
+ return estimator
+
+
+class QuantileForest(BaseSingleFitQuantileEstimator):
+ """Random forest quantile regression using tree ensemble distributions.
+
+ Implements quantile regression by fitting a single random forest and using
+ the distribution of tree predictions to estimate quantiles. This approach
+ captures epistemic uncertainty through ensemble diversity and provides
+ naturally monotonic quantiles from the empirical tree distribution.
+
+ Args:
+ n_estimators: Number of trees in the forest.
+ max_depth: Maximum depth of individual trees.
+ max_features: Fraction of features considered for best split.
+ min_samples_split: Minimum samples required to split internal nodes.
+ bootstrap: Whether to use bootstrap sampling for tree training.
+ random_state: Seed for reproducible tree construction.
+ """
+
+ def __init__(
+ self,
+ n_estimators: int = 25,
+ max_depth: int = 5,
+ max_features: float = 0.8,
+ min_samples_leaf: int = 1,
+ min_samples_split: int = 2,
+ bootstrap: bool = True,
+ random_state: Optional[int] = None,
+ ):
+ super().__init__()
+ self.base_estimator = RandomForestRegressor(
+ n_estimators=n_estimators,
+ max_depth=max_depth,
+ max_features=max_features,
+ min_samples_leaf=min_samples_leaf,
+ min_samples_split=min_samples_split,
+ bootstrap=bootstrap,
+ random_state=random_state,
+ )
+
+ def _fit_implementation(self, X: np.ndarray, y: np.ndarray):
+ """Fit the random forest on the training data.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+
+ Returns:
+ Self for method chaining.
+ """
+ self.fitted_model = self.base_estimator
+ self.fitted_model.fit(X, y)
+ return self
+
+ def _get_candidate_local_distribution(self, X: np.ndarray) -> np.ndarray:
+ """Extract tree prediction distributions for quantile computation.
+
+ Args:
+ X: Features with shape (n_samples, n_features).
+
+ Returns:
+ Tree predictions with shape (n_samples, n_estimators).
+ """
+ sub_preds = np.column_stack(
+ [estimator.predict(X) for estimator in self.fitted_model.estimators_]
+ )
+ return sub_preds
+
+
+class QuantileKNN(BaseSingleFitQuantileEstimator):
+ """K-nearest neighbors quantile regression using local empirical distributions.
+
+ Implements quantile regression by finding k nearest neighbors for each
+ prediction point and using their target value distribution to estimate
+ quantiles. This non-parametric approach adapts locally to data density
+ and provides natural uncertainty quantification in sparse regions.
+
+ Args:
+ n_neighbors: Number of nearest neighbors to use for quantile estimation.
+ """
+
+ def __init__(self, n_neighbors: int = 5):
+ super().__init__()
+ self.n_neighbors = n_neighbors
+ self.X_train = None
+ self.y_train = None
+ self.nn_model = NearestNeighbors(
+ n_neighbors=n_neighbors, algorithm="ball_tree", leaf_size=40
+ )
+
+ def _fit_implementation(self, X: np.ndarray, y: np.ndarray):
+ """Fit the k-NN model by storing training data and building search index.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+
+ Returns:
+ Self for method chaining.
+ """
+ self.X_train = X
+ self.y_train = y
+ self.nn_model.fit(X)
+ return self
+
+ def _get_candidate_local_distribution(self, X: np.ndarray) -> np.ndarray:
+ """Get neighbor target distributions for quantile computation.
+
+ Args:
+ X: Features with shape (n_samples, n_features).
+
+ Returns:
+ Neighbor targets with shape (n_samples, n_neighbors).
+ """
+ _, indices = self.nn_model.kneighbors(X)
+ neighbor_preds = self.y_train[indices]
+ return neighbor_preds
+
+
+class QuantileGP(BaseSingleFitQuantileEstimator):
+ """Gaussian process quantile regression with robust uncertainty quantification.
+
+ Implements quantile regression using Gaussian processes that model the complete
+ conditional distribution p(y|x). Provides analytical quantile computation from
+ Gaussian posteriors with proper noise handling and robust hyperparameter optimization.
+
+ All features are treated as continuous using kernels with Automatic Relevance
+ Determination (ARD). Categorical features should be one-hot encoded prior to
+ being passed to this class.
+
+ Key improvements over basic sklearn GP usage:
+ - Proper noise handling without post-hoc kernel modification
+ - Robust numerical implementation with Cholesky decomposition
+ - Analytical quantile computation for efficiency
+ - Batched prediction for memory efficiency
+ - Consistent kernel usage between training and prediction
+ - ARD kernels for automatic feature relevance determination
+
+ Args:
+ kernel: GP kernel specification. Accepts string names ("rbf", "matern",
+ "rational_quadratic", "exp_sine_squared") with sensible defaults, or
+ custom Kernel objects. Defaults to Matern(nu=1.5).
+ noise_variance: Explicit noise variance. If "optimize", will be learned.
+ If numeric, uses fixed value. Default is "optimize".
+ alpha: Regularization parameter for numerical stability. Range: [1e-12, 1e-6].
+ n_restarts_optimizer: Number of restarts for hyperparameter optimization.
+ random_state: Seed for reproducible optimization and prediction.
+ batch_size: Batch size for prediction to manage memory usage.
+ optimize_hyperparameters: Whether to optimize kernel hyperparameters.
+ If False, uses kernel as-is.
+ prior_lengthscale_concentration: For future custom optimization (unused).
+ prior_lengthscale_rate: For future custom optimization (unused).
+ prior_noise_concentration: For future custom optimization (unused).
+ prior_noise_rate: For future custom optimization (unused).
+
+ Attributes:
+ quantiles: List of quantile levels fitted during training.
+ X_train_: Training features.
+ y_train_: Training targets (normalized).
+ kernel_: Fitted kernel with optimized hyperparameters.
+ noise_variance_: Fitted noise variance.
+ chol_factor_: Cholesky decomposition of kernel matrix.
+ alpha_: Precomputed weights for prediction.
+ y_train_mean_: Mean of training targets.
+ y_train_std_: Standard deviation of training targets.
+ """
+
+ def __init__(
+ self,
+ kernel: Optional[Union[str, Kernel]] = None,
+ noise_variance: Optional[Union[str, float]] = "optimize",
+ alpha: float = 1e-10,
+ n_restarts_optimizer: int = 5,
+ random_state: Optional[int] = None,
+ batch_size: Optional[int] = None,
+ optimize_hyperparameters: bool = True,
+ prior_lengthscale_concentration: float = 2.0,
+ prior_lengthscale_rate: float = 1.0,
+ prior_noise_concentration: float = 1.1,
+ prior_noise_rate: float = 30.0,
+ ):
+ super().__init__()
+ self.kernel = kernel
+ self.noise_variance = noise_variance
+ self.alpha = alpha
+ self.n_restarts_optimizer = n_restarts_optimizer
+ self.random_state = random_state
+ self.batch_size = batch_size
+ self.optimize_hyperparameters = optimize_hyperparameters
+ self.prior_lengthscale_concentration = prior_lengthscale_concentration
+ self.prior_lengthscale_rate = prior_lengthscale_rate
+ self.prior_noise_concentration = prior_noise_concentration
+ self.prior_noise_rate = prior_noise_rate
+ self._ppf_cache = {}
+
+ # Fitted attributes
+ self.X_train_ = None
+ self.y_train_ = None
+ self.kernel_ = None
+ self.noise_variance_ = None
+ self.chol_factor_ = None
+ self.alpha_ = None
+ self.y_train_mean_ = None
+ self.y_train_std_ = None
+ # Eigendecomposition fallback attributes
+ self.eigenvals_ = None
+ self.eigenvecs_ = None
+
+ def _get_kernel_object(
+ self,
+ kernel_spec: Optional[Union[str, Kernel]] = None,
+ n_features: Optional[int] = None,
+ ) -> Kernel:
+ """Convert kernel specification to scikit-learn kernel object with ARD support.
+
+ Creates kernels with per-feature length scales for Automatic Relevance
+ Determination (ARD). This allows the model to automatically learn the
+ importance of each feature by optimizing individual length scales.
+
+ Args:
+ kernel_spec: Kernel specification (string name, kernel object, or None).
+ n_features: Number of features for ARD initialization. If None, uses scalar length scale.
+
+ Returns:
+ Scikit-learn kernel object with proper ARD bounds for optimization.
+
+ Raises:
+ ValueError: If unknown kernel name provided or invalid kernel type.
+ """
+ # Initialize length scale for ARD
+ if n_features is not None and n_features > 1:
+ # ARD: one length scale per feature
+ length_scale = np.ones(n_features)
+ length_scale_bounds = (1e-2, 1e2)
+ else:
+ # Scalar length scale for single feature or unspecified
+ length_scale = 1.0
+ length_scale_bounds = (1e-2, 1e2)
+
+ # Default to Matern kernel with ARD
+ if kernel_spec is None:
+ return C(1.0, (1e-3, 1e3)) * Matern(
+ length_scale=length_scale,
+ length_scale_bounds=length_scale_bounds,
+ nu=2.5,
+ )
+
+ # String specifications with ARD support
+ elif isinstance(kernel_spec, str):
+ kernel_map = {
+ "rbf": C(1.0, (1e-3, 1e3))
+ * RBF(
+ length_scale=length_scale, length_scale_bounds=length_scale_bounds
+ ),
+ "matern": C(1.0, (1e-3, 1e3))
+ * Matern(
+ length_scale=length_scale,
+ length_scale_bounds=length_scale_bounds,
+ nu=2.5,
+ ),
+ "rational_quadratic": C(1.0, (1e-3, 1e3))
+ * RationalQuadratic(
+ length_scale=length_scale,
+ length_scale_bounds=length_scale_bounds,
+ alpha=1.0,
+ alpha_bounds=(1e-3, 1e3),
+ ),
+ "exp_sine_squared": C(1.0, (1e-3, 1e3))
+ * ExpSineSquared(
+ length_scale=length_scale,
+ length_scale_bounds=length_scale_bounds,
+ periodicity=1.0,
+ periodicity_bounds=(1e-2, 1e2),
+ ),
+ }
+
+ if kernel_spec not in kernel_map:
+ raise ValueError(f"Unknown kernel name: {kernel_spec}")
+ return kernel_map[kernel_spec]
+
+ # Kernel object - make a deep copy for safety
+ elif isinstance(kernel_spec, Kernel):
+ return copy.deepcopy(kernel_spec)
+
+ else:
+ raise ValueError(
+ f"Kernel must be a string name, Kernel object, or None. Got: {type(kernel_spec)}"
+ )
+
+ def _optimize_hyperparameters(self) -> None:
+ """Optimize kernel hyperparameters and noise variance using sklearn's optimization."""
+ if not self.optimize_hyperparameters:
+ return
+
+ # Determine alpha value for optimization
+ # If noise_variance is "optimize", use a small alpha and let GP optimize noise
+ # If noise_variance is fixed, use it as alpha
+ if self.noise_variance == "optimize":
+ alpha_for_opt = self.alpha # Small regularization only
+ else:
+ alpha_for_opt = self.noise_variance_ + self.alpha
+
+ # Use sklearn's GaussianProcessRegressor for hyperparameter optimization
+ # This provides robust optimization with proper parameter mapping
+ temp_gp = GaussianProcessRegressor(
+ kernel=self.kernel_,
+ alpha=alpha_for_opt,
+ n_restarts_optimizer=self.n_restarts_optimizer,
+ random_state=self.random_state,
+ normalize_y=False, # We handle normalization ourselves
+ )
+
+ try:
+ # Suppress sklearn GP convergence warnings about parameter bounds
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message=".*close to the specified.*bound.*",
+ category=UserWarning,
+ module="sklearn.gaussian_process.kernels",
+ )
+ temp_gp.fit(self.X_train_, self.y_train_)
+ # Extract optimized kernel
+ self.kernel_ = temp_gp.kernel_
+
+ # Extract optimized noise variance if it was being optimized
+ if self.noise_variance == "optimize":
+ # sklearn's alpha includes both noise and regularization
+ # Extract the optimized noise component
+ self.noise_variance_ = max(temp_gp.alpha - self.alpha, 1e-10)
+
+ except Exception as e:
+ logging.warning(
+ f"Hyperparameter optimization failed: {e}, using default parameters"
+ )
+ # Keep the original kernel and noise variance if optimization fails
+
+ def _fit_implementation(self, X: np.ndarray, y: np.ndarray) -> "QuantileGP":
+ """Fit Gaussian process with proper hyperparameter optimization.
+
+ Implements robust GP fitting with:
+ - Custom hyperparameter optimization with principled priors
+ - Proper noise handling without post-hoc kernel modification
+ - Numerical stability through Cholesky decomposition
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+
+ Returns:
+ Self for method chaining.
+ """
+ # Store training data
+ self.X_train_ = X.copy()
+
+ # Normalize targets
+ self.y_train_mean_ = np.mean(y)
+ self.y_train_std_ = np.std(y)
+ if self.y_train_std_ < 1e-12:
+ self.y_train_std_ = 1.0
+ self.y_train_ = (y - self.y_train_mean_) / self.y_train_std_
+
+ # Initialize kernel with ARD support
+ n_features = X.shape[1]
+ self.kernel_ = self._get_kernel_object(self.kernel, n_features)
+
+ # Set noise variance
+ if isinstance(self.noise_variance, (int, float)):
+ self.noise_variance_ = self.noise_variance
+ else:
+ self.noise_variance_ = 1e-6 # Default, will be optimized if needed
+
+ # Optimize hyperparameters
+ self._optimize_hyperparameters()
+
+ # Fit the model with optimized parameters
+ self._fit_gp()
+
+ return self
+
+ def _fit_gp(self) -> None:
+ """Fit GP with current hyperparameters using robust Cholesky decomposition."""
+ # Compute kernel matrix
+ K = self.kernel_(self.X_train_)
+
+ # Add noise and regularization
+ K += (self.noise_variance_ + self.alpha) * np.eye(len(self.X_train_))
+
+ # Robust Cholesky decomposition with progressive regularization
+ regularization_levels = [0, 1e-8, 1e-6, 1e-4, 1e-3]
+
+ for reg in regularization_levels:
+ try:
+ K_reg = K + reg * np.eye(len(self.X_train_)) if reg > 0 else K
+ self.chol_factor_ = cholesky(K_reg, lower=True)
+ if reg > 0:
+ logging.warning(
+ f"Added regularization {reg} for numerical stability"
+ )
+ break
+ except LinAlgError:
+ if reg == regularization_levels[-1]:
+ # Final fallback: use eigendecomposition for very ill-conditioned matrices
+ logging.warning(
+ "Cholesky failed, using eigendecomposition fallback"
+ )
+ self._fit_gp_eigendecomp(K)
+ return
+ continue
+
+ # Solve for alpha using Cholesky decomposition
+ self.alpha_ = solve_triangular(self.chol_factor_, self.y_train_, lower=True)
+
+ def _fit_gp_eigendecomp(self, K: np.ndarray) -> None:
+ """Fallback GP fitting using eigendecomposition for ill-conditioned matrices."""
+ # Eigendecomposition of kernel matrix
+ eigenvals, eigenvecs = np.linalg.eigh(K)
+
+ # Clip negative eigenvalues and add regularization
+ eigenvals = np.maximum(eigenvals, 1e-12)
+
+ # Reconstruct with regularized eigenvalues
+ eigenvecs @ np.diag(eigenvals) @ eigenvecs.T
+
+ # Use pseudo-inverse for fitting
+ try:
+ K_inv = eigenvecs @ np.diag(1.0 / eigenvals) @ eigenvecs.T
+ self.alpha_ = K_inv @ self.y_train_
+ # Store decomposition for prediction
+ self.eigenvals_ = eigenvals
+ self.eigenvecs_ = eigenvecs
+ self.chol_factor_ = None # Signal to use eigendecomp in prediction
+ except Exception as e:
+ raise RuntimeError(f"Both Cholesky and eigendecomposition failed: {e}")
+
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate quantile predictions using analytical Gaussian distribution.
+
+ Uses the GP posterior mean and variance to compute quantiles analytically
+ as q_τ(x) = μ(x) + σ(x)Φ⁻¹(τ), ensuring monotonic quantile ordering.
+
+ Args:
+ X: Features for prediction with shape (n_samples, n_features).
+
+ Returns:
+ Quantile predictions with shape (n_samples, n_quantiles).
+ """
+ if self.batch_size is not None and len(X) > self.batch_size:
+ results = []
+ for i in range(0, len(X), self.batch_size):
+ batch_X = X[i : i + self.batch_size]
+ batch_result = self._predict_batch(batch_X)
+ results.append(batch_result)
+ return np.vstack(results)
+ else:
+ return self._predict_batch(X)
+
+ def _predict_batch(self, X: np.ndarray) -> np.ndarray:
+ """Compute quantiles analytically from GP posterior.
+
+ Args:
+ X: Features with shape (batch_size, n_features).
+
+ Returns:
+ Quantile predictions with shape (batch_size, n_quantiles).
+ """
+ # Get mean and variance from GP
+ y_mean, y_var = self._predict_mean_var(X)
+ y_std = np.sqrt(y_var).reshape(-1, 1)
+
+ # Get cached inverse normal CDF values
+ ppf_values = self._get_cached_ppf_values()
+
+ # Compute quantiles analytically
+ quantile_preds = y_mean.reshape(-1, 1) + y_std * ppf_values.reshape(1, -1)
+
+ return quantile_preds
+
+ def _predict_mean_var(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+ """Predict mean and variance using Cholesky or eigendecomposition.
+
+ Args:
+ X: Features with shape (n_samples, n_features).
+
+ Returns:
+ Tuple of (y_mean, y_var) with shapes (n_samples,) each.
+ """
+ # Compute kernel between test and training points
+ K_star = self.kernel_(X, self.X_train_)
+
+ if self.chol_factor_ is not None:
+ # Use Cholesky-based computation
+ chol_solve = solve_triangular(self.chol_factor_, K_star.T, lower=True)
+ y_mean = chol_solve.T @ self.alpha_
+
+ # Compute variance (in normalized space)
+ K_star_star = self.kernel_.diag(X)
+ y_var = K_star_star - np.sum(chol_solve**2, axis=0)
+
+ else:
+ # Use eigendecomposition fallback
+ y_mean = K_star @ self.alpha_
+
+ # Compute variance using eigendecomposition
+ K_star_star = self.kernel_.diag(X)
+ # K^{-1} = V * Λ^{-1} * V^T
+ K_inv_K_star = (
+ self.eigenvecs_
+ @ (K_star.T / self.eigenvals_.reshape(-1, 1))
+ @ self.eigenvecs_.T
+ )
+ y_var = K_star_star - np.sum(K_star * K_inv_K_star.T, axis=1)
+
+ # Denormalize mean
+ y_mean = y_mean * self.y_train_std_ + self.y_train_mean_
+
+ # Ensure non-negative variance before denormalization
+ y_var = np.maximum(y_var, 1e-12)
+
+ # Denormalize variance (transforms from normalized to original scale)
+ y_var *= self.y_train_std_**2
+
+ # Add noise variance in original scale for total predictive variance
+ y_var += self.noise_variance_ * self.y_train_std_**2
+
+ return y_mean, y_var
+
+ def _get_cached_ppf_values(self) -> np.ndarray:
+ """Cache inverse normal CDF values for efficiency.
+
+ Returns:
+ Cached inverse normal CDF values with shape (n_quantiles,).
+ """
+ quantiles_key = tuple(self.quantiles)
+ if quantiles_key not in self._ppf_cache:
+ self._ppf_cache[quantiles_key] = np.array(
+ [norm.ppf(q) for q in self.quantiles]
+ )
+ return self._ppf_cache[quantiles_key]
+
+ def _get_candidate_local_distribution(self, X: np.ndarray) -> np.ndarray:
+ """Generate posterior samples for Monte Carlo quantile estimation.
+
+ This method is required by the base class but not used by this implementation
+ since we use analytical quantile computation. Included for compatibility.
+
+ Args:
+ X: Features with shape (n_samples, n_features).
+
+ Returns:
+ Posterior samples with shape (n_samples, n_samples_per_point).
+ """
+ # Get mean and variance from GP
+ y_mean, y_var = self._predict_mean_var(X)
+ y_std = np.sqrt(y_var)
+
+ # Generate samples from the GP posterior for each test point
+ rng = np.random.RandomState(self.random_state)
+ n_samples = 1000 # Default number of samples
+ samples = np.array(
+ [rng.normal(y_mean[i], y_std[i], size=n_samples) for i in range(len(X))]
+ )
+ return samples
+
+
+class QuantileLeaf(BaseSingleFitQuantileEstimator):
+ """Quantile Regression Forest using raw Y values from leaf nodes (Meinshausen 2006).
+
+ Implements quantile regression following the approach in Meinshausen (2006) where
+ quantiles are computed from the empirical distribution of all raw Y training values
+ that fall into the same leaf nodes as the prediction point across all trees.
+
+ For a prediction point x, the method collects all training targets Y_i where
+ training point X_i and prediction point x end up in the same leaf node across
+ all trees in the forest. Quantiles are then computed as empirical percentiles
+ of this combined set of Y values.
+
+ This approach differs from standard random forest quantiles by using raw training
+ targets rather than tree predictions, providing more accurate uncertainty
+ quantification especially in regions with heteroscedastic noise.
+
+ Args:
+ n_estimators: Number of trees in the forest.
+ max_depth: Maximum depth of individual trees.
+ max_features: Fraction of features considered for best split.
+ min_samples_split: Minimum samples required to split internal nodes.
+ min_samples_leaf: Minimum samples required at leaf nodes.
+ bootstrap: Whether to use bootstrap sampling for tree training.
+ random_state: Seed for reproducible tree construction.
+ """
+
+ def __init__(
+ self,
+ n_estimators: int = 100,
+ max_depth: Optional[int] = None,
+ max_features: float = 0.8,
+ min_samples_split: int = 2,
+ min_samples_leaf: int = 1,
+ bootstrap: bool = True,
+ random_state: Optional[int] = None,
+ ):
+ super().__init__()
+ self.n_estimators = n_estimators
+ self.max_depth = max_depth
+ self.max_features = max_features
+ self.min_samples_split = min_samples_split
+ self.min_samples_leaf = min_samples_leaf
+ self.bootstrap = bootstrap
+ self.random_state = random_state
+ self.X_train = None
+ self.y_train = None
+ self.forest = None
+
+ def _fit_implementation(self, X: np.ndarray, y: np.ndarray):
+ """Fit the random forest and store training data for leaf node lookup.
+
+ Args:
+ X: Training features with shape (n_samples, n_features).
+ y: Training targets with shape (n_samples,).
+
+ Returns:
+ Self for method chaining.
+ """
+ self.X_train = X.copy()
+ self.y_train = y.copy()
+
+ self.forest = RandomForestRegressor(
+ n_estimators=self.n_estimators,
+ max_depth=self.max_depth,
+ max_features=self.max_features,
+ min_samples_split=self.min_samples_split,
+ min_samples_leaf=self.min_samples_leaf,
+ bootstrap=self.bootstrap,
+ random_state=self.random_state,
+ )
+ self.forest.fit(X, y)
+ return self
+
+ def _get_candidate_local_distribution(self, X: np.ndarray) -> np.ndarray:
+ """Extract raw Y values from leaf nodes for quantile computation.
+
+ For each prediction point, finds all training targets that fall into
+ the same leaf nodes across all trees. This creates the empirical
+ distribution used for quantile estimation following Meinshausen (2006).
+
+ Args:
+ X: Features with shape (n_samples, n_features).
+
+ Returns:
+ Raw Y values from matching leaf nodes with shape (n_samples, variable).
+ Each row contains the training targets from leaf nodes that contain
+ the corresponding prediction point. Rows may have different lengths,
+ so the array is padded with NaN values and the actual distribution
+ is extracted during quantile computation.
+ """
+ # Get leaf indices for training and test data for all trees
+ train_leaf_indices = self.forest.apply(self.X_train) # (n_train, n_trees)
+ test_leaf_indices = self.forest.apply(X) # (n_test, n_trees)
+
+ # Collect Y values for each test point
+ candidate_distributions = []
+
+ for i in range(len(X)):
+ y_values_for_point = []
+
+ # For each tree, find training points in the same leaf as test point i
+ for tree_idx in range(self.n_estimators):
+ test_leaf = test_leaf_indices[i, tree_idx]
+ # Find training points that ended up in the same leaf
+ same_leaf_mask = train_leaf_indices[:, tree_idx] == test_leaf
+ # Collect corresponding Y values
+ y_values_for_point.extend(self.y_train[same_leaf_mask])
+
+ candidate_distributions.append(np.array(y_values_for_point))
+
+ # Convert to consistent array format by padding with NaN
+ max_length = max(len(dist) for dist in candidate_distributions)
+ padded_distributions = np.full((len(X), max_length), np.nan)
+
+ for i, dist in enumerate(candidate_distributions):
+ padded_distributions[i, : len(dist)] = dist
+
+ return padded_distributions
+
+ def predict(self, X: np.ndarray) -> np.ndarray:
+ """Generate quantile predictions from raw Y values in matching leaf nodes.
+
+ Overrides the base class method to handle variable-length distributions
+ from leaf nodes. Computes empirical quantiles while ignoring NaN padding.
+
+ Args:
+ X: Features for prediction with shape (n_samples, n_features).
+
+ Returns:
+ Quantile predictions with shape (n_samples, n_quantiles).
+ """
+ candidate_distributions = self._get_candidate_local_distribution(X)
+
+ # Compute quantiles for each test point, ignoring NaN values
+ quantile_preds = np.zeros((len(X), len(self.quantiles)))
+
+ for i in range(len(X)):
+ # Extract non-NaN values for this point
+ valid_values = candidate_distributions[i][
+ ~np.isnan(candidate_distributions[i])
+ ]
+
+ if len(valid_values) > 0:
+ # Compute empirical quantiles
+ quantile_preds[i] = np.quantile(valid_values, self.quantiles)
+ else:
+ # Fallback to forest mean prediction if no valid values
+ # This should rarely happen with proper forest configuration
+ mean_pred = self.forest.predict(X[i : i + 1])[0]
+ quantile_preds[i] = mean_pred
+
+ return quantile_preds
diff --git a/confopt/selection/sampling/__init__.py b/confopt/selection/sampling/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/confopt/selection/sampling/bound_samplers.py b/confopt/selection/sampling/bound_samplers.py
new file mode 100644
index 0000000..6d86f6c
--- /dev/null
+++ b/confopt/selection/sampling/bound_samplers.py
@@ -0,0 +1,211 @@
+"""
+Bound-based acquisition strategies for conformal prediction optimization.
+
+This module implements acquisition strategies that use prediction interval bounds
+for optimization decisions. The approaches focus on conservative uncertainty
+quantification through lower bound sampling and exploration-exploitation
+trade-offs through adaptive confidence bound strategies.
+
+Bound-based methodology:
+These samplers utilize specific bounds (typically lower bounds for minimization)
+from prediction intervals to make acquisition decisions. This approach provides
+direct interpretable acquisition values while maintaining proper uncertainty
+quantification through conformal prediction intervals.
+
+Key strategies:
+- Pessimistic Lower Bound: Conservative approach using only lower bounds
+- Lower Confidence Bound (LCB): UCB-style exploration with decay schedules
+- Adaptive interval width adjustment based on coverage feedback
+
+The module provides both simple bound-based acquisition and sophisticated
+exploration strategies with theoretical guarantees for convergence in
+optimization under uncertainty scenarios.
+"""
+
+from typing import Optional, List, Literal
+import numpy as np
+from confopt.selection.sampling.utils import (
+ initialize_single_adapter,
+ update_single_interval_width,
+)
+
+
+class PessimisticLowerBoundSampler:
+ """
+ Conservative acquisition strategy using pessimistic lower bounds.
+
+ This sampler implements a conservative approach to uncertainty quantification
+ by focusing exclusively on the lower bounds of prediction intervals. The
+ strategy prioritizes risk-averse decision making by assuming pessimistic
+ scenarios, making it suitable for applications where conservative estimates
+ are preferred over aggressive exploration.
+
+ The approach provides simple, interpretable acquisition values while
+ maintaining proper uncertainty quantification through conformal prediction
+ intervals. The single-interval design offers computational efficiency and
+ straightforward interpretation.
+
+ Methodological characteristics:
+ - Single confidence level with configurable interval width
+ - Direct lower bound extraction for acquisition decisions
+ - Optional adaptive interval width adjustment
+ - Conservative bias suitable for risk-averse optimization
+ """
+
+ def __init__(
+ self,
+ interval_width: float = 0.8,
+ adapter: Optional[Literal["DtACI", "ACI"]] = None,
+ ):
+ """
+ Initialize pessimistic lower bound sampler with specified confidence level.
+
+ Args:
+ interval_width: Confidence level for prediction intervals (e.g., 0.8
+ for 80% intervals). Higher values provide wider intervals with
+ more conservative bounds. Typical values: 0.7-0.95.
+ adapter: Interval width adaptation strategy. "DtACI" provides
+ aggressive multi-scale adaptation, "ACI" offers conservative
+ adaptation, None disables adaptation.
+ """
+ self.interval_width = interval_width
+ self.alpha = 1 - interval_width
+ self.adapter = initialize_single_adapter(self.alpha, adapter)
+
+ def fetch_alphas(self) -> List[float]:
+ """
+ Retrieve current alpha value for interval construction.
+
+ Returns:
+ Single-element list containing the current alpha value (miscoverage rate).
+ """
+ return [self.alpha]
+
+ def update_interval_width(self, beta: float) -> None:
+ """
+ Update interval width based on observed coverage rate.
+
+ This method applies adaptive interval width adjustment using empirical
+ coverage feedback. The alpha parameter is updated to maintain target
+ coverage while optimizing interval efficiency for conservative bound
+ estimation.
+
+ Args:
+ beta: Observed coverage rate for the prediction interval, representing
+ the fraction of true values falling within the interval.
+ """
+ self.alpha = update_single_interval_width(self.adapter, self.alpha, beta)
+
+
+class LowerBoundSampler(PessimisticLowerBoundSampler):
+ """
+ Lower Confidence Bound acquisition strategy with adaptive exploration.
+
+ This sampler implements a Lower Confidence Bound (LCB) strategy adapted for
+ minimization problems. The approach balances exploitation of promising regions
+ with exploration of uncertain areas through an adaptive exploration parameter
+ that decays over time, providing theoretical guarantees for convergence.
+
+ The strategy extends the pessimistic lower bound approach with sophisticated
+ exploration control, making it suitable for efficient optimization under
+ uncertainty with provable regret bounds.
+
+ Mathematical formulation:
+ LCB(x) = μ(x) - β(t) * σ(x)
+ where μ(x) is the point estimate, σ(x) is the interval width, and β(t)
+ is the time-dependent exploration parameter.
+
+ Exploration decay strategies:
+ - Inverse square root: β(t) = sqrt(c/t) for aggressive decay
+ - Logarithmic: β(t) = sqrt(c*log(t)/t) for balanced exploration
+
+ Performance characteristics:
+ - Theoretical regret guarantees under appropriate decay schedules
+ - Adaptive exploration balancing exploitation and uncertainty quantification
+ - Efficient single-interval computation with optional adaptation
+ """
+
+ def __init__(
+ self,
+ interval_width: float = 0.8,
+ adapter: Optional[Literal["DtACI", "ACI"]] = None,
+ beta_decay: Optional[
+ Literal[
+ "inverse_square_root_decay",
+ "logarithmic_decay",
+ ]
+ ] = "logarithmic_decay",
+ c: float = 1,
+ beta_max: float = 10,
+ ):
+ """
+ Initialize LCB sampler with exploration decay schedule.
+
+ Args:
+ interval_width: Confidence level for prediction intervals (e.g., 0.8
+ for 80% intervals). Higher values provide wider intervals with
+ larger exploration bonuses.
+ adapter: Interval width adaptation strategy for coverage maintenance.
+ beta_decay: Exploration parameter decay strategy. "logarithmic_decay"
+ provides balanced exploration with theoretical guarantees,
+ "inverse_square_root_decay" offers more aggressive decay.
+ c: Exploration constant controlling the magnitude of exploration bonus.
+ Higher values increase exploration, lower values favor exploitation.
+ Typical values: 0.1-10.
+ beta_max: Maximum exploration parameter value to prevent excessive
+ exploration in early iterations. Provides stability for the
+ acquisition function.
+ """
+ super().__init__(interval_width, adapter)
+ self.beta_decay = beta_decay
+ self.c = c
+ self.t = 1 # Time step counter for decay computation
+ self.beta = 1 # Current exploration parameter
+ self.beta_max = beta_max
+ self.mu_max = float("-inf") # Tracking for potential future use
+
+ def update_exploration_step(self):
+ """
+ Update exploration parameter based on decay schedule and time step.
+
+ This method advances the time step and computes the new exploration
+ parameter according to the specified decay strategy. The decay ensures
+ that exploration decreases over time as confidence in the model increases,
+ following theoretical requirements for convergence guarantees.
+ """
+ self.t += 1
+ if self.beta_decay == "inverse_square_root_decay":
+ self.beta = np.sqrt(self.c / self.t)
+ elif self.beta_decay == "logarithmic_decay":
+ self.beta = np.sqrt((self.c * np.log(self.t)) / self.t)
+ elif self.beta_decay is None:
+ self.beta = 1
+ else:
+ raise ValueError(
+ "beta_decay must be 'inverse_square_root_decay', 'logarithmic_decay', or None."
+ )
+
+ def calculate_ucb_predictions(
+ self,
+ point_estimates: np.ndarray = None,
+ half_width: np.ndarray = None,
+ ) -> np.ndarray:
+ """
+ Calculate Lower Confidence Bound predictions for acquisition.
+
+ This method computes LCB values by combining point estimates with
+ exploration bonuses based on interval widths and the current exploration
+ parameter. The result provides acquisition values that balance
+ exploitation of promising regions with exploration of uncertain areas.
+
+ Args:
+ point_estimates: Point predictions (e.g., posterior means) for each
+ candidate. These represent the exploitation component.
+ half_width: Uncertainty estimates (e.g., half interval widths) for
+ each candidate. These drive the exploration component.
+
+ Returns:
+ Array of LCB acquisition values. Lower values indicate more attractive
+ candidates for minimization problems.
+ """
+ return point_estimates - self.beta * half_width
diff --git a/confopt/selection/sampling/expected_improvement_samplers.py b/confopt/selection/sampling/expected_improvement_samplers.py
new file mode 100644
index 0000000..318e8e4
--- /dev/null
+++ b/confopt/selection/sampling/expected_improvement_samplers.py
@@ -0,0 +1,200 @@
+"""
+Expected Improvement acquisition strategy for conformal prediction optimization.
+
+This module implements Expected Improvement (EI) acquisition functions using
+conformal prediction intervals to quantify uncertainty. The approach extends
+classical Bayesian optimization's Expected Improvement to conformal prediction
+settings, enabling efficient acquisition function optimization without requiring
+explicit posterior distributions.
+
+Expected Improvement methodology:
+The acquisition function computes the expected value of improvement over the
+current best observation by sampling from prediction intervals. This provides
+a natural exploration-exploitation balance, with high values indicating either
+high predicted improvement (exploitation) or high uncertainty (exploration).
+
+Mathematical foundation:
+EI(x) = E[max(f_min - f(x), 0)] where f_min is the current best value and
+the expectation is computed by Monte Carlo sampling from prediction intervals.
+
+Key features:
+- Monte Carlo estimation of expected improvement using interval sampling
+- Adaptive current best value tracking for dynamic optimization
+- Quantile-based interval construction with symmetric pairing
+- Adaptive interval width adjustment using coverage feedback
+- Efficient vectorized computation for large candidate sets
+
+The module integrates with conformal prediction frameworks by accepting
+ConformalBounds objects and providing standardized interfaces for uncertainty
+quantification and acquisition function optimization.
+"""
+
+from typing import Optional, List, Literal
+import numpy as np
+from confopt.wrapping import ConformalBounds
+from confopt.selection.sampling.utils import (
+ initialize_quantile_alphas,
+ initialize_multi_adapters,
+ update_multi_interval_widths,
+ validate_even_quantiles,
+ flatten_conformal_bounds,
+)
+
+
+class ExpectedImprovementSampler:
+ """
+ Expected Improvement acquisition strategy using conformal prediction intervals.
+
+ This class implements Expected Improvement for optimization under uncertainty
+ using conformal prediction intervals as uncertainty quantification. The
+ sampler estimates expected improvement through Monte Carlo sampling from
+ prediction intervals, providing a principled approach to balancing
+ exploration and exploitation without requiring explicit posterior models.
+
+ Methodological approach:
+ - Constructs nested prediction intervals using symmetric quantile pairing
+ - Estimates expected improvement via Monte Carlo sampling from intervals
+ - Tracks current best value for improvement computation
+ - Adapts interval widths using empirical coverage feedback
+
+ The acquisition function naturally balances exploration (high uncertainty
+ regions) with exploitation (promising low-value regions) by computing
+ expected improvements over the current best observation.
+
+ Performance characteristics:
+ - O(n_samples * n_intervals * n_observations) for EI computation
+ - Efficient vectorized operations for batch evaluation
+ - Adaptive complexity through configurable sample count
+ """
+
+ def __init__(
+ self,
+ n_quantiles: int = 4,
+ adapter: Optional[Literal["DtACI", "ACI"]] = None,
+ current_best_value: float = float("inf"),
+ num_ei_samples: int = 20,
+ ):
+ """
+ Initialize Expected Improvement sampler with interval construction.
+
+ Args:
+ n_quantiles: Number of quantiles for interval construction. Must be even
+ for symmetric pairing. Higher values provide finer uncertainty
+ granularity but increase computational cost. Typical values: 4-8.
+ adapter: Interval width adaptation strategy. "DtACI" provides aggressive
+ multi-scale adaptation, "ACI" offers conservative adaptation,
+ None disables adaptation.
+ current_best_value: Initial best observed value for improvement
+ calculation. Should be set to the minimum observed objective
+ value. Updated automatically through update_best_value().
+ num_ei_samples: Number of Monte Carlo samples for EI estimation.
+ Higher values provide more accurate estimates but increase
+ computational cost. Typical values: 10-50.
+ """
+ validate_even_quantiles(n_quantiles, "Expected Improvement")
+
+ self.n_quantiles = n_quantiles
+ self.current_best_value = current_best_value
+ self.num_ei_samples = num_ei_samples
+
+ # Initialize symmetric quantile-based alpha values
+ self.alphas = initialize_quantile_alphas(n_quantiles)
+ # Configure adapters for interval width adjustment
+ self.adapters = initialize_multi_adapters(self.alphas, adapter)
+
+ def update_best_value(self, value: float):
+ """
+ Update current best observed value for improvement computation.
+
+ This method should be called after each new observation to maintain
+ accurate improvement calculations. The best value serves as the baseline
+ for computing expected improvements in subsequent acquisition decisions.
+
+ Args:
+ value: Newly observed objective value to compare with current best.
+ For minimization problems, this updates the minimum observed value.
+ """
+ self.current_best_value = min(self.current_best_value, value)
+
+ def fetch_alphas(self) -> List[float]:
+ """
+ Retrieve current alpha values for interval construction.
+
+ Returns:
+ List of alpha values (miscoverage rates) for each confidence level,
+ ordered from lowest to highest confidence (decreasing alpha values).
+ """
+ return self.alphas
+
+ def update_interval_width(self, betas: List[float]):
+ """
+ Update interval widths using observed coverage rates.
+
+ This method applies adaptive interval width adjustment based on empirical
+ coverage feedback. Each interval's alpha parameter is updated independently
+ to maintain target coverage while optimizing interval efficiency for
+ accurate expected improvement estimation.
+
+ Args:
+ betas: Observed coverage rates for each interval, in the same order
+ as alpha values. Values should be in [0, 1] representing the
+ fraction of true values falling within each interval.
+ """
+ self.alphas = update_multi_interval_widths(self.adapters, self.alphas, betas)
+
+ def calculate_expected_improvement(
+ self,
+ predictions_per_interval: List[ConformalBounds],
+ ) -> np.ndarray:
+ """
+ Calculate Expected Improvement for each candidate point using Monte Carlo sampling.
+
+ This method estimates the expected improvement acquisition function by
+ Monte Carlo sampling from prediction intervals. For each candidate point,
+ multiple samples are drawn from its prediction intervals, improvements
+ over the current best are computed, and the expectation is estimated
+ as the sample mean.
+
+ Methodology:
+ 1. Flatten prediction intervals into efficient matrix representation
+ 2. Generate random samples from intervals for each observation
+ 3. Compute improvements: max(0, current_best - sampled_value)
+ 4. Estimate expected improvement as sample mean
+ 5. Return negated values for minimization compatibility
+
+ Args:
+ predictions_per_interval: List of ConformalBounds objects containing
+ lower and upper bounds for each confidence level. All bounds
+ must have the same number of observations.
+
+ Returns:
+ Array of expected improvement values with shape (n_observations,).
+ Values are negated for minimization (higher EI = more negative value).
+ Points with higher expected improvement are more attractive for
+ next evaluation.
+ """
+ # Flatten intervals into efficient matrix representation
+ all_bounds = flatten_conformal_bounds(predictions_per_interval)
+
+ n_observations = len(predictions_per_interval[0].lower_bounds)
+
+ # Generate random sample indices for Monte Carlo estimation
+ idxs = np.random.randint(
+ 0, all_bounds.shape[1], size=(n_observations, self.num_ei_samples)
+ )
+
+ # Extract interval samples for each observation
+ realizations_per_observation = np.zeros((n_observations, self.num_ei_samples))
+ for i in range(n_observations):
+ realizations_per_observation[i] = all_bounds[i, idxs[i]]
+
+ # Compute improvements over current best value
+ improvements_per_observation = np.maximum(
+ 0, self.current_best_value - realizations_per_observation
+ )
+
+ # Estimate expected improvement as sample mean
+ expected_improvements = np.mean(improvements_per_observation, axis=1)
+
+ # Return negated for minimization compatibility
+ return -expected_improvements
diff --git a/confopt/selection/sampling/thompson_samplers.py b/confopt/selection/sampling/thompson_samplers.py
new file mode 100644
index 0000000..62328fa
--- /dev/null
+++ b/confopt/selection/sampling/thompson_samplers.py
@@ -0,0 +1,171 @@
+"""
+Thompson sampling strategy for conformal prediction acquisition.
+
+This module implements Thompson sampling for conformal prediction, providing
+a probabilistic approach to exploration-exploitation trade-offs in optimization
+under uncertainty. The implementation uses random sampling from prediction
+intervals to approximate posterior sampling, enabling efficient acquisition
+function optimization with proper uncertainty quantification.
+
+Thompson sampling methodology:
+The sampler randomly draws values from available prediction intervals to simulate
+sampling from posterior distributions over the objective function. This approach
+naturally balances exploration of uncertain regions with exploitation of
+promising areas, providing theoretical guarantees for regret minimization in
+bandit-style optimization problems.
+
+Key features:
+- Quantile-based interval construction with symmetric pairing
+- Adaptive interval width adjustment using coverage feedback
+- Optional optimistic sampling with point estimate integration
+- Efficient vectorized sampling across multiple intervals
+- Integration with conformal prediction uncertainty quantification
+
+The module integrates with the broader conformal optimization framework by
+accepting ConformalBounds objects and providing standardized interfaces for
+alpha value management and interval width adaptation.
+"""
+
+from typing import Optional, List, Literal
+import numpy as np
+from confopt.wrapping import ConformalBounds
+from confopt.selection.sampling.utils import (
+ initialize_quantile_alphas,
+ initialize_multi_adapters,
+ update_multi_interval_widths,
+ validate_even_quantiles,
+ flatten_conformal_bounds,
+)
+
+
+class ThompsonSampler:
+ """
+ Thompson sampling acquisition strategy for conformal prediction optimization.
+
+ This class implements Thompson sampling using conformal prediction intervals
+ as approximations to posterior distributions. The sampler randomly draws
+ values from prediction intervals to balance exploration and exploitation,
+ providing a principled approach to acquisition function optimization under
+ uncertainty.
+
+ The implementation supports multiple confidence levels through quantile-based
+ interval construction, adaptive interval width adjustment based on coverage
+ feedback, and optional optimistic sampling for enhanced exploration.
+
+ Methodological approach:
+ - Constructs nested prediction intervals using symmetric quantile pairing
+ - Samples randomly from flattened interval representations
+ - Optionally incorporates point estimates for optimistic exploration
+ - Adapts interval widths using empirical coverage rates
+
+ Performance characteristics:
+ - O(n_intervals * n_observations) sampling complexity
+ - Efficient vectorized operations for large candidate sets
+ - Minimal memory overhead through flattened representations
+ """
+
+ def __init__(
+ self,
+ n_quantiles: int = 4,
+ adapter: Optional[Literal["DtACI", "ACI"]] = None,
+ enable_optimistic_sampling: bool = False,
+ ):
+ """
+ Initialize Thompson sampler with quantile-based interval construction.
+
+ Args:
+ n_quantiles: Number of quantiles for interval construction. Must be even
+ to enable symmetric pairing. Higher values provide finer uncertainty
+ granularity but increase computational cost. Typical values: 4-8.
+ adapter: Interval width adaptation strategy. "DtACI" provides aggressive
+ multi-scale adaptation, "ACI" offers conservative single-scale
+ adaptation, None disables adaptation.
+ enable_optimistic_sampling: Whether to incorporate point estimates for
+ optimistic exploration. When enabled, sampled values are capped
+ by point predictions to encourage exploitation of promising regions.
+ """
+ validate_even_quantiles(n_quantiles, "Thompson")
+
+ self.n_quantiles = n_quantiles
+ self.enable_optimistic_sampling = enable_optimistic_sampling
+
+ # Initialize symmetric quantile-based alpha values
+ self.alphas = initialize_quantile_alphas(n_quantiles)
+ # Configure adapters for interval width adjustment
+ self.adapters = initialize_multi_adapters(self.alphas, adapter)
+
+ def fetch_alphas(self) -> List[float]:
+ """
+ Retrieve current alpha values for interval construction.
+
+ Returns:
+ List of alpha values (miscoverage rates) for each confidence level,
+ ordered from lowest to highest confidence (decreasing alpha values).
+ """
+ return self.alphas
+
+ def update_interval_width(self, betas: List[float]):
+ """
+ Update interval widths using observed coverage rates.
+
+ This method applies adaptive interval width adjustment based on empirical
+ coverage feedback. Each interval's alpha parameter is updated independently
+ using its corresponding observed coverage rate, allowing for fine-grained
+ control over uncertainty quantification accuracy.
+
+ Args:
+ betas: Observed coverage rates for each interval, in the same order
+ as the alpha values. Values should be in [0, 1] representing
+ the fraction of true values falling within each interval.
+ """
+ self.alphas = update_multi_interval_widths(self.adapters, self.alphas, betas)
+
+ def calculate_thompson_predictions(
+ self,
+ predictions_per_interval: List[ConformalBounds],
+ point_predictions: Optional[np.ndarray] = None,
+ ) -> np.ndarray:
+ """
+ Generate Thompson sampling predictions through random interval sampling.
+
+ This method implements the core Thompson sampling logic by randomly
+ selecting values from the available prediction intervals. The sampling
+ process approximates drawing from posterior distributions over the
+ objective function, enabling principled exploration-exploitation
+ trade-offs.
+
+ Methodology:
+ 1. Flatten prediction intervals into efficient matrix representation
+ 2. Randomly sample column indices for each observation
+ 3. Extract corresponding interval bounds
+ 4. Optionally apply optimistic capping using point estimates
+
+ Args:
+ predictions_per_interval: List of ConformalBounds objects containing
+ lower and upper bounds for each confidence level. All bounds
+ must have the same number of observations.
+ point_predictions: Optional point estimates for optimistic sampling.
+ When provided and optimistic sampling is enabled, sampled values
+ are capped at point estimates to encourage exploitation.
+
+ Returns:
+ Array of sampled predictions with shape (n_observations,). Each value
+ represents a random draw from the corresponding observation's
+ prediction intervals, potentially capped by point estimates.
+ """
+ # Flatten intervals into efficient matrix representation
+ all_bounds = flatten_conformal_bounds(predictions_per_interval)
+ n_observations = len(predictions_per_interval[0].lower_bounds)
+ n_intervals = all_bounds.shape[1]
+
+ # Randomly sample interval bounds for each observation
+ idx = np.random.randint(0, n_intervals, size=n_observations)
+ sampled_bounds = np.array(
+ [all_bounds[i, idx[i]] for i in range(n_observations)]
+ )
+
+ # Apply optimistic capping if enabled and point predictions available
+ if self.enable_optimistic_sampling and point_predictions is not None:
+ sampled_bounds = np.minimum(sampled_bounds, point_predictions)
+
+ return sampled_bounds
diff --git a/confopt/selection/sampling/utils.py b/confopt/selection/sampling/utils.py
new file mode 100644
index 0000000..f62c7c6
--- /dev/null
+++ b/confopt/selection/sampling/utils.py
@@ -0,0 +1,284 @@
+"""
+Utility functions for sampling strategies in conformal prediction.
+
+This module provides shared functionality used across different sampler implementations,
+including alpha initialization strategies, adapter configuration for interval width
+adjustment, and common preprocessing utilities. The module implements quantile-based
+alpha initialization following symmetric quantile pairing methodology and provides
+standardized interfaces for interval width adaptation using coverage rate feedback.
+
+Key architectural components:
+- Quantile-based alpha value initialization using symmetric pairing
+- Multi-adapter configuration for complex sampling strategies
+- Interval width update mechanisms with coverage rate feedback
+- Validation utilities for sampling parameter constraints
+- Conformal bounds preprocessing for efficient computation
+
+Integration context:
+The utilities in this module are designed to be used by all sampling strategy
+implementations, providing consistent interfaces for common operations while
+allowing each sampler to implement its specific acquisition logic.
+"""
+
+from typing import Optional, List, Literal
+import warnings
+from confopt.selection.adaptation import DtACI
+from confopt.wrapping import ConformalBounds
+import numpy as np
+
+
+def initialize_quantile_alphas(n_quantiles: int) -> List[float]:
+ """
+ Initialize alpha values using symmetric quantile pairing methodology.
+
+ This function implements a symmetric quantile initialization strategy where
+ quantiles are paired symmetrically around the median, and alpha values are
+ computed as the complement of the quantile interval width. This approach
+ ensures balanced coverage across different uncertainty levels while maintaining
+ proper nesting of prediction intervals.
+
+ The methodology creates quantiles using equal spacing in the cumulative
+ distribution, then pairs them symmetrically to form nested intervals with
+ decreasing alpha values (increasing confidence levels).
+
+ Args:
+ n_quantiles: Number of quantiles to generate. Must be even to ensure
+ symmetric pairing. Typical values are 4, 6, or 8 depending on the
+ desired granularity of uncertainty quantification.
+
+ Returns:
+ List of alpha values in decreasing order, corresponding to increasing
+ confidence levels. Length is n_quantiles // 2.
+
+ Raises:
+ ValueError: If n_quantiles is not even, preventing symmetric pairing.
+
+ Example:
+ >>> alphas = initialize_quantile_alphas(4)
+ >>> print(alphas) # [0.4, 0.2] for 60% and 80% confidence intervals
+ """
+ if n_quantiles % 2 != 0:
+ raise ValueError("Number of quantiles must be even.")
+
+ starting_quantiles = [
+ round(i / (n_quantiles + 1), 2) for i in range(1, n_quantiles + 1)
+ ]
+ alphas = []
+ half_length = len(starting_quantiles) // 2
+
+ for i in range(half_length):
+ lower, upper = starting_quantiles[i], starting_quantiles[-(i + 1)]
+ alphas.append(1 - (upper - lower))
+ return alphas
+
+
+def initialize_multi_adapters(
+ alphas: List[float], adapter: Optional[Literal["DtACI", "ACI"]] = None
+) -> Optional[List[DtACI]]:
+ """
+ Initialize multiple adapters for dynamic interval width adjustment.
+
+ This function creates individual adapters for each alpha value in multi-interval
+ sampling strategies. Each adapter maintains its own coverage tracking and
+ adjustment mechanism, allowing for independent width optimization across
+ different confidence levels.
+
+ The DtACI adapter uses multiple gamma values for robust adaptation, while
+ ACI uses a single gamma value for simpler, more conservative adjustment.
+
+ Args:
+ alphas: List of alpha values, each requiring its own adapter instance.
+ Each alpha corresponds to a different confidence level in the
+ multi-interval sampling strategy.
+ adapter: Adaptation strategy type. "DtACI" provides aggressive adaptation
+ with multiple gamma parameters, while "ACI" provides conservative
+ adaptation with a single gamma parameter.
+
+ Returns:
+ List of initialized adapters corresponding to each alpha value, or None
+ if no adaptation is requested. Each adapter maintains independent state
+ for coverage tracking and interval adjustment.
+
+ Raises:
+ ValueError: If adapter type is not recognized or supported.
+ """
+ if adapter is None:
+ return None
+ elif adapter == "DtACI":
+ return [
+ DtACI(
+ alpha=alpha,
+ gamma_values=[0.001, 0.002, 0.004, 0.008, 0.0160, 0.032, 0.064, 0.128],
+ )
+ for alpha in alphas
+ ]
+ elif adapter == "ACI":
+ return [DtACI(alpha=alpha, gamma_values=[0.005]) for alpha in alphas]
+ else:
+ raise ValueError("adapter must be None, 'DtACI', or 'ACI'")
+
+
+def initialize_single_adapter(
+ alpha: float, adapter: Optional[Literal["DtACI", "ACI"]] = None
+) -> Optional[DtACI]:
+ """
+ Initialize a single adapter for interval width adjustment in single-alpha samplers.
+
+ This function creates a single adapter instance for samplers that operate with
+ a single confidence level. The adapter tracks coverage rates and adjusts the
+ alpha parameter to maintain target coverage while optimizing interval width.
+
+ Args:
+ alpha: The alpha value (miscoverage rate) for the prediction interval.
+ Typical values range from 0.05 to 0.2, corresponding to 95% to 80%
+ confidence levels.
+ adapter: Adaptation strategy type. "DtACI" uses multiple gamma values
+ for robust adaptation across different time scales, while "ACI"
+ uses conservative single-gamma adaptation.
+
+ Returns:
+ Initialized adapter instance for the specified alpha value, or None
+ if no adaptation is requested.
+
+ Raises:
+ ValueError: If adapter type is not recognized.
+ """
+ if adapter is None:
+ return None
+ elif adapter == "DtACI":
+ return DtACI(
+ alpha=alpha,
+ gamma_values=[0.001, 0.002, 0.004, 0.008, 0.0160, 0.032, 0.064, 0.128],
+ )
+ elif adapter == "ACI":
+ return DtACI(alpha=alpha, gamma_values=[0.005])
+ else:
+ raise ValueError("adapter must be None, 'DtACI', or 'ACI'")
+
+
+def update_multi_interval_widths(
+ adapters: Optional[List[DtACI]], alphas: List[float], betas: List[float]
+) -> List[float]:
+ """
+ Update multiple interval widths using coverage rate feedback.
+
+ This function applies adaptive interval width adjustment across multiple
+ confidence levels simultaneously. Each adapter receives its corresponding
+ observed coverage rate and updates its alpha parameter independently,
+ allowing for fine-grained control over interval widths at different
+ confidence levels.
+
+ The update mechanism uses empirical coverage rates to adjust miscoverage
+ parameters, tightening intervals when coverage exceeds targets and
+ widening them when coverage falls short.
+
+ Args:
+ adapters: List of adapter instances, one per interval. If None,
+ no adaptation is performed and original alphas are returned.
+ alphas: Current alpha values for each interval. These serve as
+ fallback values if no adapters are provided.
+ betas: Observed coverage rates for each interval, used to drive
+ the adaptation process. Should have same length as alphas.
+
+ Returns:
+ Updated alpha values after applying coverage-based adaptation.
+ If no adapters are provided, returns the original alpha values.
+ """
+ if adapters:
+ updated_alphas = []
+ for i, (adapter, beta) in enumerate(zip(adapters, betas)):
+ updated_alpha = adapter.update(beta=beta)
+ updated_alphas.append(updated_alpha)
+ return updated_alphas
+ else:
+ return alphas
+
+
+def update_single_interval_width(
+ adapter: Optional[DtACI], alpha: float, beta: float
+) -> float:
+ """
+ Update a single interval width using observed coverage rate feedback.
+
+ This function applies adaptive interval width adjustment for single-interval
+ samplers. The adapter uses the observed coverage rate to adjust the alpha
+ parameter, balancing between maintaining target coverage and optimizing
+ interval efficiency.
+
+ Args:
+ adapter: The adapter instance for interval width adjustment. If None,
+ a warning is issued and the original alpha is returned unchanged.
+ alpha: Current alpha value (miscoverage rate) for the interval.
+ beta: Observed coverage rate used to drive the adaptation process.
+
+ Returns:
+ Updated alpha value after applying coverage-based adaptation, or
+ the original alpha if no adapter is provided.
+
+ Warns:
+ UserWarning: If update is requested but no adapter was initialized.
+ """
+ if adapter is not None:
+ return adapter.update(beta=beta)
+ else:
+ warnings.warn(
+ "'update_interval_width()' method was called, but no adapter was initialized."
+ )
+ return alpha
+
+
+def validate_even_quantiles(n_quantiles: int, sampler_name: str = "sampler") -> None:
+ """
+ Validate quantile count constraints for symmetric sampling strategies.
+
+ This validation function ensures that sampling strategies requiring symmetric
+ quantile pairing receive appropriate input parameters. Many sampling methods
+ rely on symmetric interval construction, which requires even numbers of
+ quantiles for proper mathematical formulation.
+
+ Args:
+ n_quantiles: Number of quantiles to validate.
+ sampler_name: Name of the sampler for descriptive error messages.
+
+ Raises:
+ ValueError: If n_quantiles is not even, preventing symmetric pairing.
+ """
+ if n_quantiles % 2 != 0:
+ raise ValueError(f"Number of {sampler_name} quantiles must be even.")
+
+
+def flatten_conformal_bounds(
+ predictions_per_interval: List[ConformalBounds],
+) -> np.ndarray:
+ """
+ Flatten conformal prediction bounds into efficient matrix representation.
+
+ This preprocessing function transforms a list of ConformalBounds objects
+ into a 2D numpy array for efficient vectorized operations. The flattening
+ interleaves lower and upper bounds to maintain interval relationships
+ while enabling fast numerical computations across all intervals and
+ observations simultaneously.
+
+ The resulting matrix structure supports efficient sampling operations,
+ statistical computations, and vectorized interval manipulations required
+ by acquisition functions.
+
+ Args:
+ predictions_per_interval: List of ConformalBounds objects, each containing
+ lower_bounds and upper_bounds arrays. All bounds objects must have
+ the same number of observations.
+
+ Returns:
+ Flattened bounds array of shape (n_observations, n_intervals * 2) where
+ columns alternate between lower and upper bounds for each interval.
+
+ Example:
+ For 2 intervals and 3 observations:
+ Column order: [interval1_lower, interval1_upper, interval2_lower, interval2_upper]
+ """
+ n_points = len(predictions_per_interval[0].lower_bounds)
+ all_bounds = np.zeros((n_points, len(predictions_per_interval) * 2))
+ for i, interval in enumerate(predictions_per_interval):
+ all_bounds[:, i * 2] = interval.lower_bounds.flatten()
+ all_bounds[:, i * 2 + 1] = interval.upper_bounds.flatten()
+ return all_bounds
diff --git a/confopt/tuning.py b/confopt/tuning.py
index f69af5a..596d3bf 100644
--- a/confopt/tuning.py
+++ b/confopt/tuning.py
@@ -1,887 +1,741 @@
import logging
import random
-from copy import deepcopy
-from typing import Optional, Dict, Any, Tuple, List
+from typing import Optional, Dict, Tuple, get_type_hints, Literal, List
+from confopt.wrapping import ParameterRange
import numpy as np
-from sklearn.metrics import mean_squared_error, accuracy_score, log_loss
-from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from datetime import datetime
-
-from confopt.config import (
- NON_NORMALIZING_ARCHITECTURES,
- METRIC_PROPORTIONALITY_LOOKUP,
- QUANTILE_ESTIMATOR_ARCHITECTURES,
+import inspect
+from confopt.utils.tracking import (
+ Trial,
+ Study,
+ RuntimeTracker,
+ StaticConfigurationManager,
+ DynamicConfigurationManager,
+ ProgressBarManager,
)
-from confopt.estimation import (
- QuantileConformalRegression,
- LocallyWeightedConformalRegression,
+from confopt.utils.optimization import FixedSearcherOptimizer, DecayingSearcherOptimizer
+from confopt.selection.acquisition import (
+ QuantileConformalSearcher,
+ BaseConformalSearcher,
)
-from confopt.optimization import derive_optimal_tuning_count, RuntimeTracker
-from confopt.preprocessing import train_val_split, remove_iqr_outliers
-from confopt.utils import get_tuning_configurations, tabularize_configurations
-
-from confopt.wrapping import TunableModel
-from sklearn.base import BaseEstimator
+from confopt.selection.sampling.bound_samplers import (
+ LowerBoundSampler,
+ PessimisticLowerBoundSampler,
+)
+from confopt.selection.sampling.thompson_samplers import ThompsonSampler
logger = logging.getLogger(__name__)
-def update_model_parameters(
- model_instance: Any, configuration: Dict, random_state: int = None
-):
+def stop_search(
+ n_remaining_configurations: int,
+ current_iter: int,
+ current_runtime: float,
+ max_runtime: Optional[float] = None,
+ max_searches: Optional[int] = None,
+) -> bool:
+ """Determine whether to terminate the hyperparameter search process.
+
+ Evaluates multiple stopping criteria to determine if the optimization should halt.
+ The function implements a logical OR of termination conditions: exhausted search space,
+ runtime budget exceeded, or iteration limit reached.
+
+ Args:
+ n_remaining_configurations: Number of configurations still available for evaluation
+ current_iter: Current iteration count in the search process
+ current_runtime: Elapsed time since search initiation in seconds
+ max_runtime: Maximum allowed runtime in seconds, None for no limit
+ max_searches: Maximum allowed iterations, None for no limit
+
+ Returns:
+ True if any stopping criterion is met, False otherwise
"""
- Updates the attributes of an initialized model object.
-
- Only attributes which are specified in the 'configuration'
- dictionary input of this function will be overridden.
-
- Parameters
- ----------
- model_instance :
- An instance of a prediction model.
- configuration :
- A dictionary whose keys represent the attributes of
- the model instance that need to be overridden and whose
- values represent what they should be overridden to.
- Keys must match model instance attribute names.
- random_state :
- Random generation seed.
-
- Returns
- -------
- updated_model_instance :
- Model instance with updated attributes.
+ if n_remaining_configurations == 0:
+ return True
+
+ if max_runtime is not None:
+ if current_runtime >= max_runtime:
+ return True
+
+ if max_searches is not None:
+ if current_iter >= max_searches:
+ return True
+
+ return False
+
+
+class ConformalTuner:
+ """Conformal prediction-based hyperparameter optimization framework.
+
+ Implements a sophisticated hyperparameter optimization system that combines random search
+ initialization with conformal prediction-guided exploration. The tuner uses uncertainty
+ quantification to make statistically principled decisions about which configurations
+ to evaluate next, providing both efficiency improvements and theoretical guarantees.
+
+ The optimization process follows a two-phase strategy:
+ 1. Random search phase: Explores the search space randomly to establish baseline performance
+ 2. Conformal search phase: Uses conformal prediction models to guide configuration selection
+
+ The framework supports adaptive retraining of prediction models, dynamic configuration
+ sampling, and multi-armed bandit optimization for automatically tuning searcher parameters.
+ Statistical validity is maintained through proper conformal prediction procedures that
+ provide distribution-free coverage guarantees.
+
+ Args:
+ objective_function: Function to optimize, must accept 'configuration' dict parameter
+ search_space: Dictionary mapping parameter names to ParameterRange objects
+ minimize: Whether to minimize (True) or maximize (False) the objective function
+ n_candidates: Number of candidate configurations to sample from the search space at
+ each iteration of conformal search
+ warm_starts: Pre-evaluated (configuration, performance) pairs to seed the search
+ dynamic_sampling: Whether to dynamically resample configuration candidates at each
+ iteration of conformal search
+ random_state: Random seed for reproducible results. Default: None.
+
+ Attributes:
+ study: Container for storing trial results and optimization history
+ config_manager: Handles configuration sampling and tracking
+ search_timer: Tracks total optimization runtime
"""
- updated_model_instance = deepcopy(model_instance)
- for tuning_attr_name, tuning_attr in configuration.items():
- setattr(updated_model_instance, tuning_attr_name, tuning_attr)
- if hasattr(updated_model_instance, "random_state"):
- setattr(updated_model_instance, "random_state", random_state)
- return updated_model_instance
-
-
-def score_predictions(
- y_obs: np.array, y_pred: np.array, scoring_function: str
-) -> float:
- """
- Score a model's predictions against observed realizations.
-
- Parameters
- ----------
- y_obs :
- Observed target variable realizations.
- y_pred :
- Model predicted target variable values.
- scoring_function :
- Type of scoring function to use. Can be one of
- either:
- - 'accuracy_score'
- - 'log_loss'
- - 'mean_squared_error'
-
- Returns
- -------
- score :
- Scored model predictions.
- """
- if scoring_function == "accuracy_score":
- score = accuracy_score(y_true=y_obs, y_pred=y_pred)
- elif scoring_function == "log_loss":
- score = log_loss(y_true=y_obs, y_pred=y_pred)
- elif scoring_function == "mean_squared_error":
- score = mean_squared_error(y_true=y_obs, y_pred=y_pred)
- else:
- raise ValueError(f"{scoring_function} is not a recognized scoring function.")
-
- return score
-
-
-def process_and_split_estimation_data(
- searched_configurations: np.array,
- searched_performances: np.array,
- train_split: float,
- filter_outliers: bool = False,
- outlier_scope: str = "top_and_bottom",
- random_state: Optional[int] = None,
-) -> Tuple[np.array, np.array, np.array, np.array]:
- """
- Preprocess configuration data used to train conformal search estimators.
-
- Data is split into training and validation sets, with optional
- outlier filtering.
-
- Parameters
- ----------
- searched_configurations :
- Parameter configurations selected for search as part
- of conformal hyperparameter optimization framework.
- searched_performances :
- Validation performance of each parameter configuration.
- train_split :
- Proportion of overall configurations that should be allocated
- to the training set.
- filter_outliers :
- Whether to remove outliers from the input configuration
- data based on performance.
- outlier_scope :
- Determines which outliers are removed. Takes:
- - 'top_only': Only upper threshold outliers are removed.
- - 'bottom_only': Only lower threshold outliers are removed.
- - 'top_and_bottom': All outliers are removed.
- random_state :
- Random generation seed.
-
- Returns
- -------
- X_train :
- Training portion of configurations.
- y_train :
- Training portion of configuration performances.
- X_val :
- Validation portion of configurations.
- y_val :
- Validation portion of configuration performances.
- """
- X = searched_configurations.copy()
- y = searched_performances.copy()
- logger.debug(f"Minimum performance in searcher data: {y.min()}")
- logger.debug(f"Maximum performance in searcher data: {y.max()}")
-
- if filter_outliers:
- X, y = remove_iqr_outliers(X=X, y=y, scope=outlier_scope)
-
- X_train, y_train, X_val, y_val = train_val_split(
- X=X,
- y=y,
- train_split=train_split,
- normalize=False,
- ordinal=False,
- random_state=random_state,
- )
-
- return X_train, y_train, X_val, y_val
-
-
-def normalize_estimation_data(
- training_searched_configurations: np.array,
- validation_searched_configurations: np.array,
- searchable_configurations: np.array,
-):
- """
- Normalize configuration data used to train conformal search estimators.
-
- Parameters
- ----------
- training_searched_configurations :
- Training portion of parameter configurations selected for
- search as part of conformal optimization framework.
- validation_searched_configurations :
- Validation portion of parameter configurations selected for
- search as part of conformal optimization framework.
- searchable_configurations :
- Larger range of parameter configurations that remain
- un-searched (i.e. whose validation performance has not
- yet been evaluated).
-
- Returns
- -------
- normalized_training_searched_configurations :
- Normalized training portion of searched parameter
- configurations.
- normalized_validation_searched_configurations :
- Normalized validation portion of searched parameter
- configurations.
- normalized_searchable_configurations :
- Normalized un-searched parameter configurations.
- """
- scaler = StandardScaler()
- scaler.fit(training_searched_configurations)
- normalized_searchable_configurations = scaler.transform(searchable_configurations)
- normalized_training_searched_configurations = scaler.transform(
- training_searched_configurations
- )
- normalized_validation_searched_configurations = scaler.transform(
- validation_searched_configurations
- )
-
- return (
- normalized_training_searched_configurations,
- normalized_validation_searched_configurations,
- normalized_searchable_configurations,
- )
-
-
-def get_best_configuration_idx(
- configuration_performance_bounds: Tuple[np.array, np.array],
- optimization_direction: str,
-) -> int:
- """
- Get index of best performing parameter configuration.
-
- Parameters
- ----------
- configuration_performance_bounds :
- Tuple of upper and lower performance bound estimates
- for each available configuration.
- optimization_direction :
- Whether the best configuration is one that maximizes
- (direct) the upper bound or minimizes (inverse) the
- lower bound.
-
- Returns
- -------
- best_idx :
- Index of best performing configuration based on
- performance bounds.
- """
- (
- performance_lower_bounds,
- performance_higher_bounds,
- ) = configuration_performance_bounds
- if optimization_direction == "inverse":
- best_idx = np.argmin(performance_lower_bounds)
-
- elif optimization_direction == "direct":
- best_idx = np.argmax(performance_higher_bounds)
- else:
- raise ValueError(
- f"{optimization_direction} is not a valid loss direction instruction."
- )
-
- return best_idx
+ def __init__(
+ self,
+ objective_function: callable,
+ search_space: Dict[str, ParameterRange],
+ minimize: bool = True,
+ n_candidates: int = 5000,
+ warm_starts: Optional[List[Tuple[Dict, float]]] = None,
+ dynamic_sampling: bool = True,
+ ) -> None:
+ self.objective_function = objective_function
+ self.check_objective_function()
-def get_best_performance_idx(
- custom_loss_function: str, searched_performances: List[float]
-) -> int:
- if METRIC_PROPORTIONALITY_LOOKUP[custom_loss_function] == "direct":
- best_performance_idx = searched_performances.index(max(searched_performances))
- elif METRIC_PROPORTIONALITY_LOOKUP[custom_loss_function] == "inverse":
- best_performance_idx = searched_performances.index(min(searched_performances))
- else:
- raise ValueError()
+ self.search_space = search_space
+ self.minimize = minimize
+ self.metric_sign = 1 if minimize else -1
+ self.warm_starts = warm_starts
+ self.n_candidates = n_candidates
+ self.dynamic_sampling = dynamic_sampling
+ self.config_manager = None
+
+ def check_objective_function(self) -> None:
+ """Validate objective function signature and type annotations.
+
+ Ensures the objective function conforms to the required interface:
+ single parameter named 'configuration' of type Dict, returning numeric value.
+ This validation prevents runtime errors and ensures compatibility with
+ the optimization framework.
+
+ Raises:
+ ValueError: If function signature doesn't match requirements
+ TypeError: If type annotations are incorrect
+ """
+ signature = inspect.signature(self.objective_function)
+ args = list(signature.parameters.values())
- return best_performance_idx
+ if len(args) != 1:
+ raise ValueError("Objective function must take exactly one argument.")
+ first_arg = args[0]
+ if first_arg.name != "configuration":
+ raise ValueError(
+ "The objective function must take exactly one argument named 'configuration'."
+ )
-def update_adaptive_confidence_level(
- true_confidence_level: float,
- last_confidence_level: float,
- breach: bool,
- learning_rate: float,
-) -> float:
- """
- Update adaptive confidence level based on breach events.
-
- The confidence level is increased or decreased based on
- a specified learning rate and whether the last used interval
- was breached or not.
-
- Parameters
- ----------
- true_confidence_level :
- Global confidence level specified at the beginning of
- conformal hyperparameter search.
- last_confidence_level :
- Confidence level as of the last used interval.
- learning_rate :
- Learning rate dictating the magnitude of the confidence
- level update.
-
- Returns
- -------
- updated_confidence_level :
- Updated confidence level.
- """
- updated_confidence_level = 1 - (
- (1 - last_confidence_level)
- + learning_rate * ((1 - true_confidence_level) - breach)
- )
- updated_confidence_level = min(max(0.01, updated_confidence_level), 0.99)
- logger.debug(
- f"Updated confidence level of {last_confidence_level} to {updated_confidence_level}."
- )
+ type_hints = get_type_hints(self.objective_function)
+ if "configuration" in type_hints and type_hints["configuration"] is not Dict:
+ raise TypeError(
+ "The 'configuration' argument of the objective must be of type Dict."
+ )
+ if "return" in type_hints and type_hints["return"] not in [
+ int,
+ float,
+ np.number,
+ ]:
+ raise TypeError(
+ "The return type of the objective function must be numeric (int, float, or np.number)."
+ )
- return updated_confidence_level
+ def process_warm_starts(self) -> None:
+ """Initialize optimization with pre-evaluated configurations.
+ Processes warm start configurations by marking them as searched and creating
+ corresponding trial records. This allows the optimization to begin with
+ prior knowledge, potentially accelerating convergence by skipping known
+ poor configurations and leveraging good starting points.
-class ConformalSearcher:
- """
- Conformal hyperparameter searcher.
+ The warm start configurations are treated as iteration 0 data and assigned
+ the 'warm_start' acquisition source for tracking purposes.
+ """
+ for idx, (config, performance) in enumerate(self.warm_starts):
+ self.config_manager.mark_as_searched(config, performance)
+ trial = Trial(
+ iteration=idx,
+ timestamp=datetime.now(),
+ configuration=config.copy(),
+ tabularized_configuration=self.config_manager.listify_configs([config])[
+ 0
+ ],
+ performance=performance,
+ acquisition_source="warm_start",
+ )
+ self.study.append_trial(trial)
- Tunes a desired model by inferentially searching a
- specified hyperparameter space using conformal estimators.
- """
+ def initialize_tuning_resources(self) -> None:
+ """Initialize core optimization components and data structures.
- def __init__(
- self,
- model: BaseEstimator | TunableModel,
- X_train: np.array,
- y_train: np.array,
- X_val: np.array,
- y_val: np.array,
- search_space: Dict,
- prediction_type: str,
- custom_loss_function: Optional[str] = None,
- ):
- """
- Create a conformal searcher instance.
-
- Parameters
- ----------
- model :
- Model object to tune through conformal search. Must
- be an instance with a .fit() and .predict() method.
- X_train :
- Training portion of explanatory variable examples.
- y_train :
- Training portion of target variable examples.
- X_val :
- Validation portion of explanatory variable examples.
- y_val :
- Validation portion of target variable examples.
- search_space :
- Dictionary mapping parameter names to possible parameter
- values they can take.
- prediction_type :
- The type of prediction to perform on the X and y data.
- Can be one of either:
- - 'regression'
- - 'classification'
- custom_loss_function :
- Loss functions are inferred based on the type of prediction
- to perform (regression or classification), but if it's
- desirable to use a specific loss function one may be
- specified here. Current support is limited to:
- - 'mean_squared_error'
- - 'accuracy_score'
- - 'log_loss'
+ Sets up the study container for trial tracking, configuration manager for
+ handling search space sampling, and processes any warm start configurations.
+ The configuration manager uses the optimized incremental approach for
+ maximum performance.
"""
+ self.study = Study(
+ metric_optimization="minimize" if self.minimize else "maximize"
+ )
- if isinstance(model, BaseEstimator) or isinstance(model, TunableModel):
- self.model = model
+ # Instantiate appropriate configuration manager based on dynamic_sampling setting
+ if self.dynamic_sampling:
+ self.config_manager = DynamicConfigurationManager(
+ search_space=self.search_space,
+ n_candidate_configurations=self.n_candidates,
+ )
else:
- raise ValueError(
- "Model to tune must be a sklearn BaseEstimator model or wrapped as subclass of TunableModel abstract class."
+ self.config_manager = StaticConfigurationManager(
+ search_space=self.search_space,
+ n_candidate_configurations=self.n_candidates,
)
- self.X_train = X_train
- self.y_train = y_train
- self.X_val = X_val
- self.y_val = y_val
- self.search_space = search_space
- self.prediction_type = prediction_type
+ if self.warm_starts:
+ self.process_warm_starts()
- self.custom_loss_function = (
- self._set_default_evaluation_metric()
- if custom_loss_function is None
- else custom_loss_function
- )
- self.tuning_configurations = self._get_tuning_configurations()
+ def _evaluate_configuration(self, configuration: Dict) -> Tuple[float, float]:
+ """Evaluate a configuration and measure execution time.
- def _set_default_evaluation_metric(self) -> str:
- if self.prediction_type == "regression":
- custom_loss_function = "mean_squared_error"
- elif self.prediction_type == "classification":
- custom_loss_function = "accuracy_score"
- else:
- raise ValueError(
- f"Unable to auto-allocate evaluation metric for {self.prediction_type} prediction type."
- )
- return custom_loss_function
+ Executes the objective function with the given configuration while tracking
+ runtime. This method provides the core evaluation mechanism used throughout
+ both random and conformal search phases.
- def _get_tuning_configurations(self):
- logger.debug("Creating hyperparameter space...")
- tuning_configurations = get_tuning_configurations(
- parameter_grid=self.search_space, n_configurations=1000, random_state=1234
- )
- return tuning_configurations
+ Args:
+ configuration: Parameter configuration dictionary to evaluate
- def _evaluate_configuration_performance(
- self, configuration: Dict, random_state: Optional[int] = None
- ) -> float:
- """
- Evaluate the performance of a specified parameter configuration.
-
- Parameters
- ----------
- configuration :
- Parameter configuration for the base model being tuned using
- conformal search.
- random_state :
- Random generation seed.
-
- Returns
- -------
- performance :
- Specified configuration's validation performance.
+ Returns:
+ Tuple of (performance_value, evaluation_runtime)
"""
- logger.debug(f"Evaluating model with configuration: {configuration}")
+ runtime_tracker = RuntimeTracker()
+ performance = self.objective_function(configuration=configuration)
+ runtime = runtime_tracker.return_runtime()
+ return performance, runtime
- updated_model = update_model_parameters(
- model_instance=self.model,
- configuration=configuration,
- random_state=random_state,
- )
- updated_model.fit(X=self.X_train, y=self.y_train)
+ def random_search(
+ self,
+ max_random_iter: int,
+ max_runtime: Optional[int] = None,
+ max_searches: Optional[int] = None,
+ verbose: bool = True,
+ ) -> None:
+ """Execute random search phase to initialize optimization with baseline data.
+
+ Performs uniform random sampling of configurations to establish initial
+ performance landscape understanding. This phase is crucial for subsequent
+ conformal prediction model training, as it provides the foundational
+ dataset for uncertainty quantification.
+
+ Args:
+ max_random_iter: Maximum number of random configurations to evaluate
+ max_runtime: Optional runtime budget in seconds
+ max_searches: Optional total iteration limit
+ verbose: Whether to display progress information
+ """
- if self.custom_loss_function in ["log_loss"]:
- y_pred = updated_model.predict_proba(self.X_val)
- else:
- y_pred = updated_model.predict(self.X_val)
+ available_configs = self.config_manager.get_searchable_configurations()
+ adj_n_searches = min(max_random_iter, len(available_configs))
+ if adj_n_searches == 0:
+ logger.warning("No configurations available for random search")
- performance = score_predictions(
- y_obs=self.y_val, y_pred=y_pred, scoring_function=self.custom_loss_function
+ search_idxs = np.random.choice(
+ len(available_configs), size=adj_n_searches, replace=False
)
+ sampled_configs = [available_configs[idx] for idx in search_idxs]
- return performance
+ progress_iter = (
+ tqdm(sampled_configs, desc="Random search: ")
+ if verbose
+ else sampled_configs
+ )
- def _random_search(
- self,
- n_searches: int,
- max_runtime: int,
- verbose: bool = True,
- random_state: Optional[int] = None,
- ) -> Tuple[List, List, List, float]:
- """
- Randomly search a portion of the model's hyperparameter space.
-
- Parameters
- ----------
- n_searches :
- Number of random searches to perform.
- max_runtime :
- Maximum runtime after which search stops.
- verbose :
- Whether to print updates during code execution.
- random_state :
- Random generation seed.
-
- Returns
- -------
- searched_configurations :
- List of parameter configurations that were randomly
- selected and searched.
- searched_performances :
- Search performance of each searched configuration,
- consisting of out of sample, validation performance
- of a model trained using the searched configuration.
- searched_timestamps :
- List of timestamps corresponding to each searched
- hyperparameter configuration.
- runtime_per_search :
- Average time taken to train the model being tuned
- across configurations, in seconds.
- """
- random.seed(random_state)
- np.random.seed(random_state)
-
- searched_configurations = []
- searched_performances = []
- searched_timestamps = []
-
- skipped_configuration_counter = 0
- runtime_per_search = 0
-
- shuffled_tuning_configurations = self.tuning_configurations.copy()
- random.seed(random_state)
- random.shuffle(shuffled_tuning_configurations)
- randomly_sampled_configurations = shuffled_tuning_configurations[
- : min(n_searches, len(self.tuning_configurations))
- ]
-
- model_training_timer = RuntimeTracker()
- model_training_timer.pause_runtime()
- if verbose:
- randomly_sampled_configurations = tqdm(
- randomly_sampled_configurations, desc="Random search: "
- )
- for config_idx, hyperparameter_configuration in enumerate(
- randomly_sampled_configurations
- ):
- model_training_timer.resume_runtime()
- validation_performance = self._evaluate_configuration_performance(
- configuration=hyperparameter_configuration, random_state=random_state
- )
- model_training_timer.pause_runtime()
+ for config in progress_iter:
+ validation_performance, training_time = self._evaluate_configuration(config)
if np.isnan(validation_performance):
- skipped_configuration_counter += 1
logger.debug(
"Obtained non-numerical performance, skipping configuration."
)
+ self.config_manager.add_to_banned_configurations(config)
continue
- searched_configurations.append(hyperparameter_configuration.copy())
- searched_performances.append(validation_performance)
- searched_timestamps.append(datetime.now())
+ self.config_manager.mark_as_searched(config, validation_performance)
+
+ trial = Trial(
+ iteration=len(self.study.trials),
+ timestamp=datetime.now(),
+ configuration=config.copy(),
+ tabularized_configuration=self.config_manager.listify_configs([config])[
+ 0
+ ],
+ performance=validation_performance,
+ acquisition_source="rs",
+ target_model_runtime=training_time,
+ )
+ self.study.append_trial(trial)
- runtime_per_search = (
- runtime_per_search + model_training_timer.return_runtime()
- ) / (config_idx - skipped_configuration_counter + 1)
+ searchable_count = self.config_manager.get_searchable_configurations_count()
+ current_runtime = self.search_timer.return_runtime()
- logger.debug(
- f"Random search iter {config_idx} performance: {validation_performance}"
+ stop = stop_search(
+ n_remaining_configurations=searchable_count,
+ current_runtime=current_runtime,
+ max_runtime=max_runtime,
+ current_iter=len(self.study.trials),
+ max_searches=max_searches,
)
+ if stop:
+ break
- if self.search_timer.return_runtime() > max_runtime:
- raise RuntimeError(
- "confopt preliminary random search exceeded total runtime budget. "
- "Retry with larger runtime budget or set iteration-capped budget instead."
- )
+ def setup_conformal_search_resources(
+ self,
+ verbose: bool,
+ max_runtime: Optional[int],
+ max_searches: Optional[int],
+ ) -> Tuple[ProgressBarManager, float]:
+ """Initialize progress tracking and iteration limits for conformal search.
+
+ Sets up the progress bar manager for displaying search progress and calculates
+ the maximum number of conformal search iterations based on total limits and
+ already completed trials from previous phases.
+
+ Args:
+ verbose: Whether to display progress information
+ max_runtime: Optional maximum runtime in seconds
+ max_searches: Optional maximum total iterations
+
+ Returns:
+ Tuple of (progress_manager, conformal_max_searches)
+ """
+ progress_manager = ProgressBarManager(verbose=verbose)
+ progress_manager.create_progress_bar(
+ max_runtime=max_runtime,
+ max_searches=max_searches,
+ current_trials=len(self.study.trials),
+ description="Conformal search",
+ )
- return (
- searched_configurations,
- searched_performances,
- searched_timestamps,
- runtime_per_search,
+ conformal_max_searches = (
+ max_searches - len(self.study.trials)
+ if max_searches is not None
+ else float("inf")
)
- @staticmethod
- def _set_conformal_validation_split(X: np.array) -> float:
- if len(X) <= 30:
- validation_split = 5 / len(X)
- else:
- validation_split = 0.33
- return validation_split
+ return progress_manager, conformal_max_searches
- def search(
+ def initialize_searcher_optimizer(
self,
- runtime_budget: int,
- confidence_level: float = 0.8,
- conformal_search_estimator: str = "qgbm",
- n_random_searches: int = 20,
- conformal_retraining_frequency: int = 1,
- enable_adaptive_intervals: bool = True,
- conformal_learning_rate: float = 0.1,
- verbose: bool = True,
- random_state: Optional[int] = None,
+ optimizer_framework: Optional[str],
):
+ """Initialize searcher parameter tuner.
+
+ Args:
+ optimizer_framework: Tuning strategy ('decaying', 'fixed', None)
+
+ Returns:
+ Configured optimizer instance
+ """
+ if optimizer_framework == "fixed":
+ optimizer = FixedSearcherOptimizer(
+ n_tuning_episodes=10,
+ tuning_interval=20,
+ )
+ elif optimizer_framework == "decaying":
+ optimizer = DecayingSearcherOptimizer(
+ n_tuning_episodes=10,
+ initial_tuning_interval=10,
+ decay_rate=0.1,
+ decay_type="linear",
+ max_tuning_interval=40,
+ )
+ elif optimizer_framework is None:
+ optimizer = FixedSearcherOptimizer(
+ n_tuning_episodes=0,
+ tuning_interval=1,
+ )
+ else:
+ raise ValueError(
+ "optimizer_framework must be either 'fixed', 'decaying', or None."
+ )
+ return optimizer
+
+ def retrain_searcher(
+ self,
+ searcher: BaseConformalSearcher,
+ X: np.array,
+ y: np.array,
+ tuning_count: int,
+ ) -> float:
+ """Train conformal prediction searcher on accumulated data.
+
+ Fits the conformal prediction model using the provided data,
+ tracking training time and model performance for adaptive parameter
+ optimization. The tuning_count parameter controls internal hyperparameter
+ optimization within the searcher.
+
+ Args:
+ searcher: Conformal searcher instance to train
+ X: Feature matrix (sign-adjusted)
+ y: Target values (sign-adjusted)
+ tuning_count: Number of internal tuning iterations
+
+ Returns:
+ Training runtime in seconds
"""
- Search model hyperparameter space using conformal estimators.
-
- Model and hyperparameter space are defined in the initialization
- of this class. This method takes as inputs a limit on the duration
- of search and several overrides for search behaviour.
-
- Search involves randomly evaluating an initial number of hyperparameter
- configurations, then training a conformal estimator on the relationship
- between configurations and performance to optimally select the next
- best configuration to sample at each subsequent sampling event.
- Upon exceeding the maximum search duration, search results are stored
- in the class instance and accessible via dedicated externalizing methods.
-
- Parameters
- ----------
- runtime_budget :
- Maximum time budget to allocate to hyperparameter search in seconds.
- After the budget is exceeded, search stops and results are stored in
- the instance for later access.
- An error will be raised if the budget is not sufficient to carry out
- conformal search, in which case it should be raised.
- confidence_level :
- Confidence level used during construction of conformal searchers'
- intervals. The confidence level controls the exploration/exploitation
- tradeoff, with smaller values making search greedier.
- Confidence level must be bound between [0, 1].
- conformal_search_estimator :
- String identifier specifying which type of estimator should be
- used to infer model hyperparameter performance.
- Supported estimators include:
- - 'qgbm' (default): quantile gradient boosted machine.
- - 'qrf': quantile random forest.
- - 'kr': kernel ridge.
- - 'gp': gaussian process.
- - 'gbm': gradient boosted machine.
- - 'knn': k-nearest neighbours.
- - 'rf': random forest.
- - 'dnn': dense neural network.
- n_random_searches :
- Number of initial random searches to perform before switching
- to inferential search. A larger number delays the beginning of
- conformal search, but provides the search estimator with more
- data and more robust patterns. The more parameters are being
- optimized during search, the more random search observations
- are needed before the conformal searcher can extrapolate
- effectively. This value defaults to 20, which is the minimum
- advisable number before the estimator will struggle to train.
- conformal_retraining_frequency :
- Sampling interval after which conformal search estimators should be
- retrained. Eg. an interval of 5, would mean conformal estimators
- are retrained after every 5th sampled/searched parameter configuration.
- A lower retraining frequency is always desirable, but may be increased
- to reduce runtime.
- enable_adaptive_intervals :
- Whether to allow conformal intervals used for configuration sampling
- to change after each sampling event. This allows for better interval
- coverage under covariate shift and is enabled by default.
- conformal_learning_rate :
- Learning rate dictating how rapidly adaptive intervals are updated.
- verbose :
- Whether to print updates during code execution.
- random_state :
- Random generation seed.
+ runtime_tracker = RuntimeTracker()
+ searcher.fit(
+ X=X,
+ y=y,
+ tuning_iterations=tuning_count,
+ )
+
+ training_runtime = runtime_tracker.return_runtime()
+ return training_runtime
+
+ def select_next_configuration(
+ self,
+ searcher: BaseConformalSearcher,
+ searchable_configs: List,
+ transformed_configs: np.array,
+ ) -> Dict:
+ """Select the most promising configuration using conformal predictions.
+
+ Uses the conformal searcher to predict lower bounds for all available
+ configurations and selects the one with the minimum predicted lower bound.
+ This implements a pessimistic acquisition strategy that favors configurations
+ with high confidence of good performance.
+
+ Args:
+ searcher: Trained conformal searcher for predictions
+ searchable_configs: List of available configuration dictionaries
+ transformed_configs: Scaled feature matrix for configurations
+
+ Returns:
+ Selected configuration dictionary
"""
+ bounds = searcher.predict(X=transformed_configs)
+ next_idx = np.argmin(bounds)
+ next_config = searchable_configs[next_idx]
+ return next_config
- self.random_state = random_state
- self.search_timer = RuntimeTracker()
+ def get_interval_if_applicable(
+ self,
+ searcher: BaseConformalSearcher,
+ transformed_config: np.array,
+ ) -> Tuple[Optional[float], Optional[float]]:
+ """Get prediction interval bounds if supported by searcher.
+
+ Returns the lower and upper bounds of the prediction interval for
+ configurations using lower bound samplers. This provides the raw
+ interval information for storage and analysis.
+
+ Args:
+ searcher: Conformal searcher instance
+ transformed_config: Scaled configuration features
+
+ Returns:
+ Tuple of (lower_bound, upper_bound) if applicable, (None, None) otherwise
+ """
+ if isinstance(
+ searcher.sampler, (LowerBoundSampler, PessimisticLowerBoundSampler)
+ ):
+ lower_bound, upper_bound = searcher.get_interval(X=transformed_config)
+ return lower_bound, upper_bound
+ else:
+ return None, None
+
+ def update_optimizer_parameters(
+ self,
+ optimizer,
+ search_iter: int,
+ ) -> Tuple[int, int]:
+ """Update multi-armed bandit optimizer and select new parameter values.
+
+ Updates the parameter optimizer with the current search iteration and
+ selects new parameter values for subsequent iterations.
+
+ Args:
+ optimizer: Multi-armed bandit optimizer instance
+ search_iter: Current search iteration number
+
+ Returns:
+ Tuple of (new_tuning_count, new_searcher_retuning_frequency)
+ """
+ optimizer.update(
+ search_iter=search_iter,
+ )
+
+ new_tuning_count, new_searcher_retuning_frequency = optimizer.select_arm()
+ return new_tuning_count, new_searcher_retuning_frequency
+ def conformal_search(
+ self,
+ searcher: BaseConformalSearcher,
+ verbose: bool,
+ max_searches: Optional[int],
+ max_runtime: Optional[int],
+ optimizer_framework: Optional[str] = None,
+ ) -> None:
+ """Execute conformal prediction-guided hyperparameter search.
+
+ Implements the main conformal search loop that iteratively trains conformal
+ prediction models, selects promising configurations based on uncertainty
+ quantification, and updates the models with new observations.
+
+ Args:
+ searcher: Conformal prediction searcher for configuration selection
+ verbose: Whether to display search progress
+ max_searches: Maximum total iterations including previous phases
+ max_runtime: Maximum total runtime budget in seconds
+ optimizer_framework: Parameter tuning strategy
+ """
(
- self.searched_configurations,
- self.searched_performances,
- self.searched_timestamps,
- runtime_per_search,
- ) = self._random_search(
- n_searches=n_random_searches,
- max_runtime=runtime_budget,
- verbose=verbose,
- random_state=random_state,
+ progress_manager,
+ conformal_max_searches,
+ ) = self.setup_conformal_search_resources(verbose, max_runtime, max_searches)
+ optimizer = self.initialize_searcher_optimizer(
+ optimizer_framework=optimizer_framework,
)
- search_model_tuning_count = 0
+ tuning_count = 0
+ searcher_retuning_frequency = 1
+ training_runtime = 0
- search_idx_range = range(len(self.tuning_configurations) - n_random_searches)
- search_progress_bar = tqdm(total=runtime_budget, desc="Conformal search: ")
- for config_idx in search_idx_range:
- if verbose:
- search_progress_bar.update(
- int(self.search_timer.return_runtime()) - search_progress_bar.n
- )
- searchable_configurations = [
- configuration
- for configuration in self.tuning_configurations
- if configuration not in self.searched_configurations
- ]
- tabularized_searchable_configurations = tabularize_configurations(
- configurations=searchable_configurations
- ).to_numpy()
- tabularized_searched_configurations = tabularize_configurations(
- configurations=self.searched_configurations.copy()
- ).to_numpy()
-
- validation_split = ConformalSearcher._set_conformal_validation_split(
- tabularized_searched_configurations
- )
- remove_outliers = (
- True
- if self.custom_loss_function == "log_loss"
- or self.prediction_type == "regression"
- else False
+ for search_iter in range(conformal_max_searches):
+ progress_manager.update_progress(
+ current_runtime=(
+ self.search_timer.return_runtime() if max_runtime else None
+ ),
+ iteration_count=1 if max_searches else 0,
)
- outlier_scope = "top_only"
- (
- X_train_conformal,
- y_train_conformal,
- X_val_conformal,
- y_val_conformal,
- ) = process_and_split_estimation_data(
- searched_configurations=tabularized_searched_configurations,
- searched_performances=np.array(self.searched_performances),
- train_split=(1 - validation_split),
- filter_outliers=remove_outliers,
- outlier_scope=outlier_scope,
- random_state=random_state,
+
+ X = self.config_manager.tabularize_configs(
+ self.config_manager.searched_configs
)
+ y = np.array(self.config_manager.searched_performances) * self.metric_sign
+
+ searchable_configs = self.config_manager.get_searchable_configurations()
+ X_searchable = self.config_manager.tabularize_configs(searchable_configs)
+
+ if search_iter == 0 or search_iter % 1 == 0:
+ training_runtime = self.retrain_searcher(searcher, X, y, tuning_count)
- if conformal_search_estimator.lower() not in NON_NORMALIZING_ARCHITECTURES:
(
- X_train_conformal,
- X_val_conformal,
- tabularized_searchable_configurations,
- ) = normalize_estimation_data(
- training_searched_configurations=X_train_conformal,
- validation_searched_configurations=X_val_conformal,
- searchable_configurations=tabularized_searchable_configurations,
+ tuning_count,
+ searcher_retuning_frequency,
+ ) = self.update_optimizer_parameters(
+ optimizer,
+ search_iter,
)
- hit_retraining_interval = config_idx % conformal_retraining_frequency == 0
- if config_idx == 0 or hit_retraining_interval:
- if config_idx == 0:
- latest_confidence_level = confidence_level
-
- if conformal_search_estimator in QUANTILE_ESTIMATOR_ARCHITECTURES:
- conformal_regressor = QuantileConformalRegression(
- quantile_estimator_architecture=conformal_search_estimator
- )
-
- conformal_regressor.fit(
- X_train=X_train_conformal,
- y_train=y_train_conformal,
- X_val=X_val_conformal,
- y_val=y_val_conformal,
- confidence_level=latest_confidence_level,
- tuning_iterations=search_model_tuning_count,
- random_state=random_state,
- )
-
- else:
- (
- HR_X_pe_fitting,
- HR_y_pe_fitting,
- HR_X_ve_fitting,
- HR_y_ve_fitting,
- ) = train_val_split(
- X_train_conformal,
- y_train_conformal,
- train_split=0.75,
- normalize=False,
- random_state=random_state,
- )
- logger.debug(
- f"Obtained sub training set of size {HR_X_pe_fitting.shape} "
- f"and sub validation set of size {HR_X_ve_fitting.shape}"
- )
-
- conformal_regressor = LocallyWeightedConformalRegression(
- point_estimator_architecture=conformal_search_estimator,
- demeaning_estimator_architecture=conformal_search_estimator,
- variance_estimator_architecture=conformal_search_estimator,
- )
-
- conformal_regressor.fit(
- X_pe=HR_X_pe_fitting,
- y_pe=HR_y_pe_fitting,
- X_ve=HR_X_ve_fitting,
- y_ve=HR_y_ve_fitting,
- X_val=X_val_conformal,
- y_val=y_val_conformal,
- tuning_iterations=search_model_tuning_count,
- random_state=random_state,
- )
-
- hyperreg_model_runtime_per_iter = conformal_regressor.training_time
- search_model_tuning_count = derive_optimal_tuning_count(
- baseline_model_runtime=runtime_per_search,
- search_model_runtime=hyperreg_model_runtime_per_iter,
- search_model_retraining_freq=conformal_retraining_frequency,
- search_to_baseline_runtime_ratio=0.3,
+ # Select next configuration
+ next_config = self.select_next_configuration(
+ searcher, searchable_configs, X_searchable
)
- (
- parameter_performance_lower_bounds,
- parameter_performance_higher_bounds,
- ) = conformal_regressor.predict(
- X=tabularized_searchable_configurations,
- confidence_level=latest_confidence_level,
- )
+ # Evaluate configuration
+ performance, _ = self._evaluate_configuration(next_config)
+ if np.isnan(performance):
+ self.config_manager.add_to_banned_configurations(next_config)
+ continue
- maximal_idx = get_best_configuration_idx(
- configuration_performance_bounds=(
- parameter_performance_lower_bounds,
- parameter_performance_higher_bounds,
- ),
- optimization_direction=METRIC_PROPORTIONALITY_LOOKUP[
- self.custom_loss_function
- ],
+ # Get interval bounds
+ transformed_config = self.config_manager.tabularize_configs([next_config])
+
+ lower_bound, upper_bound = self.get_interval_if_applicable(
+ searcher, transformed_config
)
- maximal_parameter = searchable_configurations[maximal_idx].copy()
- validation_performance = self._evaluate_configuration_performance(
- configuration=maximal_parameter, random_state=random_state
+ # Convert bounds back to original units and handle interval orientation
+ if lower_bound is not None and upper_bound is not None:
+ converted_lower = lower_bound * self.metric_sign
+ converted_upper = upper_bound * self.metric_sign
+ # For maximization (metric_sign = -1), swap bounds to maintain proper ordering
+ if not self.minimize:
+ signed_lower_bound = converted_upper # What was upper becomes lower
+ signed_upper_bound = converted_lower # What was lower becomes upper
+ else:
+ signed_lower_bound = converted_lower
+ signed_upper_bound = converted_upper
+ else:
+ signed_lower_bound = None
+ signed_upper_bound = None
+
+ signed_performance = self.metric_sign * performance
+ searcher.update(X=transformed_config.flatten(), y_true=signed_performance)
+
+ self.config_manager.mark_as_searched(next_config, performance)
+ trial = Trial(
+ iteration=len(self.study.trials),
+ timestamp=datetime.now(),
+ configuration=next_config.copy(),
+ tabularized_configuration=self.config_manager.listify_configs(
+ [next_config]
+ )[0],
+ performance=performance,
+ acquisition_source=str(searcher),
+ searcher_runtime=training_runtime,
+ lower_bound=signed_lower_bound,
+ upper_bound=signed_upper_bound,
)
- logger.debug(
- f"Conformal search iter {config_idx} performance: {validation_performance}"
+ self.study.append_trial(trial)
+
+ searchable_count = self.config_manager.get_searchable_configurations_count()
+ should_stop = stop_search(
+ n_remaining_configurations=searchable_count,
+ current_runtime=self.search_timer.return_runtime(),
+ max_runtime=max_runtime,
+ current_iter=len(self.study.trials),
+ max_searches=max_searches,
)
- if np.isnan(validation_performance):
- continue
+ if should_stop:
+ break
- if (
- validation_performance
- > parameter_performance_higher_bounds[maximal_idx]
- ) or (
- validation_performance < parameter_performance_lower_bounds[maximal_idx]
- ):
- is_last_interval_breached = True
- else:
- is_last_interval_breached = False
-
- if enable_adaptive_intervals:
- latest_confidence_level = update_adaptive_confidence_level(
- true_confidence_level=confidence_level,
- last_confidence_level=latest_confidence_level,
- breach=is_last_interval_breached,
- learning_rate=conformal_learning_rate,
- )
+ progress_manager.close_progress_bar()
- self.searched_configurations.append(maximal_parameter.copy())
- self.searched_performances.append(validation_performance)
- self.searched_timestamps.append(datetime.now())
+ def tune(
+ self,
+ max_searches: Optional[int] = 100,
+ max_runtime: Optional[int] = None,
+ searcher: Optional[QuantileConformalSearcher] = None,
+ n_random_searches: int = 15,
+ optimizer_framework: Optional[Literal["decaying", "fixed"]] = None,
+ random_state: Optional[int] = None,
+ verbose: bool = True,
+ ) -> None:
+ """Execute hyperparameter optimization using conformal prediction surrogate models.
+
+ Performs intelligent hyperparameter search by randomly sampling an initial number
+ of hyperparameter configurations, then activating surrogate based search according
+ to the specified searcher.
+
+ Args:
+ max_searches: Maximum total configurations to search (random + conformal searches).
+ Default: 100.
+ max_runtime: Maximum search time in seconds. Search will terminate after this time,
+ regardless of iterations. Default: None (no time limit).
+ searcher: Conformal searcher object responsible for the selection of candidate
+ hyperparameter configurations. When none is provided, the searcher defaults
+ to a QGBM surrogate with a Thompson Sampler.
+ Should you want to use a custom searcher, see confopt.selection.acquisition for
+ searcher instantiation and confopt.selection.acquisition.samplers to set the
+ searcher's sampler.
+ Default: None.
+ n_random_searches: Number of random configurations to evaluate before conformal search.
+ Provides initial training data for the surrogate model. Default: 15.
+ optimizer_framework: Controls how and when the surrogate model tunes its own parameters
+ (this is different from tuning your target model). Options are 'decaying' for
+ adaptive tuning with increasing intervals over time, 'fixed' for
+ deterministic tuning at fixed intervals, or None for no tuning. Surrogate tuning
+ adds computational cost and is recommended only if your target model takes more
+ than 5 minutes to train. Default: None.
+ random_state: Random seed for reproducible results. Default: None.
+ verbose: Whether to enable progress display. Default: True.
+
+ Example:
+ Basic usage::
+
+ import numpy as np
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import FloatRange
+
+ def objective(configuration):
+ x1 = configuration['x1']
+ x2 = configuration['x2']
+ A = 10
+ n = 2
+ return A * n + (x1**2 - A * np.cos(2 * np.pi * x1)) + (x2**2 - A * np.cos(2 * np.pi * x2))
+
+ search_space = {
+ 'x1': FloatRange(min_value=-5.12, max_value=5.12),
+ 'x2': FloatRange(min_value=-5.12, max_value=5.12)
+ }
+
+ tuner = ConformalTuner(
+ objective_function=objective,
+ search_space=search_space,
+ minimize=True
+ )
- if self.search_timer.return_runtime() > runtime_budget:
- if verbose:
- search_progress_bar.update(runtime_budget - search_progress_bar.n)
- search_progress_bar.close()
- break
+ tuner.tune(n_random_searches=10, max_searches=50)
- def get_best_params(self) -> Dict:
+ best_config = tuner.get_best_params()
+ best_score = tuner.get_best_value()
"""
- Extract hyperparameters from best performing parameter
- configuration identified during conformal search.
- Returns
- -------
- best_params :
- Best performing model hyperparameters.
- """
- best_performance_idx = get_best_performance_idx(
- custom_loss_function=self.custom_loss_function,
- searched_performances=self.searched_performances,
- )
- best_params = self.searched_configurations[best_performance_idx]
+ if random_state is not None:
+ random.seed(a=random_state)
+ np.random.seed(seed=random_state)
+
+ if searcher is None:
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="qgbm",
+ sampler=ThompsonSampler(
+ n_quantiles=4,
+ adapter="DtACI",
+ enable_optimistic_sampling=False,
+ ),
+ calibration_split_strategy="adaptive",
+ n_calibration_folds=5,
+ n_pre_conformal_trials=32,
+ )
- return best_params
+ self.initialize_tuning_resources()
+ self.search_timer = RuntimeTracker()
- def get_best_value(self) -> float:
- """
- Extract validation performance of best performing parameter
- configuration identified during conformal search.
+ n_warm_starts = len(self.warm_starts) if self.warm_starts else 0
+ remaining_random_searches = max(0, n_random_searches - n_warm_starts)
+ if remaining_random_searches > 0:
+ self.random_search(
+ max_random_iter=remaining_random_searches,
+ max_runtime=max_runtime,
+ max_searches=max_searches,
+ verbose=verbose,
+ )
- Returns
- -------
- best_performance :
- Best predictive performance achieved.
- """
- best_performance_idx = get_best_performance_idx(
- custom_loss_function=self.custom_loss_function,
- searched_performances=self.searched_performances,
+ self.conformal_search(
+ searcher=searcher,
+ verbose=verbose,
+ max_searches=max_searches,
+ max_runtime=max_runtime,
+ optimizer_framework=optimizer_framework,
)
- best_performance = self.searched_performances[best_performance_idx]
- return best_performance
+ def get_best_params(self) -> Dict:
+ """Retrieve the best configuration found during optimization.
- def configure_best_model(self):
- """
- Extract best initialized (but unfitted) model identified
- during conformal search.
+ Returns the parameter configuration that achieved the optimal objective
+ function value, according to the specified optimization direction.
- Returns
- -------
- best_model :
- Best model from search.
+ Returns:
+ Dictionary containing the optimal parameter configuration
"""
- best_model = update_model_parameters(
- model_instance=self.model,
- configuration=self.get_best_params(),
- random_state=self.random_state,
- )
- return best_model
+ return self.study.get_best_configuration()
- def fit_best_model(self):
- """
- Fit best model identified during conformal search.
-
- Returns
- -------
- best_fitted_model :
- Best model from search, fit on all available data.
- """
- best_fitted_model = self.configure_best_model()
- X_full = np.vstack((self.X_train, self.X_val))
- y_full = np.hstack((self.y_train, self.y_val))
+ def get_best_value(self) -> float:
+ """Retrieve the best objective function value achieved during optimization.
- best_fitted_model.fit(X=X_full, y=y_full)
+ Returns the optimal performance value found across all evaluated
+ configurations, according to the specified optimization direction.
- return best_fitted_model
+ Returns:
+ Best objective function value achieved
+ """
+ return self.study.get_best_performance()
diff --git a/confopt/utils.py b/confopt/utils.py
deleted file mode 100644
index 31f385b..0000000
--- a/confopt/utils.py
+++ /dev/null
@@ -1,216 +0,0 @@
-import logging
-import random
-from typing import Dict, List, Optional, Tuple
-
-import numpy as np
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def get_perceptron_layers(
- n_layers_grid: List[int],
- layer_size_grid: List[int],
- random_seed: Optional[int] = None,
-) -> List[Tuple]:
- """
- Construct list of randomly sampled multilayer perceptron
- configuration tuples.
-
- Each tuple is randomly constructed given a grid of layer
- counts and a grid of layer sizes. A single tuple is just
- a sequence of layer sizes, eg. (10, 20, 60, 20, 10), for
- some diamond shaped perceptron.
-
- Parameters
- ----------
- n_layers_grid :
- List of potential layer counts determining how many
- perceptron layers there can be in a configuration tuple.
- layer_size_grid :
- List of potential perceptron layer sizes from which
- to construct a configuration tuple.
- random_seed :
- Random seed.
-
- Returns
- -------
- layer_tuples :
- Collection of tuples, each of which contains the layer sizes
- determining the architecture of a multilayer perceptron.
- """
- random.seed(random_seed)
- np.random.seed(random_seed)
-
- layer_tuples = []
- # Hard coded:
- discretization = 1000
- for _ in range(discretization):
- tuple_len = random.choice(n_layers_grid)
- layer_tuple = ()
- for _ in range(tuple_len):
- layer_tuple = layer_tuple + (random.choice(layer_size_grid),)
- layer_tuples.append(layer_tuple)
-
- return layer_tuples
-
-
-def get_tuning_configurations(
- parameter_grid: Dict, n_configurations: int, random_state: Optional[int] = None
-) -> List[Dict]:
- """
- Randomly sample list of unique hyperparameter configurations.
-
- Each configuration is constructed from a broader parameter grid of
- possible parameter values.
-
- Parameters
- ----------
- parameter_grid :
- Dictionary of parameter names to possible ranged parameter values.
- n_configurations :
- Number of desired configurations to randomly construct from the
- raw parameter grid.
- random_state :
- Random seed.
-
- Returns
- -------
- configurations :
- Unique randomly constructed hyperparameter configurations.
- """
- random.seed(random_state)
-
- configurations = []
- for _ in range(n_configurations):
- configuration = {}
- for parameter_name in parameter_grid:
- parameter_value = random.choice(parameter_grid[parameter_name])
- configuration[parameter_name] = parameter_value
- if configuration not in configurations:
- configurations.append(configuration)
-
- return configurations
-
-
-def tabularize_configurations(configurations: List[Dict]) -> pd.DataFrame:
- """
- Transform list of configuration dictionaries into tabular training data.
-
- Configurations are type transformed, one hot encoded and wrapped in a
- pandas dataframe to enable regression tasks.
-
- Parameters
- ----------
- configurations :
- List of hyperparameter configurations to tabularize.
-
- Returns
- -------
- tabularized_configurations :
- Tabularized hyperparameter configurations (hyperparameter names
- as columns and hyperparameter values as rows).
- """
- logger.debug(f"Received {len(configurations)} configurations to tabularize.")
-
- # Get maximum length of any list or tuple parameter in configuration (this is
- # important for configuration inputs where lists and tuples can be of variable
- # length depending on the parameter values passed):
- max_tuple_or_list_lens_per_parameter = {}
- for configuration in configurations:
- for parameter_name, parameter in configuration.items():
- if isinstance(parameter, (tuple, list)):
- if parameter_name not in max_tuple_or_list_lens_per_parameter:
- max_tuple_or_list_lens_per_parameter[parameter_name] = len(
- parameter
- )
- elif (
- len(parameter)
- > max_tuple_or_list_lens_per_parameter[parameter_name]
- ):
- max_tuple_or_list_lens_per_parameter[parameter_name] = len(
- parameter
- )
-
- # Create new configurations with flattened list/tuple parameter inputs:
- expanded_configurations = []
- for configuration in configurations:
- expanded_record = {}
- for parameter_name, parameter in configuration.items():
- if isinstance(parameter, (tuple, list)):
- for i in range(max_tuple_or_list_lens_per_parameter[parameter_name]):
- if i < len(parameter):
- expanded_record[f"{parameter_name}_{i}"] = parameter[i]
- else:
- # Below assumes that missing dimensions are equivalent to 0 entries
- # (This works for eg. for the tuple layer sizes of an MLPRegressor)
- expanded_record[f"{parameter_name}_{i}"] = 0
- else:
- expanded_record[parameter_name] = parameter
-
- expanded_configurations.append(expanded_record)
-
- logger.debug(
- f"Expanded configuration list's first element: {expanded_configurations[0]}"
- )
-
- # NOTE: None values are converted to np.nan during pandas ingestion.
- tabularized_configurations = pd.DataFrame(expanded_configurations).replace(
- {np.nan: None}
- )
-
- categorical_columns = []
- column_types = list(tabularized_configurations.dtypes)
- # Loop through each column type in the tabular data and wherever an
- # object column is present (due to None parameter values being mixed
- # in with other types) check whether the column is a None + str mix
- # or a None + float/int mix.
- # For inference purposes, the None values in an otherwise str filled
- # column should be considered another category, and are thus set to
- # "None", while in the None + numericals case they are assumed to mean
- # zero (this last conversion is not accurate for all parameters,
- # eg. the maximum number of leaves in a random forest algorithm,
- # TODO: consider turning the None + numerical columns to categoricals).
- for original_column_idx, column_type in enumerate(column_types):
- if str(column_type) == "object":
- types = []
- column_name = tabularized_configurations.columns[original_column_idx]
- for element in list(tabularized_configurations[column_name]):
- if type(element) not in types:
- types.append(type(element))
- if str in types:
- tabularized_configurations[column_name] = (
- tabularized_configurations[column_name]
- .infer_objects(copy=False)
- .fillna("None")
- )
- categorical_columns.append(column_name)
- elif float in types or int in types:
- tabularized_configurations[column_name] = (
- tabularized_configurations[column_name]
- .infer_objects(copy=False)
- .fillna(0)
- )
- else:
- raise ValueError(
- "Type other than 'str', 'int', 'float' was detected in 'None' handling."
- )
-
- # One hot encode categorical columns (parameters) in tabularized dataset:
- for column_name in categorical_columns:
- tabularized_configurations = pd.concat(
- [
- tabularized_configurations,
- pd.get_dummies(tabularized_configurations[column_name]),
- ],
- axis=1,
- )
- tabularized_configurations = tabularized_configurations.drop(
- [column_name], axis=1
- )
-
- logger.debug(
- f"Tabularized configuration dataframe shape: {tabularized_configurations.shape}"
- )
-
- return tabularized_configurations
diff --git a/confopt/utils/__init__.py b/confopt/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/confopt/utils/configurations/__init__.py b/confopt/utils/configurations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/confopt/utils/configurations/encoding.py b/confopt/utils/configurations/encoding.py
new file mode 100644
index 0000000..5bdf7b2
--- /dev/null
+++ b/confopt/utils/configurations/encoding.py
@@ -0,0 +1,134 @@
+import logging
+from typing import Dict, List
+
+import numpy as np
+import pandas as pd
+from confopt.wrapping import CategoricalRange, ParameterRange
+
+logger = logging.getLogger(__name__)
+
+
+class ConfigurationEncoder:
+ """
+ Encodes configuration dictionaries into numerical feature matrices.
+
+ The encoder supports both continuous and categorical parameters, using one-hot
+ encoding for categorical variables. The encoding schema is constructed from a
+ provided search space and is deterministic, ensuring reproducibility across runs.
+ Intended for use in hyperparameter optimization workflows where explicit and
+ consistent feature representation is required.
+
+ Args:
+ search_space (Dict[str, ParameterRange]):
+ Dictionary mapping parameter names to their respective ParameterRange objects.
+ Categorical parameters must use CategoricalRange.
+ """
+
+ def __init__(self, search_space: Dict[str, ParameterRange]):
+ """
+ Initialize the encoder and build the encoding schema from the search space.
+
+ Args:
+ search_space (Dict[str, ParameterRange]):
+ Parameter search space definition.
+ """
+ self.search_space = search_space
+ self.categorical_mappings = {}
+ self.column_names = []
+ self._build_encoding_schema()
+
+ def transform(self, configurations: List[Dict]) -> pd.DataFrame:
+ """
+ Transform a list of configuration dictionaries into a numerical DataFrame.
+
+ Args:
+ configurations (List[Dict]):
+ List of configuration dictionaries, each mapping parameter names to values.
+
+ Returns:
+ pd.DataFrame: Feature matrix with columns corresponding to the encoding schema.
+ """
+ feature_matrix = self._create_feature_matrix(configurations)
+ return pd.DataFrame(data=feature_matrix, columns=self.column_names)
+
+ def _build_encoding_schema(self) -> None:
+ """
+ Construct the encoding schema and categorical mappings from the search space.
+
+ Ensures deterministic column ordering and explicit one-hot encoding for
+ categorical parameters.
+ """
+ self.categorical_mappings = {}
+ self.column_names = []
+
+ for param_name in sorted(self.search_space.keys()):
+ param_range = self.search_space[param_name]
+
+ if isinstance(param_range, CategoricalRange):
+ self._add_categorical_columns(param_name, param_range.choices)
+ else:
+ self.column_names.append(param_name)
+
+ def _add_categorical_columns(self, param_name: str, choices: List) -> None:
+ """
+ Add one-hot encoded columns for a categorical parameter.
+
+ Args:
+ param_name (str): Name of the categorical parameter.
+ choices (List): List of possible categorical values.
+ """
+ sorted_values = sorted(choices, key=str)
+ param_mappings = {}
+
+ for value in sorted_values:
+ column_idx = len(self.column_names)
+ column_name = f"{param_name}_{value}"
+ param_mappings[value] = column_idx
+ self.column_names.append(column_name)
+
+ self.categorical_mappings[param_name] = param_mappings
+
+ def _create_feature_matrix(self, configurations: List[Dict]) -> np.ndarray:
+ """
+ Create a numerical feature matrix from a list of configurations.
+
+ Args:
+ configurations (List[Dict]):
+ List of configuration dictionaries.
+
+ Returns:
+ np.ndarray: 2D array of shape (n_samples, n_features) with encoded features.
+ """
+ n_samples = len(configurations)
+ n_features = len(self.column_names)
+ feature_matrix = np.zeros((n_samples, n_features))
+
+ for row_idx, config in enumerate(configurations):
+ self._encode_single_config(config, feature_matrix, row_idx)
+
+ return feature_matrix
+
+ def _encode_single_config(
+ self, config: Dict, feature_matrix: np.ndarray, row_idx: int
+ ) -> None:
+ """
+ Encode a single configuration into the feature matrix row.
+
+ Args:
+ config (Dict): Configuration dictionary for a single sample.
+ feature_matrix (np.ndarray): Feature matrix to populate.
+ row_idx (int): Row index for the current configuration.
+ """
+ column_idx = 0
+
+ for param_name in sorted(config.keys()):
+ param_value = config[param_name]
+
+ if param_name in self.categorical_mappings:
+ if param_value in self.categorical_mappings[param_name]:
+ one_hot_idx = self.categorical_mappings[param_name][param_value]
+ feature_matrix[row_idx, one_hot_idx] = 1
+ column_idx += len(self.categorical_mappings[param_name])
+ else:
+ feature_matrix[row_idx, column_idx] = param_value
+ column_idx += 1
diff --git a/confopt/utils/configurations/sampling.py b/confopt/utils/configurations/sampling.py
new file mode 100644
index 0000000..0884b5b
--- /dev/null
+++ b/confopt/utils/configurations/sampling.py
@@ -0,0 +1,242 @@
+from typing import Dict, List, Optional, Literal
+import math
+import logging
+import random
+import numpy as np
+from scipy.stats import qmc
+from confopt.wrapping import (
+ IntRange,
+ FloatRange,
+ CategoricalRange,
+ ParameterRange,
+)
+from confopt.utils.configurations.utils import create_config_hash
+
+logger = logging.getLogger(__name__)
+
+
+def get_tuning_configurations(
+ parameter_grid: Dict[str, ParameterRange],
+ n_configurations: int,
+ random_state: Optional[int] = None,
+ sampling_method: Literal["uniform", "sobol"] = "uniform",
+) -> List[Dict]:
+ """
+ Generate a list of unique parameter configurations for hyperparameter tuning.
+
+ This function delegates to either uniform or Sobol sampling based on the selected method.
+ Uniform sampling draws random values for each parameter independently, while Sobol sampling
+ generates low-discrepancy samples for numeric parameters and randomly assigns categorical values.
+ Ensures uniqueness of configurations by hashing.
+
+ Args:
+ parameter_grid: Dictionary mapping parameter names to their range objects.
+ n_configurations: Number of unique configurations to generate.
+ random_state: Seed for reproducibility.
+ sampling_method: Sampling strategy, either 'uniform' or 'sobol'.
+
+ Returns:
+ List of unique parameter configurations as dictionaries.
+ """
+ if sampling_method == "sobol":
+ samples = _sobol_sampling(
+ parameter_grid=parameter_grid,
+ n_configurations=n_configurations,
+ random_state=random_state,
+ )
+ elif sampling_method == "uniform":
+ samples = _uniform_sampling(
+ parameter_grid=parameter_grid,
+ n_configurations=n_configurations,
+ random_state=random_state,
+ )
+ else:
+ raise ValueError(
+ f"Invalid sampling method: {sampling_method}. Must be 'uniform' or 'sobol'."
+ )
+
+ return samples
+
+
+def _uniform_sampling(
+ parameter_grid: Dict[str, ParameterRange],
+ n_configurations: int,
+ random_state: Optional[int] = None,
+) -> List[Dict]:
+ """
+ Generate unique parameter configurations using uniform random sampling.
+
+ For each configuration, samples each parameter independently: integers and floats are drawn
+ uniformly from their respective ranges (log-scale supported for both), and categorical
+ parameters are chosen randomly from their choices. Ensures uniqueness by hashing each
+ configuration. Sampling stops when the requested number of unique configurations is reached
+ or a maximum attempt threshold is exceeded.
+
+ Args:
+ parameter_grid: Dictionary mapping parameter names to their range objects.
+ n_configurations: Number of unique configurations to generate.
+ random_state: Seed for reproducibility.
+
+ Returns:
+ List of unique parameter configurations as dictionaries.
+ """
+ configurations: List[Dict] = []
+ configurations_set = set()
+ if random_state is not None:
+ random.seed(a=random_state)
+ np.random.seed(seed=random_state)
+
+ param_names = sorted(parameter_grid.keys())
+ max_attempts = min(n_configurations * 3, 50000)
+ attempts = 0
+ while len(configurations) < n_configurations and attempts < max_attempts:
+ config = {}
+ for name in param_names:
+ param_range = parameter_grid[name]
+ if isinstance(param_range, IntRange):
+ if param_range.log_scale:
+ lmin = np.log(max(param_range.min_value, 1))
+ lmax = np.log(param_range.max_value)
+ config[name] = int(np.round(np.exp(random.uniform(lmin, lmax))))
+ # Ensure the value is within bounds
+ config[name] = max(
+ param_range.min_value, min(config[name], param_range.max_value)
+ )
+ else:
+ config[name] = random.randint(
+ param_range.min_value, param_range.max_value
+ )
+ elif isinstance(param_range, FloatRange):
+ if param_range.log_scale:
+ lmin = np.log(max(param_range.min_value, 1e-10))
+ lmax = np.log(param_range.max_value)
+ config[name] = float(np.exp(random.uniform(lmin, lmax)))
+ else:
+ config[name] = random.uniform(
+ param_range.min_value, param_range.max_value
+ )
+ elif isinstance(param_range, CategoricalRange):
+ value = random.choice(param_range.choices)
+ # Ensure bools don't get auto type cast to numpy.bool_ or int:
+ # Check if ALL choices are actually boolean types, not just equal to True/False
+ if all(isinstance(choice, bool) for choice in param_range.choices):
+ value = bool(value)
+ config[name] = value
+ config_hash = create_config_hash(config)
+ if config_hash not in configurations_set:
+ configurations_set.add(config_hash)
+ configurations.append(config)
+ attempts += 1
+
+ if len(configurations) < n_configurations:
+ logger.warning(
+ f"Could only generate {len(configurations)} unique configurations "
+ )
+ return configurations
+
+
+def _sobol_sampling(
+ parameter_grid: Dict[str, ParameterRange],
+ n_configurations: int,
+ random_state: Optional[int] = None,
+) -> List[Dict]:
+ """
+ Generate unique parameter configurations using Sobol sequence sampling.
+
+ Applies a low-discrepancy Sobol sequence to sample numeric parameters (int and float),
+ mapping each dimension to a parameter. Categorical parameters are assigned randomly.
+ Ensures uniqueness by hashing each configuration. At least one numeric parameter is required.
+ Sampling stops when the requested number of unique configurations is reached.
+
+ Args:
+ parameter_grid: Dictionary mapping parameter names to their range objects.
+ n_configurations: Number of unique configurations to generate.
+ random_state: Seed for reproducibility.
+
+ Returns:
+ List of unique parameter configurations as dictionaries.
+ """
+ configurations: List[Dict] = []
+ configurations_set = set()
+ # Seed random generators for reproducible categorical assignments
+ if random_state is not None:
+ random.seed(random_state)
+ np.random.seed(random_state)
+
+ param_names = sorted(parameter_grid.keys())
+ param_ranges = [parameter_grid[name] for name in param_names]
+ # Separate numeric and categorical parameters for Sobol and random sampling
+ numeric_params = [
+ (i, name, pr)
+ for i, (name, pr) in enumerate(zip(param_names, param_ranges))
+ if isinstance(pr, (IntRange, FloatRange))
+ ]
+ categorical_params = [
+ (i, name, pr)
+ for i, (name, pr) in enumerate(zip(param_names, param_ranges))
+ if isinstance(pr, CategoricalRange)
+ ]
+
+ if not numeric_params:
+ raise ValueError("Sobol sampling requires at least one numeric parameter.")
+
+ # Generate Sobol samples for numeric parameters.
+ # SciPy's Sobol implementation expects a power-of-two sample size for balance.
+ # Use `random_base2(m)` to generate 2**m samples (power of two) and then
+ # slice to the requested `n_configurations` to avoid the UserWarning.
+ if n_configurations <= 0:
+ raise ValueError(
+ "n_configurations must be a positive integer for Sobol sampling"
+ )
+ sobol_engine = qmc.Sobol(d=len(numeric_params), scramble=False, seed=random_state)
+ # Compute the smallest m such that 2**m >= n_configurations
+ m = math.ceil(math.log2(n_configurations))
+ samples_all = sobol_engine.random_base2(m)
+ samples = samples_all[:n_configurations]
+ for row in samples:
+ config = {}
+ # Map Sobol sample to each numeric parameter
+ for dim, (_, name, pr) in enumerate(numeric_params):
+ if isinstance(pr, IntRange):
+ if pr.log_scale:
+ lmin = np.log(max(pr.min_value, 1))
+ lmax = np.log(pr.max_value)
+ value = int(np.round(np.exp(lmin + row[dim] * (lmax - lmin))))
+ config[name] = max(pr.min_value, min(value, pr.max_value))
+ else:
+ # Use round instead of floor for more balanced integer sampling
+ value = int(
+ np.round(
+ row[dim] * (pr.max_value - pr.min_value) + pr.min_value
+ )
+ )
+ config[name] = max(pr.min_value, min(value, pr.max_value))
+ else:
+ if pr.log_scale:
+ lmin = np.log(max(pr.min_value, 1e-10))
+ lmax = np.log(pr.max_value)
+ config[name] = float(np.exp(lmin + row[dim] * (lmax - lmin)))
+ else:
+ config[name] = float(
+ pr.min_value + row[dim] * (pr.max_value - pr.min_value)
+ )
+ # Assign categorical parameters randomly
+ for _, name, pr in categorical_params:
+ value = random.choice(pr.choices)
+ # Ensure bools are Python bool, not numpy.bool_ or int
+ # Check if ALL choices are actually boolean types, not just equal to True/False
+ if all(isinstance(choice, bool) for choice in pr.choices):
+ value = bool(value)
+ config[name] = value
+ config_hash = create_config_hash(config)
+ # Ensure uniqueness of each configuration
+ if config_hash not in configurations_set:
+ configurations_set.add(config_hash)
+ configurations.append(config)
+ if len(configurations) >= n_configurations:
+ break
+ if len(configurations) < n_configurations:
+ logger.warning(
+ f"Could only generate {len(configurations)} unique configurations "
+ )
+ return configurations
diff --git a/confopt/utils/configurations/utils.py b/confopt/utils/configurations/utils.py
new file mode 100644
index 0000000..5850f01
--- /dev/null
+++ b/confopt/utils/configurations/utils.py
@@ -0,0 +1,10 @@
+def create_config_hash(config: dict) -> int:
+ """Create a fast hashable representation of a configuration using tuples."""
+ items = []
+ for k in sorted(config.keys()):
+ v = config[k]
+ if isinstance(v, (int, float, bool)):
+ items.append((k, v))
+ else:
+ items.append((k, str(v)))
+ return hash(tuple(items))
diff --git a/confopt/utils/optimization.py b/confopt/utils/optimization.py
new file mode 100644
index 0000000..298fa5a
--- /dev/null
+++ b/confopt/utils/optimization.py
@@ -0,0 +1,157 @@
+import logging
+import numpy as np
+from typing import Tuple, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class DecayingSearcherOptimizer:
+ """Searcher optimizer that increases tuning_interval as search progresses.
+
+ This optimizer implements a decaying strategy where the tuning interval
+ starts at an initial value and increases over time according to various
+ decay rate options. The n_tuning_episodes remains constant throughout
+ the search process. The conformal model retrains every iteration (frequency = 1).
+
+ Args:
+ n_tuning_episodes (int): Number of tuning episodes to perform at each
+ optimization step. Defaults to 10.
+ initial_tuning_interval (int): Initial tuning interval to decay from.
+ Must be a positive integer. Defaults to 1.
+ decay_rate (float): Rate of decay - higher values mean faster increase
+ in tuning interval. Defaults to 0.1.
+ decay_type (str): Type of decay function. Must be one of 'linear',
+ 'exponential', or 'logarithmic'. Defaults to 'linear'.
+ max_tuning_interval (int): Maximum tuning interval cap to prevent
+ excessive intervals. Defaults to 20.
+
+ Attributes:
+ current_iter (int): Current search iteration number.
+
+ Note:
+ The decay functions are:
+ - Linear: interval = initial + decay_rate * iter
+ - Exponential: interval = initial * (1 + decay_rate)^iter
+ - Logarithmic: interval = initial + decay_rate * log(1 + iter)
+
+ All intervals are rounded to integers.
+ """
+
+ def __init__(
+ self,
+ n_tuning_episodes: int = 10,
+ initial_tuning_interval: int = 1,
+ decay_rate: float = 0.1,
+ decay_type: str = "linear",
+ max_tuning_interval: int = 20,
+ ):
+ self.n_tuning_episodes = n_tuning_episodes
+ self.initial_tuning_interval = initial_tuning_interval
+ self.decay_rate = decay_rate
+ self.decay_type = decay_type
+ self.max_tuning_interval = max_tuning_interval
+ self.current_iter = 0
+
+ # Validate decay_type
+ if decay_type not in ["linear", "exponential", "logarithmic"]:
+ raise ValueError(
+ "decay_type must be one of 'linear', 'exponential', 'logarithmic'"
+ )
+
+ def _calculate_current_interval(self, search_iter: int) -> int:
+ """Calculate the current tuning interval based on search iteration.
+
+ Args:
+ search_iter (int): Current search iteration number.
+
+ Returns:
+ int: Calculated tuning interval, rounded to integer.
+ """
+ if self.decay_type == "linear":
+ # Linear increase: interval = initial + decay_rate * iter
+ interval = self.initial_tuning_interval + self.decay_rate * search_iter
+ elif self.decay_type == "exponential":
+ # Exponential increase: interval = initial * (1 + decay_rate)^iter
+ interval = self.initial_tuning_interval * (
+ (1 + self.decay_rate) ** search_iter
+ )
+ elif self.decay_type == "logarithmic":
+ # Logarithmic increase: interval = initial + decay_rate * log(1 + iter)
+ interval = self.initial_tuning_interval + self.decay_rate * np.log(
+ 1 + search_iter
+ )
+
+ # Cap at maximum interval
+ interval = min(interval, self.max_tuning_interval)
+
+ # Round to integer and ensure minimum interval
+ interval = max(int(round(interval)), 1)
+
+ return interval
+
+ def update(self, search_iter: Optional[int] = None) -> None:
+ """Update the optimizer with search iteration information.
+
+ Args:
+ search_iter (int, optional): Current search iteration number. If provided,
+ updates the internal iteration counter used for decay calculations.
+ """
+ if search_iter is not None:
+ self.current_iter = search_iter
+
+ def select_arm(self) -> Tuple[int, int]:
+ """Select the tuning count and interval based on current decay strategy.
+
+ Returns:
+ tuple[int, int]: Tuple containing (n_tuning_episodes, current_tuning_interval).
+ The tuning interval is calculated based on the current iteration
+ and decay parameters.
+ """
+ current_interval = self._calculate_current_interval(self.current_iter)
+ return (self.n_tuning_episodes, current_interval)
+
+
+class FixedSearcherOptimizer:
+ """Fixed searcher optimizer with constant tuning parameters.
+
+ This optimizer returns fixed tuning parameters regardless of search progress.
+ Useful as a baseline or when consistent tuning behavior is desired.
+ The conformal model retrains every iteration (frequency = 1).
+
+ Args:
+ n_tuning_episodes (int): Number of tuning episodes to perform at each
+ optimization step. Defaults to 10.
+ tuning_interval (int): Fixed tuning interval to use throughout optimization.
+ Defaults to 5.
+
+ Attributes:
+ fixed_count (int): Fixed number of tuning episodes.
+ fixed_interval (int): Fixed tuning interval.
+ """
+
+ def __init__(
+ self,
+ n_tuning_episodes: int = 10,
+ tuning_interval: int = 5,
+ ):
+ self.fixed_count = n_tuning_episodes
+ self.fixed_interval = tuning_interval
+
+ def select_arm(self) -> Tuple[int, int]:
+ """Select the fixed tuning count and interval.
+
+ Returns:
+ tuple[int, int]: Tuple containing (fixed_count, fixed_interval).
+ """
+ return self.fixed_count, self.fixed_interval
+
+ def update(self, search_iter: Optional[int] = None) -> None:
+ """Update method that accepts search_iter for API compatibility.
+
+ This method does nothing for the fixed optimizer but maintains
+ the same interface as other optimizers.
+
+ Args:
+ search_iter (int, optional): Current search iteration number.
+ Ignored by this optimizer.
+ """
diff --git a/confopt/preprocessing.py b/confopt/utils/preprocessing.py
similarity index 54%
rename from confopt/preprocessing.py
rename to confopt/utils/preprocessing.py
index 78659ba..e856f2c 100644
--- a/confopt/preprocessing.py
+++ b/confopt/utils/preprocessing.py
@@ -1,5 +1,5 @@
import random
-from typing import Tuple, Optional
+from typing import Tuple
import numpy as np
from sklearn.preprocessing import StandardScaler
@@ -74,59 +74,3 @@ def train_val_split(
X_val = scaler.transform(X_val)
return X_train, y_train, X_val, y_val
-
-
-def remove_iqr_outliers(
- X: np.array, y: np.array, scope: str, iqr_factor: Optional[float] = 1.5
-) -> Tuple[np.array, np.array]:
- """
- Remove data outliers via interquartile range filtering.
-
- Interquartile range is applied to target variable only.
-
- Parameters
- ----------
- X :
- Feature variables.
- y :
- Target variable.
- scope :
- Determines which outliers are removed. Takes:
- - 'top_only': Only upper threshold outliers are removed.
- - 'bottom_only': Only lower threshold outliers are removed.
- - 'top_and_bottom': All outliers are removed.
- iqr_factor :
- Factor by which to multiply the interquartile range when
- determining outlier thresholds.
-
- Returns
- -------
- X_retained :
- Outlier filtered X features variables.
- y_retained :
- Outlier filtered y target variable.
- """
- q1 = np.quantile(y, 0.25)
- q3 = np.quantile(y, 0.75)
- iqr = abs(q3 - q1)
-
- bottom_outlier_idxs = list(np.where(y < (q1 - iqr_factor * iqr))[0])
- top_outlier_idxs = list(np.where(y > (q3 + iqr_factor * iqr))[0])
-
- if scope == "top_only":
- outlier_idxs = top_outlier_idxs.copy()
- elif scope == "bottom_only":
- outlier_idxs = bottom_outlier_idxs.copy()
- elif scope == "top_and_bottom":
- outlier_idxs = top_outlier_idxs + bottom_outlier_idxs
- else:
- raise ValueError(
- "'scope' can only take one of 'top_only', 'bottom_only' or 'top_and_bottom', "
- f"but {scope} was passed."
- )
-
- retained_idxs = list(set(list(range(0, len(X)))) - set(outlier_idxs))
- X_retained = X[retained_idxs, :]
- y_retained = y[retained_idxs]
-
- return X_retained, y_retained
diff --git a/confopt/utils/tracking.py b/confopt/utils/tracking.py
new file mode 100644
index 0000000..cc5bd4d
--- /dev/null
+++ b/confopt/utils/tracking.py
@@ -0,0 +1,530 @@
+import logging
+import time
+from pydantic import BaseModel
+from datetime import datetime
+from typing import Optional, Literal
+from confopt.wrapping import ParameterRange
+import numpy as np
+from confopt.utils.configurations.encoding import ConfigurationEncoder
+from confopt.utils.configurations.sampling import get_tuning_configurations
+from tqdm import tqdm
+from confopt.utils.configurations.utils import create_config_hash
+
+
+logger = logging.getLogger(__name__)
+
+
+class RuntimeTracker:
+ """
+ Tracks wall-clock runtime for iterative search or training processes.
+
+ Used to measure elapsed time for optimization or model training, supporting
+ pause/resume semantics for accurate accounting in multi-stage workflows.
+ """
+
+ def __init__(self):
+ self.start_time = time.time()
+ self.runtime = 0
+
+ def _elapsed_runtime(self):
+ """
+ Returns the elapsed time since the last start or resume.
+
+ Returns:
+ Elapsed time in seconds.
+ """
+ take_time = time.time()
+ return abs(take_time - self.start_time)
+
+ def pause_runtime(self):
+ """
+ Accumulates elapsed time into the runtime counter and pauses tracking.
+ """
+ self.runtime = self.runtime + self._elapsed_runtime()
+
+ def resume_runtime(self):
+ """
+ Resumes runtime tracking from the current time.
+ """
+ self.start_time = time.time()
+
+ def return_runtime(self):
+ """
+ Returns the total accumulated runtime, including the current interval.
+
+ Returns:
+ Total runtime in seconds.
+ """
+ self.pause_runtime()
+ taken_runtime = self.runtime
+ self.resume_runtime()
+ return taken_runtime
+
+
+class ProgressBarManager:
+ """
+ Manages progress bar creation, updates, and closure for search operations.
+
+ Integrates with tqdm to provide runtime- or iteration-based progress feedback
+ during optimization or training loops. Used in tuning workflows to visualize
+ progress and support user feedback.
+ """
+
+ def __init__(self, verbose: bool = True):
+ self.verbose = verbose
+ self.progress_bar = None
+
+ def create_progress_bar(
+ self,
+ max_runtime: Optional[int] = None,
+ max_searches: Optional[int] = None,
+ current_trials: int = 0,
+ description: str = "Search progress",
+ ) -> None:
+ """
+ Initializes a progress bar based on runtime or iteration constraints.
+
+ Args:
+ max_runtime: Maximum allowed runtime in seconds.
+ max_searches: Maximum number of iterations.
+ current_trials: Number of completed trials (for offsetting
+ iteration progress).
+ description: Description for the progress bar.
+ """
+ if self.verbose:
+ if max_runtime is not None:
+ self.progress_bar = tqdm(total=max_runtime, desc=f"{description}: ")
+ elif max_searches is not None:
+ remaining_iter = max_searches - current_trials
+ if remaining_iter > 0:
+ self.progress_bar = tqdm(
+ total=remaining_iter, desc=f"{description}: "
+ )
+
+ def update_progress(
+ self, current_runtime: Optional[float] = None, iteration_count: int = 1
+ ) -> None:
+ """
+ Updates the progress bar based on runtime or iteration increments.
+
+ Args:
+ current_runtime: Current elapsed runtime in seconds.
+ iteration_count: Number of iterations to increment (if not
+ runtime-based).
+ """
+ if self.progress_bar:
+ if current_runtime is not None:
+ # Runtime-based progress
+ new_progress = int(current_runtime) - self.progress_bar.n
+ if new_progress > 0:
+ self.progress_bar.update(new_progress)
+ else:
+ # Iteration-based progress
+ self.progress_bar.update(iteration_count)
+
+ def close_progress_bar(self) -> None:
+ """
+ Closes and cleans up the progress bar.
+ """
+ if self.progress_bar:
+ self.progress_bar.close()
+ self.progress_bar = None
+
+
+class Trial(BaseModel):
+ """
+ Represents a single experiment trial in a hyperparameter search.
+
+ Captures configuration, performance, timing, and metadata for each evaluation.
+ Used for experiment logging, analysis, and reproducibility.
+ """
+
+ iteration: int
+ timestamp: datetime
+ configuration: dict
+ tabularized_configuration: list[float]
+ performance: float
+ acquisition_source: Optional[str] = None
+ lower_bound: Optional[float] = None
+ upper_bound: Optional[float] = None
+ searcher_runtime: Optional[float] = None
+ target_model_runtime: Optional[float] = None
+
+
+class Study:
+ """
+ Aggregates and manages a collection of experiment trials.
+
+ Provides methods for appending, querying, and analyzing trials, including best
+ configuration selection and runtime statistics. Used as the main experiment
+ log in tuning workflows.
+ """
+
+ def __init__(
+ self, metric_optimization: Literal["minimize", "maximize"] = "minimize"
+ ):
+ self.trials: list[Trial] = []
+ self.metric_optimization = metric_optimization
+
+ def append_trial(self, trial: Trial):
+ """
+ Appends a single trial to the study log.
+
+ Args:
+ trial: Trial object to append.
+ """
+ self.trials.append(trial)
+
+ def batch_append_trials(self, trials: list[Trial]):
+ """
+ Appends multiple trials to the study log.
+
+ Args:
+ trials: List of Trial objects to append.
+ """
+ self.trials.extend(trials)
+
+ def get_searched_configurations(self) -> list[dict]:
+ """
+ Returns a list of all configurations evaluated in the study.
+
+ Returns:
+ List of configuration dictionaries.
+ """
+ searched_configurations = []
+ for trial in self.trials:
+ searched_configurations.append(trial.configuration)
+ return searched_configurations
+
+ def get_searched_performances(self) -> list[dict]:
+ """
+ Returns a list of all performance values from the study.
+
+ Returns:
+ List of performance values.
+ """
+ searched_performances = []
+ for trial in self.trials:
+ searched_performances.append(trial.performance)
+ return searched_performances
+
+ def get_best_configuration(self) -> dict:
+ """
+ Returns the configuration with the best performance according to the
+ optimization direction.
+
+ Returns:
+ Best configuration dictionary.
+ """
+ searched_configurations = []
+ for trial in self.trials:
+ searched_configurations.append((trial.configuration, trial.performance))
+
+ if self.metric_optimization == "minimize":
+ best_config, _ = min(searched_configurations, key=lambda x: x[1])
+ else: # maximize
+ best_config, _ = max(searched_configurations, key=lambda x: x[1])
+ return best_config
+
+ def get_best_performance(self) -> float:
+ """
+ Returns the best performance value according to the optimization
+ direction.
+
+ Returns:
+ Best performance value.
+ """
+ searched_performances = []
+ for trial in self.trials:
+ searched_performances.append(trial.performance)
+
+ if self.metric_optimization == "minimize":
+ return min(searched_performances)
+ else: # maximize
+ return max(searched_performances)
+
+ def get_average_target_model_runtime(self) -> float:
+ """
+ Returns the average runtime of the target model across all trials.
+
+ Returns:
+ Average runtime in seconds.
+ """
+ target_model_runtimes = []
+ for trial in self.trials:
+ if trial.target_model_runtime is not None:
+ target_model_runtimes.append(trial.target_model_runtime)
+ return sum(target_model_runtimes) / len(target_model_runtimes)
+
+
+class BaseConfigurationManager:
+ """
+ Abstract base class for configuration management in search workflows.
+
+ Handles tracking of searched, banned, and candidate configurations, and
+ provides tabularization for model input. Used as a base for static and
+ dynamic configuration managers.
+ """
+
+ def __init__(
+ self,
+ search_space: dict[str, ParameterRange],
+ n_candidate_configurations: int,
+ ) -> None:
+ self.search_space = search_space
+ self.n_candidate_configurations = n_candidate_configurations
+ self.searched_configs = []
+ self.searched_performances = []
+ self.searched_config_hashes = set()
+ self.encoder = None
+ self.banned_configurations = []
+
+ def _setup_encoder(self) -> None:
+ """
+ Initializes the configuration encoder for tabularization.
+ """
+ self.encoder = ConfigurationEncoder(search_space=self.search_space)
+
+ def mark_as_searched(self, config: dict, performance: float) -> None:
+ """
+ Marks a configuration as searched and records its performance.
+
+ Args:
+ config: Configuration dictionary.
+ performance: Observed performance value.
+ """
+ config_hash = create_config_hash(config)
+ self.searched_configs.append(config)
+ self.searched_performances.append(performance)
+ self.searched_config_hashes.add(config_hash)
+
+ def tabularize_configs(self, configs: list[dict]) -> np.array:
+ """
+ Converts a list of configuration dictionaries to a tabular numpy array for
+ model input.
+
+ Args:
+ configs: List of configuration dictionaries.
+ Returns:
+ Tabularized configuration array.
+ """
+ if not configs:
+ return np.array([])
+ return self.encoder.transform(configs).to_numpy()
+
+ def listify_configs(self, configs: list[dict]) -> list[list[float]]:
+ """
+ Converts a list of configuration dictionaries to lists of numerical values.
+
+ Args:
+ configs: List of configuration dictionaries to convert.
+ Returns:
+ List of lists, where each inner list contains numerical values
+ in the same order as DataFrame columns.
+ """
+ if not configs:
+ return []
+ if self.encoder is None:
+ self._setup_encoder()
+ tabularized = self.encoder.transform(configs).to_numpy()
+ return [row.tolist() for row in tabularized]
+
+ def add_to_banned_configurations(self, config: dict) -> None:
+ """
+ Adds a configuration to the banned list if not already present.
+
+ Args:
+ config: Configuration dictionary to ban.
+ """
+ config_hash = create_config_hash(config)
+ if config_hash not in [
+ create_config_hash(c) for c in self.banned_configurations
+ ]:
+ self.banned_configurations.append(config)
+
+
+class StaticConfigurationManager(BaseConfigurationManager):
+ """
+ Manages a static set of candidate configurations for search.
+
+ Precomputes and caches candidate configurations, filtering out searched and
+ banned ones. Used for search strategies where the candidate pool is fixed.
+
+ Optimized with set-based tracking for O(1) operations and intelligent caching.
+ """
+
+ def __init__(
+ self,
+ search_space: dict[str, ParameterRange],
+ n_candidate_configurations: int,
+ ) -> None:
+ super().__init__(search_space, n_candidate_configurations)
+
+ # Core optimization: set-based tracking for O(1) operations
+ self.searched_indices = set()
+ self.banned_indices = set()
+
+ # Pre-computed data for efficiency
+ self.all_candidate_configs = []
+ self.config_to_index = {} # Hash -> index mapping
+
+ # Simple caching
+ self._searchable_configs_cache = None
+ self._cache_valid = False
+
+ self._initialize_static_configs_and_encoder()
+
+ def _initialize_static_configs_and_encoder(self) -> None:
+ """
+ Initializes the static candidate configuration pool and encoder.
+ """
+ # Generate all candidate configurations
+ self.all_candidate_configs = get_tuning_configurations(
+ parameter_grid=self.search_space,
+ n_configurations=self.n_candidate_configurations,
+ random_state=None,
+ sampling_method="uniform",
+ )
+
+ # Setup encoder
+ self._setup_encoder()
+
+ # Build hash-to-index mapping for O(1) lookups
+ for i, config in enumerate(self.all_candidate_configs):
+ config_hash = create_config_hash(config)
+ self.config_to_index[config_hash] = i
+
+ def mark_as_searched(self, config: dict, performance: float) -> None:
+ """
+ Marks a configuration as searched using optimized O(1) operations.
+
+ Args:
+ config: Configuration dictionary.
+ performance: Observed performance value.
+ """
+ config_hash = create_config_hash(config)
+
+ # Use index tracking for pre-computed configs
+ if config_hash in self.config_to_index:
+ idx = self.config_to_index[config_hash]
+ self.searched_indices.add(idx)
+
+ # Update base class tracking
+ super().mark_as_searched(config, performance)
+
+ # Invalidate cache
+ self._cache_valid = False
+
+ def add_to_banned_configurations(self, config: dict) -> None:
+ """
+ Adds a configuration to the banned list using O(1) operations.
+
+ Args:
+ config: Configuration dictionary to ban.
+ """
+ config_hash = create_config_hash(config)
+
+ # Use index tracking for pre-computed configs
+ if config_hash in self.config_to_index:
+ idx = self.config_to_index[config_hash]
+ self.banned_indices.add(idx)
+
+ # Update base class tracking
+ super().add_to_banned_configurations(config)
+
+ # Invalidate cache
+ self._cache_valid = False
+
+ def get_searchable_configurations(self) -> list[dict]:
+ """
+ Returns the list of candidate configurations not yet searched or banned
+ using optimized set operations and caching.
+
+ Returns:
+ List of configuration dictionaries.
+ """
+ if self._cache_valid and self._searchable_configs_cache is not None:
+ return self._searchable_configs_cache.copy()
+
+ # Use set operations for O(1) filtering
+ excluded_indices = self.searched_indices | self.banned_indices
+ self._searchable_configs_cache = [
+ self.all_candidate_configs[i]
+ for i in range(len(self.all_candidate_configs))
+ if i not in excluded_indices
+ ]
+ self._cache_valid = True
+
+ return self._searchable_configs_cache.copy()
+
+ def get_searchable_configurations_count(self) -> int:
+ """
+ Returns the count of searchable configurations using O(1) set operations.
+
+ Returns:
+ Number of searchable configurations remaining.
+ """
+ excluded_count = len(self.searched_indices | self.banned_indices)
+ return len(self.all_candidate_configs) - excluded_count
+
+
+class DynamicConfigurationManager(BaseConfigurationManager):
+ """
+ Dynamically generates candidate configurations for each search iteration.
+
+ Used for search strategies where the candidate pool is not fixed and can
+ adapt to search history. Integrates with configuration sampling utilities for
+ on-the-fly candidate generation.
+ """
+
+ def __init__(
+ self,
+ search_space: dict[str, ParameterRange],
+ n_candidate_configurations: int,
+ ) -> None:
+ super().__init__(search_space, n_candidate_configurations)
+ self.current_searchable_configs = []
+ self._setup_encoder()
+
+ def get_searchable_configurations(self) -> list[dict]:
+ """
+ Generates and returns a list of candidate configurations not yet searched
+ or banned.
+
+ Returns:
+ List of configuration dictionaries.
+ """
+ candidate_configurations = get_tuning_configurations(
+ parameter_grid=self.search_space,
+ n_configurations=self.n_candidate_configurations
+ + len(self.searched_configs),
+ random_state=None,
+ sampling_method="uniform",
+ )
+
+ banned_hashes = set(create_config_hash(c) for c in self.banned_configurations)
+ filtered_configs = []
+
+ for config in candidate_configurations:
+ config_hash = create_config_hash(config)
+ if (
+ config_hash not in self.searched_config_hashes
+ and config_hash not in banned_hashes
+ ):
+ filtered_configs.append(config)
+ if len(filtered_configs) >= self.n_candidate_configurations:
+ break
+
+ # Store current searchable configs for count method
+ self.current_searchable_configs = filtered_configs
+ return filtered_configs
+
+ def get_searchable_configurations_count(self) -> int:
+ """
+ Returns the count of searchable configurations from the last call to
+ get_searchable_configurations().
+
+ Returns:
+ Number of searchable configurations remaining.
+ """
+ return len(self.current_searchable_configs)
diff --git a/confopt/wrapping.py b/confopt/wrapping.py
index 3016ed9..5551429 100644
--- a/confopt/wrapping.py
+++ b/confopt/wrapping.py
@@ -1,11 +1,72 @@
-from abc import ABC, abstractmethod
+from typing import Union
+from pydantic import BaseModel, field_validator, ValidationInfo, ConfigDict
+import numpy as np
-class TunableModel(ABC):
- @abstractmethod
- def fit(self, X, y):
- pass
+class IntRange(BaseModel):
+ """Range of integer values for hyperparameter optimization."""
- @abstractmethod
- def predict(self, X):
- pass
+ min_value: int
+ max_value: int
+ log_scale: bool = False # Whether to sample on a logarithmic scale
+
+ @field_validator("max_value")
+ def max_gt_min(cls, v, info: ValidationInfo):
+ if (
+ hasattr(info, "data")
+ and "min_value" in info.data
+ and v <= info.data["min_value"]
+ ):
+ raise ValueError("max_value must be greater than min_value")
+ return v
+
+ @field_validator("log_scale")
+ def log_scale_positive_values(cls, v, info: ValidationInfo):
+ if (
+ v
+ and hasattr(info, "data")
+ and "min_value" in info.data
+ and info.data["min_value"] <= 0
+ ):
+ raise ValueError("log_scale=True requires min_value > 0")
+ return v
+
+
+class FloatRange(BaseModel):
+ """Range of float values for hyperparameter optimization."""
+
+ min_value: float
+ max_value: float
+ log_scale: bool = False # Whether to sample on a logarithmic scale
+
+ @field_validator("max_value")
+ def max_gt_min(cls, v, info: ValidationInfo):
+ if (
+ hasattr(info, "data")
+ and "min_value" in info.data
+ and v <= info.data["min_value"]
+ ):
+ raise ValueError("max_value must be greater than min_value")
+ return v
+
+
+class CategoricalRange(BaseModel):
+ """Categorical values for hyperparameter optimization."""
+
+ choices: list[Union[str, int, float, bool]]
+
+ @field_validator("choices")
+ def non_empty_choices(cls, v):
+ if len(v) == 0:
+ raise ValueError("choices must not be empty")
+ return v
+
+
+ParameterRange = Union[IntRange, FloatRange, CategoricalRange]
+
+
+class ConformalBounds(BaseModel):
+ lower_bounds: np.ndarray
+ upper_bounds: np.ndarray
+
+ model_config = ConfigDict(arbitrary_types_allowed=True)
diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep
new file mode 100644
index 0000000..cd65377
--- /dev/null
+++ b/docs/_static/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the _static directory is tracked by git
+# even when it's empty. Sphinx needs this directory for static assets.
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000..4a6600b
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,550 @@
+/* ===================================================================
+ ConfOpt Documentation - Simplified Modern Pink/Red Theme
+ Compatible with sphinx_rtd_theme
+ ================================================================== */
+
+/* CSS Custom Properties - Simplified Design System */
+:root {
+ /* Primary Pink/Red Palette */
+ --primary-50: #fdf2f8;
+ --primary-100: #fce7f3;
+ --primary-200: #fbcfe8;
+ --primary-300: #f9a8d4;
+ --primary-400: #f472b6;
+ --primary-500: #ec4899;
+ --primary-600: #db2777;
+ --primary-700: #be185d;
+ --primary-800: #9d174d;
+
+ /* Accent Colors */
+ --accent-green: #059669;
+ --accent-orange: #ea580c;
+ --accent-red: #dc2626;
+ --accent-purple: #8b5cf6;
+
+ /* Neutral Palette */
+ --gray-50: #f9fafb;
+ --gray-100: #f3f4f6;
+ --gray-200: #e5e7eb;
+ --gray-300: #d1d5db;
+ --gray-400: #9ca3af;
+ --gray-500: #6b7280;
+ --gray-600: #4b5563;
+ --gray-700: #374151;
+ --gray-800: #1f2937;
+ --gray-900: #111827;
+
+ /* Typography */
+ --font-mono: 'SF Mono', 'Monaco', 'Roboto Mono', 'Courier New', monospace;
+ --font-sans: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+
+ /* Shadows */
+ --shadow-sm: 0 1px 2px 0 rgba(219, 39, 119, 0.05);
+ --shadow-md: 0 4px 6px -1px rgba(219, 39, 119, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.05);
+ --shadow-lg: 0 10px 15px -3px rgba(219, 39, 119, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
+
+ /* Transitions */
+ --transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
+}
+
+/* ===================================================================
+ Header Customization
+ ================================================================== */
+
+/* RTD Header with Pink Gradient */
+.wy-side-nav-search {
+ background: linear-gradient(135deg, var(--primary-600), var(--primary-200)) !important;
+}
+
+.wy-side-nav-search > a {
+ color: white !important;
+ font-weight: 700;
+ text-decoration: none;
+}
+
+.wy-side-nav-search > a:hover {
+ color: white !important;
+ background: none !important;
+}
+
+/* Logo styling */
+.wy-side-nav-search img.logo {
+ max-height: 60px;
+ width: auto;
+ transition: var(--transition);
+}
+
+.wy-side-nav-search img.logo:hover {
+ transform: scale(1.05);
+}
+
+/* Search input styling */
+.wy-side-nav-search input[type="text"] {
+ border: none;
+ border-radius: 6px;
+ background: rgba(255, 255, 255, 0.95);
+ color: var(--gray-700);
+ font-size: 14px;
+ transition: var(--transition);
+}
+
+.wy-side-nav-search input[type="text"]:focus {
+ outline: none;
+ background: white;
+ box-shadow: 0 0 0 3px rgba(219, 39, 119, 0.2);
+}
+
+/* ===================================================================
+ Navigation Styling
+ ================================================================== */
+
+/* Navigation background */
+.wy-nav-side {
+ background: var(--gray-50) !important;
+ border-right: 1px solid var(--gray-200);
+}
+
+/* Navigation items */
+.wy-menu-vertical a {
+ color: var(--gray-600) !important;
+ border-left: 3px solid transparent;
+ transition: var(--transition);
+ font-weight: 500;
+}
+
+.wy-menu-vertical a:hover {
+ background: var(--primary-50) !important;
+ color: var(--primary-700) !important;
+ border-left-color: var(--primary-300);
+ transform: translateX(2px);
+}
+
+/* Active navigation items */
+.wy-menu-vertical li.current > a,
+.wy-menu-vertical li.current a {
+ background: var(--primary-100) !important;
+ color: var(--primary-800) !important;
+ border-left-color: var(--primary-600) !important;
+ font-weight: 600;
+}
+
+/* Sub-navigation */
+.wy-menu-vertical li ul li a {
+ color: var(--gray-500) !important;
+ font-size: 14px;
+ font-weight: 400;
+}
+
+.wy-menu-vertical li ul li a:hover {
+ background: var(--primary-50) !important;
+ color: var(--primary-600) !important;
+}
+
+.wy-menu-vertical li ul li.current a {
+ background: var(--primary-50) !important;
+ color: var(--primary-700) !important;
+ border-left-color: var(--primary-500) !important;
+ font-weight: 500;
+}
+
+/* ===================================================================
+ Content Area
+ ================================================================== */
+
+/* Main content background */
+.wy-nav-content {
+ background: #ffffff;
+}
+
+/* ===================================================================
+ Typography
+ ================================================================== */
+
+/* Headings */
+h1, h2, h3, h4, h5, h6 {
+ color: var(--gray-900);
+ font-weight: 700;
+}
+
+h1 {
+ color: var(--primary-800);
+ border-bottom: 3px solid var(--primary-200);
+ padding-bottom: 0.5rem;
+}
+
+h2 {
+ color: var(--gray-800);
+ border-bottom: 2px solid var(--primary-100);
+ padding-bottom: 0.25rem;
+}
+
+h3 {
+ color: var(--gray-700);
+}
+
+/* Hide header anchor links */
+.headerlink {
+ display: none !important;
+}
+
+/* Links */
+a {
+ color: var(--primary-600);
+ text-decoration: none;
+}
+
+a:hover {
+ color: var(--primary-800);
+ text-decoration: underline;
+ text-decoration-color: var(--primary-600);
+}
+
+/* Toctree captions */
+.caption,
+.toctree-caption,
+.rst-content .toctree-wrapper p.caption,
+.rst-content p.caption {
+ color: var(--primary-800) !important;
+ font-weight: 600;
+}
+
+/* ===================================================================
+ Code Blocks & Syntax Highlighting
+ ================================================================== */
+
+/* Code block container */
+.highlight {
+ border-radius: 8px;
+ border: 1px solid var(--primary-200);
+ background: linear-gradient(135deg, #fdeff8, #fff9ff, #ffffff) !important;
+ margin: 1.5rem 0;
+ overflow: hidden;
+ box-shadow: var(--shadow-md);
+ position: relative;
+}
+
+/* Code block accent line */
+.highlight::before {
+ content: '';
+ position: absolute;
+ top: 0;
+ left: 0;
+ right: 0;
+ height: 3px;
+ background: linear-gradient(90deg, var(--primary-200), var(--primary-400));
+}
+
+/* Code content */
+.highlight pre {
+ padding: 1.5rem;
+ line-height: 1.6;
+ font-family: var(--font-mono);
+ font-size: 14px;
+ margin: 0;
+ background: transparent !important;
+ border: none;
+ overflow-x: auto;
+ color: var(--gray-800);
+}
+
+/* Inline code */
+code {
+ background: linear-gradient(135deg, var(--primary-50), #fef7f7) !important;
+ color: var(--primary-300) !important;
+ padding: 3px 8px;
+ border-radius: 4px;
+ font-family: var(--font-mono);
+ font-size: 0.875em;
+ font-weight: 600;
+ border: 1px solid var(--primary-200);
+ box-shadow: var(--shadow-sm);
+}
+
+/* Syntax highlighting - Pink/Red theme */
+.highlight .k, .highlight .kw { color: #d946ef; font-weight: 700; } /* Keywords */
+.highlight .kn, .highlight .kd, .highlight .kc { color: #c026d3; font-weight: 600; } /* Import, declarations */
+.highlight .s, .highlight .s1, .highlight .s2 { color: var(--primary-500); font-weight: 500; } /* Strings */
+.highlight .c, .highlight .c1, .highlight .cm { color: var(--gray-500); font-style: italic; opacity: 0.8; } /* Comments */
+.highlight .n, .highlight .na { color: var(--gray-800); } /* Names */
+.highlight .nb { color: var(--accent-red); font-weight: 600; } /* Built-ins */
+.highlight .nf { color: var(--primary-700); font-weight: 700; } /* Functions */
+.highlight .nc { color: var(--primary-600); font-weight: 700; } /* Classes */
+.highlight .mi, .highlight .mf { color: var(--primary-400); font-weight: 600; } /* Numbers */
+.highlight .o, .highlight .ow { color: var(--gray-600); font-weight: 500; } /* Operators */
+.highlight .p { color: var(--gray-500); } /* Punctuation */
+
+/* ===================================================================
+ Admonitions
+ ================================================================== */
+
+.admonition {
+ border-radius: 8px;
+ border: none;
+ margin: 1.5rem 0;
+ overflow: hidden;
+ box-shadow: var(--shadow-md);
+ background: white;
+}
+
+.admonition-title {
+ padding: 1rem 1.25rem;
+ margin: 0;
+ font-weight: 700;
+ font-size: 14px;
+ text-transform: uppercase;
+ letter-spacing: 0.05em;
+}
+
+.admonition p {
+ padding: 1.25rem;
+ margin: 0;
+ line-height: 1.6;
+}
+
+/* Admonition types */
+.admonition.note {
+ border-left: 4px solid var(--primary-600);
+}
+.admonition.note .admonition-title {
+ background: linear-gradient(135deg, var(--primary-100), var(--primary-50));
+ color: var(--primary-800);
+}
+
+.admonition.warning {
+ border-left: 4px solid var(--accent-orange);
+}
+.admonition.warning .admonition-title {
+ background: linear-gradient(135deg, #fef3c7, #fde68a);
+ color: #92400e;
+}
+
+.admonition.important {
+ border-left: 4px solid var(--accent-green);
+}
+.admonition.important .admonition-title {
+ background: linear-gradient(135deg, #d1fae5, #a7f3d0);
+ color: #065f46;
+}
+
+.admonition.tip {
+ border-left: 4px solid var(--accent-purple);
+}
+.admonition.tip .admonition-title {
+ background: linear-gradient(135deg, #ede9fe, #f3e8ff);
+ color: #5b21b6;
+}
+
+/* ===================================================================
+ Tables
+ ================================================================== */
+
+.wy-table-responsive table {
+ border-collapse: collapse;
+ width: 100%;
+ margin: 1.5rem 0;
+ background: white;
+ border-radius: 8px;
+ overflow: hidden;
+ box-shadow: var(--shadow-sm);
+ border: 1px solid var(--gray-200);
+}
+
+.wy-table-responsive table th {
+ background: linear-gradient(135deg, var(--primary-50), #ffffff);
+ padding: 1rem 1.25rem;
+ text-align: left;
+ font-weight: 700;
+ color: var(--gray-800);
+ border-bottom: 2px solid var(--primary-600);
+ font-size: 14px;
+ text-transform: uppercase;
+ letter-spacing: 0.05em;
+}
+
+.wy-table-responsive table td {
+ padding: 1rem 1.25rem;
+ border-bottom: 1px solid var(--gray-100);
+ color: var(--gray-700);
+}
+
+.wy-table-responsive table tr:hover {
+ background: linear-gradient(90deg, var(--primary-50) 0%, transparent 100%);
+}
+
+/* ===================================================================
+ API Documentation
+ ================================================================== */
+
+.class > dt,
+.function > dt,
+.method > dt {
+ background: var(--gray-50);
+ border: 1px solid var(--gray-200);
+ border-radius: 6px;
+ padding: 1rem 1.25rem;
+ margin-bottom: 0.5rem;
+ font-family: var(--font-mono);
+ font-size: 14px;
+ color: var(--gray-800);
+ box-shadow: var(--shadow-sm);
+}
+
+.sig-name {
+ color: var(--primary-700);
+ font-weight: 700;
+}
+
+.sig-param {
+ color: var(--accent-red);
+ font-style: italic;
+}
+
+/* ===================================================================
+ Responsive Design
+ ================================================================== */
+
+@media screen and (max-width: 768px) {
+ .wy-menu-vertical a {
+ font-size: 14px;
+ padding: 12px 16px;
+ }
+
+ h1 {
+ font-size: 2rem;
+ }
+
+ h2 {
+ font-size: 1.5rem;
+ }
+
+ .highlight pre {
+ padding: 1rem;
+ font-size: 13px;
+ }
+
+ code {
+ font-size: 0.8em;
+ padding: 2px 6px;
+ }
+}
+
+/* ===================================================================
+ RTD Theme Compatibility
+ ================================================================== */
+
+/* Ensure RTD mobile menu works properly */
+.wy-nav-top {
+ background: var(--primary-600) !important;
+}
+
+.wy-nav-top a {
+ color: white !important;
+}
+
+/* RTD version selector styling */
+.rst-versions {
+ border-top: 2px solid var(--primary-600);
+}
+
+.rst-versions .rst-current-version {
+ background: var(--primary-700);
+}
+
+.rst-versions .rst-current-version .fa {
+ color: var(--primary-200);
+}
+
+/* RTD search results */
+.wy-side-nav-search .wy-dropdown > a:hover {
+ background: rgba(255, 255, 255, 0.1) !important;
+}
+
+/* ===================================================================
+ Loading States & Performance
+ ================================================================== */
+
+/* Loading state */
+body.loading {
+ opacity: 0.9;
+}
+
+body.loaded {
+ opacity: 1;
+ transition: opacity 0.3s ease;
+}
+
+/* Search focus enhancement */
+.wy-side-nav-search.search-focused {
+ box-shadow: 0 0 20px rgba(219, 39, 119, 0.3);
+}
+
+/* Copy button styles (fallback if sphinx-copybutton not available) */
+.copy-btn:hover {
+ background: var(--primary-700) !important;
+ transform: scale(1.05);
+}
+
+/* ===================================================================
+ Accessibility & Performance
+ ================================================================== */
+
+/* Focus states */
+*:focus {
+ outline: 2px solid var(--primary-600);
+ outline-offset: 2px;
+}
+
+/* Skip link for screen readers */
+.skip-link {
+ position: absolute;
+ top: -40px;
+ left: 6px;
+ background: var(--primary-600);
+ color: white;
+ padding: 8px;
+ text-decoration: none;
+ border-radius: 0 0 4px 4px;
+ z-index: 1000;
+}
+
+.skip-link:focus {
+ top: 0;
+}
+
+/* High contrast mode support */
+@media (prefers-contrast: high) {
+ :root {
+ --primary-600: #000;
+ --primary-700: #333;
+ --gray-600: #000;
+ --gray-700: #000;
+ }
+}
+
+/* Reduced motion */
+@media (prefers-reduced-motion: reduce) {
+ * {
+ animation-duration: 0.01ms !important;
+ transition-duration: 0.01ms !important;
+ }
+
+ .copy-btn:hover {
+ transform: none !important;
+ }
+}
+
+/* Print styles */
+@media print {
+ .highlight {
+ border: 1px solid #ccc;
+ background: #f5f5f5 !important;
+ }
+
+ .wy-nav-side {
+ display: none !important;
+ }
+
+ .copy-btn {
+ display: none !important;
+ }
+}
diff --git a/docs/_static/layout-manager.js b/docs/_static/layout-manager.js
new file mode 100644
index 0000000..eabcac5
--- /dev/null
+++ b/docs/_static/layout-manager.js
@@ -0,0 +1,191 @@
+/**
+ * ConfOpt Documentation - Simplified Layout Manager
+ * Minimal JavaScript for enhanced UX without breaking RTD functionality
+ */
+
+(function() {
+ 'use strict';
+
+ // Simple debounce utility
+ function debounce(func, wait) {
+ let timeout;
+ return function executedFunction(...args) {
+ const later = () => {
+ clearTimeout(timeout);
+ func(...args);
+ };
+ clearTimeout(timeout);
+ timeout = setTimeout(later, wait);
+ };
+ }
+
+ // Enhance search input with better UX
+ function enhanceSearchInput() {
+ const searchInput = document.querySelector('.wy-side-nav-search input[type="text"]');
+ if (!searchInput) return;
+
+ // Add placeholder text if not already set
+ if (!searchInput.placeholder) {
+ searchInput.placeholder = 'Search documentation...';
+ }
+
+ // Add smooth focus/blur animations
+ searchInput.addEventListener('focus', function() {
+ this.parentElement.classList.add('search-focused');
+ });
+
+ searchInput.addEventListener('blur', function() {
+ this.parentElement.classList.remove('search-focused');
+ });
+ }
+
+ // Add smooth scroll behavior for navigation links
+ function enhanceNavigation() {
+ const navLinks = document.querySelectorAll('.wy-menu-vertical a[href^="#"]');
+
+ navLinks.forEach(link => {
+ link.addEventListener('click', function(e) {
+ const href = this.getAttribute('href');
+ const target = document.querySelector(href);
+
+ if (target) {
+ e.preventDefault();
+ target.scrollIntoView({
+ behavior: 'smooth',
+ block: 'start'
+ });
+
+ // Update URL without jumping
+ history.pushState(null, null, href);
+ }
+ });
+ });
+ }
+
+ // Add copy button functionality for code blocks (if sphinx-copybutton is not available)
+ function addCopyButtons() {
+ // Only add if sphinx-copybutton is not already present
+ if (document.querySelector('.copybtn')) return;
+
+ const codeBlocks = document.querySelectorAll('.highlight pre');
+
+ codeBlocks.forEach(block => {
+ const button = document.createElement('button');
+ button.className = 'copy-btn';
+ button.innerHTML = '📋';
+ button.title = 'Copy to clipboard';
+ button.style.cssText = `
+ position: absolute;
+ top: 8px;
+ right: 8px;
+ background: var(--primary-600);
+ color: white;
+ border: none;
+ border-radius: 4px;
+ padding: 4px 8px;
+ font-size: 12px;
+ cursor: pointer;
+ opacity: 0.7;
+ transition: opacity 0.2s;
+ `;
+
+ button.addEventListener('click', async function() {
+ try {
+ await navigator.clipboard.writeText(block.textContent);
+ button.innerHTML = '✅';
+ button.title = 'Copied!';
+ setTimeout(() => {
+ button.innerHTML = '📋';
+ button.title = 'Copy to clipboard';
+ }, 2000);
+ } catch (err) {
+ console.warn('Could not copy text: ', err);
+ }
+ });
+
+ button.addEventListener('mouseenter', function() {
+ this.style.opacity = '1';
+ });
+
+ button.addEventListener('mouseleave', function() {
+ this.style.opacity = '0.7';
+ });
+
+ // Add button to code block container
+ const container = block.parentElement;
+ container.style.position = 'relative';
+ container.appendChild(button);
+ });
+ }
+
+ // Add keyboard navigation enhancement
+ function enhanceKeyboardNavigation() {
+ document.addEventListener('keydown', function(e) {
+ // Alt + S to focus search
+ if (e.altKey && e.key === 's') {
+ e.preventDefault();
+ const searchInput = document.querySelector('.wy-side-nav-search input[type="text"]');
+ if (searchInput) {
+ searchInput.focus();
+ searchInput.select();
+ }
+ }
+
+ // Escape to blur search
+ if (e.key === 'Escape') {
+ const searchInput = document.querySelector('.wy-side-nav-search input[type="text"]:focus');
+ if (searchInput) {
+ searchInput.blur();
+ }
+ }
+ });
+ }
+
+ // Add loading state for better perceived performance
+ function addLoadingStates() {
+ // Add loading class to body initially
+ document.body.classList.add('loading');
+
+ // Remove loading class when everything is ready
+ window.addEventListener('load', function() {
+ setTimeout(() => {
+ document.body.classList.remove('loading');
+ document.body.classList.add('loaded');
+ }, 100);
+ });
+ }
+
+ // Main initialization function
+ function init() {
+ try {
+ enhanceSearchInput();
+ enhanceNavigation();
+ addCopyButtons();
+ enhanceKeyboardNavigation();
+ addLoadingStates();
+ } catch (error) {
+ console.warn('ConfOpt Layout Manager: Some enhancements failed to initialize:', error);
+ }
+ }
+
+ // Initialize when DOM is ready
+ if (document.readyState === 'loading') {
+ document.addEventListener('DOMContentLoaded', init);
+ } else {
+ init();
+ }
+
+ // Handle page changes for single-page applications
+ window.addEventListener('popstate', debounce(init, 100));
+
+ // Export for debugging (optional)
+ if (typeof window !== 'undefined') {
+ window.ConfOptLayoutManager = {
+ init,
+ enhanceSearchInput,
+ enhanceNavigation,
+ addCopyButtons
+ };
+ }
+
+})();
diff --git a/docs/_templates/.gitkeep b/docs/_templates/.gitkeep
new file mode 100644
index 0000000..4c8c53e
--- /dev/null
+++ b/docs/_templates/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the _templates directory is tracked by git
+# even when it's empty. Sphinx uses this directory for custom templates.
diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst
new file mode 100644
index 0000000..028ed68
--- /dev/null
+++ b/docs/advanced_usage.rst
@@ -0,0 +1,134 @@
+Advanced Usage
+==============
+
+This guide shows how to use ConfOpt's advanced features to customize and accelerate your optimization process. Each section builds on the basics, with clear code and explanations.
+
+Custom Searchers
+----------------
+
+ConfOpt lets you define custom searchers to control how new configurations are selected.
+A searcher is made up of a quantile estimator (surrogate model) and a sampler (acquisition function).
+
+A searcher can be instantied via the ``QuantileConformalSearcher`` class.
+
+To create a custom searcher with a custom estimator architecture and sampler, select from the following:
+
+**Estimator Architectures**
+
+Estimator architectures determine the framework used to build the surrogate model.
+
+You can choose from the following architectures:
+
+* ``"qrf"``: Quantile Random Forest
+* ``"qgbm"``: Quantile Gradient Boosting Machine
+* ``"qknn"``: Quantile K-Nearest Neighbors
+* ``"qgp"``: Quantile Gaussian Process
+* ``"ql"``: Quantile Lasso
+* ``"qens5"``: Quantile Ensemble of 3 models (QGBM, QGP, QL)
+
+**Samplers**
+
+Samplers dictate which configuration to try next, driven by some base acquisition function.
+
+You can use the following samplers:
+
+* ``LowerBoundSampler``: Lower confidence bounds with exploration decay (good for fast convergence on simple problems)
+* ``ThompsonSampler``: Posterior sampling for exploration (good for balancing exploration and exploitation)
+* ``ExpectedImprovementSampler``: Expected improvement over current best (good for both fast convergence and exploration)
+
+**Example:**
+
+Let's use a ``QuantileConformalSearcher`` with a ``LowerBoundSampler`` and a Quantile Random Forest surrogate (``"qrf"``) estimator:
+
+.. code-block:: python
+
+ from confopt.selection.acquisition import QuantileConformalSearcher
+ from confopt.selection.sampling.bound_samplers import LowerBoundSampler
+
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="qrf",
+ sampler=LowerBoundSampler(
+ interval_width=0.8, # Width of the confidence interval to use as the lower bound,
+ adapter="DtACI", # Conformal adapter to use for calibration
+ beta_decay="logarithmic_decay", # Lower Bound Sampling decay function
+ c=1.0 # Lower Bound Sampling Decay rate
+ )
+ )
+
+And pass our custom searcher to the tuner to use it:
+
+.. code-block:: python
+
+ from confopt.tuning import ConformalTuner
+
+ tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=False,
+ )
+
+ tuner.tune(
+ searcher=searcher,
+ max_searches=100,
+ n_random_searches=20,
+ verbose=True
+ )
+
+Warm Starting
+-------------
+
+Warm starting lets you begin optimization with configurations you've already evaluated. This can speed up convergence by using prior knowledge.
+
+**How It Works**
+
+* Warm start configurations are ingested before random search.
+* They count toward the ``n_random_searches`` budget.
+* They help train the initial surrogate model.
+
+**Example:**
+
+.. code-block:: python
+
+ warm_start_configs = [
+ ({'n_estimators': 100, 'max_depth': 8}, 0.95), # (hyperparameter configuration, objective value)
+ ({'n_estimators': 150, 'max_depth': 6}, 0.93),
+ ({'n_estimators': 80, 'max_depth': 10}, 0.91)
+ ]
+
+ tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=False,
+ warm_starts=warm_start_configs
+ )
+
+ tuner.tune(n_random_searches=10, max_searches=50)
+
+Optimizers
+----------
+
+Optimizers control how the surrogate models tune their own hyperparameters.
+
+**Optimizer Frameworks**
+
+* ``None``: No tuning.
+* ``'decaying'``: Tune parameters with increasing intervals over time, using configurable decay functions (linear, exponential, or logarithmic).
+* ``'fixed'``: Tune parameters after each sampling episode, with a fixed number (10) of hyperparameter combinations.
+
+**Which Should I Use?**
+
+* Use ``None`` if the model you want to tune (not the surrogate model) trains very quickly (less than 10 seconds) or on little data.
+* Use ``'decaying'`` if you want adaptive tuning that starts intensive and becomes less frequent over time.
+* Use ``'fixed'`` if you want consistent tuning behavior throughout the optimization process.
+
+If your optimization is taking unexpectedly long on the ``'decaying'`` or ``'fixed'``optimizers, try switching to ``None``.
+
+**Example:**
+
+.. code-block:: python
+
+ tuner.tune(
+ optimizer_framework='decaying',
+ max_searches=200,
+ verbose=True
+ )
diff --git a/docs/api_reference.rst b/docs/api_reference.rst
new file mode 100644
index 0000000..0492e64
--- /dev/null
+++ b/docs/api_reference.rst
@@ -0,0 +1,91 @@
+API Reference
+-------------
+
+ConformalTuner
+==============
+
+.. currentmodule:: confopt.tuning
+
+.. _conformaltuner:
+
+.. autoclass:: ConformalTuner
+ :members:
+ :exclude-members: __init__
+ :noindex:
+
+Parameter Ranges
+================
+
+.. currentmodule:: confopt.wrapping
+
+.. _intrange:
+
+IntRange
+~~~~~~~~
+.. autoclass:: IntRange
+ :members:
+ :noindex:
+
+.. _floatrange:
+
+FloatRange
+~~~~~~~~~~
+.. autoclass:: FloatRange
+ :members:
+ :noindex:
+
+.. _categoricalrange:
+
+CategoricalRange
+~~~~~~~~~~~~~~~~
+.. autoclass:: CategoricalRange
+ :members:
+ :noindex:
+
+QuantileConformalSearcher
+=========================
+
+.. currentmodule:: confopt.selection.acquisition
+
+.. autoclass:: QuantileConformalSearcher
+ :members:
+ :exclude-members: __init__
+ :noindex:
+
+Samplers
+========
+
+.. currentmodule:: confopt.selection.sampling.bound_samplers
+
+PessimisticLowerBoundSampler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: PessimisticLowerBoundSampler
+ :members:
+ :exclude-members: __init__
+ :noindex:
+
+LowerBoundSampler
+~~~~~~~~~~~~~~~~~
+.. autoclass:: LowerBoundSampler
+ :members:
+ :exclude-members: __init__
+ :noindex:
+
+.. currentmodule:: confopt.selection.sampling.thompson_samplers
+
+ThompsonSampler
+~~~~~~~~~~~~~~~
+.. autoclass:: ThompsonSampler
+ :members:
+ :exclude-members: __init__
+ :noindex:
+
+ExpectedImprovementSampler
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. currentmodule:: confopt.selection.sampling.expected_improvement_samplers
+
+.. autoclass:: ExpectedImprovementSampler
+ :members:
+ :exclude-members: __init__
+ :noindex:
diff --git a/docs/architecture.rst b/docs/architecture.rst
new file mode 100644
index 0000000..9c527e5
--- /dev/null
+++ b/docs/architecture.rst
@@ -0,0 +1,485 @@
+Architecture
+============
+
+Module Dependency Structure
+---------------------------
+
+The following diagram shows the directional module dependencies within the confopt package.
+Module paths are shown without the ``confopt.`` prefix for clarity.
+
+.. mermaid::
+
+ graph TD
+ subgraph "Core Layer"
+ tuning["tuning"]
+ wrapping["wrapping"]
+ end
+
+ subgraph "Utils Layer"
+ utils_preprocessing["utils.preprocessing"]
+ utils_tracking["utils.tracking"]
+ utils_optimization["utils.optimization"]
+
+ subgraph "Configuration Utilities"
+ utils_configurations_encoding["utils.configurations.encoding"]
+ utils_configurations_sampling["utils.configurations.sampling"]
+ utils_configurations_utils["utils.configurations.utils"]
+ end
+ end
+
+ subgraph "Selection Layer"
+ selection_acquisition["selection.acquisition"]
+ selection_conformalization["selection.conformalization"]
+ selection_estimation["selection.estimation"]
+ selection_estimator_configuration["selection.estimator_configuration"]
+ selection_adaptation["selection.adaptation"]
+
+ subgraph "Estimator Implementations"
+ selection_estimators_quantile_estimation["selection.estimators.quantile_estimation"]
+ selection_estimators_ensembling["selection.estimators.ensembling"]
+ end
+
+ subgraph "Sampling Strategies"
+ selection_sampling_bound_samplers["selection.sampling.bound_samplers"]
+ selection_sampling_thompson_samplers["selection.sampling.thompson_samplers"]
+ selection_sampling_expected_improvement_samplers["selection.sampling.expected_improvement_samplers"]
+
+ selection_sampling_utils["selection.sampling.utils"]
+ end
+ end
+
+ %% Core Dependencies
+ tuning --> wrapping
+ tuning --> utils_preprocessing
+ tuning --> utils_tracking
+ tuning --> utils_optimization
+ tuning --> selection_acquisition
+
+ %% Utils Dependencies
+ utils_tracking --> wrapping
+ utils_tracking --> utils_configurations_encoding
+ utils_tracking --> utils_configurations_sampling
+ utils_tracking --> utils_configurations_utils
+
+ utils_configurations_sampling --> wrapping
+ utils_configurations_sampling --> utils_configurations_utils
+ utils_configurations_encoding --> wrapping
+
+ %% Selection Layer Dependencies
+ selection_acquisition --> selection_conformalization
+ selection_acquisition --> selection_sampling_bound_samplers
+ selection_acquisition --> selection_sampling_thompson_samplers
+ selection_acquisition --> selection_sampling_expected_improvement_samplers
+
+ selection_acquisition --> selection_estimation
+
+ selection_conformalization --> wrapping
+ selection_conformalization --> utils_preprocessing
+ selection_conformalization --> selection_estimation
+ selection_conformalization --> selection_estimator_configuration
+
+ selection_estimation --> selection_estimator_configuration
+ selection_estimation --> selection_estimators_quantile_estimation
+ selection_estimation --> selection_estimators_ensembling
+ selection_estimation --> utils_configurations_sampling
+
+ selection_estimator_configuration --> wrapping
+ selection_estimator_configuration --> selection_estimators_quantile_estimation
+ selection_estimator_configuration --> selection_estimators_ensembling
+
+ selection_estimators_ensembling --> selection_estimators_quantile_estimation
+
+ %% Sampling Dependencies
+ selection_sampling_bound_samplers --> selection_sampling_utils
+ selection_sampling_thompson_samplers --> wrapping
+ selection_sampling_thompson_samplers --> selection_sampling_utils
+ selection_sampling_expected_improvement_samplers --> wrapping
+ selection_sampling_expected_improvement_samplers --> selection_sampling_utils
+
+
+ selection_sampling_utils --> selection_adaptation
+ selection_sampling_utils --> wrapping
+
+ %% Styling
+ style tuning fill:#ff6b6b
+ style wrapping fill:#4ecdc4
+ style utils_preprocessing fill:#45b7d1
+ style utils_tracking fill:#45b7d1
+ style utils_optimization fill:#45b7d1
+ style selection_acquisition fill:#96ceb4
+ style selection_conformalization fill:#96ceb4
+ style selection_estimation fill:#96ceb4
+
+Module Organization and Flow
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Core Orchestration**
+ The ``tuning`` module contains ``ConformalTuner`` which orchestrates the entire optimization process. It depends on data structures from ``wrapping`` and coordinates all other layers.
+
+**Utilities Layer**
+ * ``utils.preprocessing``: Data splitting utilities
+ * ``utils.tracking``: Experiment management and progress monitoring
+ * ``utils.optimization``: Searcher optimization algorithms
+ * ``utils.configurations.*``: Parameter encoding, sampling, and hashing utilities
+
+**Selection Layer**
+ * ``selection.acquisition``: Main acquisition function interface and implementations
+ * ``selection.conformalization``: Conformal prediction estimators and calibration
+ * ``selection.estimation``: Hyperparameter tuning and model selection
+ * ``selection.estimator_configuration``: Registry and configuration for all estimators
+ * ``selection.estimators.*``: Quantile regression and ensemble implementations
+ * ``selection.sampling.*``: Acquisition sampling strategies and utilities
+ * ``selection.adaptation``: Adaptive alpha adjustment mechanisms
+
+**Dependency Flow Patterns**
+ Data flows from ``tuning`` through ``utils`` to ``selection`` layers. The ``wrapping`` module provides shared data structures used across all layers. Configuration utilities support both experiment tracking and model selection processes.
+
+Detailed Dependency Structure
+-----------------------------
+
+The following diagram shows the complete end-to-end flow with class and method interactions:
+
+.. mermaid::
+
+ graph TD
+ subgraph "Main Orchestration"
+ CT["ConformalTuner
tune()
random_search()
conformal_search()
_evaluate_configuration()"]
+ STOP["stop_search()
check_objective_function()"]
+ end
+
+ subgraph "Experiment Management"
+ STUDY["Study
append_trial()
batch_append_trials()
get_best_configuration()
get_best_performance()
get_searched_configurations()
get_searched_performances()
get_average_target_model_runtime()"]
+ TRIAL["Trial
iteration
timestamp
configuration
performance
acquisition_source
lower_bound
upper_bound
searcher_runtime
target_model_runtime"]
+ RT["RuntimeTracker
pause_runtime()
resume_runtime()
return_runtime()"]
+ PBM["ProgressBarManager
create_progress_bar()
update_progress()
close_progress_bar()"]
+ end
+
+ subgraph "Configuration Management"
+ BCM["BaseConfigurationManager
mark_as_searched()
tabularize_configs()
listify_configs()
add_to_banned_configurations()"]
+ SCM["StaticConfigurationManager
get_searchable_configurations()
get_searchable_configurations_count()"]
+ DCM["DynamicConfigurationManager
get_searchable_configurations()
get_searchable_configurations_count()"]
+ CE["ConfigurationEncoder
transform()
_build_encoding_schema()
_create_feature_matrix()"]
+ GTC["get_tuning_configurations()
_uniform_sampling()
_sobol_sampling()"]
+ CCH["create_config_hash()
hash_generation()"]
+ end
+
+ subgraph "Acquisition Layer"
+ BCS["BaseConformalSearcher
predict()
update()
get_interval()
_calculate_betas()"]
+ QCS["QuantileConformalSearcher
fit()
_predict_with_ucb()
_predict_with_thompson()
_predict_with_pessimistic_lower_bound()
_predict_with_expected_improvement()"]
+ end
+
+ subgraph "Conformal Prediction"
+ QCE["QuantileConformalEstimator
fit()
predict_intervals()
calculate_betas()
update_alphas()
_fit_non_conformal()
_fit_cv_plus()
_fit_train_test_split()"]
+ DTACI["DtACI
update()
pinball_loss()"]
+ SACS["set_calibration_split()
alpha_to_quantiles()"]
+ end
+
+ subgraph "Hyperparameter Tuning"
+ RT_TUNER["RandomTuner
tune()
_create_fold_indices()
_score_configurations()
_fit_model()
_evaluate_model()"]
+ PT["PointTuner
tune()
_fit_model()
_evaluate_model()"]
+ QT["QuantileTuner
_fit_model()
_evaluate_model()"]
+ IE["initialize_estimator()
estimator_creation()"]
+ ASCF["average_scores_across_folds()
score_aggregation()"]
+ end
+
+ subgraph "Estimator Registry"
+ ER["ESTIMATOR_REGISTRY
rf, gbm, kr, knn
qgbm, qrf, qknn, ql, qgp, qleaf
qens1, qens2, qens3, qens4, qens5"]
+ EC["EstimatorConfig
estimator_name
estimator_class
default_params
estimator_parameter_space
ensemble_components
is_ensemble_estimator()
is_quantile_estimator()"]
+ end
+
+ subgraph "Quantile Estimators"
+ BMFQE["BaseMultiFitQuantileEstimator
fit()
_fit_quantile_estimator()"]
+ BSFQE["BaseSingleFitQuantileEstimator
fit()
_fit_implementation()"]
+ QL["QuantileLasso
fit()
predict_quantiles()"]
+ QG["QuantileGBM
fit()
predict_quantiles()"]
+ QF["QuantileForest
fit()
predict_quantiles()"]
+ QK["QuantileKNN
fit()
predict_quantiles()"]
+ QGP["QuantileGP
fit()
predict_quantiles()"]
+ QLeaf["QuantileLeaf
fit()
predict_quantiles()"]
+ end
+
+ subgraph "Ensemble Methods"
+ BEE["BaseEnsembleEstimator
fit()
predict()"]
+ PEE["PointEnsembleEstimator
fit()
predict()
_compute_point_weights()
_compute_linear_stack_weights()
_get_stacking_training_data()"]
+ QEE["QuantileEnsembleEstimator
fit()
predict()
_compute_quantile_weights()
_compute_linear_stack_weights()
_get_stacking_training_data()"]
+ QLM["QuantileLassoMeta
fit()
predict()
_quantile_loss_objective()"]
+ end
+
+ subgraph "Sampling Strategies"
+ LBS["LowerBoundSampler
calculate_ucb_predictions()
update_exploration_step()
fetch_alphas()
update_interval_width()"]
+ PLBS["PessimisticLowerBoundSampler
fetch_alphas()
update_interval_width()"]
+ TS["ThompsonSampler
calculate_thompson_predictions()
fetch_alphas()
update_interval_width()"]
+ EIS["ExpectedImprovementSampler
calculate_expected_improvement()
update_best_value()
fetch_alphas()
update_interval_width()"]
+ end
+
+ subgraph "Sampling Utilities"
+ IQA["initialize_quantile_alphas()
alpha_generation()"]
+ IMA["initialize_multi_adapters()
adapter_creation()"]
+ ISA["initialize_single_adapter()
single_adapter_setup()"]
+ UMIW["update_multi_interval_widths()
width_updates()"]
+ USIW["update_single_interval_width()
single_width_update()"]
+ FCB["flatten_conformal_bounds()
bounds_flattening()"]
+ VEQ["validate_even_quantiles()
quantile_validation()"]
+ end
+
+ subgraph "Data Processing"
+ TVS["train_val_split()
data_splitting()"]
+ end
+
+ subgraph "Searcher Optimization"
+ DSO["DecayingSearcherOptimizer
select_arm()
update()
_calculate_current_interval()"]
+ FSO["FixedSearcherOptimizer
select_arm()
update()"]
+ end
+
+ subgraph "Parameter Structures"
+ PR["ParameterRange
IntRange
FloatRange
CategoricalRange"]
+ CB["ConformalBounds
lower_bounds
upper_bounds"]
+ end
+
+ %% Main Flow Connections
+ CT --> STUDY
+ CT --> RT
+ CT --> PBM
+ CT --> SCM
+ CT --> DCM
+ CT --> QCS
+ CT --> TVS
+ CT --> DSO
+ CT --> FSO
+ CT --> STOP
+
+ %% Configuration Management Flow
+ STUDY --> TRIAL
+ STUDY --> CE
+ STUDY --> GTC
+ STUDY --> CCH
+ BCM --> SCM
+ BCM --> DCM
+ SCM --> GTC
+ DCM --> GTC
+ DCM --> DSO
+
+ %% Acquisition Flow
+ QCS --> QCE
+ BCS --> LBS
+ BCS --> PLBS
+ BCS --> TS
+ BCS --> EIS
+
+ %% Conformal Prediction Flow
+ QCE --> QT
+ QCE --> IE
+ QCE --> DTACI
+ QCE --> SACS
+ QCS --> SACS
+ DTACI --> SACS
+
+ %% Hyperparameter Tuning Flow
+ RT_TUNER --> IE
+ RT_TUNER --> ASCF
+ PT --> RT_TUNER
+ PT --> ER
+ QT --> RT_TUNER
+ QT --> ER
+ IE --> ER
+ IE --> EC
+
+ %% Estimator Flow
+ ER --> EC
+ EC --> BMFQE
+ EC --> BSFQE
+ BMFQE --> QL
+ BMFQE --> QG
+ BSFQE --> QF
+ BSFQE --> QK
+ BSFQE --> QGP
+ BSFQE --> QLeaf
+ EC --> BEE
+ BEE --> PEE
+ BEE --> QEE
+
+ %% Ensemble Flow
+ PEE --> BMFQE
+ PEE --> BSFQE
+ QEE --> BMFQE
+ QEE --> BSFQE
+ QEE --> QLM
+
+ %% Sampling Utilities Flow
+ LBS --> IQA
+ LBS --> VEQ
+ PLBS --> IQA
+ PLBS --> VEQ
+ TS --> IQA
+ TS --> IMA
+ TS --> ISA
+ TS --> VEQ
+ EIS --> IQA
+ EIS --> UMIW
+ EIS --> USIW
+ EIS --> VEQ
+
+ %% Adaptive Flow
+ IMA --> DTACI
+ ISA --> DTACI
+ UMIW --> DTACI
+ USIW --> DTACI
+
+ %% Data Structure Flow
+ CT --> PR
+ QCE --> CB
+ LBS --> CB
+ PLBS --> CB
+ TS --> CB
+ EIS --> CB
+
+ %% Styling
+ style CT fill:#ff6b6b
+ style QCS fill:#4ecdc4
+ style QCE fill:#45b7d1
+ style DSO fill:#96ceb4
+ style STUDY fill:#feca57
+
+End-to-End Execution Flow
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Step 1: Initialization and Setup**
+
+When ``ConformalTuner.tune()`` starts, it creates a ``Study`` object to track all trials and results. The study initializes a ``RuntimeTracker`` for timing and ``ProgressBarManager`` for user feedback. Parameter spaces are defined using ``ParameterRange`` objects (``IntRange``, ``FloatRange``, ``CategoricalRange``) which specify search bounds and types.
+
+Configuration management happens through either ``StaticConfigurationManager`` (for predefined configurations) or ``DynamicConfigurationManager`` (for adaptive suggestions). The ``ConfigurationEncoder`` handles conversion between different parameter representations, while ``get_tuning_configurations()`` generates initial parameter samples using uniform or Sobol sequences.
+
+**Step 2: Acquisition Function Setup**
+
+The system uses quantile-based conformal prediction for acquisition:
+
+* ``QuantileConformalSearcher`` - uses direct quantile estimation
+
+This inherits from ``BaseConformalSearcher`` which provides the common interface for ``predict()``, ``update()``, and ``get_interval()`` methods.
+
+**Conformal Estimator Initialization:**
+
+``QuantileConformalEstimator`` implements quantile-based conformal prediction using direct quantile estimation with conformal adjustment for coverage guarantees.
+
+**Step 3: Data Processing Pipeline**
+
+Raw input data flows through ``train_val_split()`` which creates training, validation, and calibration sets. This split data structure maintains proper separation required for conformal prediction coverage guarantees.
+
+For ``QuantileConformalEstimator``, the training data gets processed as:
+
+* Quantile estimation → trains quantile regression models for prediction intervals
+* Validation set → generates nonconformity scores for conformal calibration
+
+**Step 4: Hyperparameter Tuning Layer**
+
+The tuning hierarchy works as follows:
+
+.. code-block:: text
+
+ RandomTuner (base class)
+ ├── PointTuner (for point estimation)
+ └── QuantileTuner (for quantile estimation)
+
+``tune()`` handles the optimization process:
+
+1. Creates cross-validation folds through ``_create_fold_indices()``
+2. Scores configurations using ``_score_configurations()``
+3. Uses ``initialize_estimator()`` to create estimator instances from ``ESTIMATOR_REGISTRY``
+4. Performs cross-validation through ``_fit_model()`` and ``_evaluate_model()``
+5. Aggregates results using ``average_scores_across_folds()``
+6. Returns fitted estimator and best hyperparameters
+
+The ``ESTIMATOR_REGISTRY`` contains ``EstimatorConfig`` objects that define:
+
+* Architecture identifiers
+* Parameter ranges for hyperparameter search
+* Default parameter values
+* Estimator class references
+
+**Step 5: Estimator Implementation Layer**
+
+The system supports multiple quantile estimator types:
+
+**Individual Quantile Estimators:**
+
+* ``QuantileLasso`` - L1-regularized quantile regression
+* ``QuantileGBM`` - Gradient boosting for quantile estimation
+* ``QuantileForest`` - Random forest with quantile prediction
+* ``QuantileKNN`` - K-nearest neighbors for quantile estimation
+* ``QuantileGP`` - Gaussian process with quantile likelihood
+* ``QuantileLeaf`` - Leaf-based quantile estimation
+
+**Ensemble Estimators:**
+
+* ``BaseEnsembleEstimator`` - abstract base class for ensemble methods
+* ``PointEnsembleEstimator`` - combines multiple point estimators using weighted averaging with uniform or linear stacking strategies
+* ``QuantileEnsembleEstimator`` - combines multiple quantile estimators using uniform or linear stacking approaches
+* ``QuantileLassoMeta`` - specialized meta-learner for quantile ensemble optimization using Lasso regression
+
+Ensemble implementations support multiple weighting strategies:
+- Uniform weighting for simple averaging
+- Linear stacking with cross-validation optimization
+- Lasso-based meta-learning for optimal weight computation
+
+**Step 6: Acquisition Strategy Execution**
+
+The ``BaseConformalSearcher.predict()`` method routes to strategy-specific implementations:
+
+**Acquisition Function Hierarchy:**
+
+.. code-block:: text
+
+ Acquisition Strategies
+ ├── LowerBoundSampler (Upper Confidence Bound)
+ ├── PessimisticLowerBoundSampler (Conservative Lower Bound)
+ ├── ThompsonSampler (Posterior Sampling)
+ └── ExpectedImprovementSampler (Expected Improvement)
+
+Each strategy calls specific methods:
+
+* ``LowerBoundSampler`` → ``calculate_ucb_predictions()``
+* ``ThompsonSampler`` → ``calculate_thompson_predictions()``
+* ``ExpectedImprovementSampler`` → ``_calculate_expected_improvement()``
+
+
+All strategies use shared utilities from ``selection.sampling.utils``:
+
+* ``initialize_quantile_alphas()`` - sets up alpha levels
+* ``initialize_multi_adapters()`` / ``initialize_single_adapter()`` - configures adaptive mechanisms
+* ``update_multi_interval_widths()`` / ``update_single_interval_width()`` - adjusts interval sizes
+* ``flatten_conformal_bounds()`` - converts bounds to usable format
+
+**Step 7: Conformal Prediction and Interval Generation**
+
+The conformal estimators generate prediction intervals:
+
+1. ``fit()`` method trains on calibration data
+2. ``predict_intervals()`` generates ``ConformalBounds`` objects containing lower_bound, upper_bound, and alpha values
+3. ``calculate_betas()`` computes coverage feedback for adaptive adjustment
+
+**Step 8: Adaptive Feedback Loop**
+
+After each evaluation, the system updates:
+
+1. ``get_interval()`` retrieves prediction interval bounds for storage and analysis
+2. ``_calculate_betas()`` computes coverage statistics
+3. ``DtACI.update()`` adjusts significance levels based on coverage feedback
+4. ``pinball_loss()`` provides loss-based adaptation signals
+
+**Step 9: Trial Management and Optimization**
+
+Results flow back through the trial management system:
+
+1. ``_evaluate_configuration()`` executes the objective function
+2. ``append_trial()`` records results in the study
+3. ``get_best_configuration()`` retrieves current optimal configuration
+4. ``conformal_search()`` continues the optimization loop
+
+**Conformal Searcher Optimization**
+
+All conformal searchers require training on the accumulated configuration-to-performance pairs during search. The system provides different optimization strategies for determining when and how frequently to retrain the searchers:
+
+* ``DecayingSearcherOptimizer`` - increases tuning intervals over time using linear, exponential, or logarithmic decay functions
+* ``FixedSearcherOptimizer`` - maintains constant retraining intervals and tuning trial counts
+
+The system also supports disabling searcher optimization entirely for simpler use cases.
diff --git a/docs/basic_usage/classification_example.rst b/docs/basic_usage/classification_example.rst
new file mode 100644
index 0000000..cda1d98
--- /dev/null
+++ b/docs/basic_usage/classification_example.rst
@@ -0,0 +1,202 @@
+Classification Example
+=======================
+
+This example will show you how to use ConfOpt to optimize hyperparameters for a classification task.
+
+If you already used hyperparameter tuning packages, the "Code Example" section below will give you a quick run through of how to use ConfOpt. If not, don't worry, the "Detailed Walkthrough" section will explain everything step-by-step.
+
+Code Example
+------------
+
+1. Set up search space and objective function:
+
+.. code-block:: python
+
+
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+
+ from sklearn.ensemble import RandomForestClassifier
+
+ from sklearn.datasets import load_wine
+ from sklearn.model_selection import train_test_split
+ from sklearn.metrics import accuracy_score
+
+ search_space = {
+ 'n_estimators': IntRange(min_value=50, max_value=200),
+ 'max_features': FloatRange(min_value=0.1, max_value=1.0),
+ 'criterion': CategoricalRange(choices=['gini', 'entropy', 'log_loss'])
+ }
+
+ def objective_function(configuration):
+ X, y = load_wine(return_X_y=True)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.3, random_state=42, stratify=y
+ )
+
+ model = RandomForestClassifier(
+ n_estimators=configuration['n_estimators'],
+ max_features=configuration['max_features'],
+ criterion=configuration['criterion'],
+ random_state=42
+ )
+
+ model.fit(X_train, y_train)
+ predictions = model.predict(X_test)
+ score = accuracy_score(y_test, predictions)
+
+ return score
+
+2. Call ConfOpt to tune hyperparameters:
+
+.. code-block:: python
+
+ tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=False
+ )
+
+ tuner.tune(
+ max_searches=50,
+ n_random_searches=10,
+ verbose=True
+ )
+
+3. Extract results:
+
+.. code-block:: python
+
+ best_params = tuner.get_best_params()
+ best_accuracy = tuner.get_best_value()
+
+ tuned_model = RandomForestClassifier(**best_params, random_state=42)
+
+
+Detailed Walkthrough
+--------------------
+
+Imports
+~~~~~~~
+
+First, let's import everything we'll be needing:
+
+.. code-block:: python
+
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+
+ from sklearn.ensemble import RandomForestClassifier
+
+ from sklearn.datasets import load_wine
+ from sklearn.model_selection import train_test_split
+ from sklearn.metrics import accuracy_score
+
+For this tutorial, we'll be using the sklearn Wine dataset and trying to tune the hyperparameters of a ``RandomForestClassifier``.
+
+Search Space
+~~~~~~~~~~~~
+
+Next, we need to define the hyperparameter space we want ``confopt`` to optimize over.
+
+This is done using the :ref:`IntRange `, :ref:`FloatRange `, and :ref:`CategoricalRange ` classes, which specify the ranges for each hyperparameter.
+Below let's define a simple example with one of each type of hyperparameter:
+
+.. code-block:: python
+
+ search_space = {
+ 'n_estimators': IntRange(min_value=50, max_value=200),
+ 'max_features': FloatRange(min_value=0.1, max_value=1.0),
+ 'criterion': CategoricalRange(choices=['gini', 'entropy', 'log_loss'])
+ }
+
+
+This tells ``confopt`` to explore the following hyperparameter ranges:
+
+* ``n_estimators``: Number of trees in the forest (all integer values from 50 to 200)
+* ``max_features``: Fraction of features to consider at each split (any float between 0.1 and 1.0)
+* ``criterion``: Function to measure the quality of a split (choose from 'gini', 'entropy', or 'log_loss')
+
+
+Objective Function
+~~~~~~~~~~~~~~~~~~
+
+The objective function defines how the model trains and what metric you want to optimize for during hyperparameter search:
+
+.. code-block:: python
+
+ def objective_function(configuration):
+ X, y = load_wine(return_X_y=True)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.3, random_state=42, stratify=y
+ )
+
+ model = RandomForestClassifier(
+ n_estimators=configuration['n_estimators'],
+ max_features=configuration['max_features'],
+ criterion=configuration['criterion'],
+ random_state=42
+ )
+
+ model.fit(X_train, y_train)
+ predictions = model.predict(X_test)
+ score = accuracy_score(y_test, predictions)
+
+ return score
+
+The objective function must take a single argument called ``configuration``, which is a dictionary containing a hyperparameter value for each hyperparameter name specified in your ``search_space``. The values will be chosen automatically by the tuner during optimization.
+
+The ``score`` can be any metric of your choosing (e.g., accuracy, log loss, F1 score, etc.). This is the value that ``confopt`` will try to optimize for.
+
+In this example, the data is loaded and split inside the objective function for simplicity, but you may prefer to load the data outside (to avoid reloading it for each configuration) and
+either pass the training and test sets as arguments using ``partial`` from the ``functools`` library, or reference them from the global scope.
+
+Running the Optimization
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+To start optimizing, first instantiate a :ref:`ConformalTuner ` by providing your objective function, search space, and the optimization direction:
+
+.. code-block:: python
+
+ tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=False # Use True for metrics like log loss
+ )
+
+The ``minimize`` parameter should be set to ``False`` if you want to maximize your metric (e.g., accuracy), or ``True`` if you want to minimize it (e.g., log loss).
+
+To actually kickstart the hyperparameter search, call:
+
+.. code-block:: python
+
+ tuner.tune(
+ max_searches=50,
+ n_random_searches=10,
+ verbose=True
+ )
+
+Where:
+
+* ``max_searches`` controls how many different hyperparameter configurations will be tried in total.
+* ``n_random_searches`` sets how many of those will be chosen randomly before the tuner switches to using smart optimization (eg. ``max_searches=50`` and ``n_random_searches=10`` means the tuner will sample 10 random configurations, then 40 smart configurations).
+
+
+Getting the Results
+~~~~~~~~~~~~~~~~~~~
+
+
+After that runs, you can retrieve the best hyperparameters or the best score found respectively using ``get_best_params()`` and ``get_best_value()``:
+
+.. code-block:: python
+
+ best_params = tuner.get_best_params()
+ best_accuracy = tuner.get_best_value()
+
+Which you can use to instantiate a tuned version of your model:
+
+.. code-block:: python
+
+
+ tuned_model = RandomForestClassifier(**best_params, random_state=42)
diff --git a/docs/basic_usage/regression_example.rst b/docs/basic_usage/regression_example.rst
new file mode 100644
index 0000000..71ab574
--- /dev/null
+++ b/docs/basic_usage/regression_example.rst
@@ -0,0 +1,194 @@
+Regression Example
+==================
+
+This example will show you how to use ConfOpt to optimize hyperparameters for a regression task.
+
+If you already used hyperparameter tuning packages, the "Code Example" section below will give you a quick run through of how to use ConfOpt. If not, don't worry, the "Detailed Walkthrough" section will explain everything step-by-step.
+
+Code Example
+------------
+
+1. Set up search space and objective function:
+
+.. code-block:: python
+
+
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+ from sklearn.ensemble import RandomForestRegressor
+ from sklearn.datasets import load_diabetes
+ from sklearn.model_selection import train_test_split
+ from sklearn.metrics import mean_squared_error, r2_score
+
+ search_space = {
+ 'n_estimators': IntRange(min_value=50, max_value=200),
+ 'max_depth': IntRange(min_value=3, max_value=15),
+ 'min_samples_split': IntRange(min_value=2, max_value=10)
+ }
+
+ def objective_function(configuration):
+ X, y = load_diabetes(return_X_y=True)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.3, random_state=42
+ )
+
+ model = RandomForestRegressor(
+ n_estimators=configuration['n_estimators'],
+ max_depth=configuration['max_depth'],
+ min_samples_split=configuration['min_samples_split'],
+ random_state=42
+ )
+
+ model.fit(X_train, y_train)
+ predictions = model.predict(X_test)
+ mse = mean_squared_error(y_test, predictions)
+ return mse # Lower is better (minimize MSE)
+
+2. Call ConfOpt to tune hyperparameters:
+
+.. code-block:: python
+
+ tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=True # Minimizing MSE
+ )
+
+ tuner.tune(
+ max_searches=50,
+ n_random_searches=10,
+ verbose=True
+ )
+
+3. Extract results:
+
+.. code-block:: python
+
+ best_params = tuner.get_best_params()
+ best_mse = tuner.get_best_value()
+
+ tuned_model = RandomForestRegressor(**best_params, random_state=42)
+
+Detailed Walkthrough
+--------------------
+
+Imports
+~~~~~~~
+
+First, let's import everything we'll be needing:
+
+.. code-block:: python
+
+ from confopt.tuning import ConformalTuner
+ from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+
+ from sklearn.ensemble import RandomForestRegressor
+
+ from sklearn.datasets import load_diabetes
+ from sklearn.model_selection import train_test_split
+ from sklearn.metrics import mean_squared_error
+
+For this tutorial, we'll be using the sklearn Diabetes dataset and trying to tune the hyperparameters of a ``RandomForestRegressor``.
+
+Search Space
+~~~~~~~~~~~~
+
+Next, we need to define the hyperparameter space we want ``confopt`` to optimize over.
+
+This is done using the :ref:`IntRange `, :ref:`FloatRange `, and :ref:`CategoricalRange ` classes, which specify the ranges for each hyperparameter.
+
+Below let's define a simple example with a few typical hyperparameters for regression:
+
+.. code-block:: python
+
+ search_space = {
+ 'n_estimators': IntRange(min_value=50, max_value=200),
+ 'max_depth': IntRange(min_value=3, max_value=15),
+ 'min_samples_split': IntRange(min_value=2, max_value=10)
+ }
+
+This tells ``confopt`` to explore the following hyperparameter ranges:
+
+* ``n_estimators``: Number of trees in the forest (all integer values from 50 to 200)
+* ``max_depth``: Maximum tree depth (all integer values from 3 to 15)
+* ``min_samples_split``: Minimum samples to split a node (all integer values from 2 to 10)
+
+Objective Function
+~~~~~~~~~~~~~~~~~~
+
+The objective function defines how the model trains and what metric you want to optimize for during hyperparameter search:
+
+.. code-block:: python
+
+ def objective_function(configuration):
+ X, y = load_diabetes(return_X_y=True)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.3, random_state=42
+ )
+
+ model = RandomForestRegressor(
+ n_estimators=configuration['n_estimators'],
+ max_depth=configuration['max_depth'],
+ min_samples_split=configuration['min_samples_split'],
+ random_state=42
+ )
+
+ model.fit(X_train, y_train)
+ predictions = model.predict(X_test)
+ mse = mean_squared_error(y_test, predictions)
+ return mse # Lower is better (minimize MSE)
+
+
+The objective function must take a single argument called ``configuration``, which is a dictionary containing a value for each hyperparameter name specified in your ``search_space``. The values will be chosen automatically by the tuner during optimization.
+
+The ``score`` can be any metric of your choosing (e.g., MSE, R², MAE, etc.). This is the value that ``confopt`` will try to optimize for. For MSE, lower is better, so we minimize it.
+
+In this example, the data is loaded and split inside the objective function for simplicity, but you may prefer to load the data outside (to avoid reloading it for each configuration) and either pass the training and test sets as arguments using ``partial`` from the ``functools`` library, or reference them from the global scope.
+
+Running the Optimization
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+To start optimizing, first instantiate a :ref:`ConformalTuner ` by providing your objective function, search space, and the optimization direction:
+
+.. code-block:: python
+
+ tuner = ConformalTuner(
+ objective_function=objective_function,
+ search_space=search_space,
+ minimize=True # Minimizing MSE
+ )
+
+The ``minimize`` parameter should be set to ``True`` to minimize metrics where lower is better (e.g., MSE, MAE), or ``False`` to maximize metrics where higher is better (e.g., R²).
+
+To actually kickstart the hyperparameter search, call:
+
+.. code-block:: python
+
+ tuner.tune(
+ max_searches=50,
+ n_random_searches=10,
+ verbose=True
+ )
+
+Where:
+
+* ``max_searches`` controls how many different hyperparameter configurations will be tried in total.
+* ``n_random_searches`` sets how many of those will be chosen randomly before the tuner switches to using smart optimization (e.g., ``max_searches=50`` and ``n_random_searches=10`` means the tuner will sample 10 random configurations, then 40 smart configurations).
+
+Getting the Results
+~~~~~~~~~~~~~~~~~~~
+
+
+After that runs, you can retrieve the best hyperparameters or the best score found respectively using :meth:`~confopt.tuning.ConformalTuner.get_best_params` and :meth:`~confopt.tuning.ConformalTuner.get_best_value`:
+
+.. code-block:: python
+
+ best_params = tuner.get_best_params()
+ best_mse = tuner.get_best_value()
+
+Which you can use to instantiate a tuned version of your model:
+
+.. code-block:: python
+
+ tuned_model = RandomForestRegressor(**best_params, random_state=42)
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..084bc03
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,214 @@
+# Configuration file for the Sphinx documentation builder.
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath(".."))
+
+# RTD environment detection (optional, for any future customizations)
+on_rtd = os.environ.get("READTHEDOCS", None) == "True"
+rtd_version = os.environ.get("READTHEDOCS_VERSION", "latest")
+
+# -- Project information -----------------------------------------------------
+
+project = "ConfOpt"
+copyright = "2025, Riccardo Doyle"
+author = "Riccardo Doyle"
+release = "2.0.0"
+version = "2.0.0"
+
+# -- General configuration ---------------------------------------------------
+
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.napoleon",
+ "sphinx.ext.viewcode",
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.githubpages",
+ "myst_parser",
+ "sphinx_copybutton",
+ "sphinxcontrib.mermaid",
+]
+
+# MyST parser configuration
+myst_enable_extensions = [
+ "colon_fence",
+ "deflist",
+ "html_admonition",
+ "html_image",
+ "linkify",
+ "replacements",
+ "smartquotes",
+ "tasklist",
+]
+
+# Napoleon settings for Google-style docstrings
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_include_init_with_doc = False
+napoleon_include_private_with_doc = False
+napoleon_include_special_with_doc = True
+napoleon_use_admonition_for_examples = False
+napoleon_use_admonition_for_notes = False
+napoleon_use_admonition_for_references = False
+napoleon_use_ivar = False
+napoleon_use_param = True
+napoleon_use_rtype = True
+
+# Autodoc settings
+
+autodoc_default_options = {
+ "members": True,
+ "member-order": "bysource",
+ "special-members": "__init__",
+ "undoc-members": True,
+ "exclude-members": "__weakref__",
+}
+autodoc_typehints = "description"
+autodoc_class_attributes = False
+
+# Autosummary settings
+autosummary_generate = True
+autosummary_generate_overwrite = True
+
+# Intersphinx mapping
+intersphinx_mapping = {
+ "python": ("https://docs.python.org/3", None),
+ "numpy": ("https://numpy.org/doc/stable/", None),
+ "scipy": ("https://docs.scipy.org/doc/scipy/", None),
+ "sklearn": ("https://scikit-learn.org/stable/", None),
+}
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+# -- Options for HTML output -------------------------------------------------
+
+html_theme = "sphinx_rtd_theme"
+html_theme_options = {
+ "canonical_url": "https://confopt.readthedocs.io/",
+ "logo_only": True, # Show project title alongside logo
+ "prev_next_buttons_location": "bottom",
+ "style_external_links": True,
+ "style_nav_header_background": "#db2777", # Match our pink theme
+ # Navigation options - optimized for usability
+ "collapse_navigation": False,
+ "sticky_navigation": True,
+ "navigation_depth": 3,
+ "includehidden": True,
+ "titles_only": False,
+ # Additional RTD theme options
+ "vcs_pageview_mode": "blob",
+ "navigation_with_keys": True,
+}
+
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_js_files = ["layout-manager.js"]
+
+# GitHub integration
+html_context = {
+ "display_github": True,
+ "github_user": "rick12000",
+ "github_repo": "confopt",
+ "github_version": "main",
+ "conf_py_path": "/docs/",
+}
+
+# Custom logo and favicon
+html_logo = "../assets/logo.png"
+html_favicon = None # RTD will handle this
+
+# The root toctree document (updated from deprecated master_doc)
+root_doc = "index"
+
+# The name of the Pygments (syntax highlighting) style to use
+pygments_style = "sphinx"
+
+# If true, `todo` and `todoList` produce output, else they produce nothing
+todo_include_todos = False
+
+# Security and performance improvements
+tls_verify = True
+tls_cacerts = ""
+
+# Suppress warnings for external references that may not always be available
+suppress_warnings = [
+ "ref.doc",
+ "ref.ref",
+ "epub.unknown_project_files",
+]
+
+# Enable nitpicky mode for better link validation (but suppress known issues)
+nitpicky = True
+nitpick_ignore = [
+ ("py:class", "type"),
+ ("py:class", "object"),
+ ("py:class", "callable"),
+ ("py:class", "default=100"),
+ ("py:class", "default=None"),
+ ("py:class", "default=15"),
+ ("py:class", "default=1"),
+ ("py:class", "default=True"),
+ ("py:class", "confopt.wrapping.IntRange"),
+ ("py:class", "confopt.wrapping.FloatRange"),
+ ("py:class", "confopt.wrapping.CategoricalRange"),
+ ("py:class", "BaseConformalSearcher"),
+ ("py:class", "numpy.array"),
+ ("py:class", "sklearn.preprocessing._data.StandardScaler"),
+ ("py:class", "confopt.selection.acquisition.BaseConformalSearcher"),
+ ("py:class", "confopt.utils.tracking.ProgressBarManager"),
+ ("py:class", "confopt.selection.acquisition.QuantileConformalSearcher"),
+ ("py:class", "confopt.selection.sampling.bound_samplers.LowerBoundSampler"),
+ ("py:class", "confopt.selection.sampling.thompson_samplers.ThompsonSampler"),
+ (
+ "py:class",
+ "confopt.selection.sampling.bound_samplers.PessimisticLowerBoundSampler",
+ ),
+ (
+ "py:class",
+ "confopt.selection.sampling.expected_improvement_samplers.ExpectedImprovementSampler",
+ ),
+ ("py:class", "confopt.wrapping.ConformalBounds"),
+ ("py:class", "pydantic_core.core_schema.ValidationInfo"),
+ ("py:class", "ConfigDict"),
+ ("py:meth", "confopt.tuning.ConformalTuner.get_best_params"),
+ ("py:meth", "confopt.tuning.ConformalTuner.get_best_value"),
+]
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+ "papersize": "letterpaper",
+ "pointsize": "10pt",
+}
+
+latex_documents = [
+ (root_doc, "confopt.tex", "ConfOpt Documentation", author, "manual"),
+]
+
+# -- Options for manual page output ------------------------------------------
+
+man_pages = [(root_doc, "confopt", "ConfOpt Documentation", [author], 1)]
+
+# -- Options for Texinfo output ----------------------------------------------
+
+texinfo_documents = [
+ (
+ root_doc,
+ "confopt",
+ "ConfOpt Documentation",
+ author,
+ "confopt",
+ "Voice Command Assistant for Accessibility.",
+ "Miscellaneous",
+ ),
+]
+
+# -- Options for Epub output -------------------------------------------------
+
+epub_title = project
+epub_exclude_files = ["search.html", ".nojekyll", ".doctrees", "environment.pickle"]
diff --git a/docs/contact.rst b/docs/contact.rst
new file mode 100644
index 0000000..22c0761
--- /dev/null
+++ b/docs/contact.rst
@@ -0,0 +1,8 @@
+Contact
+=======
+
+🌟 **GitHub:** https://github.com/rick12000/confopt
+
+🛠️ **Support:** https://github.com/rick12000/confopt/issues
+
+📧 **Contribution Requests or Feedback:** r.doyle.edu@gmail.com
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
new file mode 100644
index 0000000..906fd7e
--- /dev/null
+++ b/docs/getting_started.rst
@@ -0,0 +1,13 @@
+Getting Started
+===============
+
+This section provides practical examples of using ConfOpt for different types of machine learning tasks.
+
+Each example provides a full code example, followed by a step by step explanation.
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Examples
+
+ basic_usage/classification_example
+ basic_usage/regression_example
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..5815ccc
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,57 @@
+.. image:: ../assets/logo.png
+ :align: center
+ :width: 250px
+
+
+`ConfOpt `_ is a flexible hyperparameter optimization library, blending the strenghts of quantile regression with the calibration of conformal prediction.
+
+Find out how to **include it in your ML workflow** below! 👇
+
+.. toctree::
+ :maxdepth: 1
+ :caption: User Guide
+
+ installation
+ getting_started
+ advanced_usage
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Developer Guide
+ :hidden:
+
+ api_reference
+ architecture
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Other
+ :hidden:
+
+ roadmap
+ contact
+
+📈 Benchmarks
+=============
+
+.. image:: ../assets/benchmark_results.png
+ :align: center
+ :width: 450px
+ :alt: Benchmark Results
+
+**ConfOpt** is significantly better than plain old random search, but it also beats established tools like **Optuna** or traditional **Gaussian Processes**!
+
+The above benchmark considers neural architecture search on complex image recognition datasets (JAHS-201) and neural network tuning on tabular classification datasets (LCBench-L).
+
+For a fuller analysis of caveats and benchmarking results, refer to the latest methodological paper.
+
+🔬 Theory
+==========
+
+ConfOpt implements surrogate models and acquisition functions from the following papers:
+
+- **Adaptive Conformal Hyperparameter Optimization**: `arXiv, 2022 `_
+
+- **Optimizing Hyperparameters with Conformal Quantile Regression**: `PMLR, 2023 `_
+
+- **Enhancing Performance and Calibration in Quantile Hyperparameter Optimization**: `arXiv, 2025 `_
diff --git a/docs/installation.rst b/docs/installation.rst
new file mode 100644
index 0000000..abb007e
--- /dev/null
+++ b/docs/installation.rst
@@ -0,0 +1,16 @@
+Installation
+============
+
+Install `ConfOpt `_ using pip:
+
+.. code-block:: bash
+
+ pip install confopt
+
+Alternatively, for the latest development version, clone the repository and install it in editable mode:
+
+.. code-block:: bash
+
+ git clone https://github.com/rick12000/confopt.git
+ cd confopt
+ pip install -e .
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..2f00427
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,68 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.https://sphinx-doc.org/
+ exit /b 1
+)
+
+if "%1" == "" goto help
+if "%1" == "livehtml" goto livehtml
+if "%1" == "cleanhtml" goto cleanhtml
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+echo.
+echo.Additional targets:
+echo. livehtml Start live rebuild server using sphinx-autobuild
+echo. cleanhtml Clean build cache and rebuild HTML documentation
+goto end
+
+:livehtml
+echo Starting live documentation server...
+echo Clearing build cache...
+if exist "%BUILDDIR%" rd /s /q "%BUILDDIR%" 2>nul
+echo Performing initial clean build...
+%SPHINXBUILD% -E -a %SOURCEDIR% %BUILDDIR%\html %SPHINXOPTS% %O%
+echo Open http://localhost:8000 in your browser
+echo Press Ctrl+C to stop the server
+sphinx-autobuild %SOURCEDIR% %BUILDDIR%\html %SPHINXOPTS% %O% --host 0.0.0.0 --port 8000 --ignore "*.tmp" --ignore "*.swp" --ignore "*~" --watch %SOURCEDIR%
+if errorlevel 1 (
+ echo.
+ echo.sphinx-autobuild not found. Install with: pip install sphinx-autobuild
+ echo.Or use: build_docs.bat live
+ exit /b 1
+)
+goto end
+
+:cleanhtml
+echo Clearing build cache...
+if exist "%BUILDDIR%" rd /s /q "%BUILDDIR%" 2>nul
+echo Building HTML documentation with clean cache...
+%SPHINXBUILD% -E -a %SOURCEDIR% %BUILDDIR%\html %SPHINXOPTS% %O%
+echo.
+echo.Build finished. The HTML pages are in %BUILDDIR%\html.
+goto end
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..44090df
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,7 @@
+sphinx>=8.1.0
+sphinx-rtd-theme>=2.0.0
+myst-parser>=3.0.0
+sphinx-copybutton>=0.5.2
+sphinxcontrib-mermaid>=0.9.2
+linkify-it-py>=2.0.0
+sphinx-autobuild>=2024.2.4
diff --git a/docs/roadmap.rst b/docs/roadmap.rst
new file mode 100644
index 0000000..2aa17bd
--- /dev/null
+++ b/docs/roadmap.rst
@@ -0,0 +1,21 @@
+========
+Roadmap
+========
+
+Upcoming Features
+=================
+
+Functionality
+------------------------
+
+* **Multi Fidelity Support**: Enable single fidelity conformal searchers to adapt to multi-fidelity settings, allowing them to be competitive in settings where models can be partially trained and lower fidelities are predictive of full fidelity performance.
+* **Multi Objective Support**: Allow searchers to optimize for more than one objective (eg. accuracy and runtime).
+* **Transfer Learning Support**: Allow searchers to use a pretrained model or an observation matcher as a starting point for tuning.
+* **Local Search**: Expected Improvement sampler currently only performs one off configuration scoring. Local search (where a local neighbourhood around the initial EI optimum is explored as a second pass refinement) can significantly improve performance.
+* **Hierarchical Hyperparameters**: Improved handling for hierarchical hyperparameter spaces (currently supported, via flattening of the hyperparameters, but potentially suboptimal for surrogate learning)
+
+Resource Management
+---------------------
+
+* **Parallel Search Support**: Allow searchers to evaluate multiple configurations in parallel if compute allows.
+* **Smart Resource Usage**: Auto detect best amount of parallelism based on available resources and expected load.
diff --git a/examples/tabular_tuning.py b/examples/tabular_tuning.py
deleted file mode 100644
index e40490e..0000000
--- a/examples/tabular_tuning.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from sklearn.datasets import fetch_california_housing
-from sklearn.ensemble import RandomForestRegressor
-from confopt.tuning import ConformalSearcher
-
-# Set up toy data:
-X, y = fetch_california_housing(return_X_y=True)
-split_idx = int(len(X) * 0.5)
-X_train, y_train = X[:split_idx, :], y[:split_idx]
-X_val, y_val = X[split_idx:, :], y[split_idx:]
-
-# Define parameter search space:
-parameter_search_space = {
- "n_estimators": [10, 30, 50, 100, 150, 200, 300, 400],
- "min_samples_split": [0.005, 0.01, 0.1, 0.2, 0.3],
- "min_samples_leaf": [0.005, 0.01, 0.1, 0.2, 0.3],
- "max_features": [None, 0.8, 0.9, 1],
-}
-
-# Set up conformal searcher instance:
-searcher = ConformalSearcher(
- model=RandomForestRegressor(),
- X_train=X_train,
- y_train=y_train,
- X_val=X_val,
- y_val=y_val,
- search_space=parameter_search_space,
- prediction_type="regression",
-)
-
-# Carry out hyperparameter search:
-searcher.search(
- runtime_budget=120,
-)
-
-# Extract results, in the form of either:
-
-# 1. The best hyperparamter configuration found during search
-best_params = searcher.get_best_params()
-
-# 2. An initialized (but not trained) model object with the
-# best hyperparameter configuration found during search
-model_init = searcher.configure_best_model()
-
-# 3. A trained model with the best hyperparameter configuration
-# found during search
-model = searcher.fit_best_model()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7222e74
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,54 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "confopt"
+version = "2.0.0"
+description = "Conformal hyperparameter optimization tool"
+readme = "README.md"
+authors = [
+ {name = "Riccardo Doyle", email = "r.doyle.edu@gmail.com"}
+]
+requires-python = ">=3.9"
+classifiers = [
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+ "numpy>=1.20.0",
+ "scikit-learn>=1.0.0",
+ "scipy>=1.7.0",
+ "pandas>=1.3.0",
+ "tqdm>=4.60.0",
+ "pydantic>=2.0.0",
+ "joblib>=1.0.0",
+ "statsmodels>=0.13.0"
+]
+
+[project.urls]
+Source = "https://github.com/rick12000/confopt"
+Documentation = "https://confopt.readthedocs.io"
+Changelog = "https://github.com/rick12000/confopt/releases"
+
+[project.optional-dependencies]
+dev = [
+ "pytest>=7.4.0",
+ "pytest-xdist>=3.0.0",
+ "pre-commit>=3.4.0",
+ "autoflake>=2.0.0",
+]
+docs = [
+ "sphinx>=5.0.0",
+ "sphinx-rtd-theme>=1.3.0",
+ "myst-parser>=2.0.0",
+ "sphinx-copybutton>=0.5.0",
+ "sphinxcontrib-mermaid>=0.8.0",
+ "sphinx-autobuild>=2024.10.3"
+]
+
+[tool.setuptools]
+packages = { find = { where = ["."] , include = ["confopt*"] } }
+include-package-data = true
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..7617853
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+markers =
+ slow: marks tests as slow (deselect with '-m "not slow"')
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 8c19c2a..98353cf 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,2 +1,4 @@
-pytest==7.4.2
-pre-commit==3.4.0
+pytest>=7.4.0
+pytest-xdist>=3.0.0
+pre-commit>=3.4.0
+autoflake>=2.0.0
diff --git a/requirements.txt b/requirements.txt
index b30be02..de42639 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,8 @@
-numpy>=1.24.4
-scikit-learn>=1.3.2
-quantile-forest>=1.2.4
-tqdm>=4.66.1
-pandas>=2.0.3
+numpy>=1.20.0
+scikit-learn>=1.0.0
+scipy>=1.7.0
+pandas>=1.3.0
+tqdm>=4.60.0
+pydantic>=2.0.0
+joblib>=1.0.0
+statsmodels>=0.13.0
diff --git a/setup.py b/setup.py
deleted file mode 100644
index fe08e4c..0000000
--- a/setup.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from setuptools import setup, find_packages
-
-with open("README.md", "r") as f:
- long_description = f.read()
-
-setup(
- name="confopt",
- description="Conformal hyperparameter optimization tool",
- long_description=long_description,
- long_description_content_type="text/markdown",
- url="https://github.com/rick12000/confopt",
- author="Riccardo Doyle",
- author_email="r.doyle.edu@gmail.com",
- packages=find_packages(),
- version="1.0.2",
- license="Apache License 2.0",
- install_requires=[line.strip() for line in open("requirements.txt").readlines()],
- # TODO: Replace this with explicits
- classifiers=[
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "Programming Language :: Python :: 3.12",
- ],
-)
diff --git a/tests/conftest.py b/tests/conftest.py
index 28c11ee..0630043 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,164 +1,923 @@
import random
-from typing import Dict
import numpy as np
import pytest
-from sklearn.ensemble import GradientBoostingRegressor
-
-from confopt.estimation import (
- QuantileConformalRegression,
- LocallyWeightedConformalRegression,
+from typing import Dict
+from confopt.tuning import (
+ ConformalTuner,
+)
+from confopt.utils.configurations.sampling import get_tuning_configurations
+from confopt.selection.acquisition import QuantileConformalSearcher
+from confopt.selection.sampling.thompson_samplers import ThompsonSampler
+from confopt.wrapping import FloatRange, IntRange, CategoricalRange, ConformalBounds
+from sklearn.base import BaseEstimator
+from confopt.selection.estimator_configuration import (
+ ESTIMATOR_REGISTRY,
)
-from confopt.tuning import ConformalSearcher
-from confopt.utils import get_tuning_configurations
+from confopt.selection.estimators.quantile_estimation import (
+ BaseSingleFitQuantileEstimator,
+ BaseMultiFitQuantileEstimator,
+)
+from confopt.selection.estimators.ensembling import (
+ QuantileEnsembleEstimator,
+ PointEnsembleEstimator,
+)
+from unittest.mock import Mock
+from confopt.selection.adaptation import DtACI
DEFAULT_SEED = 1234
-# Dummy made up search space:
-DUMMY_PARAMETER_GRID: Dict = {
- "int_parameter": [1, 2, 3, 4, 5],
- "float_parameter": [1.1, 2.2, 3.3, 4.4],
-}
-# Dummy search space for a GBM model:
-DUMMY_GBM_PARAMETER_GRID: Dict = {
- "n_estimators": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
- "learning_rate": [0.1, 0.2, 0.3, 0.4, 0.5],
-}
+def build_estimator_architectures(amended: bool = False):
+ """Build estimator architecture lists from ESTIMATOR_REGISTRY.
+
+ Args:
+ amended: If True, creates modified versions with n_estimators=25 for faster testing.
+ If False, creates standard architecture lists.
+
+ Returns:
+ Tuple containing:
+ - point_estimator_architectures: List of point estimator names
+ - single_fit_quantile_estimator_architectures: List of single-fit quantile estimator names
+ - multi_fit_quantile_estimator_architectures: List of multi-fit quantile estimator names
+ - quantile_estimator_architectures: List of all quantile estimator names
+ - estimator_registry: Registry of estimator configurations (amended if requested)
+ """
+ from copy import deepcopy
+
+ point_estimator_architectures = []
+ single_fit_quantile_estimator_architectures = []
+ multi_fit_quantile_estimator_architectures = []
+ quantile_estimator_architectures = []
+
+ # Create registry (amended if requested)
+ if amended:
+ estimator_registry = {}
+ for estimator_name, estimator_config in ESTIMATOR_REGISTRY.items():
+ amended_config = deepcopy(estimator_config)
+
+ # Check if the estimator has n_estimators parameter
+ if (
+ hasattr(amended_config, "default_params")
+ and "n_estimators" in amended_config.default_params
+ ):
+ amended_config.default_params["n_estimators"] = 15
+
+ # Also check ensemble components if it's an ensemble estimator
+ if (
+ hasattr(amended_config, "ensemble_components")
+ and amended_config.ensemble_components
+ ):
+ for component in amended_config.ensemble_components:
+ if "params" in component and "n_estimators" in component["params"]:
+ component["params"]["n_estimators"] = 15
+
+ if estimator_name in ["gp", "qgp"]:
+ continue
+
+ if "qens" in estimator_name:
+ continue
+
+ estimator_registry[estimator_name] = amended_config
+ else:
+ estimator_registry = ESTIMATOR_REGISTRY
+
+ # Build architecture lists
+ for estimator_name, estimator_config in estimator_registry.items():
+ if issubclass(
+ estimator_config.estimator_class,
+ (
+ BaseMultiFitQuantileEstimator,
+ BaseSingleFitQuantileEstimator,
+ QuantileEnsembleEstimator,
+ ),
+ ):
+ quantile_estimator_architectures.append(estimator_name)
+ if issubclass(
+ estimator_config.estimator_class,
+ (BaseMultiFitQuantileEstimator),
+ ):
+ multi_fit_quantile_estimator_architectures.append(estimator_name)
+ elif issubclass(
+ estimator_config.estimator_class,
+ (BaseSingleFitQuantileEstimator),
+ ):
+ single_fit_quantile_estimator_architectures.append(estimator_name)
+ elif issubclass(
+ estimator_config.estimator_class, (BaseEstimator, PointEnsembleEstimator)
+ ):
+ point_estimator_architectures.append(estimator_name)
+
+ return (
+ point_estimator_architectures,
+ single_fit_quantile_estimator_architectures,
+ multi_fit_quantile_estimator_architectures,
+ quantile_estimator_architectures,
+ estimator_registry,
+ )
+
+
+# Create original architecture lists
+(
+ POINT_ESTIMATOR_ARCHITECTURES,
+ SINGLE_FIT_QUANTILE_ESTIMATOR_ARCHITECTURES,
+ MULTI_FIT_QUANTILE_ESTIMATOR_ARCHITECTURES,
+ QUANTILE_ESTIMATOR_ARCHITECTURES,
+ _,
+) = build_estimator_architectures(amended=False)
+
+# Create amended architecture lists for faster testing
+(
+ AMENDED_POINT_ESTIMATOR_ARCHITECTURES,
+ AMENDED_SINGLE_FIT_QUANTILE_ESTIMATOR_ARCHITECTURES,
+ AMENDED_MULTI_FIT_QUANTILE_ESTIMATOR_ARCHITECTURES,
+ AMENDED_QUANTILE_ESTIMATOR_ARCHITECTURES,
+ AMENDED_ESTIMATOR_REGISTRY,
+) = build_estimator_architectures(amended=True)
+
+
+def simple_quadratic_minimization(x):
+ """Simple quadratic function for minimization testing.
+
+ Global minimum at x = [2, -1] with value 0.
+ This creates a clear, smooth objective surface that conformal prediction
+ can easily learn and exploit, unlike random search.
+ """
+ x = np.asarray(x)
+ # Shifted quadratic with minimum at [2, -1]
+ return (x[0] - 2) ** 2 + (x[1] + 1) ** 2
+
+
+def simple_quadratic_maximization(x):
+ """Simple negative quadratic function for maximization testing.
+
+ Global maximum at x = [1, 0.5] with value 0.
+ This creates a clear, smooth objective surface that conformal prediction
+ can easily learn and exploit, unlike random search.
+ """
+ x = np.asarray(x)
+ # Negative shifted quadratic with maximum at [1, 0.5]
+ return -((x[0] - 1) ** 2 + (x[1] - 0.5) ** 2)
+
+
+def rastrigin(x, A=20):
+ n = len(x)
+ rastrigin_value = A * n + np.sum(x**2 - A * np.cos(2 * np.pi * x))
+ return rastrigin_value
+
+
+def ackley(x, a=20, b=0.2, c=2 * np.pi):
+ """Ackley function - commonly used maximization benchmark.
+
+ Global minimum is at x = [0, 0, ..., 0] with value 0.
+ For maximization, we negate this so global maximum is 0 at origin.
+ """
+ x = np.asarray(x)
+ n = len(x)
+ sum1 = np.sum(x**2)
+ sum2 = np.sum(np.cos(c * x))
+ ackley_value = (
+ -a * np.exp(-b * np.sqrt(sum1 / n)) - np.exp(sum2 / n) + a + np.exp(1)
+ )
+ return -ackley_value # Negate for maximization
+
+
+class ObjectiveSurfaceGenerator:
+ def __init__(self, generator: str):
+ self.generator = generator
+
+ def predict(self, params):
+ x = np.array(list(params.values()), dtype=float)
+
+ if self.generator == "rastrigin":
+ y = rastrigin(x=x)
+
+ return y
@pytest.fixture
-def dummy_stationary_gaussian_dataset():
- np.random.seed(DEFAULT_SEED)
- random.seed(DEFAULT_SEED)
+def mock_constant_objective_function():
+ def objective(configuration: Dict):
+ return 2
- X, y = [], []
- for x_observation in range(1, 11):
- for _ in range(0, 1000):
- X.append(x_observation)
- y.append(np.random.normal(0, 101))
- dataset = np.column_stack([X, y])
- np.random.shuffle(dataset)
- return dataset
+ return objective
@pytest.fixture
-def dummy_fixed_quantile_dataset():
+def toy_dataset():
+ # Create a small toy dataset with deterministic values
+ X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+ y = np.array([2, 4, 6, 8])
+ return X, y
+
+
+@pytest.fixture
+def big_toy_dataset():
+ # Create a larger toy dataset with 80 observations and 2 features
+ X = np.linspace(0, 10, 80).reshape(-1, 1) # Capped at 80
+ X = np.hstack([X, X + np.random.normal(0, 1, 80).reshape(-1, 1)]) # Capped at 80
+ # Make y always negative by using negative coefficients and subtracting a constant
+ y = -5 * X[:, 0] - 3 * X[:, 1] - 10 + np.random.normal(0, 1, 80) # Capped at 80
+ return X, y
+
+
+@pytest.fixture
+def quantiles():
+ return [0.1, 0.5, 0.9]
+
+
+@pytest.fixture
+def dummy_expanding_quantile_gaussian_dataset():
np.random.seed(DEFAULT_SEED)
random.seed(DEFAULT_SEED)
X, y = [], []
- for x_observation in range(1, 11):
- for _ in range(0, 1000):
+ # Reduce to 80 total observations (16 per x_observation)
+ for x_observation in range(1, 6):
+ for _ in range(0, 20): # Adjusted to make total 80
X.append(x_observation)
- y.append(random.choice(range(1, 101)))
- dataset = np.column_stack([X, y])
- np.random.shuffle(dataset)
- return dataset
+ y.append(x_observation * np.random.normal(0, 10))
+
+ X_array = np.array(X).reshape(-1, 1)
+ # Normalize X to have zero mean and unit variance
+ X_normalized = (X_array - np.mean(X_array)) / np.std(X_array)
+ return X_normalized, np.array(y)
@pytest.fixture
-def dummy_init_quantile_regression():
- qcr = QuantileConformalRegression(quantile_estimator_architecture="qgbm")
- return qcr
+def dummy_parameter_grid():
+ return {
+ "param_1": FloatRange(min_value=0.01, max_value=100, log_scale=True),
+ "param_2": IntRange(min_value=1, max_value=100),
+ "param_3": CategoricalRange(choices=["option1", "option2", "option3"]),
+ }
@pytest.fixture
-def dummy_init_locally_weighted_regression():
- lwr = LocallyWeightedConformalRegression(
- point_estimator_architecture="gbm",
- demeaning_estimator_architecture="gbm",
- variance_estimator_architecture="gbm",
- )
- return lwr
+def simple_minimization_parameter_grid():
+ """Parameter grid for simple quadratic minimization function.
+
+ Optimum is at x1=2, x2=-1. This grid covers the optimum with reasonable bounds
+ that allow the conformal prediction algorithm to learn the pattern efficiently.
+ """
+ return {
+ "x1": FloatRange(min_value=-2.0, max_value=6.0),
+ "x2": FloatRange(min_value=-5.0, max_value=3.0),
+ }
@pytest.fixture
-def dummy_configuration_performance_bounds():
- """
- Dummy performance bounds, where each set of
- bounds is meant to represent upper and lower
- expectations of a certain parameter configuration's
- performance.
+def simple_maximization_parameter_grid():
+ """Parameter grid for simple quadratic maximization function.
+
+ Optimum is at x1=1, x2=0.5. This grid covers the optimum with reasonable bounds
+ that allow the conformal prediction algorithm to learn the pattern efficiently.
"""
- performance_lower_bounds = np.arange(0, 100, 0.5)
- performance_upper_bounds = performance_lower_bounds + 10
- return performance_lower_bounds, performance_upper_bounds
+ return {
+ "x1": FloatRange(min_value=-2.0, max_value=4.0),
+ "x2": FloatRange(min_value=-2.5, max_value=3.5),
+ }
@pytest.fixture
-def dummy_parameter_grid():
- return DUMMY_PARAMETER_GRID
+def rastrigin_parameter_grid():
+ """Parameter grid for 6-dimensional Rastrigin function optimization."""
+ return {
+ "x1": FloatRange(min_value=-5.12, max_value=5.12),
+ "x2": FloatRange(min_value=-5.12, max_value=5.12),
+ "x3": FloatRange(min_value=-5.12, max_value=5.12),
+ "x4": FloatRange(min_value=-5.12, max_value=5.12),
+ "x5": FloatRange(min_value=-5.12, max_value=5.12),
+ "x6": FloatRange(min_value=-5.12, max_value=5.12),
+ }
@pytest.fixture
-def dummy_configurations(dummy_parameter_grid):
- """
- Samples unique configurations from broader
- possible values in dummy hyperparameter search space.
- """
- max_configurations = 100
- tuning_configurations = get_tuning_configurations(
- parameter_grid=dummy_parameter_grid,
- n_configurations=max_configurations,
- random_state=DEFAULT_SEED,
+def ackley_parameter_grid():
+ """Parameter grid for 6-dimensional Ackley function optimization."""
+ return {
+ "x1": FloatRange(min_value=-32.768, max_value=32.768),
+ "x2": FloatRange(min_value=-32.768, max_value=32.768),
+ "x3": FloatRange(min_value=-32.768, max_value=32.768),
+ "x4": FloatRange(min_value=-32.768, max_value=32.768),
+ "x5": FloatRange(min_value=-32.768, max_value=32.768),
+ "x6": FloatRange(min_value=-32.768, max_value=32.768),
+ }
+
+
+@pytest.fixture
+def linear_data_drift():
+ np.random.seed(42)
+ n = 500
+ X = np.linspace(0, 10, n).reshape(-1, 1)
+
+ noise_level = np.linspace(0.5, 3, n)
+ noise = np.random.normal(0, 1, n) * noise_level
+
+ y = np.zeros(n)
+
+ first_segment = int(0.3 * n)
+ y[:first_segment] = 2 * X[:first_segment].flatten() + 5 + noise[:first_segment]
+
+ second_segment = int(0.6 * n)
+ y[first_segment:second_segment] = (
+ 3 * X[first_segment:second_segment].flatten()
+ + 2
+ + noise[first_segment:second_segment]
)
- return tuning_configurations
+
+ y[second_segment:] = 2.5 * X[second_segment:].flatten() + 8 + noise[second_segment:]
+
+ return X, y
+
+
+@pytest.fixture
+def simple_conformal_bounds():
+ lower_bounds1 = np.array([0.1, 0.3, 0.5])
+ upper_bounds1 = np.array([0.4, 0.6, 0.8])
+
+ lower_bounds2 = np.array([0.2, 0.4, 0.6])
+ upper_bounds2 = np.array([0.5, 0.7, 0.9])
+
+ return [
+ ConformalBounds(lower_bounds=lower_bounds1, upper_bounds=upper_bounds1),
+ ConformalBounds(lower_bounds=lower_bounds2, upper_bounds=upper_bounds2),
+ ]
+
+
+@pytest.fixture
+def estimator1():
+ """Mock point estimator that returns deterministic values scaled to input size."""
+ mock = Mock()
+
+ def scaled_predict(X):
+ # Return values that scale based on input length
+ n_samples = len(X)
+ return np.arange(1, n_samples + 1) * 2 # [2, 4, 6, 8, ...] based on input size
+
+ mock.predict = Mock(side_effect=scaled_predict)
+ mock.fit = Mock(return_value=mock)
+ return mock
+
+
+@pytest.fixture
+def estimator2():
+ """Mock point estimator that returns different deterministic values scaled to input size."""
+ mock = Mock()
+
+ def scaled_predict(X):
+ # Return values that scale based on input length
+ n_samples = len(X)
+ return np.arange(2, n_samples + 2) * 2 # [4, 6, 8, 10, ...] based on input size
+
+ mock.predict = Mock(side_effect=scaled_predict)
+ mock.fit = Mock(return_value=mock)
+ return mock
+
+
+@pytest.fixture
+def quantile_estimator1(quantiles):
+ """Mock quantile estimator that returns deterministic quantile predictions for any input size."""
+ mock = Mock()
+
+ def scaled_predict(X):
+ # Return values for any size of X
+ n_samples = len(X)
+ result = np.zeros((n_samples, len(quantiles)))
+ for i, q in enumerate(quantiles):
+ result[:, i] = (i + 1) * 2 # Values 2, 4, 6 for quantiles
+ return result
+
+ mock.fit = Mock(return_value=mock)
+ mock.predict = Mock(side_effect=scaled_predict)
+ return mock
@pytest.fixture
-def dummy_gbm_parameter_grid():
- return DUMMY_GBM_PARAMETER_GRID
+def quantile_estimator2(quantiles):
+ """Mock quantile estimator that returns constant values across quantiles."""
+ mock = Mock()
+
+ def scaled_predict(X):
+ # Return values for any size of X
+ n_samples = len(X)
+ return np.ones((n_samples, len(quantiles))) * 4
+
+ mock.fit = Mock(return_value=mock)
+ mock.predict = Mock(side_effect=scaled_predict)
+ return mock
+
+
+@pytest.fixture
+def competing_estimator():
+ """Mock point estimator with different performance characteristics."""
+ mock = Mock()
+
+ def scaled_predict(X):
+ # Return values that scale based on input length
+ n_samples = len(X)
+ return (
+ np.arange(0.5, n_samples + 0.5) * 2
+ ) # [1, 3, 5, 7, ...] based on input size
+
+ mock.predict = Mock(side_effect=scaled_predict)
+ mock.fit = Mock(return_value=mock)
+ return mock
@pytest.fixture
-def dummy_gbm_configurations(dummy_gbm_parameter_grid):
- max_configurations = 60
- gbm_tuning_configurations = get_tuning_configurations(
- parameter_grid=dummy_gbm_parameter_grid,
- n_configurations=max_configurations,
- random_state=DEFAULT_SEED,
+def tuner(mock_constant_objective_function, dummy_parameter_grid):
+ # Create a standard tuner instance that can be reused across tests
+ return ConformalTuner(
+ objective_function=mock_constant_objective_function,
+ search_space=dummy_parameter_grid,
+ minimize=True,
+ n_candidates=100,
)
- return gbm_tuning_configurations
@pytest.fixture
-def dummy_initialized_conformal_searcher__gbm_mse(
- dummy_stationary_gaussian_dataset, dummy_gbm_parameter_grid
-):
- """
- Creates a conformal searcher instance from dummy raw X, y data
- and a dummy parameter grid.
+def small_parameter_grid():
+ """Small parameter grid for focused configuration testing"""
+ return {
+ "x": FloatRange(min_value=0.0, max_value=1.0),
+ "y": IntRange(min_value=1, max_value=3),
+ "z": CategoricalRange(choices=["A", "B"]),
+ }
- This particular fixture is set to optimize a GBM base model on
- regression data, using an MSE objective. The model architecture
- and type of data are arbitrarily pinned; more fixtures could
- be created to test other model or data types.
- """
- custom_loss_function = "mean_squared_error"
- prediction_type = "regression"
- model = GradientBoostingRegressor()
- X, y = (
- dummy_stationary_gaussian_dataset[:, 0].reshape(-1, 1),
- dummy_stationary_gaussian_dataset[:, 1],
+@pytest.fixture
+def dynamic_tuner(mock_constant_objective_function, small_parameter_grid):
+ """Tuner configured for dynamic sampling with small candidate count"""
+ return ConformalTuner(
+ objective_function=mock_constant_objective_function,
+ search_space=small_parameter_grid,
+ minimize=True,
+ n_candidates=5,
+ dynamic_sampling=True,
)
- train_split = 0.5
- X_train, y_train = (
- X[: round(len(X) * train_split), :],
- y[: round(len(y) * train_split)],
+
+
+@pytest.fixture
+def static_tuner(mock_constant_objective_function, small_parameter_grid):
+ """Tuner configured for static sampling with small candidate count"""
+ return ConformalTuner(
+ objective_function=mock_constant_objective_function,
+ search_space=small_parameter_grid,
+ minimize=True,
+ n_candidates=10,
+ dynamic_sampling=False,
)
- X_val, y_val = X[round(len(X) * train_split) :, :], y[round(len(y) * train_split) :]
-
- searcher = ConformalSearcher(
- model=model,
- X_train=X_train,
- y_train=y_train,
- X_val=X_val,
- y_val=y_val,
- search_space=dummy_gbm_parameter_grid,
- prediction_type=prediction_type,
- custom_loss_function=custom_loss_function,
+
+
+# Fixtures for quantile estimation testing
+
+
+@pytest.fixture
+def uniform_regression_data():
+ """Generate uniform regression data for quantile testing."""
+ np.random.seed(42)
+ n_samples = 300
+ n_features = 3
+
+ X = np.random.uniform(-1, 1, size=(n_samples, n_features))
+ y = np.random.uniform(0, 1, size=n_samples)
+
+ return X, y
+
+
+@pytest.fixture
+def heteroscedastic_regression_data():
+ """Generate heteroscedastic regression data where variance changes with X."""
+ np.random.seed(42)
+ n_samples = 200
+ X = np.linspace(-3, 3, n_samples).reshape(-1, 1)
+
+ # Heteroscedastic noise: variance increases with |X|
+ noise_std = 0.5 + 1.5 * np.abs(X.flatten())
+ noise = np.random.normal(0, 1, n_samples) * noise_std
+
+ # True function: quadratic with heteroscedastic noise
+ y = 2 * X.flatten() ** 2 + 1.5 * X.flatten() + noise
+
+ return X, y
+
+
+@pytest.fixture
+def multimodal_regression_data():
+ """Generate multimodal regression data with multiple peaks and valleys."""
+ np.random.seed(42)
+ n_samples = 300
+ X = np.linspace(-4, 4, n_samples).reshape(-1, 1)
+
+ # Multimodal function: mixture of Gaussians
+ y = (
+ 2 * np.exp(-0.5 * (X.flatten() + 2) ** 2)
+ + 1.5 * np.exp(-0.5 * (X.flatten() - 1) ** 2)
+ + np.exp(-0.5 * (X.flatten() - 3) ** 2)
+ + np.random.normal(0, 0.3, n_samples)
)
- return searcher
+ return X, y
+
+
+@pytest.fixture
+def skewed_regression_data():
+ """Generate regression data with skewed noise distribution."""
+ np.random.seed(42)
+ n_samples = 250
+ X = np.linspace(0, 5, n_samples).reshape(-1, 1)
+
+ # Skewed noise using exponential distribution
+ skewed_noise = np.random.exponential(0.5, n_samples) - 0.5
+
+ # True function with skewed residuals
+ y = np.sin(X.flatten()) + 0.5 * X.flatten() + skewed_noise
+
+ return X, y
+
+
+@pytest.fixture
+def high_dimensional_regression_data():
+ """Generate high-dimensional regression data for testing scalability."""
+ np.random.seed(42)
+ n_samples = 150
+ n_features = 8
+ X = np.random.randn(n_samples, n_features)
+
+ # Linear combination with interaction terms
+ true_coef = np.array([2, -1, 0.5, -0.5, 1, 0, -0.3, 0.8])
+ y = X @ true_coef + 0.5 * X[:, 0] * X[:, 1] + np.random.normal(0, 0.5, n_samples)
+
+ return X, y
+
+
+@pytest.fixture
+def sparse_regression_data():
+ """Generate sparse regression data with few informative features."""
+ np.random.seed(42)
+ n_samples = 100
+ n_features = 10
+ X = np.random.randn(n_samples, n_features)
+
+ # Only first 3 features are informative
+ true_coef = np.zeros(n_features)
+ true_coef[:3] = [3, -2, 1.5]
+ y = X @ true_coef + np.random.normal(0, 0.3, n_samples)
+
+ return X, y
+
+
+@pytest.fixture
+def toy_regression_data():
+ """Generate simple toy regression data for basic testing."""
+
+ def _generate_data(n_samples=100, n_features=2, noise_std=0.1, random_state=42):
+ np.random.seed(random_state)
+ X = np.random.randn(n_samples, n_features)
+ true_coef = np.ones(n_features)
+ y = X @ true_coef + np.random.normal(0, noise_std, n_samples)
+ return X, y
+
+ return _generate_data
+
+
+@pytest.fixture
+def quantile_test_data():
+ """Generate data with known quantile structure for validation."""
+ np.random.seed(42)
+ n_samples = 500
+ X = np.linspace(-3, 3, n_samples).reshape(-1, 1)
+
+ # Create data where we know the true quantiles
+ # Use a location-scale model: Y = μ(X) + σ(X) * ε
+ mu = 2 * X.flatten() # Mean function
+ sigma = 0.5 + 0.3 * np.abs(X.flatten()) # Scale function
+ epsilon = np.random.normal(0, 1, n_samples) # Standard normal noise
+
+ y = mu + sigma * epsilon
+
+ # Store true quantiles for validation
+ true_quantiles = {}
+ for q in [0.1, 0.25, 0.5, 0.75, 0.9]:
+ from scipy.stats import norm
+
+ true_quantiles[q] = mu + sigma * norm.ppf(q)
+
+ return X, y, true_quantiles
+
+
+@pytest.fixture
+def monotonicity_test_quantiles():
+ """Standard quantiles for monotonicity testing."""
+ return [0.1, 0.25, 0.5, 0.75, 0.9]
+
+
+@pytest.fixture
+def alpha_levels_for_conformalization():
+ """Standard alpha levels for conformalization testing."""
+ return [0.1, 0.2, 0.3] # Corresponding to 90%, 80%, 70% coverage
+
+
+@pytest.fixture
+def estimation_test_data():
+ """Generate test data for estimation module tests."""
+ np.random.seed(42)
+ X = np.random.rand(50, 5)
+ y = X.sum(axis=1) + np.random.normal(0, 0.1, 50)
+ from sklearn.model_selection import train_test_split
+
+ return train_test_split(X, y, test_size=0.25, random_state=42)
+
+
+@pytest.fixture
+def point_tuner():
+ """Create a PointTuner instance for testing."""
+ from confopt.selection.estimation import PointTuner
+
+ return PointTuner(random_state=42)
+
+
+@pytest.fixture
+def quantile_tuner_with_quantiles():
+ """Create a QuantileTuner instance with quantiles for testing."""
+ from confopt.selection.estimation import QuantileTuner
+
+ quantiles = [0.1, 0.9]
+ return QuantileTuner(quantiles=quantiles, random_state=42), quantiles
+
+
+@pytest.fixture
+def multi_interval_bounds():
+ """Create multiple ConformalBounds objects for multi-interval testing."""
+ n_obs = 30
+ bounds_list = []
+ for i in range(3):
+ width_factor = (i + 1) * 0.5
+ lower = np.random.uniform(-1, 0, n_obs)
+ upper = lower + np.random.uniform(0.2 * width_factor, 1.0 * width_factor, n_obs)
+ bounds_list.append(ConformalBounds(lower_bounds=lower, upper_bounds=upper))
+ return bounds_list
+
+
+@pytest.fixture
+def nested_intervals():
+ """Create properly nested intervals for testing interval relationships."""
+ n_obs = 20
+ # Create nested intervals: each inner interval contained within outer
+ center = np.random.uniform(-1, 1, n_obs)
+
+ # Widest interval (lowest confidence)
+ wide_lower = center - 2.0
+ wide_upper = center + 2.0
+
+ # Medium interval
+ med_lower = center - 1.0
+ med_upper = center + 1.0
+
+ # Narrowest interval (highest confidence)
+ narrow_lower = center - 0.5
+ narrow_upper = center + 0.5
+
+ return [
+ ConformalBounds(lower_bounds=wide_lower, upper_bounds=wide_upper),
+ ConformalBounds(lower_bounds=med_lower, upper_bounds=med_upper),
+ ConformalBounds(lower_bounds=narrow_lower, upper_bounds=narrow_upper),
+ ]
+
+
+@pytest.fixture
+def coverage_feedback():
+ """Sample coverage feedback for adaptation testing."""
+ return [0.85, 0.78, 0.92]
+
+
+@pytest.fixture
+def small_dataset():
+ """Small dataset for computational testing."""
+ n_obs = 10
+ bounds = []
+ for _ in range(2):
+ lower = np.random.uniform(-0.5, 0, n_obs)
+ upper = lower + np.random.uniform(0.1, 0.5, n_obs)
+ bounds.append(ConformalBounds(lower_bounds=lower, upper_bounds=upper))
+ return bounds
+
+
+@pytest.fixture
+def test_predictions_and_widths():
+ """Combined point predictions and interval widths for LCB testing."""
+ np.random.seed(42)
+ n_points = 15
+ point_estimates = np.random.uniform(-2, 2, n_points)
+ interval_widths = np.random.uniform(0.2, 1.5, n_points)
+ return point_estimates, interval_widths
+
+
+@pytest.fixture
+def conformal_bounds_deterministic():
+ """Deterministic conformal bounds for reproducible testing."""
+ lower_bounds1 = np.array([1.0, 2.0, 3.0, 4.0])
+ upper_bounds1 = np.array([1.5, 2.5, 3.5, 4.5])
+
+ lower_bounds2 = np.array([0.8, 1.8, 2.8, 3.8])
+ upper_bounds2 = np.array([1.3, 2.3, 3.3, 4.3])
+
+ return [
+ ConformalBounds(lower_bounds=lower_bounds1, upper_bounds=upper_bounds1),
+ ConformalBounds(lower_bounds=lower_bounds2, upper_bounds=upper_bounds2),
+ ]
+
+
+@pytest.fixture
+def comprehensive_minimizing_tuning_setup(simple_minimization_parameter_grid):
+ """Fixture for comprehensive integration test setup (objective, warm starts, tuner, searcher).
+
+ Uses a simple quadratic minimization function that's easy for conformal prediction to learn,
+ ensuring the test validates that conformal search outperforms random search.
+ """
+
+ def optimization_objective(configuration: Dict) -> float:
+ # Extract 2-dimensional vector from configuration
+ x = np.array(
+ [
+ configuration["x1"],
+ configuration["x2"],
+ ]
+ )
+
+ # Use simple quadratic function for minimization (minimum at [2, -1])
+ return simple_quadratic_minimization(x)
+
+ warm_start_configs_raw = get_tuning_configurations(
+ parameter_grid=simple_minimization_parameter_grid,
+ n_configurations=5,
+ random_state=123,
+ sampling_method="uniform",
+ )
+ warm_start_configs = []
+ for config in warm_start_configs_raw:
+ performance = optimization_objective(config)
+ warm_start_configs.append((config, performance))
+
+ def make_tuner_and_searcher(dynamic_sampling):
+ tuner = ConformalTuner(
+ objective_function=optimization_objective,
+ search_space=simple_minimization_parameter_grid,
+ minimize=True,
+ n_candidates=1000,
+ warm_starts=warm_start_configs,
+ dynamic_sampling=dynamic_sampling,
+ )
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="qgbm",
+ sampler=ThompsonSampler(
+ n_quantiles=4,
+ adapter="DtACI",
+ enable_optimistic_sampling=False,
+ ),
+ n_pre_conformal_trials=32,
+ calibration_split_strategy="train_test_split",
+ )
+ return tuner, searcher, warm_start_configs, optimization_objective
+
+ return make_tuner_and_searcher
+
+
+@pytest.fixture
+def comprehensive_maximizing_tuning_setup(simple_maximization_parameter_grid):
+ """Fixture for comprehensive integration test setup for maximization (objective, warm starts, tuner, searcher).
+
+ Uses a simple quadratic maximization function that's easy for conformal prediction to learn,
+ ensuring the test validates that conformal search outperforms random search.
+ """
+
+ def optimization_objective(configuration: Dict) -> float:
+ # Extract 2-dimensional vector from configuration
+ x = np.array(
+ [
+ configuration["x1"],
+ configuration["x2"],
+ ]
+ )
+
+ # Use simple quadratic function for maximization (maximum at [1, 0.5])
+ return simple_quadratic_maximization(x)
+
+ warm_start_configs_raw = get_tuning_configurations(
+ parameter_grid=simple_maximization_parameter_grid,
+ n_configurations=5,
+ random_state=123,
+ sampling_method="uniform",
+ )
+ warm_start_configs = []
+ for config in warm_start_configs_raw:
+ performance = optimization_objective(config)
+ warm_start_configs.append((config, performance))
+
+ def make_tuner_and_searcher(dynamic_sampling):
+ tuner = ConformalTuner(
+ objective_function=optimization_objective,
+ search_space=simple_maximization_parameter_grid,
+ minimize=False, # Set to False for maximization
+ n_candidates=1000,
+ warm_starts=warm_start_configs,
+ dynamic_sampling=dynamic_sampling,
+ )
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="qgbm",
+ sampler=ThompsonSampler(
+ n_quantiles=4,
+ adapter="DtACI",
+ enable_optimistic_sampling=False,
+ ),
+ n_pre_conformal_trials=32,
+ calibration_split_strategy="train_test_split",
+ )
+ return tuner, searcher, warm_start_configs, optimization_objective
+
+ return make_tuner_and_searcher
+
+
+@pytest.fixture
+def moderate_shift_data():
+ """Create data with moderate distribution shift (0.1 -> 0.5 noise)."""
+ np.random.seed(42)
+ n_points = 200
+ shift_point = 100
+
+ X1 = np.random.randn(shift_point, 2)
+ y1 = X1.sum(axis=1) + 0.1 * np.random.randn(shift_point)
+
+ X2 = np.random.randn(n_points - shift_point, 2)
+ y2 = X2.sum(axis=1) + 0.5 * np.random.randn(n_points - shift_point)
+
+ return np.vstack([X1, X2]), np.hstack([y1, y2])
+
+
+@pytest.fixture
+def high_shift_data():
+ """Create data with high distribution shift (0.1 -> 0.8 -> 0.1 noise)."""
+ np.random.seed(42)
+ n_points = 300
+ shift_points = [100, 200]
+
+ X1 = np.random.randn(shift_points[0], 2)
+ y1 = X1.sum(axis=1) + 0.1 * np.random.randn(shift_points[0])
+
+ X2 = np.random.randn(shift_points[1] - shift_points[0], 2)
+ y2 = X2.sum(axis=1) + 0.8 * np.random.randn(shift_points[1] - shift_points[0])
+
+ X3 = np.random.randn(n_points - shift_points[1], 2)
+ y3 = X3.sum(axis=1) + 0.1 * np.random.randn(n_points - shift_points[1])
+
+ return np.vstack([X1, X2, X3]), np.hstack([y1, y2, y3])
+
+
+@pytest.fixture
+def dtaci_instance():
+ """Standard DtACI instance for testing."""
+ return DtACI(alpha=0.1, gamma_values=[0.01, 0.05, 0.1])
+
+
+# Quantile Estimation Test Data Fixtures
+@pytest.fixture
+def linear_regression_data():
+ """Simple linear regression with homoscedastic noise."""
+ np.random.seed(42)
+ n_samples = 200
+ X = np.linspace(-2, 2, n_samples).reshape(-1, 1)
+ y = 2.5 * X.flatten() + 1.0 + np.random.normal(0, 0.5, n_samples)
+ return X, y
+
+
+@pytest.fixture
+def heteroscedastic_data():
+ """Heteroscedastic data where variance increases with |X|."""
+ np.random.seed(42)
+ n_samples = 300
+ X = np.linspace(-3, 3, n_samples).reshape(-1, 1)
+ noise_std = 0.3 + 1.2 * np.abs(X.flatten())
+ noise = np.random.normal(0, 1, n_samples) * noise_std
+ y = 1.5 * X.flatten() ** 2 + 0.5 * X.flatten() + noise
+ return X, y
+
+
+@pytest.fixture
+def diabetes_data():
+ """Scikit-learn diabetes dataset for regression testing."""
+ from sklearn.datasets import load_diabetes
+
+ diabetes = load_diabetes()
+ return diabetes.data, diabetes.target
+
+
+@pytest.fixture
+def comprehensive_test_quantiles():
+ """Comprehensive set of quantiles for testing."""
+ return [0.05, 0.25, 0.5, 0.75, 0.95]
+
+
+@pytest.fixture
+def ensemble_test_quantiles():
+ return [0.25, 0.5, 0.75]
diff --git a/tests/integration_tests/tuning_integration.py b/tests/integration_tests/tuning_integration.py
new file mode 100644
index 0000000..3f75040
--- /dev/null
+++ b/tests/integration_tests/tuning_integration.py
@@ -0,0 +1,206 @@
+import pytest
+import numpy as np
+from typing import Dict, Tuple, Optional
+
+from confopt.tuning import ConformalTuner
+from confopt.wrapping import CategoricalRange
+from confopt.selection.acquisition import QuantileConformalSearcher, LowerBoundSampler
+
+DRAW_OR_WIN_RATE_THRESHOLD = 0.75
+WINDOW_SIZE = 20
+TARGET_ALPHAS = [0.25, 0.5, 0.75]
+ADAPTER_TYPES = ["DtACI", "ACI"]
+
+
+def complex_objective(configuration: Dict) -> float:
+ x1 = configuration["x1"]
+ x2 = configuration["x2"]
+ categorical_val = {"A": 1.0, "B": 2.5, "C": 4.0}[configuration["categorical"]]
+
+ term1 = np.sin(x1 * np.pi) * np.cos(x2 * np.pi)
+ term2 = 0.5 * (x1 - 0.3) ** 2 + 0.8 * (x2 - 0.7) ** 2
+ term3 = categorical_val * np.exp(-((x1 - 0.5) ** 2 + (x2 - 0.5) ** 2))
+
+ return term1 + term2 + term3 + np.random.normal(0, 0.05)
+
+
+def calculate_coverage_rate_from_study(study) -> float:
+ breach_count = 0
+ total_intervals = 0
+
+ for trial in study.trials:
+ if trial.lower_bound is not None and trial.upper_bound is not None:
+ total_intervals += 1
+ if not (trial.lower_bound <= trial.performance <= trial.upper_bound):
+ breach_count += 1
+
+ return 1 - (breach_count / total_intervals)
+
+
+def calculate_windowed_deviations_from_study(
+ study, alpha: float, window_size: int
+) -> float:
+ target_coverage = 1 - alpha
+ trials = [
+ t
+ for t in study.trials
+ if t.lower_bound is not None and t.upper_bound is not None
+ ]
+
+ if len(trials) < window_size:
+ return 0.0
+
+ n_windows = len(trials) // window_size
+ deviations = []
+
+ for i in range(n_windows):
+ start_idx = i * window_size
+ end_idx = start_idx + window_size
+ window_trials = trials[start_idx:end_idx]
+
+ breaches = sum(
+ 1
+ for t in window_trials
+ if not (t.lower_bound <= t.performance <= t.upper_bound)
+ )
+ window_coverage = 1 - (breaches / window_size)
+ deviation = abs(window_coverage - target_coverage)
+ deviations.append(deviation)
+
+ return np.mean(deviations)
+
+
+def run_experiment(
+ adapter_type: Optional[str], seed: int, alpha: float
+) -> Tuple[float, float]:
+ np.random.seed(seed)
+
+ search_space = {
+ "x1": CategoricalRange(choices=np.linspace(0, 1, 15).tolist()),
+ "x2": CategoricalRange(choices=np.linspace(0, 1, 15).tolist()),
+ "categorical": CategoricalRange(choices=["A", "B", "C"]),
+ }
+
+ interval_width = 1 - alpha
+
+ sampler = LowerBoundSampler(
+ interval_width=interval_width,
+ adapter=adapter_type,
+ c=0,
+ )
+
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="qgbm",
+ sampler=sampler,
+ n_pre_conformal_trials=32,
+ calibration_split_strategy="train_test_split",
+ )
+
+ tuner = ConformalTuner(
+ objective_function=complex_objective,
+ search_space=search_space,
+ minimize=True,
+ n_candidates=2000,
+ dynamic_sampling=True,
+ )
+
+ tuner.tune(
+ n_random_searches=15,
+ searcher=searcher,
+ random_state=seed,
+ max_searches=60,
+ verbose=False,
+ )
+
+ coverage_rate = calculate_coverage_rate_from_study(tuner.study)
+ windowed_deviation = calculate_windowed_deviations_from_study(
+ tuner.study, alpha, WINDOW_SIZE
+ )
+
+ return coverage_rate, windowed_deviation
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("target_alpha", TARGET_ALPHAS)
+@pytest.mark.parametrize("adapter_type", ADAPTER_TYPES)
+def test_adaptive_vs_nonadaptive_coverage(target_alpha, adapter_type):
+ print(f"Testing {adapter_type} with target alpha {target_alpha}")
+ n_seeds = 5
+ adaptive_wins_global = 0
+ adaptive_wins_local = 0
+
+ for seed in range(n_seeds):
+ adaptive_coverage, adaptive_local_dev = run_experiment(
+ adapter_type, seed, target_alpha
+ )
+ nonadaptive_coverage, nonadaptive_local_dev = run_experiment(
+ None, seed, target_alpha
+ )
+
+ target_coverage = 1 - target_alpha
+ adaptive_global_dev = abs(adaptive_coverage - target_coverage)
+ nonadaptive_global_dev = abs(nonadaptive_coverage - target_coverage)
+
+ if adaptive_global_dev <= nonadaptive_global_dev:
+ adaptive_wins_global += 1
+ if adaptive_local_dev <= nonadaptive_local_dev:
+ adaptive_wins_local += 1
+
+ global_win_rate = adaptive_wins_global / n_seeds
+ local_win_rate = adaptive_wins_local / n_seeds
+
+ print(f"Global win rate: {global_win_rate}, Local win rate: {local_win_rate}")
+
+ if adapter_type is not None:
+ assert (
+ global_win_rate >= DRAW_OR_WIN_RATE_THRESHOLD
+ ), f"Global win rate: {global_win_rate}"
+ assert (
+ local_win_rate >= DRAW_OR_WIN_RATE_THRESHOLD
+ ), f"Local win rate: {local_win_rate}"
+
+
+def test_dtaci_parameter_evolution():
+ search_space = {
+ "x1": CategoricalRange(choices=np.linspace(0, 1, 8).tolist()),
+ "x2": CategoricalRange(choices=np.linspace(0, 1, 8).tolist()),
+ "categorical": CategoricalRange(choices=["A", "B", "C"]),
+ }
+
+ sampler = LowerBoundSampler(
+ interval_width=0.8,
+ adapter="DtACI",
+ c=0,
+ )
+
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=sampler,
+ n_pre_conformal_trials=32,
+ )
+
+ tuner = ConformalTuner(
+ objective_function=complex_objective,
+ search_space=search_space,
+ minimize=True,
+ n_candidates=500,
+ )
+
+ tuner.tune(
+ n_random_searches=15,
+ searcher=searcher,
+ random_state=42,
+ max_searches=100,
+ verbose=False,
+ )
+
+ adapter = sampler.adapter
+
+ assert adapter is not None
+ assert adapter.update_count > 0
+ assert len(adapter.alpha_history) > 0
+
+ for alpha_val in adapter.alpha_history:
+ assert 0.001 <= alpha_val <= 0.999
+
+ assert np.var(adapter.alpha_history) != 0
diff --git a/tests/selection/estimators/test_ensembling.py b/tests/selection/estimators/test_ensembling.py
new file mode 100644
index 0000000..199c4c2
--- /dev/null
+++ b/tests/selection/estimators/test_ensembling.py
@@ -0,0 +1,387 @@
+import pytest
+import numpy as np
+from sklearn.metrics import mean_pinball_loss
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import RandomForestRegressor
+
+from confopt.selection.estimators.ensembling import (
+ PointEnsembleEstimator,
+ QuantileEnsembleEstimator,
+ QuantileLassoMeta,
+)
+from confopt.selection.estimators.quantile_estimation import (
+ QuantileGBM,
+ QuantileKNN,
+ QuantileLasso,
+)
+
+
+def test_quantile_lasso_meta_fit_predict():
+ """Test that QuantileLassoMeta correctly fits and predicts."""
+ np.random.seed(42)
+ n_samples, n_features = 100, 3
+ X = np.random.randn(n_samples, n_features)
+ y = X @ np.array([0.5, 0.3, 0.2]) + 0.1 * np.random.randn(n_samples)
+
+ quantile_lasso = QuantileLassoMeta(alpha=0.01, quantile=0.5)
+ quantile_lasso.fit(X, y)
+
+ # Check that coefficients sum to 1 (normalized)
+ assert np.isclose(np.sum(quantile_lasso.coef_), 1.0)
+ assert np.all(quantile_lasso.coef_ >= 0) # positive constraint
+
+ # Check prediction works
+ predictions = quantile_lasso.predict(X)
+ assert predictions.shape == (n_samples,)
+
+
+def test_quantile_lasso_meta_different_quantiles():
+ """Test that QuantileLassoMeta gives different weights for different quantiles."""
+ np.random.seed(42)
+ n_samples, n_features = 200, 3
+ X = np.random.randn(n_samples, n_features)
+ y = X @ np.array([0.5, 0.3, 0.2]) + 0.2 * np.random.randn(n_samples)
+
+ quantile_25 = QuantileLassoMeta(alpha=0.01, quantile=0.25)
+ quantile_75 = QuantileLassoMeta(alpha=0.01, quantile=0.75)
+
+ quantile_25.fit(X, y)
+ quantile_75.fit(X, y)
+
+ # Weights might be different for different quantiles
+ assert quantile_25.coef_ is not None
+ assert quantile_75.coef_ is not None
+ assert np.isclose(np.sum(quantile_25.coef_), 1.0)
+ assert np.isclose(np.sum(quantile_75.coef_), 1.0)
+
+
+def test_quantile_lasso_meta_better_than_uniform():
+ """Test that QuantileLassoMeta performs better than uniform weights for quantile loss."""
+ from sklearn.metrics import mean_pinball_loss
+
+ np.random.seed(42)
+ n_samples, n_features = 150, 3
+
+ # Create data where first feature is best for the quantile
+ X = np.random.randn(n_samples, n_features)
+ y = 2 * X[:, 0] + 0.1 * X[:, 1] + 0.05 * X[:, 2] + 0.1 * np.random.randn(n_samples)
+
+ quantile = 0.25
+
+ # Quantile Lasso
+ quantile_lasso = QuantileLassoMeta(alpha=0.01, quantile=quantile)
+ quantile_lasso.fit(X, y)
+ pred_quantile_lasso = quantile_lasso.predict(X)
+
+ # Uniform weights
+ uniform_weights = np.ones(n_features) / n_features
+ pred_uniform = X @ uniform_weights
+
+ # Compare pinball losses
+ loss_quantile_lasso = mean_pinball_loss(y, pred_quantile_lasso, alpha=quantile)
+ loss_uniform = mean_pinball_loss(y, pred_uniform, alpha=quantile)
+
+ # QuantileLasso should perform at least as well as uniform weights
+ assert loss_quantile_lasso <= loss_uniform * 1.05 # Allow small tolerance
+
+
+def create_diverse_quantile_estimators(random_state=42):
+ return [
+ QuantileGBM(
+ learning_rate=0.1,
+ n_estimators=50,
+ min_samples_split=10,
+ min_samples_leaf=5,
+ max_depth=3,
+ random_state=random_state,
+ ),
+ QuantileKNN(n_neighbors=15),
+ QuantileLasso(
+ max_iter=1000,
+ p_tol=1e-6,
+ random_state=random_state,
+ ),
+ ]
+
+
+def create_diverse_point_estimators(random_state=42):
+ return [
+ LinearRegression(),
+ RandomForestRegressor(
+ n_estimators=50,
+ max_depth=3,
+ random_state=random_state,
+ ),
+ ]
+
+
+def evaluate_quantile_performance(y_true, y_pred, quantiles):
+ total_loss = 0.0
+ for i, q in enumerate(quantiles):
+ loss = mean_pinball_loss(y_true, y_pred[:, i], alpha=q)
+ total_loss += loss
+ return total_loss / len(quantiles)
+
+
+def evaluate_point_performance(y_true, y_pred):
+ return np.mean((y_true - y_pred) ** 2)
+
+
+def test_point_ensemble_get_stacking_training_data(toy_dataset, estimator1, estimator2):
+ X, y = toy_dataset
+
+ model = PointEnsembleEstimator(
+ estimators=[estimator1, estimator2], cv=2, random_state=42, alpha=0.01
+ )
+
+ val_indices, val_targets, val_predictions = model._get_stacking_training_data(X, y)
+
+ assert len(np.unique(val_indices)) == len(X)
+ assert val_predictions.shape == (len(X), 2)
+ assert np.array_equal(val_targets, y[val_indices])
+
+
+@pytest.mark.parametrize("weighting_strategy", ["uniform", "linear_stack"])
+def test_point_ensemble_compute_weights(
+ toy_dataset, estimator1, competing_estimator, weighting_strategy
+):
+ X, y = toy_dataset
+
+ model = PointEnsembleEstimator(
+ estimators=[estimator1, competing_estimator],
+ cv=2,
+ weighting_strategy=weighting_strategy,
+ random_state=42,
+ alpha=0.01,
+ )
+
+ weights = model._compute_point_weights(X, y)
+
+ assert len(weights) == 2
+ assert np.isclose(np.sum(weights), 1.0)
+ assert np.all(weights >= 0)
+
+ if weighting_strategy == "uniform":
+ assert np.allclose(weights, np.array([0.5, 0.5]))
+
+
+def test_point_ensemble_predict_with_uniform_weights(
+ toy_dataset, estimator1, estimator2
+):
+ X, _ = toy_dataset
+
+ model = PointEnsembleEstimator(
+ estimators=[estimator1, estimator2],
+ weighting_strategy="uniform",
+ alpha=0.01,
+ )
+ model.weights = np.array([0.5, 0.5])
+
+ predictions = model.predict(X)
+ expected = np.array([3, 5, 7, 9])
+
+ assert predictions[0] == 3
+ assert predictions[-1] == 9
+ assert np.array_equal(predictions, expected)
+
+
+def test_quantile_ensemble_get_stacking_training_data(
+ toy_dataset, quantiles, quantile_estimator1, quantile_estimator2
+):
+ X, y = toy_dataset
+
+ model = QuantileEnsembleEstimator(
+ estimators=[quantile_estimator1, quantile_estimator2],
+ cv=2,
+ random_state=42,
+ alpha=0.01,
+ )
+
+ (
+ val_indices,
+ val_targets,
+ val_predictions,
+ ) = model._get_stacking_training_data(X, y, quantiles)
+
+ assert len(val_indices) == len(val_targets) == len(X)
+ assert val_predictions.shape[0] == len(X)
+ assert val_predictions.shape[1] == 2 * len(quantiles)
+
+
+@pytest.mark.parametrize("weighting_strategy", ["uniform", "linear_stack"])
+def test_quantile_ensemble_compute_quantile_weights(
+ toy_dataset,
+ quantiles,
+ quantile_estimator1,
+ quantile_estimator2,
+ weighting_strategy,
+):
+ X, y = toy_dataset
+
+ model = QuantileEnsembleEstimator(
+ estimators=[quantile_estimator1, quantile_estimator2],
+ cv=2,
+ weighting_strategy=weighting_strategy,
+ random_state=42,
+ alpha=0.01,
+ )
+
+ weights = model._compute_quantile_weights(X, y, quantiles)
+
+ if weighting_strategy == "uniform":
+ assert weights.shape == (len(quantiles), 2)
+ for w in weights:
+ assert np.isclose(np.sum(w), 1.0)
+ assert np.all(w >= 0)
+ elif weighting_strategy == "linear_stack":
+ assert weights.shape == (len(quantiles), 2)
+ for w in weights:
+ assert np.isclose(np.sum(w), 1.0)
+ assert np.all(w >= 0)
+
+
+def test_quantile_ensemble_predict_quantiles(
+ toy_dataset, quantiles, quantile_estimator1, quantile_estimator2
+):
+ X, _ = toy_dataset
+ n_samples = len(X)
+
+ model = QuantileEnsembleEstimator(
+ estimators=[quantile_estimator1, quantile_estimator2],
+ weighting_strategy="uniform",
+ alpha=0.01,
+ )
+ model.quantiles = quantiles
+ model.quantile_weights = np.array([[0.5, 0.5], [0.5, 0.5], [0.5, 0.5]])
+
+ predictions = model.predict(X)
+ expected = np.tile([3.0, 4.0, 5.0], (n_samples, 1))
+ assert np.array_equal(predictions, expected)
+
+ quantile_estimator1.predict.assert_called_with(X)
+ quantile_estimator2.predict.assert_called_with(X)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+ "data_fixture_name",
+ [
+ "heteroscedastic_data",
+ "diabetes_data",
+ ],
+)
+@pytest.mark.parametrize("weighting_strategy", ["linear_stack"])
+def test_ensemble_outperforms_components_multiple_repetitions(
+ request,
+ data_fixture_name,
+ weighting_strategy,
+ ensemble_test_quantiles,
+):
+ X, y = request.getfixturevalue(data_fixture_name)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.7, random_state=42
+ )
+
+ scaler = StandardScaler()
+ X_train = scaler.fit_transform(X_train)
+ X_test = scaler.transform(X_test)
+
+ n_repetitions = 10
+ success_threshold = 0.51
+
+ pinball_wins = 0
+
+ for rep in range(n_repetitions):
+ estimators = create_diverse_quantile_estimators(random_state=42 + rep)
+
+ individual_losses = []
+ for estimator in estimators:
+ estimator.fit(X_train, y_train, quantiles=ensemble_test_quantiles)
+ y_pred_individual = estimator.predict(X_test)
+ loss = evaluate_quantile_performance(
+ y_test, y_pred_individual, ensemble_test_quantiles
+ )
+ individual_losses.append(loss)
+
+ best_individual_loss = min(individual_losses)
+
+ ensemble = QuantileEnsembleEstimator(
+ estimators=create_diverse_quantile_estimators(random_state=42 + rep),
+ cv=5,
+ weighting_strategy=weighting_strategy,
+ random_state=42 + rep,
+ alpha=0.01, # Reduced alpha for better performance with quantile Lasso
+ )
+
+ ensemble.fit(X_train, y_train, quantiles=ensemble_test_quantiles)
+ y_pred_ensemble = ensemble.predict(X_test)
+ ensemble_loss = evaluate_quantile_performance(
+ y_test, y_pred_ensemble, ensemble_test_quantiles
+ )
+
+ if ensemble_loss <= best_individual_loss:
+ pinball_wins += 1
+
+ pinball_success_rate = pinball_wins / n_repetitions
+ assert pinball_success_rate > success_threshold
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+ "data_fixture_name",
+ [
+ "heteroscedastic_data",
+ "diabetes_data",
+ ],
+)
+@pytest.mark.parametrize("weighting_strategy", ["linear_stack"])
+def test_point_ensemble_outperforms_components_multiple_repetitions(
+ request,
+ data_fixture_name,
+ weighting_strategy,
+):
+ X, y = request.getfixturevalue(data_fixture_name)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.7, random_state=42
+ )
+
+ scaler = StandardScaler()
+ X_train = scaler.fit_transform(X_train)
+ X_test = scaler.transform(X_test)
+
+ n_repetitions = 10
+ success_threshold = 0.51
+
+ mse_wins = 0
+
+ for rep in range(n_repetitions):
+ estimators = create_diverse_point_estimators(random_state=42 + rep)
+
+ individual_losses = []
+ for estimator in estimators:
+ estimator.fit(X_train, y_train)
+ y_pred_individual = estimator.predict(X_test)
+ loss = evaluate_point_performance(y_test, y_pred_individual)
+ individual_losses.append(loss)
+
+ best_individual_loss = min(individual_losses)
+
+ ensemble = PointEnsembleEstimator(
+ estimators=create_diverse_point_estimators(random_state=42 + rep),
+ cv=5,
+ weighting_strategy=weighting_strategy,
+ random_state=42 + rep,
+ alpha=0.01, # Reduced alpha for better performance
+ )
+
+ ensemble.fit(X_train, y_train)
+ y_pred_ensemble = ensemble.predict(X_test)
+ ensemble_loss = evaluate_point_performance(y_test, y_pred_ensemble)
+
+ if ensemble_loss <= best_individual_loss:
+ mse_wins += 1
+
+ mse_success_rate = mse_wins / n_repetitions
+ assert mse_success_rate > success_threshold
diff --git a/tests/selection/estimators/test_quantile_estimation.py b/tests/selection/estimators/test_quantile_estimation.py
new file mode 100644
index 0000000..5f5187f
--- /dev/null
+++ b/tests/selection/estimators/test_quantile_estimation.py
@@ -0,0 +1,187 @@
+import pytest
+import numpy as np
+from typing import List, Dict, Any
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from unittest.mock import Mock
+from confopt.selection.estimators.quantile_estimation import (
+ QuantileLasso,
+ QuantileGBM,
+ QuantileForest,
+ QuantileKNN,
+ QuantileGP,
+ QuantileLeaf,
+ QuantRegWrapper,
+)
+
+
+def assess_quantile_quality(
+ y_true: np.ndarray, predictions: np.ndarray, quantiles: List[float]
+) -> Dict[str, Any]:
+ n_samples, n_quantiles = predictions.shape
+
+ hard_violations = 0
+ soft_violations = 0
+ violation_magnitudes = []
+ tolerance = 1e-6
+
+ for i in range(n_samples):
+ pred_row = predictions[i, :]
+ for j in range(n_quantiles - 1):
+ diff = pred_row[j + 1] - pred_row[j]
+ if diff < -tolerance:
+ hard_violations += 1
+ violation_magnitudes.append(abs(diff))
+ elif abs(diff) <= tolerance:
+ soft_violations += 1
+
+ total_comparisons = n_samples * (n_quantiles - 1)
+
+ coverage_errors = []
+ for i, q in enumerate(quantiles):
+ empirical_coverage = np.mean(y_true <= predictions[:, i])
+ coverage_error = abs(empirical_coverage - q)
+ coverage_errors.append(coverage_error)
+
+ return {
+ "hard_violations": hard_violations,
+ "soft_violations": soft_violations,
+ "hard_rate": hard_violations / total_comparisons,
+ "soft_rate": soft_violations / total_comparisons,
+ "mean_violation_magnitude": np.mean(violation_magnitudes)
+ if violation_magnitudes
+ else 0.0,
+ "coverage_errors": coverage_errors,
+ "mean_coverage_error": np.mean(coverage_errors),
+ "total_comparisons": total_comparisons,
+ }
+
+
+QUALITY_THRESHOLDS = {
+ "single_fit": {
+ "max_hard_violation_rate": 0.0,
+ "max_soft_violation_rate": 0.10,
+ "max_coverage_error": 0.20,
+ },
+ "multi_fit": {
+ "max_hard_violation_rate": 0.08,
+ "max_soft_violation_rate": 0.18,
+ "max_coverage_error": 0.20,
+ },
+}
+
+
+@pytest.mark.parametrize(
+ "data_fixture_name",
+ [
+ "linear_regression_data",
+ "heteroscedastic_data",
+ "diabetes_data",
+ ],
+)
+@pytest.mark.parametrize(
+ "estimator_class,estimator_params,estimator_type",
+ [
+ (
+ QuantileGP,
+ {"kernel": "matern", "random_state": 42},
+ "single_fit",
+ ),
+ (
+ QuantileForest,
+ {"n_estimators": 30, "max_depth": 6, "random_state": 42},
+ "single_fit",
+ ),
+ (
+ QuantileLeaf,
+ {"n_estimators": 30, "max_depth": 6, "random_state": 42},
+ "single_fit",
+ ),
+ (QuantileKNN, {"n_neighbors": 8}, "single_fit"),
+ (
+ QuantileGBM,
+ {
+ "learning_rate": 0.1,
+ "n_estimators": 30,
+ "min_samples_split": 8,
+ "min_samples_leaf": 4,
+ "max_depth": 4,
+ "random_state": 42,
+ },
+ "multi_fit",
+ ),
+ (
+ QuantileLasso,
+ {"max_iter": 1000, "p_tol": 1e-6, "random_state": 42},
+ "multi_fit",
+ ),
+ ],
+)
+def test_quantile_estimator_comprehensive_quality(
+ request,
+ data_fixture_name,
+ estimator_class,
+ estimator_params,
+ estimator_type,
+ comprehensive_test_quantiles,
+):
+ X, y = request.getfixturevalue(data_fixture_name)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.3, random_state=42
+ )
+
+ # Standardize features to avoid penalizing scale-sensitive estimators
+ scaler = StandardScaler()
+ X_train = scaler.fit_transform(X_train)
+ X_test = scaler.transform(X_test)
+
+ quantiles = comprehensive_test_quantiles
+ estimator = estimator_class(**estimator_params)
+
+ try:
+ estimator.fit(X_train, y_train, quantiles)
+ predictions = estimator.predict(X_test)
+ except Exception as e:
+ pytest.fail(
+ "Estimator {} failed on {}: {}".format(
+ estimator_class.__name__, data_fixture_name, str(e)
+ )
+ )
+
+ assert predictions.shape == (len(X_test), len(quantiles))
+ assert not np.any(np.isnan(predictions))
+ assert not np.any(np.isinf(predictions))
+
+ base_thresholds = QUALITY_THRESHOLDS[estimator_type].copy()
+
+ quality_stats = assess_quantile_quality(y_test, predictions, quantiles)
+
+ assert quality_stats["hard_rate"] <= base_thresholds["max_hard_violation_rate"]
+ assert quality_stats["soft_rate"] <= base_thresholds["max_soft_violation_rate"]
+ assert quality_stats["mean_coverage_error"] <= base_thresholds["max_coverage_error"]
+
+
+def test_quantreg_wrapper_with_intercept():
+ mock_results = Mock()
+ mock_results.params = np.array([1.0, 2.0, 3.0])
+
+ wrapper = QuantRegWrapper(mock_results, has_intercept=True)
+ X_test = np.array([[1, 2], [3, 4]])
+
+ predictions = wrapper.predict(X_test)
+ expected = np.array([1 + 1 * 2 + 2 * 3, 1 + 3 * 2 + 4 * 3])
+
+ np.testing.assert_array_equal(predictions, expected)
+
+
+def test_quantreg_wrapper_without_intercept():
+ mock_results = Mock()
+ mock_results.params = np.array([2.0, 3.0])
+
+ wrapper = QuantRegWrapper(mock_results, has_intercept=False)
+ X_test = np.array([[1, 2], [3, 4]])
+
+ predictions = wrapper.predict(X_test)
+ expected = np.array([1 * 2 + 2 * 3, 3 * 2 + 4 * 3])
+
+ np.testing.assert_array_equal(predictions, expected)
diff --git a/tests/selection/sampling/__init__.py b/tests/selection/sampling/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/selection/sampling/test_bound_samplers.py b/tests/selection/sampling/test_bound_samplers.py
new file mode 100644
index 0000000..957ed2b
--- /dev/null
+++ b/tests/selection/sampling/test_bound_samplers.py
@@ -0,0 +1,413 @@
+"""
+Tests for bound-based acquisition strategies in conformal prediction optimization.
+
+This module tests the bound-based acquisition samplers that use prediction interval
+bounds for optimization decisions. Tests focus on methodological correctness of
+bound extraction, exploration-exploitation balance, adaptive interval width
+adjustment, and mathematical properties of the acquisition functions.
+
+Test coverage includes:
+- PessimisticLowerBoundSampler: Conservative bound-based acquisition
+- LowerBoundSampler: LCB-style exploration with decay schedules
+- Adaptive interval width mechanisms and coverage feedback
+- Mathematical properties and edge cases
+"""
+
+import pytest
+import numpy as np
+from unittest.mock import patch
+from confopt.selection.sampling.bound_samplers import (
+ PessimisticLowerBoundSampler,
+ LowerBoundSampler,
+)
+
+
+class TestPessimisticLowerBoundSampler:
+ """Test conservative acquisition strategy using pessimistic lower bounds."""
+
+ @pytest.mark.parametrize("interval_width", [0.7, 0.8, 0.9, 0.95])
+ def test_initialization_interval_width(self, interval_width):
+ """Test initialization with different interval widths."""
+ sampler = PessimisticLowerBoundSampler(interval_width=interval_width)
+
+ assert sampler.interval_width == interval_width
+ assert sampler.alpha == 1 - interval_width
+ assert 0 < sampler.alpha < 1
+
+ @pytest.mark.parametrize("adapter", [None, "DtACI", "ACI"])
+ def test_initialization_adapter_types(self, adapter):
+ """Test initialization with different adapter configurations."""
+ sampler = PessimisticLowerBoundSampler(interval_width=0.8, adapter=adapter)
+
+ if adapter is None:
+ assert sampler.adapter is None
+ else:
+ assert sampler.adapter is not None
+
+ def test_fetch_alphas_single_value(self):
+ """Test alpha retrieval returns single value list."""
+ sampler = PessimisticLowerBoundSampler(interval_width=0.85)
+ alphas = sampler.fetch_alphas()
+
+ assert isinstance(alphas, list)
+ assert len(alphas) == 1
+ assert abs(alphas[0] - 0.15) < 1e-10
+
+ def test_fetch_alphas_consistency(self):
+ """Test alpha values remain consistent with interval width."""
+ interval_widths = [0.7, 0.8, 0.9]
+ for width in interval_widths:
+ sampler = PessimisticLowerBoundSampler(interval_width=width)
+ alphas = sampler.fetch_alphas()
+ assert alphas[0] == 1 - width
+
+ @patch("confopt.selection.sampling.bound_samplers.update_single_interval_width")
+ def test_update_interval_width_with_adapter(self, mock_update):
+ """Test interval width update with adapter present."""
+ mock_update.return_value = 0.12
+ sampler = PessimisticLowerBoundSampler(interval_width=0.8, adapter="ACI")
+ original_alpha = sampler.alpha
+
+ sampler.update_interval_width(beta=0.85)
+
+ mock_update.assert_called_once_with(sampler.adapter, original_alpha, 0.85)
+ assert sampler.alpha == 0.12
+
+ @patch("confopt.selection.sampling.bound_samplers.update_single_interval_width")
+ def test_update_interval_width_without_adapter(self, mock_update):
+ """Test interval width update without adapter."""
+ mock_update.return_value = 0.2
+ sampler = PessimisticLowerBoundSampler(interval_width=0.8, adapter=None)
+ original_alpha = sampler.alpha
+
+ sampler.update_interval_width(beta=0.85)
+
+ mock_update.assert_called_once_with(None, original_alpha, 0.85)
+ assert sampler.alpha == 0.2
+
+ @pytest.mark.parametrize("beta", [0.5, 0.75, 0.85, 0.95])
+ def test_update_interval_width_coverage_range(self, beta):
+ """Test update with different coverage rates."""
+ sampler = PessimisticLowerBoundSampler(interval_width=0.8, adapter="ACI")
+ sampler.alpha
+
+ sampler.update_interval_width(beta=beta)
+
+ # Alpha should be adjusted based on coverage
+ assert isinstance(sampler.alpha, float)
+ assert 0 < sampler.alpha < 1
+
+ def test_interval_width_bounds(self):
+ """Test interval width parameter bounds."""
+ # Valid ranges
+ for width in [0.5, 0.8, 0.99]:
+ sampler = PessimisticLowerBoundSampler(interval_width=width)
+ assert 0 < sampler.alpha < 1
+
+ # Edge case: very high confidence
+ sampler = PessimisticLowerBoundSampler(interval_width=0.999)
+ assert abs(sampler.alpha - 0.001) < 1e-10
+
+ def test_alpha_interval_width_relationship(self):
+ """Test mathematical relationship between alpha and interval width."""
+ widths = np.linspace(0.5, 0.95, 10)
+ for width in widths:
+ sampler = PessimisticLowerBoundSampler(interval_width=width)
+ assert abs(sampler.alpha + sampler.interval_width - 1.0) < 1e-10
+
+
+class TestLowerBoundSampler:
+ """Test LCB acquisition strategy with adaptive exploration."""
+
+ @pytest.mark.parametrize("interval_width", [0.7, 0.8, 0.9])
+ @pytest.mark.parametrize("adapter", [None, "DtACI", "ACI"])
+ def test_initialization_inheritance(self, interval_width, adapter):
+ """Test proper inheritance from PessimisticLowerBoundSampler."""
+ sampler = LowerBoundSampler(interval_width=interval_width, adapter=adapter)
+
+ assert sampler.interval_width == interval_width
+ assert sampler.alpha == 1 - interval_width
+ if adapter is None:
+ assert sampler.adapter is None
+ else:
+ assert sampler.adapter is not None
+
+ @pytest.mark.parametrize(
+ "beta_decay", [None, "inverse_square_root_decay", "logarithmic_decay"]
+ )
+ def test_initialization_decay_strategies(self, beta_decay):
+ """Test initialization with different decay strategies."""
+ sampler = LowerBoundSampler(beta_decay=beta_decay)
+
+ assert sampler.beta_decay == beta_decay
+ assert sampler.t == 1
+ assert sampler.beta == 1
+
+ @pytest.mark.parametrize("c", [0.1, 1.0, 5.0, 10.0])
+ def test_initialization_exploration_constant(self, c):
+ """Test initialization with different exploration constants."""
+ sampler = LowerBoundSampler(c=c)
+
+ assert sampler.c == c
+
+ @pytest.mark.parametrize("beta_max", [1.0, 5.0, 10.0, 20.0])
+ def test_initialization_beta_max(self, beta_max):
+ """Test initialization with different maximum beta values."""
+ sampler = LowerBoundSampler(beta_max=beta_max)
+
+ assert sampler.beta_max == beta_max
+
+ def test_time_step_initialization(self):
+ """Test initial time step and exploration parameter."""
+ sampler = LowerBoundSampler()
+
+ assert sampler.t == 1
+ assert sampler.beta == 1
+ assert sampler.mu_max == float("-inf")
+
+ def test_update_exploration_step_time_increment(self):
+ """Test time step increment in exploration update."""
+ sampler = LowerBoundSampler()
+ initial_t = sampler.t
+
+ sampler.update_exploration_step()
+
+ assert sampler.t == initial_t + 1
+
+ @pytest.mark.parametrize(
+ "decay_type", ["inverse_square_root_decay", "logarithmic_decay"]
+ )
+ def test_update_exploration_decay_formulas(self, decay_type):
+ """Test exploration decay formula implementations."""
+ c = 2.0
+ sampler = LowerBoundSampler(beta_decay=decay_type, c=c)
+
+ # Run multiple steps to test decay
+ betas = []
+ for _ in range(10):
+ sampler.update_exploration_step()
+ betas.append(sampler.beta)
+
+ # Beta should generally decrease (with possible fluctuations due to log term)
+ assert betas[-1] < betas[0]
+ assert all(beta >= 0 for beta in betas)
+
+ def test_update_exploration_inverse_square_root_decay(self):
+ """Test inverse square root decay implementation."""
+ c = 4.0
+ sampler = LowerBoundSampler(beta_decay="inverse_square_root_decay", c=c)
+
+ sampler.update_exploration_step() # t=2
+ expected_beta = np.sqrt(c / 2)
+ assert abs(sampler.beta - expected_beta) < 1e-10
+
+ sampler.update_exploration_step() # t=3
+ expected_beta = np.sqrt(c / 3)
+ assert abs(sampler.beta - expected_beta) < 1e-10
+
+ def test_update_exploration_logarithmic_decay(self):
+ """Test logarithmic decay implementation."""
+ c = 2.0
+ sampler = LowerBoundSampler(beta_decay="logarithmic_decay", c=c)
+
+ sampler.update_exploration_step() # t=2
+ expected_beta = np.sqrt((c * np.log(2)) / 2)
+ assert abs(sampler.beta - expected_beta) < 1e-10
+
+ sampler.update_exploration_step() # t=3
+ expected_beta = np.sqrt((c * np.log(3)) / 3)
+ assert abs(sampler.beta - expected_beta) < 1e-10
+
+ def test_update_exploration_no_decay(self):
+ """Test behavior when no decay is specified."""
+ sampler = LowerBoundSampler(beta_decay=None)
+ initial_beta = sampler.beta
+
+ for _ in range(5):
+ sampler.update_exploration_step()
+ assert sampler.beta == initial_beta
+
+ def test_update_exploration_invalid_decay(self):
+ """Test error handling for invalid decay strategies."""
+ sampler = LowerBoundSampler()
+ sampler.beta_decay = "invalid_decay"
+
+ with pytest.raises(ValueError, match="beta_decay must be"):
+ sampler.update_exploration_step()
+
+ def test_calculate_ucb_predictions_basic(self, test_predictions_and_widths):
+ """Test basic LCB calculation functionality."""
+ point_estimates, interval_widths = test_predictions_and_widths
+ sampler = LowerBoundSampler()
+
+ lcb_values = sampler.calculate_ucb_predictions(point_estimates, interval_widths)
+
+ assert lcb_values.shape == point_estimates.shape
+ assert isinstance(lcb_values, np.ndarray)
+
+ def test_calculate_ucb_predictions_formula(self, test_predictions_and_widths):
+ """Test LCB formula implementation."""
+ point_estimates, interval_widths = test_predictions_and_widths
+ beta = 2.0
+ sampler = LowerBoundSampler()
+ sampler.beta = beta
+
+ lcb_values = sampler.calculate_ucb_predictions(point_estimates, interval_widths)
+ expected_values = point_estimates - beta * interval_widths
+
+ np.testing.assert_array_almost_equal(lcb_values, expected_values)
+
+ def test_calculate_ucb_predictions_beta_effect(self, test_predictions_and_widths):
+ """Test effect of different beta values on LCB calculations."""
+ point_estimates, interval_widths = test_predictions_and_widths
+
+ beta_low = LowerBoundSampler()
+ beta_low.beta = 0.5
+
+ beta_high = LowerBoundSampler()
+ beta_high.beta = 3.0
+
+ lcb_low = beta_low.calculate_ucb_predictions(point_estimates, interval_widths)
+ lcb_high = beta_high.calculate_ucb_predictions(point_estimates, interval_widths)
+
+ # Higher beta should lead to lower (more conservative) LCB values
+ assert np.all(lcb_high < lcb_low)
+
+ def test_calculate_ucb_predictions_edge_cases(self):
+ """Test LCB calculation with edge case inputs."""
+ sampler = LowerBoundSampler()
+
+ # Zero interval widths
+ point_estimates = np.array([1, 2, 3])
+ interval_widths = np.zeros(3)
+ lcb_values = sampler.calculate_ucb_predictions(point_estimates, interval_widths)
+ np.testing.assert_array_equal(lcb_values, point_estimates)
+
+ # Single point
+ single_point = np.array([5.0])
+ single_width = np.array([1.0])
+ lcb_single = sampler.calculate_ucb_predictions(single_point, single_width)
+ assert lcb_single.shape == (1,)
+
+ def test_calculate_ucb_predictions_negative_inputs(self):
+ """Test LCB calculation with negative inputs."""
+ sampler = LowerBoundSampler()
+ sampler.beta = 1.5
+
+ point_estimates = np.array([-2, -1, 0, 1, 2])
+ interval_widths = np.array([0.5, 1.0, 1.5, 1.0, 0.5])
+
+ lcb_values = sampler.calculate_ucb_predictions(point_estimates, interval_widths)
+ expected = point_estimates - 1.5 * interval_widths
+
+ np.testing.assert_array_almost_equal(lcb_values, expected)
+
+ @pytest.mark.parametrize("t_steps", [1, 5, 10, 50])
+ def test_exploration_decay_convergence(self, t_steps):
+ """Test exploration parameter convergence over multiple steps."""
+ sampler = LowerBoundSampler(beta_decay="logarithmic_decay", c=1.0)
+
+ for _ in range(t_steps):
+ sampler.update_exploration_step()
+
+ # Beta should decrease as t increases
+ assert sampler.beta < 1.0
+ assert sampler.beta > 0
+ assert sampler.t == t_steps + 1
+
+ def test_exploration_decay_asymptotic_behavior(self):
+ """Test asymptotic behavior of exploration decay."""
+ sampler = LowerBoundSampler(beta_decay="inverse_square_root_decay", c=1.0)
+
+ # Run many steps
+ for _ in range(1000):
+ sampler.update_exploration_step()
+
+ # Beta should be very small but positive
+ assert 0 < sampler.beta < 0.1
+
+ def test_inheritance_method_access(self):
+ """Test access to inherited methods from parent class."""
+ sampler = LowerBoundSampler(interval_width=0.85, adapter="ACI")
+
+ # Should have access to parent methods
+ alphas = sampler.fetch_alphas()
+ assert len(alphas) == 1
+ assert abs(alphas[0] - 0.15) < 1e-10
+
+ # Should be able to update interval width
+ sampler.update_interval_width(beta=0.8)
+ assert isinstance(sampler.alpha, float)
+
+ def test_mathematical_properties_lcb_ordering(self, test_predictions_and_widths):
+ """Test mathematical ordering properties of LCB values."""
+ point_estimates, interval_widths = test_predictions_and_widths
+ sampler = LowerBoundSampler()
+ sampler.beta = 1.0
+
+ lcb_values = sampler.calculate_ucb_predictions(point_estimates, interval_widths)
+
+ # LCB should be lower than point estimates when interval_widths > 0
+ mask = interval_widths > 0
+ assert np.all(lcb_values[mask] <= point_estimates[mask])
+
+ def test_exploration_constant_impact(self, test_predictions_and_widths):
+ """Test impact of exploration constant on acquisition behavior."""
+ point_estimates, interval_widths = test_predictions_and_widths
+
+ sampler_conservative = LowerBoundSampler(c=0.1)
+ sampler_conservative.update_exploration_step()
+
+ sampler_aggressive = LowerBoundSampler(c=10.0)
+ sampler_aggressive.update_exploration_step()
+
+ lcb_conservative = sampler_conservative.calculate_ucb_predictions(
+ point_estimates, interval_widths
+ )
+ lcb_aggressive = sampler_aggressive.calculate_ucb_predictions(
+ point_estimates, interval_widths
+ )
+
+ # Aggressive exploration should lead to lower LCB values
+ assert np.mean(lcb_aggressive) < np.mean(lcb_conservative)
+
+ def test_beta_max_constraint(self):
+ """Test that beta values respect maximum constraint."""
+ beta_max = 5.0
+ sampler = LowerBoundSampler(
+ beta_max=beta_max, c=100.0
+ ) # Large c to potentially exceed beta_max
+
+ # Even with large c, beta should not exceed beta_max in early iterations
+ assert sampler.beta <= beta_max
+
+ @pytest.mark.parametrize("array_size", [1, 10, 100, 1000])
+ def test_calculate_ucb_predictions_scalability(self, array_size):
+ """Test LCB calculation scalability with different array sizes."""
+ sampler = LowerBoundSampler()
+
+ point_estimates = np.random.uniform(-5, 5, array_size)
+ interval_widths = np.random.uniform(0.1, 2.0, array_size)
+
+ lcb_values = sampler.calculate_ucb_predictions(point_estimates, interval_widths)
+
+ assert lcb_values.shape == (array_size,)
+ assert len(lcb_values) == array_size
+
+ def test_state_consistency_after_updates(self):
+ """Test state consistency after multiple operations."""
+ sampler = LowerBoundSampler(interval_width=0.8, adapter="ACI", c=2.0)
+ original_interval_width = sampler.interval_width
+
+ # Perform multiple operations
+ sampler.update_exploration_step()
+ sampler.update_interval_width(beta=0.85)
+ sampler.update_exploration_step()
+
+ # State should remain consistent
+ assert isinstance(sampler.alpha, float)
+ assert 0 < sampler.alpha < 1
+ assert sampler.t >= 1
+ assert sampler.beta >= 0
+ # interval_width remains unchanged even when alpha is updated
+ assert sampler.interval_width == original_interval_width
diff --git a/tests/selection/sampling/test_expected_improvement_samplers.py b/tests/selection/sampling/test_expected_improvement_samplers.py
new file mode 100644
index 0000000..e390de3
--- /dev/null
+++ b/tests/selection/sampling/test_expected_improvement_samplers.py
@@ -0,0 +1,102 @@
+"""
+Tests for Expected Improvement acquisition strategies in conformal prediction optimization.
+
+This module tests the Expected Improvement sampler that estimates expected improvement
+through Monte Carlo sampling from prediction intervals. Tests focus on mathematical
+correctness of EI estimation, exploration-exploitation balance, adaptive interval
+width adjustment, and acquisition function properties.
+
+Test coverage includes:
+- ExpectedImprovementSampler: Monte Carlo EI estimation with conformal intervals
+- Best value tracking and improvement computation accuracy
+- Adaptive interval width mechanisms and coverage feedback
+- Mathematical properties of EI acquisition function
+- Edge cases and boundary conditions
+"""
+
+import pytest
+import numpy as np
+from confopt.selection.sampling.expected_improvement_samplers import (
+ ExpectedImprovementSampler,
+)
+
+
+class TestExpectedImprovementSampler:
+ """Test Expected Improvement acquisition strategy using conformal prediction intervals."""
+
+ @pytest.mark.parametrize("n_quantiles", [4, 6, 8])
+ def test_initialization_even_quantiles(self, n_quantiles):
+ """Test initialization with valid even quantile numbers."""
+ sampler = ExpectedImprovementSampler(n_quantiles=n_quantiles)
+
+ assert sampler.n_quantiles == n_quantiles
+ assert len(sampler.alphas) == n_quantiles // 2
+
+ @pytest.mark.parametrize("n_quantiles", [3, 5, 7])
+ def test_initialization_odd_quantiles_raises_error(self, n_quantiles):
+ """Test that odd quantile numbers raise validation errors."""
+ with pytest.raises(ValueError):
+ ExpectedImprovementSampler(n_quantiles=n_quantiles)
+
+ @pytest.mark.parametrize("adapter", [None, "DtACI", "ACI"])
+ def test_initialization_adapter_types(self, adapter):
+ """Test initialization with different adapter configurations."""
+ sampler = ExpectedImprovementSampler(n_quantiles=4, adapter=adapter)
+
+ if adapter is None:
+ assert sampler.adapters is None
+ else:
+ assert sampler.adapters is not None
+ assert len(sampler.adapters) == len(sampler.alphas)
+
+ def test_update_best_value(self):
+ """Test best value updates with improving values."""
+ sampler = ExpectedImprovementSampler(current_best_value=10.0)
+
+ # Better value should update
+ sampler.update_best_value(5.0)
+ assert sampler.current_best_value == 5.0
+
+ # Should not update if new value is worse
+ sampler.update_best_value(10.0)
+ assert sampler.current_best_value == 5.0
+
+ def test_fetch_alphas_returns_correct_format(self):
+ """Test alpha retrieval returns proper list format."""
+ sampler = ExpectedImprovementSampler(n_quantiles=6)
+ alphas = sampler.fetch_alphas()
+
+ assert isinstance(alphas, list)
+ assert len(alphas) == 3 # n_quantiles // 2
+ assert all(0 < alpha < 1 for alpha in alphas)
+
+ def test_calculate_expected_improvement_negative_values(
+ self, simple_conformal_bounds
+ ):
+ """Test EI values are negative for minimization compatibility."""
+ sampler = ExpectedImprovementSampler(
+ n_quantiles=4, num_ei_samples=20, current_best_value=0.1
+ )
+
+ ei_values = sampler.calculate_expected_improvement(simple_conformal_bounds)
+
+ # All EI values should be non-positive (negated for minimization)
+ assert np.all(ei_values <= 0)
+ n_observations = len(simple_conformal_bounds[0].lower_bounds)
+ assert ei_values.shape == (n_observations,)
+
+ def test_calculate_expected_improvement_deterministic_sampling(
+ self, simple_conformal_bounds
+ ):
+ """Test EI calculation consistency with fixed random seed."""
+ sampler = ExpectedImprovementSampler(n_quantiles=4, num_ei_samples=50)
+
+ # Calculate EI with fixed seed
+ np.random.seed(42)
+ ei_values1 = sampler.calculate_expected_improvement(simple_conformal_bounds)
+
+ np.random.seed(42)
+ ei_values2 = sampler.calculate_expected_improvement(simple_conformal_bounds)
+
+ # Results should be identical with same seed
+ np.testing.assert_array_almost_equal(ei_values1, ei_values2)
diff --git a/tests/selection/sampling/test_sampling_utils.py b/tests/selection/sampling/test_sampling_utils.py
new file mode 100644
index 0000000..82c2aac
--- /dev/null
+++ b/tests/selection/sampling/test_sampling_utils.py
@@ -0,0 +1,142 @@
+import pytest
+import numpy as np
+from confopt.selection.sampling.utils import (
+ initialize_quantile_alphas,
+ initialize_multi_adapters,
+ initialize_single_adapter,
+ update_multi_interval_widths,
+ update_single_interval_width,
+ validate_even_quantiles,
+ flatten_conformal_bounds,
+)
+
+
+@pytest.mark.parametrize("n_quantiles", [2, 4, 6, 8, 10])
+def test_initialize_quantile_alphas_even_counts(n_quantiles):
+ """Test quantile alpha initialization with valid even counts."""
+ alphas = initialize_quantile_alphas(n_quantiles)
+
+ # Should return half the input quantiles
+ assert len(alphas) == n_quantiles // 2
+
+ # All alphas should be in valid range
+ assert all(0 < alpha < 1 for alpha in alphas)
+
+ # Spot check:
+ if n_quantiles == 4:
+ expected_alphas = [0.4, 0.8]
+ np.testing.assert_allclose(alphas, expected_alphas, rtol=1e-10)
+
+
+@pytest.mark.parametrize("n_quantiles", [1, 3, 5, 7])
+def test_initialize_quantile_alphas_odd_counts_raises(n_quantiles):
+ """Test that odd quantile counts raise appropriate errors."""
+ with pytest.raises(ValueError):
+ initialize_quantile_alphas(n_quantiles)
+
+
+def test_update_multi_interval_widths_with_adapters(coverage_feedback):
+ """Test multi-interval width updates with adaptation."""
+ alphas = [0.2, 0.1, 0.05]
+ adapters = initialize_multi_adapters(alphas, "DtACI")
+
+ # Store initial alphas
+ initial_alphas = alphas.copy()
+
+ # Update with coverage feedback
+ updated_alphas = update_multi_interval_widths(adapters, alphas, coverage_feedback)
+
+ # Should return list of same length
+ assert len(updated_alphas) == len(initial_alphas)
+
+ # Alphas should be updated (likely different from initial)
+ assert isinstance(updated_alphas, list)
+ assert all(isinstance(alpha, float) for alpha in updated_alphas)
+
+ # All alphas should remain in valid range
+ assert all(0 < alpha < 1 for alpha in updated_alphas)
+
+
+def test_update_multi_interval_widths_without_adapters():
+ """Test multi-interval width updates without adaptation."""
+ alphas = [0.2, 0.1, 0.05]
+ betas = [0.8, 0.9, 0.95]
+
+ updated_alphas = update_multi_interval_widths(None, alphas, betas)
+
+ # Should return original alphas unchanged
+ assert updated_alphas == alphas
+
+
+def test_update_single_interval_width():
+ """Test single interval width update with adaptation."""
+ alpha = 0.1
+ adapter = initialize_single_adapter(alpha, "DtACI")
+ beta = 0.85
+
+ updated_alpha = update_single_interval_width(adapter, alpha, beta)
+
+ # Should return a float in valid range
+ assert isinstance(updated_alpha, float)
+ assert 0 < updated_alpha < 1
+ assert updated_alpha != alpha # Should be updated
+
+
+def test_validate_even_quantiles_valid():
+ """Test validation passes for even quantiles."""
+ # Should not raise any exception
+ validate_even_quantiles(4, "test_sampler")
+ validate_even_quantiles(6, "another_sampler")
+
+
+@pytest.mark.parametrize("n_quantiles", [1, 3, 5, 7])
+def test_validate_even_quantiles_invalid(n_quantiles):
+ """Test validation raises for odd quantiles."""
+ with pytest.raises(
+ ValueError, match="Number of test_sampler quantiles must be even"
+ ):
+ validate_even_quantiles(n_quantiles, "test_sampler")
+
+
+def test_flatten_conformal_bounds_structure(multi_interval_bounds):
+ """Test conformal bounds flattening produces correct structure."""
+ flattened = flatten_conformal_bounds(multi_interval_bounds)
+
+ n_obs = len(multi_interval_bounds[0].lower_bounds)
+ n_intervals = len(multi_interval_bounds)
+ expected_shape = (n_obs, n_intervals * 2)
+
+ # Should have correct shape
+ assert flattened.shape == expected_shape
+
+ # Should be numpy array
+ assert isinstance(flattened, np.ndarray)
+
+
+def test_flatten_conformal_bounds_interleaving(small_dataset):
+ """Test that bounds are correctly interleaved in flattened representation."""
+ flattened = flatten_conformal_bounds(small_dataset)
+
+ # Check that columns alternate between lower and upper bounds
+ for i, bounds in enumerate(small_dataset):
+ lower_col = i * 2
+ upper_col = i * 2 + 1
+
+ np.testing.assert_array_equal(flattened[:, lower_col], bounds.lower_bounds)
+ np.testing.assert_array_equal(flattened[:, upper_col], bounds.upper_bounds)
+
+
+def test_flatten_conformal_bounds_preserves_intervals(nested_intervals):
+ """Test that flattening preserves interval relationships."""
+ flattened = flatten_conformal_bounds(nested_intervals)
+
+ # Check that nested relationships are preserved
+ for obs_idx in range(flattened.shape[0]):
+ # Extract bounds for this observation
+ wide_lower, wide_upper = flattened[obs_idx, 0], flattened[obs_idx, 1]
+ med_lower, med_upper = flattened[obs_idx, 2], flattened[obs_idx, 3]
+ narrow_lower, narrow_upper = flattened[obs_idx, 4], flattened[obs_idx, 5]
+
+ # Verify nesting: narrow ⊆ medium ⊆ wide
+ assert wide_lower <= med_lower <= narrow_lower
+ assert narrow_upper <= med_upper <= wide_upper
diff --git a/tests/selection/sampling/test_thompson_samplers.py b/tests/selection/sampling/test_thompson_samplers.py
new file mode 100644
index 0000000..1f7f2e2
--- /dev/null
+++ b/tests/selection/sampling/test_thompson_samplers.py
@@ -0,0 +1,204 @@
+import pytest
+import numpy as np
+from confopt.selection.sampling.thompson_samplers import ThompsonSampler
+from confopt.wrapping import ConformalBounds
+
+
+@pytest.mark.parametrize("n_quantiles", [2, 4, 6, 8])
+def test_thompson_sampler_initialization_valid_quantiles(n_quantiles):
+ """Test Thompson sampler initialization with valid even quantile counts."""
+ sampler = ThompsonSampler(n_quantiles=n_quantiles)
+
+ assert len(sampler.alphas) == n_quantiles // 2
+ assert sampler.n_quantiles == n_quantiles
+ assert not sampler.enable_optimistic_sampling
+ assert sampler.adapters is None # Default no adapter
+
+
+@pytest.mark.parametrize("adapter", ["DtACI", "ACI", None])
+def test_thompson_sampler_initialization_with_adapters(adapter):
+ """Test Thompson sampler initialization with different adapter strategies."""
+ sampler = ThompsonSampler(n_quantiles=4, adapter=adapter)
+
+ if adapter is None:
+ assert sampler.adapters is None
+ else:
+ assert len(sampler.adapters) == 2 # n_quantiles // 2
+ assert all(hasattr(a, "update") for a in sampler.adapters)
+
+
+def test_update_interval_width_with_adapters(coverage_feedback):
+ """Test interval width updating with adaptation enabled."""
+ sampler = ThompsonSampler(n_quantiles=6, adapter="DtACI")
+ initial_alphas = sampler.alphas.copy()
+
+ sampler.update_interval_width(coverage_feedback)
+
+ assert len(sampler.alphas) == len(initial_alphas)
+ # Alphas should have changed based on coverage feedback
+ assert not np.array_equal(sampler.alphas, initial_alphas)
+
+
+def test_update_interval_width_without_adapters():
+ """Test interval width updating when no adapters are configured."""
+ sampler = ThompsonSampler(n_quantiles=4, adapter=None)
+ initial_alphas = sampler.alphas.copy()
+ betas = [0.85, 0.92]
+
+ # Should return original alphas unchanged when no adapters
+ sampler.update_interval_width(betas)
+ assert np.array_equal(sampler.alphas, initial_alphas)
+
+
+def test_calculate_thompson_predictions_shape(simple_conformal_bounds):
+ """Test Thompson predictions return correct shape."""
+ sampler = ThompsonSampler(n_quantiles=4)
+ predictions = sampler.calculate_thompson_predictions(simple_conformal_bounds)
+
+ n_observations = len(simple_conformal_bounds[0].lower_bounds)
+ assert predictions.shape == (n_observations,)
+
+
+def test_calculate_thompson_predictions_values_within_bounds(simple_conformal_bounds):
+ """Test that Thompson predictions fall within conformal bounds."""
+ sampler = ThompsonSampler(n_quantiles=4)
+ predictions = sampler.calculate_thompson_predictions(simple_conformal_bounds)
+
+ # Get overall bounds across all intervals
+ all_lower = np.minimum(
+ simple_conformal_bounds[0].lower_bounds, simple_conformal_bounds[1].lower_bounds
+ )
+ all_upper = np.maximum(
+ simple_conformal_bounds[0].upper_bounds, simple_conformal_bounds[1].upper_bounds
+ )
+
+ # All predictions should be within the overall bounds
+ assert np.all(predictions >= all_lower)
+ assert np.all(predictions <= all_upper)
+
+
+@pytest.mark.parametrize("n_quantiles", [2, 4, 6])
+def test_calculate_thompson_predictions_stochasticity(
+ simple_conformal_bounds, n_quantiles
+):
+ """Test that Thompson predictions show appropriate stochastic behavior."""
+ sampler = ThompsonSampler(n_quantiles=n_quantiles)
+
+ # Generate multiple samples
+ samples = []
+ for _ in range(50):
+ predictions = sampler.calculate_thompson_predictions(simple_conformal_bounds)
+ samples.append(predictions)
+
+ samples_array = np.array(samples)
+
+ # Check that predictions vary across runs (stochastic behavior)
+ variance_per_observation = np.var(samples_array, axis=0)
+ assert np.all(variance_per_observation > 0) # Should have non-zero variance
+
+
+def test_calculate_thompson_predictions_optimistic_sampling_enabled(
+ simple_conformal_bounds,
+):
+ """Test Thompson predictions with optimistic sampling enabled."""
+ sampler = ThompsonSampler(n_quantiles=4, enable_optimistic_sampling=True)
+ point_estimates = np.array([0.2, 0.4, 0.6]) # Conservative point estimates
+
+ predictions = sampler.calculate_thompson_predictions(
+ simple_conformal_bounds, point_predictions=point_estimates
+ )
+
+ # Predictions should be capped at point estimates
+ assert np.all(predictions <= point_estimates)
+
+
+def test_calculate_thompson_predictions_mathematical_properties(
+ simple_conformal_bounds,
+):
+ """Test mathematical properties of Thompson sampling distribution.
+
+ Thompson sampling uniformly samples from the flattened bounds matrix,
+ which contains all lower and upper bounds from all intervals.
+ For simple_conformal_bounds, each observation should sample uniformly
+ from the set of bounds: [lower1, upper1, lower2, upper2].
+ """
+ sampler = ThompsonSampler(n_quantiles=4) # Creates 2 intervals
+
+ # Extract expected values for each observation from the bounds
+ expected_values_per_obs = []
+ for obs_idx in range(len(simple_conformal_bounds[0].lower_bounds)):
+ values = [
+ simple_conformal_bounds[0].lower_bounds[obs_idx], # interval 1 lower
+ simple_conformal_bounds[0].upper_bounds[obs_idx], # interval 1 upper
+ simple_conformal_bounds[1].lower_bounds[obs_idx], # interval 2 lower
+ simple_conformal_bounds[1].upper_bounds[obs_idx], # interval 2 upper
+ ]
+ expected_values_per_obs.append(values)
+
+ # Generate many samples for statistical analysis
+ n_samples = 10000
+ samples = []
+ for _ in range(n_samples):
+ predictions = sampler.calculate_thompson_predictions(simple_conformal_bounds)
+ samples.append(predictions)
+
+ samples_array = np.array(samples)
+
+ # For each observation, rigorously test uniform sampling from expected values
+ for obs_idx in range(len(simple_conformal_bounds[0].lower_bounds)):
+ obs_samples = samples_array[:, obs_idx]
+ expected_values = expected_values_per_obs[obs_idx]
+
+ # Test 1: All samples should be from the expected discrete set
+ unique_samples = np.unique(obs_samples)
+ np.testing.assert_array_almost_equal(
+ np.sort(unique_samples),
+ np.sort(expected_values),
+ decimal=10,
+ err_msg=f"Observation {obs_idx} samples not from expected bounds set",
+ )
+
+ # Test 2: Each value should appear with approximately equal frequency (uniform)
+ expected_freq = n_samples / len(expected_values)
+ tolerance = 0.05 * n_samples # 5% tolerance for randomness
+
+ for value in expected_values:
+ actual_freq = np.sum(np.isclose(obs_samples, value))
+ assert abs(actual_freq - expected_freq) < tolerance, (
+ f"Observation {obs_idx}, value {value}: expected ~{expected_freq:.0f} "
+ f"occurrences, got {actual_freq}"
+ )
+
+ # Test 3: Sample mean should equal theoretical mean of uniform distribution
+ theoretical_mean = np.mean(expected_values)
+ sample_mean = np.mean(obs_samples)
+
+ # With large sample size, sample mean should be very close to theoretical
+ mean_tolerance = 0.01 * abs(theoretical_mean) # 1% tolerance
+ assert abs(sample_mean - theoretical_mean) < mean_tolerance, (
+ f"Observation {obs_idx}: theoretical mean {theoretical_mean:.6f}, "
+ f"sample mean {sample_mean:.6f}"
+ )
+
+
+def test_thompson_sampler_deterministic_with_seed():
+ """Test that Thompson sampler produces deterministic results with fixed seed."""
+ sampler = ThompsonSampler(n_quantiles=4)
+
+ # Create fixed bounds
+ bounds = [
+ ConformalBounds(
+ lower_bounds=np.array([0.1, 0.2]), upper_bounds=np.array([0.5, 0.6])
+ )
+ ]
+
+ # Set seed and get predictions
+ np.random.seed(42)
+ predictions1 = sampler.calculate_thompson_predictions(bounds)
+
+ # Reset seed and get predictions again
+ np.random.seed(42)
+ predictions2 = sampler.calculate_thompson_predictions(bounds)
+
+ # Should be identical with same seed
+ np.testing.assert_array_equal(predictions1, predictions2)
diff --git a/tests/selection/test_acquisition.py b/tests/selection/test_acquisition.py
new file mode 100644
index 0000000..2f31b92
--- /dev/null
+++ b/tests/selection/test_acquisition.py
@@ -0,0 +1,206 @@
+import pytest
+import numpy as np
+from confopt.selection.acquisition import (
+ QuantileConformalSearcher,
+)
+from confopt.selection.sampling.bound_samplers import (
+ PessimisticLowerBoundSampler,
+ LowerBoundSampler,
+)
+from confopt.selection.sampling.thompson_samplers import ThompsonSampler
+from confopt.selection.sampling.expected_improvement_samplers import (
+ ExpectedImprovementSampler,
+)
+
+from conftest import (
+ QUANTILE_ESTIMATOR_ARCHITECTURES,
+)
+
+
+@pytest.mark.parametrize(
+ "sampler_class,sampler_kwargs",
+ [
+ (PessimisticLowerBoundSampler, {"interval_width": 0.8}),
+ (LowerBoundSampler, {"interval_width": 0.8}),
+ (ThompsonSampler, {"n_quantiles": 4}),
+ (ExpectedImprovementSampler, {"n_quantiles": 4}),
+ ],
+)
+@pytest.mark.parametrize("quantile_arch", QUANTILE_ESTIMATOR_ARCHITECTURES[:1])
+def test_quantile_conformal_searcher(
+ sampler_class, sampler_kwargs, quantile_arch, big_toy_dataset
+):
+ X, y = big_toy_dataset
+ X_train, y_train = X[:7], y[:7]
+ X_val, y_val = X[7:], y[7:]
+
+ sampler = sampler_class(**sampler_kwargs)
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture=quantile_arch,
+ sampler=sampler,
+ n_pre_conformal_trials=5,
+ )
+
+ # Combine train and val data for new interface
+ X_combined = np.vstack((X_train, X_val))
+ y_combined = np.concatenate((y_train, y_val))
+ searcher.fit(
+ X=X_combined,
+ y=y_combined,
+ tuning_iterations=0,
+ random_state=42,
+ )
+
+ predictions = searcher.predict(X_val)
+ assert len(predictions) == len(X_val)
+
+ X_update = X_val[0].reshape(1, -1)
+ y_update = y_val[0]
+ initial_X_train_len = len(searcher.X_train)
+ initial_y_train_len = len(searcher.y_train)
+
+ searcher.update(X_update, y_update)
+
+ # Data doesn't change, only updates samplers and other states:
+ assert len(searcher.X_train) == initial_X_train_len
+ assert len(searcher.y_train) == initial_y_train_len
+
+
+def test_quantile_searcher_prediction_methods(big_toy_dataset):
+ X, y = big_toy_dataset
+ X_train, y_train = X[:7], y[:7]
+ X_val, y_val = X[7:], y[7:]
+ X_test = X_val
+
+ lb_sampler = LowerBoundSampler(interval_width=0.8, beta_decay=None)
+ lb_searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=lb_sampler,
+ n_pre_conformal_trials=5,
+ )
+ # Combine train and val data for new interface
+ X_combined = np.vstack((X_train, X_val))
+ y_combined = np.concatenate((y_train, y_val))
+ lb_searcher.fit(
+ X=X_combined,
+ y=y_combined,
+ tuning_iterations=0,
+ random_state=42,
+ )
+ lb_predictions = lb_searcher.predict(X_test)
+ assert len(lb_predictions) == len(X_test)
+
+ thompson_sampler = ThompsonSampler(n_quantiles=4)
+ thompson_searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=thompson_sampler,
+ n_pre_conformal_trials=5,
+ )
+ thompson_searcher.fit(
+ X=X_combined,
+ y=y_combined,
+ tuning_iterations=0,
+ random_state=42,
+ )
+ thompson_predictions = thompson_searcher.predict(X_test)
+ assert len(thompson_predictions) == len(X_test)
+
+ ei_sampler = ExpectedImprovementSampler(n_quantiles=4, current_best_value=0.5)
+ ei_searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=ei_sampler,
+ n_pre_conformal_trials=5,
+ )
+ ei_searcher.fit(
+ X=X_combined,
+ y=y_combined,
+ tuning_iterations=0,
+ random_state=42,
+ )
+ ei_predictions = ei_searcher.predict(X_test)
+ assert len(ei_predictions) == len(X_test)
+
+ plb_sampler = PessimisticLowerBoundSampler(interval_width=0.8)
+ plb_searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=plb_sampler,
+ n_pre_conformal_trials=5,
+ )
+ plb_searcher.fit(
+ X=X_combined,
+ y=y_combined,
+ tuning_iterations=0,
+ random_state=42,
+ )
+ plb_predictions = plb_searcher.predict(X_test)
+ assert len(plb_predictions) == len(X_test)
+
+
+@pytest.mark.parametrize("current_best_value", [0.0, 0.5, 1.0, 10.0])
+def test_expected_improvement_best_value_update(current_best_value, big_toy_dataset):
+ """Test that Expected Improvement properly tracks and updates best values."""
+ X, y = big_toy_dataset
+ X_train, y_train = X[:10], y[:10]
+ X_val, y_val = X[10:20], y[10:20]
+
+ sampler = ExpectedImprovementSampler(
+ n_quantiles=4, current_best_value=current_best_value
+ )
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=sampler,
+ n_pre_conformal_trials=5,
+ )
+
+ # Combine train and val data for new interface
+ X_combined = np.vstack((X_train, X_val))
+ y_combined = np.concatenate((y_train, y_val))
+ searcher.fit(X=X_combined, y=y_combined, tuning_iterations=0, random_state=42)
+
+ # Test that sampler has correct initial best value
+ assert sampler.current_best_value == current_best_value
+
+ # Test update with better value (remember: we minimize, so lower is better)
+ new_value = current_best_value - 1.0
+ searcher.update(X_val[0], new_value)
+ assert sampler.current_best_value == new_value
+
+ # Test update with worse value (should not change)
+ worse_value = current_best_value + 1.0
+ searcher.update(X_val[1], worse_value)
+ assert sampler.current_best_value == new_value # Should remain the better value
+
+
+def test_adaptive_alpha_updating(big_toy_dataset):
+ """Test that adaptive alpha updating works correctly for compatible samplers."""
+ X, y = big_toy_dataset
+ X_train, y_train = X[:15], y[:15]
+ X_val, y_val = X[15:30], y[15:30]
+
+ # Test with adaptive sampler
+ sampler = LowerBoundSampler(interval_width=0.8, adapter="DtACI")
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=sampler,
+ n_pre_conformal_trials=5,
+ )
+
+ # Combine train and val data for new interface
+ X_combined = np.vstack((X_train, X_val))
+ y_combined = np.concatenate((y_train, y_val))
+ searcher.fit(X=X_combined, y=y_combined, tuning_iterations=0, random_state=42)
+
+ # Store initial alpha values
+ initial_alphas = searcher.sampler.fetch_alphas().copy()
+
+ # Perform several updates
+ for i in range(3):
+ test_point = X_val[i]
+ test_value = y_val[i]
+ searcher.update(test_point, test_value)
+
+ # Check that alphas change:
+ final_alphas = searcher.sampler.fetch_alphas()
+ assert len(final_alphas) == len(initial_alphas)
+ assert all(0 < alpha < 1 for alpha in final_alphas)
+ assert not np.array_equal(initial_alphas, final_alphas)
diff --git a/tests/selection/test_adaptation.py b/tests/selection/test_adaptation.py
new file mode 100644
index 0000000..0b520eb
--- /dev/null
+++ b/tests/selection/test_adaptation.py
@@ -0,0 +1,607 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression
+from confopt.selection.adaptation import DtACI, pinball_loss
+
+
+class SimpleACI:
+ """Simplified ACI implementation from Gibbs & Candès (2021) paper.
+
+ This implements the basic adaptive conformal inference algorithm with the simple update:
+ α_{t+1} = α_t + γ(α - err_t)
+
+ where err_t is a binary indicator: 1 for breach/error, 0 for coverage/no error.
+ This follows the exact formula from equation (2) in the paper.
+ This is used only for testing equivalence with DTACI when using a single gamma value.
+ """
+
+ def __init__(self, alpha: float = 0.1, gamma: float = 0.01):
+ """Initialize Simple ACI.
+
+ Args:
+ alpha: Target miscoverage level (α ∈ (0,1))
+ gamma: Learning rate for alpha updates
+ """
+ if not 0 < alpha < 1:
+ raise ValueError("alpha must be in (0, 1)")
+ if gamma <= 0:
+ raise ValueError("gamma must be positive")
+
+ self.alpha = alpha
+ self.gamma = gamma
+ self.alpha_t = alpha
+ self.alpha_history = []
+
+ def update(self, err_t: int) -> float:
+ """Update alpha based on binary error indicator.
+
+ Args:
+ err_t: Binary error indicator (1 = breach/error, 0 = coverage/no error)
+
+ Returns:
+ Updated miscoverage level α_t+1
+ """
+ if err_t not in [0, 1]:
+ raise ValueError(f"err_t must be 0 or 1, got {err_t}")
+
+ # Simple ACI update from paper: α_{t+1} = α_t + γ(α - err_t)
+ self.alpha_t = self.alpha_t + self.gamma * (self.alpha - err_t)
+ self.alpha_t = np.clip(self.alpha_t, 0.001, 0.999)
+
+ self.alpha_history.append(self.alpha_t)
+ return self.alpha_t
+
+
+class StaticCI:
+ def __init__(self, alpha: float = 0.1):
+ if not 0 < alpha < 1:
+ raise ValueError("alpha must be in (0, 1)")
+ self.alpha = alpha
+ self.alpha_t = alpha
+ self.alpha_history = []
+
+ def update(self, beta: float) -> float:
+ if not 0 <= beta <= 1:
+ raise ValueError(f"beta must be in [0, 1], got {beta}")
+ self.alpha_history.append(self.alpha_t)
+ return self.alpha_t
+
+
+def run_conformal_performance_test(method, X, y, target_alpha, gamma_values=None):
+ """Helper function to run conformal prediction performance tests and return metrics.
+
+ Args:
+ method: Either 'dtaci' or 'static' to specify which method to test
+ X, y: Data for testing
+ target_alpha: Target miscoverage level
+ gamma_values: Learning rates for DtACI (ignored for static method)
+
+ Returns:
+ Dictionary with performance metrics
+ """
+ if method == "dtaci":
+ if gamma_values is None:
+ gamma_values = [0.01, 0.05, 0.1]
+ predictor = DtACI(alpha=target_alpha, gamma_values=gamma_values)
+ elif method == "static":
+ predictor = StaticCI(alpha=target_alpha)
+ else:
+ raise ValueError("method must be 'dtaci' or 'static'")
+
+ breaches = []
+ alpha_evolution = []
+ initial_window = 30
+
+ for i in range(initial_window, len(X)):
+ X_past = X[:i]
+ y_past = y[:i]
+ X_test = X[i].reshape(1, -1)
+ y_test = y[i]
+
+ n_cal = max(int(len(X_past) * 0.3), 10)
+ X_train, X_cal = X_past[:-n_cal], X_past[-n_cal:]
+ y_train, y_cal = y_past[:-n_cal], y_past[-n_cal:]
+
+ model = LinearRegression()
+ model.fit(X_train, y_train)
+ y_cal_pred = model.predict(X_cal)
+ cal_residuals = np.abs(y_cal - y_cal_pred)
+ y_test_pred = model.predict(X_test)[0]
+
+ test_residual = abs(y_test - y_test_pred)
+ beta = np.mean(cal_residuals >= test_residual)
+
+ current_alpha = predictor.update(beta=beta)
+ alpha_evolution.append(current_alpha)
+
+ # Check breach
+ quantile = np.quantile(cal_residuals, 1 - current_alpha, method="linear")
+ lower = y_test_pred - quantile
+ upper = y_test_pred + quantile
+ breach = int(not (lower <= y_test <= upper))
+ breaches.append(breach)
+
+ coverage = 1 - np.mean(breaches)
+ target_coverage = 1 - target_alpha
+ coverage_error = abs(coverage - target_coverage)
+
+ return {
+ "coverage_error": coverage_error,
+ "alpha_variance": np.var(alpha_evolution),
+ "alpha_range": max(alpha_evolution) - min(alpha_evolution),
+ "alpha_evolution": alpha_evolution,
+ }
+
+
+@pytest.mark.parametrize("gamma", [0.01, 0.05, 0.1])
+@pytest.mark.parametrize("target_alpha", [0.1, 0.2])
+def test_dtaci_simple_aci_equivalence(gamma, target_alpha):
+ """Test that DTACI with single gamma produces identical results to SimpleACI.
+
+ Uses empirical quantile definition that matches conformal theory to ensure
+ exact mathematical equivalence between beta-based (DtACI) and interval-based
+ (SimpleACI) error signals. The algorithms should produce identical alpha histories."""
+ np.random.seed(42)
+
+ # Initialize both algorithms with same parameters
+ dtaci = DtACI(alpha=target_alpha, gamma_values=[gamma], use_weighted_average=True)
+ simple_aci = SimpleACI(alpha=target_alpha, gamma=gamma)
+
+ # Generate synthetic data for testing
+ n_samples = 100
+ X = np.random.randn(n_samples, 2)
+ y = X[:, 0] + 0.5 * X[:, 1] + 0.1 * np.random.randn(n_samples)
+
+ dtaci_alphas = []
+ simple_aci_alphas = []
+
+ # Simulate online conformal prediction
+ for i in range(30, n_samples):
+ # Split data
+ X_past = X[:i]
+ y_past = y[:i]
+ X_test = X[i].reshape(1, -1)
+ y_test = y[i]
+
+ # Use simple train/calibration split
+ n_cal = 20
+ X_train, X_cal = X_past[:-n_cal], X_past[-n_cal:]
+ y_train, y_cal = y_past[:-n_cal], y_past[-n_cal:]
+
+ model = LinearRegression()
+ model.fit(X_train, y_train)
+
+ y_cal_pred = model.predict(X_cal)
+ cal_residuals = np.abs(y_cal - y_cal_pred)
+ y_test_pred = model.predict(X_test)[0]
+ test_residual = abs(y_test - y_test_pred)
+
+ current_alpha = dtaci.alpha_t
+
+ # Compute interval coverage using empirical quantile that matches conformal theory
+ # This ensures exact equivalence with beta calculation
+ sorted_residuals = np.sort(cal_residuals)
+ n_cal = len(cal_residuals)
+ target_count = (1 - current_alpha) * n_cal
+ k = int(np.floor(target_count))
+ if k == 0:
+ quantile = sorted_residuals[0]
+ elif k >= n_cal:
+ quantile = sorted_residuals[-1]
+ else:
+ quantile = sorted_residuals[k]
+ lower_bound = y_test_pred - quantile
+ upper_bound = y_test_pred + quantile
+ covered = int(lower_bound <= y_test <= upper_bound)
+ err_t = int(not covered) # 1 if not covered (breach), 0 if covered
+
+ beta = np.mean(cal_residuals >= test_residual)
+ dtaci_alpha = dtaci.update(beta=beta)
+ simple_aci_alpha = simple_aci.update(err_t=err_t)
+
+ dtaci_alphas.append(dtaci_alpha)
+ simple_aci_alphas.append(simple_aci_alpha)
+
+ # Alpha updates should be identical
+ assert np.allclose(dtaci_alphas, simple_aci_alphas, atol=1e-12)
+
+ # Alpha histories should be identical
+ assert np.allclose(dtaci.alpha_history, simple_aci.alpha_history, atol=1e-12)
+
+
+def test_simple_aci_basic_functionality():
+ """Test basic functionality of SimpleACI class."""
+ aci = SimpleACI(alpha=0.1, gamma=0.01)
+
+ # Test initialization
+ assert aci.alpha == 0.1
+ assert aci.gamma == 0.01
+ assert aci.alpha_t == 0.1
+ assert len(aci.alpha_history) == 0
+
+ # Test update with breach (err_t = 1)
+ alpha_new = aci.update(err_t=1) # breach, err_t = 1
+ expected_alpha = 0.1 + 0.01 * (0.1 - 1) # 0.1 + 0.01 * (-0.9) = 0.091
+ assert abs(alpha_new - expected_alpha) < 1e-12
+ assert len(aci.alpha_history) == 1
+
+ # Test update with coverage (err_t = 0)
+ alpha_new = aci.update(err_t=0) # coverage, err_t = 0
+ expected_alpha = expected_alpha + 0.01 * (0.1 - 0) # 0.091 + 0.01 * 0.1 = 0.092
+ assert abs(alpha_new - expected_alpha) < 1e-12
+ assert len(aci.alpha_history) == 2
+
+
+def test_simple_aci_parameter_validation():
+ """Test parameter validation for SimpleACI."""
+ # Test invalid alpha
+ with pytest.raises(ValueError, match="alpha must be in"):
+ SimpleACI(alpha=0.0)
+
+ with pytest.raises(ValueError, match="alpha must be in"):
+ SimpleACI(alpha=1.0)
+
+ # Test invalid gamma
+ with pytest.raises(ValueError, match="gamma must be positive"):
+ SimpleACI(alpha=0.1, gamma=0.0)
+
+ with pytest.raises(ValueError, match="gamma must be positive"):
+ SimpleACI(alpha=0.1, gamma=-0.01)
+
+ # Test invalid err_t in update
+ aci = SimpleACI(alpha=0.1, gamma=0.01)
+ with pytest.raises(ValueError, match="err_t must be 0 or 1"):
+ aci.update(err_t=-1)
+
+ with pytest.raises(ValueError, match="err_t must be 0 or 1"):
+ aci.update(err_t=2)
+
+
+@pytest.mark.parametrize(
+ "beta,theta,alpha,expected",
+ [
+ (0.8, 0.9, 0.1, 0.09),
+ (0.95, 0.9, 0.1, 0.005),
+ (0.9, 0.9, 0.1, 0.0),
+ (0.5, 0.8, 0.2, 0.24),
+ (0.7, 0.6, 0.3, 0.03),
+ ],
+)
+def test_pinball_loss_mathematical_correctness(beta, theta, alpha, expected):
+ """Test pinball loss calculation matches theoretical formula from paper."""
+ result = pinball_loss(beta=beta, theta=theta, alpha=alpha)
+ assert abs(result - expected) < 1e-10
+
+
+def test_pinball_loss_asymmetric_penalty():
+ """Test pinball loss correctly implements asymmetric penalty structure."""
+ alpha = 0.1
+ theta = 0.9
+
+ # Under-coverage case (beta < theta)
+ under_coverage_loss = pinball_loss(beta=0.8, theta=theta, alpha=alpha)
+ # Over-coverage case (beta > theta)
+ over_coverage_loss = pinball_loss(beta=1.0, theta=theta, alpha=alpha)
+
+ # Under-coverage: ℓ(0.8, 0.9) = 0.1*(0.8-0.9) - min{0, 0.8-0.9} = -0.01 - (-0.1) = 0.09
+ # Over-coverage: ℓ(1.0, 0.9) = 0.1*(1.0-0.9) - min{0, 1.0-0.9} = 0.01 - 0 = 0.01
+ assert abs(under_coverage_loss - 0.09) < 1e-10
+ assert abs(over_coverage_loss - 0.01) < 1e-10
+ # Under-coverage should be penalized more than over-coverage
+ assert under_coverage_loss > over_coverage_loss
+
+
+def test_pinball_loss_properties():
+ """Test general mathematical properties of pinball loss function."""
+ alpha = 0.1
+
+ # Test non-negativity and zero at equality
+ for beta in [0.0, 0.3, 0.5, 0.7, 1.0]:
+ for theta in [0.1, 0.4, 0.6, 0.9]:
+ loss = pinball_loss(beta, theta, alpha)
+ assert loss >= 0
+
+ if abs(beta - theta) < 1e-10:
+ assert abs(loss) < 1e-10
+
+
+@pytest.mark.parametrize("alpha", [0.05, 0.1, 0.2, 0.5])
+def test_dtaci_initialization_parameters(alpha):
+ """Test DtACI initializes with correct theoretical parameters."""
+ dtaci = DtACI(alpha=alpha)
+
+ # Check theoretical parameter formulas
+ expected_eta = (
+ np.sqrt(3 / dtaci.interval)
+ * np.sqrt(np.log(dtaci.interval * dtaci.k) + 2)
+ / ((1 - alpha) ** 2 * alpha**2)
+ )
+ expected_sigma = 1 / (2 * dtaci.interval)
+
+ assert abs(dtaci.eta - expected_eta) < 1e-10
+ assert abs(dtaci.sigma - expected_sigma) < 1e-12
+ assert np.allclose(dtaci.alpha_t_candidates, alpha)
+ assert np.allclose(dtaci.weights, 1.0 / dtaci.k)
+ assert abs(np.sum(dtaci.weights) - 1.0) < 1e-10
+
+
+def test_dtaci_invalid_parameters():
+ """Test DtACI raises appropriate errors for invalid parameters."""
+ with pytest.raises(ValueError, match="alpha must be in"):
+ DtACI(alpha=0.0)
+
+ with pytest.raises(ValueError, match="alpha must be in"):
+ DtACI(alpha=1.0)
+
+ with pytest.raises(ValueError, match="gamma values must be positive"):
+ DtACI(alpha=0.1, gamma_values=[0.1, 0.0, 0.2])
+
+
+@pytest.mark.parametrize("beta", [0.0, 0.25, 0.5, 0.75, 1.0])
+def test_dtaci_update_weight_normalization(beta, dtaci_instance):
+ """Test that expert weights remain valid and probabilities can be computed."""
+ for _ in range(10):
+ dtaci_instance.update(beta=beta)
+ # Weights should be non-negative but not necessarily normalized
+ assert np.all(dtaci_instance.weights >= 0)
+ # Should be able to compute valid probabilities
+ weight_sum = np.sum(dtaci_instance.weights)
+ assert (
+ weight_sum > 0
+ ), "Weight sum should be positive for probability computation"
+ probabilities = dtaci_instance.weights / weight_sum
+ assert abs(np.sum(probabilities) - 1.0) < 1e-10
+ assert np.all(probabilities >= 0)
+ # Alpha values should remain in valid range
+ assert np.all(dtaci_instance.alpha_t_candidates > 0)
+ assert np.all(dtaci_instance.alpha_t_candidates < 1)
+
+
+def test_dtaci_theoretical_weight_updates():
+ """Test that weight updates follow theoretical exponential weighting scheme."""
+ dtaci = DtACI(alpha=0.1, gamma_values=[0.01, 0.05])
+
+ initial_weights = dtaci.weights.copy()
+ initial_alphas = dtaci.alpha_t_candidates.copy()
+
+ beta = 0.85
+ dtaci.update(beta=beta)
+
+ # Manually compute expected weight update following the paper's approach
+ losses = np.array(
+ [
+ pinball_loss(beta=beta, theta=alpha_val, alpha=dtaci.alpha)
+ for alpha_val in initial_alphas
+ ]
+ )
+
+ updated_weights = initial_weights * np.exp(-dtaci.eta * losses)
+ sum_of_updated_weights = np.sum(updated_weights)
+ expected_regularized = (1 - dtaci.sigma) * updated_weights + (
+ (dtaci.sigma * sum_of_updated_weights) / dtaci.k
+ )
+
+ assert np.allclose(dtaci.weights, expected_regularized, atol=1e-12)
+
+
+def test_dtaci_expert_alpha_updates():
+ """Test expert alpha values are updated correctly according to theoretical formula."""
+ dtaci = DtACI(alpha=0.1, gamma_values=[0.01, 0.05])
+
+ initial_alphas = dtaci.alpha_t_candidates.copy()
+ beta = 0.85
+ dtaci.update(beta=beta)
+
+ # Verify alpha updates follow: α_t+1^i = α_t^i + γ_i * (α - err_t^i)
+ for i, (initial_alpha, gamma) in enumerate(zip(initial_alphas, dtaci.gamma_values)):
+ err_indicator = float(beta < initial_alpha)
+ expected_alpha = initial_alpha + gamma * (dtaci.alpha - err_indicator)
+ expected_alpha = np.clip(expected_alpha, 0.001, 0.999)
+
+ assert abs(dtaci.alpha_t_candidates[i] - expected_alpha) < 1e-12
+
+
+def test_dtaci_both_selection_methods():
+ """Test that both random sampling and weighted average methods work correctly."""
+ np.random.seed(42)
+ target_alpha = 0.1
+
+ for use_weighted_average in [True, False]:
+ dtaci = DtACI(
+ alpha=target_alpha,
+ gamma_values=[0.01, 0.05],
+ use_weighted_average=use_weighted_average,
+ )
+
+ # Test with series of beta values
+ betas = [0.85, 0.92, 0.88, 0.95, 0.80]
+ alphas = [dtaci.update(beta=beta) for beta in betas]
+
+ # Both methods should produce valid alphas
+ assert all(0.001 <= alpha <= 0.999 for alpha in alphas)
+ # Should show adaptation behavior
+ assert len(set(np.round(alphas, 6))) > 1
+
+
+def test_dtaci_convergence_under_stationary_conditions():
+ """Test DtACI behavior under stationary conditions."""
+ dtaci = DtACI(alpha=0.1, gamma_values=[0.01, 0.02, 0.05])
+
+ # Test under conditions where target coverage is achieved
+ # Use a beta value that should lead to equilibrium near the target alpha
+ target_beta = 0.1 # This should lead to equilibrium around alpha = 0.1
+ alpha_history = []
+
+ for _ in range(500):
+ alpha_t = dtaci.update(beta=target_beta)
+ alpha_history.append(alpha_t)
+
+ # Under stationary conditions, alpha should be relatively stable
+ recent_alphas = alpha_history[-100:]
+ alpha_variance = np.var(recent_alphas)
+ alpha_mean = np.mean(recent_alphas)
+
+ assert alpha_variance < 0.01
+ # With beta = alpha, the algorithm should converge to a value close to alpha
+ assert abs(alpha_mean - dtaci.alpha) < 0.1
+
+
+def test_dtaci_directional_behavior():
+ """Test that DtACI adjusts alpha in the correct direction based on beta values.
+
+ This test verifies the fundamental adaptive behavior:
+ - When beta > alpha (coverage achieved): alpha should increase slightly toward target
+ - When beta < alpha (breach occurred): alpha should decrease more significantly away from target
+
+ This follows the ACI update rule: α_{t+1} = α_t + γ(α - err_t)
+ where err_t = 1 if breach (β < α), 0 if coverage (β ≥ α).
+ """
+ for beta in [0.8, 0.05]:
+ dtaci = DtACI(alpha=0.1, gamma_values=[0.01])
+ initial_alpha = dtaci.alpha_t
+ updated_alpha = dtaci.update(beta=beta)
+ if beta > initial_alpha:
+ assert updated_alpha > initial_alpha
+ else:
+ assert updated_alpha < initial_alpha
+
+
+def test_dtaci_algorithm_behavior():
+ """Test comprehensive DtACI algorithm behavior and theoretical correctness."""
+ dtaci = DtACI(alpha=0.1, gamma_values=[0.01, 0.05])
+
+ # Test algorithm components work as specified
+ betas = [0.85, 0.92, 0.88, 0.95, 0.80]
+
+ for beta in betas:
+ prev_weights = dtaci.weights.copy()
+ prev_alphas = dtaci.alpha_t_candidates.copy()
+
+ alpha_t = dtaci.update(beta=beta)
+
+ # Verify weights remain valid (non-negative and positive sum)
+ assert np.all(dtaci.weights >= 0)
+ assert np.sum(dtaci.weights) > 0
+
+ # Verify weights change when losses differ
+ losses = [
+ pinball_loss(beta, alpha_val, dtaci.alpha) for alpha_val in prev_alphas
+ ]
+ if not np.allclose(losses, losses[0]):
+ assert not np.allclose(dtaci.weights, prev_weights, atol=1e-10)
+
+ # Verify alpha values are in valid range
+ assert np.all(dtaci.alpha_t_candidates >= 0.001)
+ assert np.all(dtaci.alpha_t_candidates <= 0.999)
+ assert 0.001 <= alpha_t <= 0.999
+
+ # Test algorithm adaptation over time
+ alphas_sequence = [dtaci.update(beta=beta) for beta in betas]
+ unique_alphas = len(set(np.round(alphas_sequence, 6)))
+ assert unique_alphas > 1
+
+
+@pytest.mark.parametrize("target_alpha", [0.1, 0.2, 0.5])
+def test_dtaci_moderate_shift_performance(moderate_shift_data, target_alpha):
+ """Test DtACI performance under moderate distribution shift."""
+ X, y = moderate_shift_data
+ results = run_conformal_performance_test("dtaci", X, y, target_alpha)
+
+ tolerance = 0.05
+
+ assert results["coverage_error"] < tolerance
+ # Should show adaptation behavior
+ assert results["alpha_variance"] > 0.00001
+ assert results["alpha_range"] > 0.0001
+
+
+@pytest.mark.parametrize("target_alpha", [0.1, 0.2, 0.5])
+def test_dtaci_high_shift_performance(high_shift_data, target_alpha):
+ """Test DtACI performance under high distribution shift."""
+ X, y = high_shift_data
+ results = run_conformal_performance_test("dtaci", X, y, target_alpha)
+
+ tolerance = 0.05
+
+ assert results["coverage_error"] < tolerance
+ # Should show significant adaptation behavior under high shift
+ assert results["alpha_variance"] > 0.00001
+ assert results["alpha_range"] > 0.005
+
+
+def generate_shifted_data(
+ n_points=300, shift_points=None, noise_levels=None, random_seed=42
+):
+ """Generate synthetic data with distribution shifts for testing adaptive methods.
+
+ Args:
+ n_points: Total number of data points
+ shift_points: Points where distribution shifts occur
+ noise_levels: Noise levels for each segment
+ random_seed: Random seed for reproducibility
+
+ Returns:
+ X, y: Feature matrix and target vector
+ """
+ if shift_points is None:
+ shift_points = [80, 160, 240]
+ if noise_levels is None:
+ noise_levels = [0.1, 0.6, 0.2, 0.8]
+
+ np.random.seed(random_seed)
+
+ segments = []
+ start_idx = 0
+
+ for i, shift_point in enumerate(shift_points + [n_points]):
+ segment_size = shift_point - start_idx
+ X_segment = np.random.randn(segment_size, 2)
+ y_segment = X_segment.sum(axis=1) + noise_levels[i] * np.random.randn(
+ segment_size
+ )
+ segments.append((X_segment, y_segment))
+ start_idx = shift_point
+
+ X = np.vstack([seg[0] for seg in segments])
+ y = np.hstack([seg[1] for seg in segments])
+
+ return X, y
+
+
+@pytest.mark.parametrize("target_alpha", [0.1, 0.2])
+def test_dtaci_vs_static_conformal_multiple_repetitions(target_alpha):
+ """Test that DtACI outperforms static conformal prediction on highly shifted data.
+
+ Runs multiple random repetitions and verifies that DtACI achieves better
+ performance than static conformal prediction at least 75% of the time.
+ """
+ n_repetitions = 20
+ dtaci_wins = 0
+ dtaci_errors = []
+ static_errors = []
+
+ gamma_values = [0.01, 0.05, 0.1]
+
+ for rep in range(n_repetitions):
+ X, y = generate_shifted_data(random_seed=42 + rep)
+
+ # Run both methods using the consolidated function
+ dtaci_results = run_conformal_performance_test(
+ "dtaci", X, y, target_alpha, gamma_values
+ )
+ static_results = run_conformal_performance_test("static", X, y, target_alpha)
+
+ dtaci_errors.append(dtaci_results["coverage_error"])
+ static_errors.append(static_results["coverage_error"])
+
+ if dtaci_results["coverage_error"] < static_results["coverage_error"]:
+ dtaci_wins += 1
+
+ # DtACI should win at least 75% of the time
+ win_rate = dtaci_wins / n_repetitions
+ assert win_rate >= 0.75
+
+ # Additionally, DtACI should have better average performance
+ avg_dtaci_error = np.mean(dtaci_errors)
+ avg_static_error = np.mean(static_errors)
+ assert avg_dtaci_error <= avg_static_error
diff --git a/tests/selection/test_conformalization.py b/tests/selection/test_conformalization.py
new file mode 100644
index 0000000..3bf60b6
--- /dev/null
+++ b/tests/selection/test_conformalization.py
@@ -0,0 +1,266 @@
+import numpy as np
+import pytest
+from confopt.selection.conformalization import (
+ QuantileConformalEstimator,
+ alpha_to_quantiles,
+)
+from confopt.wrapping import ConformalBounds
+from confopt.utils.preprocessing import train_val_split
+from conftest import (
+ AMENDED_SINGLE_FIT_QUANTILE_ESTIMATOR_ARCHITECTURES,
+ AMENDED_QUANTILE_ESTIMATOR_ARCHITECTURES,
+)
+
+POINT_ESTIMATOR_COVERAGE_TOLERANCE = 0.15
+QUANTILE_ESTIMATOR_COVERAGE_TOLERANCE = 0.15
+MINIMUM_CONFORMAL_WIN_RATE = 0.51
+
+# Optional per-architecture tolerance overrides for rare problematic estimators
+ARCH_TOLERANCE_OVERRIDES: dict[str, float] = {
+ # Example only (keep empty unless specific architectures are identified):
+ # "problem_arch": 0.10,
+}
+
+
+def validate_intervals(
+ intervals: list[ConformalBounds],
+ y_true: np.ndarray,
+ alphas: list[float],
+ tolerance: float,
+) -> tuple[float, bool]:
+ coverages = []
+ errors = []
+ for i, alpha in enumerate(alphas):
+ lower_bound = intervals[i].lower_bounds
+ upper_bound = intervals[i].upper_bounds
+ coverage = np.mean((y_true >= lower_bound) & (y_true <= upper_bound))
+ error = abs(coverage - (1 - alpha)) > tolerance
+
+ coverages.append(coverage)
+ errors.append(error)
+
+ return coverages, errors
+
+
+@pytest.mark.parametrize("alpha", [0.1, 0.2, 0.3])
+def test_alpha_to_quantiles(alpha):
+ lower, upper = alpha_to_quantiles(alpha)
+ assert lower == alpha / 2
+ assert upper == 1 - alpha / 2
+ assert lower <= upper
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+ "data_fixture_name",
+ ["diabetes_data"],
+)
+@pytest.mark.parametrize(
+ "estimator_architecture", AMENDED_QUANTILE_ESTIMATOR_ARCHITECTURES
+)
+@pytest.mark.parametrize("tuning_iterations", [0])
+@pytest.mark.parametrize("alphas", [[0.1], [0.1, 0.3, 0.9]])
+@pytest.mark.parametrize(
+ "calibration_split_strategy", ["train_test_split", "cv", "adaptive"]
+)
+def test_quantile_fit_and_predict_intervals_shape_and_coverage(
+ request,
+ data_fixture_name,
+ estimator_architecture,
+ tuning_iterations,
+ alphas,
+ calibration_split_strategy,
+):
+ X, y = request.getfixturevalue(data_fixture_name)
+ (X_train, y_train, X_test, y_test,) = train_val_split(
+ X, y, train_split=0.8, normalize=False, ordinal=False, random_state=42
+ )
+
+ estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=estimator_architecture,
+ alphas=alphas,
+ n_pre_conformal_trials=15,
+ n_calibration_folds=3,
+ calibration_split_strategy=calibration_split_strategy,
+ )
+ estimator.fit(
+ X=X_train,
+ y=y_train,
+ tuning_iterations=tuning_iterations,
+ random_state=42,
+ )
+ assert len(estimator.fold_scores_per_alpha) == len(alphas)
+
+ intervals = estimator.predict_intervals(X_test)
+ assert len(intervals) == len(alphas)
+
+ tol = ARCH_TOLERANCE_OVERRIDES.get(
+ estimator_architecture, QUANTILE_ESTIMATOR_COVERAGE_TOLERANCE
+ )
+ _, errors = validate_intervals(intervals, y_test, alphas, tol)
+ assert not any(errors)
+
+
+def test_quantile_calculate_betas_output_properties(
+ dummy_expanding_quantile_gaussian_dataset,
+):
+ estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=AMENDED_QUANTILE_ESTIMATOR_ARCHITECTURES[0],
+ alphas=[0.1, 0.2, 0.3],
+ n_pre_conformal_trials=15,
+ )
+ X, y = dummy_expanding_quantile_gaussian_dataset
+ X_train, y_train, X_val, y_val = train_val_split(
+ X, y, train_split=0.8, normalize=False, ordinal=False, random_state=42
+ )
+ estimator.fit(X=X_train, y=y_train, random_state=42)
+ test_point = X_val[0]
+ test_value = y_val[0]
+ betas = estimator.calculate_betas(test_point, test_value)
+ assert len(betas) == len(estimator.alphas)
+ assert all(0 <= beta <= 1 for beta in betas)
+
+
+@pytest.mark.parametrize(
+ "n_trials,expected_conformalize",
+ [
+ (5, False),
+ (50, True),
+ ],
+)
+def test_quantile_conformalization_decision_logic(n_trials, expected_conformalize):
+ estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=AMENDED_SINGLE_FIT_QUANTILE_ESTIMATOR_ARCHITECTURES[
+ 0
+ ],
+ alphas=[0.2],
+ n_pre_conformal_trials=20,
+ )
+ total_size = n_trials
+ X = np.random.rand(total_size, 3)
+ y = np.random.rand(total_size)
+ X_train, y_train, _, _ = train_val_split(
+ X, y, train_split=0.8, normalize=False, ordinal=False, random_state=42
+ )
+ estimator.fit(X=X_train, y=y_train)
+ assert estimator.conformalize_predictions == expected_conformalize
+
+
+@pytest.mark.parametrize(
+ "initial_alphas,new_alphas",
+ [
+ ([0.2], [0.15, 0.25]),
+ ([0.1, 0.2], [0.05, 0.15, 0.3]),
+ ([0.3], [0.1]),
+ ],
+)
+def test_quantile_alpha_update_mechanism(initial_alphas, new_alphas):
+ estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=AMENDED_QUANTILE_ESTIMATOR_ARCHITECTURES[0],
+ alphas=initial_alphas,
+ )
+ estimator.update_alphas(new_alphas)
+ assert estimator.updated_alphas == new_alphas
+ assert estimator.alphas == initial_alphas
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+ "data_fixture_name",
+ [
+ # "heteroscedastic_data",
+ "diabetes_data",
+ ],
+)
+@pytest.mark.parametrize("estimator_architecture", ["qrf", "qgbm"])
+@pytest.mark.parametrize("alphas", [[0.2, 0.4, 0.6, 0.8]])
+@pytest.mark.parametrize("calibration_split_strategy", ["cv"])
+def test_conformalized_vs_non_conformalized_quantile_estimator_coverage(
+ request,
+ data_fixture_name,
+ estimator_architecture,
+ alphas,
+ calibration_split_strategy,
+):
+ X, y = request.getfixturevalue(data_fixture_name)
+
+ n_repeats = 10
+ np.random.seed(42)
+ random_states = [np.random.randint(0, 10000) for _ in range(n_repeats)]
+ better_or_equal_count = 0
+ for random_state in random_states:
+ (X_train, y_train, X_test, y_test,) = train_val_split(
+ X,
+ y,
+ # A low value, given we care about distributional coverage
+ # on hold out set and we want to simulate a finite training dataset:
+ train_split=0.7,
+ normalize=False,
+ ordinal=False,
+ random_state=random_state,
+ )
+
+ conformalized_estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=estimator_architecture,
+ alphas=alphas,
+ n_pre_conformal_trials=32,
+ calibration_split_strategy=calibration_split_strategy,
+ n_calibration_folds=5,
+ normalize_features=True,
+ )
+
+ conformalized_estimator.fit(
+ X=X_train,
+ y=y_train,
+ random_state=random_state,
+ )
+
+ non_conformalized_estimator = QuantileConformalEstimator(
+ quantile_estimator_architecture=estimator_architecture,
+ alphas=alphas,
+ n_pre_conformal_trials=10000,
+ calibration_split_strategy=calibration_split_strategy,
+ n_calibration_folds=5,
+ normalize_features=True,
+ )
+
+ non_conformalized_estimator.fit(
+ X=X_train,
+ y=y_train,
+ random_state=random_state,
+ )
+
+ assert conformalized_estimator.conformalize_predictions
+ assert not non_conformalized_estimator.conformalize_predictions
+
+ conformalized_intervals = conformalized_estimator.predict_intervals(X_test)
+ non_conformalized_intervals = non_conformalized_estimator.predict_intervals(
+ X_test
+ )
+ conformalized_coverages, _ = validate_intervals(
+ conformalized_intervals,
+ y_test,
+ alphas,
+ QUANTILE_ESTIMATOR_COVERAGE_TOLERANCE,
+ )
+ non_conformalized_coverages, _ = validate_intervals(
+ non_conformalized_intervals,
+ y_test,
+ alphas,
+ QUANTILE_ESTIMATOR_COVERAGE_TOLERANCE,
+ )
+
+ for i, alpha in enumerate(alphas):
+ target_coverage = 1 - alpha
+ conformalized_coverage = conformalized_coverages[i]
+ non_conformalized_coverage = non_conformalized_coverages[i]
+
+ conformalized_error = abs(conformalized_coverage - target_coverage)
+ non_conformalized_error = abs(non_conformalized_coverage - target_coverage)
+
+ if conformalized_error <= non_conformalized_error:
+ better_or_equal_count += 1
+
+ total_comparisons = n_repeats * len(alphas)
+ percentage_better_or_equal = better_or_equal_count / total_comparisons
+ assert percentage_better_or_equal >= MINIMUM_CONFORMAL_WIN_RATE
diff --git a/tests/selection/test_estimation.py b/tests/selection/test_estimation.py
new file mode 100644
index 0000000..f75b5fa
--- /dev/null
+++ b/tests/selection/test_estimation.py
@@ -0,0 +1,136 @@
+import pytest
+
+from confopt.selection.estimation import (
+ initialize_estimator,
+ average_scores_across_folds,
+)
+
+from confopt.selection.estimator_configuration import ESTIMATOR_REGISTRY
+
+
+@pytest.mark.parametrize("estimator_architecture", list(ESTIMATOR_REGISTRY.keys()))
+def test_initialize_estimator_returns_expected_type(estimator_architecture):
+ """Test that initialize_estimator returns the correct estimator type."""
+ estimator = initialize_estimator(estimator_architecture, random_state=42)
+ expected_class = ESTIMATOR_REGISTRY[estimator_architecture].estimator_class
+ assert isinstance(estimator, expected_class)
+
+
+@pytest.mark.parametrize("random_state", [None, 42, 123])
+def test_initialize_estimator_with_random_state(random_state):
+ """Test that random_state is properly set when supported by estimator."""
+ estimator = initialize_estimator(
+ estimator_architecture="gbm",
+ initialization_params={"random_state": 42} if random_state else {},
+ random_state=random_state,
+ )
+ assert estimator.random_state == random_state
+
+
+@pytest.mark.parametrize("split_type", ["k_fold", "ordinal_split"])
+@pytest.mark.parametrize("n_searches", [1, 3, 10])
+def test_point_tuner_returns_valid_configuration(
+ point_tuner, estimation_test_data, split_type, n_searches
+):
+ """Test that PointTuner returns a valid configuration for different search counts."""
+ X_train, X_val, y_train, y_val = estimation_test_data
+
+ # Use an estimator we know exists
+ estimator_architecture = "gbm"
+ estimator_config = ESTIMATOR_REGISTRY[estimator_architecture]
+
+ best_config = point_tuner.tune(
+ X_train,
+ y_train,
+ estimator_architecture,
+ n_searches=n_searches,
+ train_split=0.8,
+ split_type=split_type,
+ )
+
+ # Configuration should be a dictionary
+ assert isinstance(best_config, dict)
+
+ # All parameter keys should be valid for this estimator
+ valid_params = set(estimator_config.estimator_parameter_space.keys())
+ assert set(best_config.keys()).issubset(valid_params)
+
+
+@pytest.mark.parametrize("split_type", ["k_fold", "ordinal_split"])
+def test_quantile_tuner_returns_valid_configuration(
+ quantile_tuner_with_quantiles, estimation_test_data, split_type
+):
+ """Test that QuantileTuner returns valid configuration for quantile estimators."""
+ tuner, quantiles = quantile_tuner_with_quantiles
+ X_train, X_val, y_train, y_val = estimation_test_data
+
+ # Find a quantile estimator
+ quantile_architectures = [
+ arch
+ for arch, config in ESTIMATOR_REGISTRY.items()
+ if config.is_quantile_estimator()
+ ]
+ estimator_architecture = quantile_architectures[0]
+ estimator_config = ESTIMATOR_REGISTRY[estimator_architecture]
+
+ best_config = tuner.tune(
+ X_train,
+ y_train,
+ estimator_architecture,
+ n_searches=3,
+ train_split=0.8,
+ split_type=split_type,
+ )
+
+ # Configuration should be a dictionary
+ assert isinstance(best_config, dict)
+
+ # All parameter keys should be valid for this estimator
+ valid_params = set(estimator_config.estimator_parameter_space.keys())
+ assert set(best_config.keys()).issubset(valid_params)
+
+
+def test_tuning_with_forced_configurations_prioritizes_them(
+ point_tuner, estimation_test_data
+):
+ """Test that forced configurations are prioritized in tuning process."""
+ X_train, X_val, y_train, y_val = estimation_test_data
+
+ estimator_architecture = "gbm"
+ estimator_config = ESTIMATOR_REGISTRY[estimator_architecture]
+ forced_config = estimator_config.default_params
+
+ best_config = point_tuner.tune(
+ X_train,
+ y_train,
+ estimator_architecture,
+ n_searches=1, # Only one search, should return forced config
+ train_split=0.8,
+ split_type="ordinal_split",
+ forced_param_configurations=[forced_config],
+ )
+
+ assert best_config == forced_config
+
+
+def test_correct_averaging_and_ordering():
+ """Test that order of unique configurations is preserved during averaging."""
+ configs = [
+ {"param": "first"},
+ {"param": "second"},
+ {"param": "first"}, # duplicate
+ {"param": "third"},
+ ]
+ scores = [1.0, 2.0, 3.0, 4.0]
+
+ unique_configs, unique_scores = average_scores_across_folds(configs, scores)
+
+ # First unique should be "first", second should be "second", third should be "third"
+ assert unique_configs[0]["param"] == "first"
+ assert unique_configs[1]["param"] == "second"
+ assert unique_configs[2]["param"] == "third"
+
+ # Check scores are averaged correctly
+ assert unique_scores[0] == 2.0 # (1.0 + 3.0) / 2
+ assert unique_scores[1] == 2.0
+ assert unique_scores[2] == 4.0
diff --git a/tests/test_estimation.py b/tests/test_estimation.py
deleted file mode 100644
index 42a2b1d..0000000
--- a/tests/test_estimation.py
+++ /dev/null
@@ -1,470 +0,0 @@
-from typing import Dict
-
-import numpy as np
-import pytest
-
-from confopt.config import GBM_NAME, RF_NAME, QGBM_NAME, QRF_NAME
-from confopt.estimation import (
- QuantileConformalRegression,
- LocallyWeightedConformalRegression,
- initialize_point_estimator,
- initialize_quantile_estimator,
- cross_validate_configurations,
-)
-
-DEFAULT_SEED = 1234
-DEFAULT_SEARCH_POINT_ESTIMATOR = GBM_NAME
-DEFAULT_SEARCH_QUANTILE_ESTIMATOR = QRF_NAME
-
-
-def get_discretized_quantile_dict(
- X: np.array, y: np.array, quantile_level: float
-) -> Dict:
- """
- Helper function to create dictionary of quantiles per X value.
-
- Parameters
- ----------
- X :
- Explanatory variables.
- y :
- Target variable.
- quantile_level :
- Desired quantile to take.
-
- Returns
- -------
- quantile_dict :
- Dictionary relating X values to their quantile.
- """
- quantile_dict = {}
- for discrete_x_coordinate in np.unique(X):
- conditional_y_at_x = y[X == discrete_x_coordinate]
- quantile_dict[discrete_x_coordinate] = np.quantile(
- conditional_y_at_x, quantile_level
- )
- return quantile_dict
-
-
-def test_initialize_point_estimator():
- initialized_estimator = initialize_point_estimator(
- estimator_architecture=DEFAULT_SEARCH_POINT_ESTIMATOR,
- initialization_params={},
- random_state=DEFAULT_SEED,
- )
-
- assert hasattr(initialized_estimator, "predict")
-
-
-def test_initialize_point_estimator__reproducibility():
- initialized_estimator_first_call = initialize_point_estimator(
- estimator_architecture=DEFAULT_SEARCH_POINT_ESTIMATOR,
- initialization_params={},
- random_state=DEFAULT_SEED,
- )
- initialized_estimator_second_call = initialize_point_estimator(
- estimator_architecture=DEFAULT_SEARCH_POINT_ESTIMATOR,
- initialization_params={},
- random_state=DEFAULT_SEED,
- )
- assert (
- initialized_estimator_first_call.random_state
- == initialized_estimator_second_call.random_state
- )
-
-
-def test_initialize_quantile_estimator():
- dummy_pinball_loss_alpha = [0.25, 0.75]
-
- initialized_estimator = initialize_quantile_estimator(
- estimator_architecture=DEFAULT_SEARCH_QUANTILE_ESTIMATOR,
- initialization_params={},
- pinball_loss_alpha=dummy_pinball_loss_alpha,
- random_state=DEFAULT_SEED,
- )
-
- assert hasattr(initialized_estimator, "predict")
-
-
-def test_initialize_quantile_estimator__reproducibility():
- dummy_pinball_loss_alpha = [0.25, 0.75]
-
- initialized_estimator_first_call = initialize_quantile_estimator(
- estimator_architecture=DEFAULT_SEARCH_QUANTILE_ESTIMATOR,
- initialization_params={},
- pinball_loss_alpha=dummy_pinball_loss_alpha,
- random_state=DEFAULT_SEED,
- )
- initialized_estimator_second_call = initialize_quantile_estimator(
- estimator_architecture=DEFAULT_SEARCH_QUANTILE_ESTIMATOR,
- initialization_params={},
- pinball_loss_alpha=dummy_pinball_loss_alpha,
- random_state=DEFAULT_SEED,
- )
-
- assert (
- initialized_estimator_first_call.random_state
- == initialized_estimator_second_call.random_state
- )
-
-
-def test_cross_validate_configurations__point_estimator(
- dummy_gbm_configurations, dummy_stationary_gaussian_dataset
-):
- X, y = (
- dummy_stationary_gaussian_dataset[:, 0].reshape(-1, 1),
- dummy_stationary_gaussian_dataset[:, 1],
- )
-
- scored_configurations, scores = cross_validate_configurations(
- configurations=dummy_gbm_configurations,
- estimator_architecture=DEFAULT_SEARCH_POINT_ESTIMATOR,
- X=X,
- y=y,
- k_fold_splits=3,
- random_state=DEFAULT_SEED,
- )
-
- assert len(scored_configurations) == len(scores)
- assert len(scored_configurations) == len(dummy_gbm_configurations)
-
- stringified_scored_configurations = []
- for configuration in scored_configurations:
- stringified_scored_configurations.append(
- str(dict(sorted(configuration.items())))
- )
- assert sorted(list(set(stringified_scored_configurations))) == sorted(
- stringified_scored_configurations
- )
-
- for score in scores:
- assert score >= 0
-
-
-def test_cross_validate_configurations__point_estimator__reproducibility(
- dummy_gbm_configurations, dummy_stationary_gaussian_dataset
-):
- X, y = (
- dummy_stationary_gaussian_dataset[:, 0].reshape(-1, 1),
- dummy_stationary_gaussian_dataset[:, 1],
- )
-
- (
- scored_configurations_first_call,
- scores_first_call,
- ) = cross_validate_configurations(
- configurations=dummy_gbm_configurations,
- estimator_architecture=DEFAULT_SEARCH_POINT_ESTIMATOR,
- X=X,
- y=y,
- k_fold_splits=3,
- random_state=DEFAULT_SEED,
- )
- (
- scored_configurations_second_call,
- scores_second_call,
- ) = cross_validate_configurations(
- configurations=dummy_gbm_configurations,
- estimator_architecture=DEFAULT_SEARCH_POINT_ESTIMATOR,
- X=X,
- y=y,
- k_fold_splits=3,
- random_state=DEFAULT_SEED,
- )
-
- assert scored_configurations_first_call == scored_configurations_second_call
- assert scores_first_call == scores_second_call
-
-
-@pytest.mark.parametrize("confidence_level", [0.2, 0.8])
-@pytest.mark.parametrize("tuning_param_combinations", [0, 1, 3])
-@pytest.mark.parametrize("quantile_estimator_architecture", [QGBM_NAME, QRF_NAME])
-def test_quantile_conformal_regression__fit(
- dummy_fixed_quantile_dataset,
- confidence_level,
- tuning_param_combinations,
- quantile_estimator_architecture,
-):
- X, y = (
- dummy_fixed_quantile_dataset[:, 0].reshape(-1, 1),
- dummy_fixed_quantile_dataset[:, 1],
- )
- train_split = 0.8
- X_train, y_train = (
- X[: round(len(X) * train_split), :],
- y[: round(len(y) * train_split)],
- )
- X_val, y_val = X[round(len(X) * train_split) :, :], y[round(len(y) * train_split) :]
-
- qcr = QuantileConformalRegression(
- quantile_estimator_architecture=quantile_estimator_architecture,
- )
- qcr.fit(
- X_train=X_train,
- y_train=y_train,
- X_val=X_val,
- y_val=y_val,
- confidence_level=confidence_level,
- tuning_iterations=tuning_param_combinations,
- random_state=DEFAULT_SEED,
- )
-
- assert qcr.nonconformity_scores is not None
- assert qcr.quantile_estimator is not None
-
-
-@pytest.mark.parametrize("confidence_level", [0.2, 0.8])
-@pytest.mark.parametrize("tuning_param_combinations", [5])
-@pytest.mark.parametrize("quantile_estimator_architecture", [QGBM_NAME, QRF_NAME])
-def test_quantile_conformal_regression__predict(
- dummy_fixed_quantile_dataset,
- confidence_level,
- tuning_param_combinations,
- quantile_estimator_architecture,
-):
- X, y = (
- dummy_fixed_quantile_dataset[:, 0].reshape(-1, 1),
- dummy_fixed_quantile_dataset[:, 1],
- )
- train_split = 0.8
- X_train, y_train = (
- X[: round(len(X) * train_split), :],
- y[: round(len(y) * train_split)],
- )
- X_val, y_val = X[round(len(X) * train_split) :, :], y[round(len(y) * train_split) :]
-
- qcr = QuantileConformalRegression(
- quantile_estimator_architecture=quantile_estimator_architecture,
- )
- qcr.fit(
- X_train=X_train,
- y_train=y_train,
- X_val=X_val,
- y_val=y_val,
- confidence_level=confidence_level,
- tuning_iterations=tuning_param_combinations,
- random_state=DEFAULT_SEED,
- )
- y_low_bounds, y_high_bounds = qcr.predict(X_val, confidence_level=confidence_level)
-
- # Check lower bound is always lower than higher bound:
- for y_low, y_high in zip(y_low_bounds, y_high_bounds):
- assert y_low <= y_high
-
- # Compute observed quantiles per X slice during training
- # (would only work for univariate dummy datasets):
- low_quantile_dict_train = get_discretized_quantile_dict(
- X_train.reshape(
- -1,
- ),
- y_train,
- confidence_level + ((1 - confidence_level) / 2),
- )
- high_quantile_dict_train = get_discretized_quantile_dict(
- X_train.reshape(
- -1,
- ),
- y_train,
- (1 - confidence_level) / 2,
- )
- # Check that predictions return observed quantiles during training
- # Prediction error deviations of more than this amount
- # will count as a breach:
- y_breach_threshold = 1
- # More than this percentage of breaches will fail the test:
- breach_tolerance = 0.3
- low_margin_breaches, high_margin_breaches = 0, 0
- for x_obs, y_low, y_high in zip(
- X_train.reshape(
- -1,
- ),
- y_low_bounds,
- y_high_bounds,
- ):
- if abs(y_low - low_quantile_dict_train[x_obs]) > y_breach_threshold:
- low_margin_breaches += 1
- if abs(y_high - high_quantile_dict_train[x_obs]) > y_breach_threshold:
- high_margin_breaches += 1
- assert low_margin_breaches < len(X_train) * breach_tolerance
- assert high_margin_breaches < len(X_train) * breach_tolerance
-
- # Check conformal interval coverage on validation data
- # (note validation data is actively used by the searcher
- # to calibrate its conformal intervals, so this is not an
- # OOS test, just a sanity check):
- interval_breach_states = []
- for y_obs, y_low, y_high in zip(y_val, y_low_bounds, y_high_bounds):
- is_interval_breach = 0 if y_high > y_obs > y_low else 1
- interval_breach_states.append(is_interval_breach)
-
- interval_breach_rate = sum(interval_breach_states) / len(interval_breach_states)
- breach_margin = 0.01
- assert (
- (confidence_level - breach_margin)
- <= (1 - interval_breach_rate)
- <= (confidence_level + breach_margin)
- )
-
-
-@pytest.mark.parametrize("confidence_level", [0.2, 0.8])
-@pytest.mark.parametrize("tuning_param_combinations", [0, 1, 3])
-@pytest.mark.parametrize("point_estimator_architecture", [GBM_NAME, RF_NAME])
-@pytest.mark.parametrize("demeaning_estimator_architecture", [GBM_NAME])
-@pytest.mark.parametrize("variance_estimator_architecture", [GBM_NAME])
-def test_locally_weighted_conformal_regression__fit(
- dummy_fixed_quantile_dataset,
- confidence_level,
- tuning_param_combinations,
- point_estimator_architecture,
- demeaning_estimator_architecture,
- variance_estimator_architecture,
-):
- X, y = (
- dummy_fixed_quantile_dataset[:, 0].reshape(-1, 1),
- dummy_fixed_quantile_dataset[:, 1],
- )
- train_split = 0.8
- X_train, y_train = (
- X[: round(len(X) * train_split), :],
- y[: round(len(y) * train_split)],
- )
- pe_split = 0.8
- X_pe, y_pe = (
- X_train[: round(len(X_train) * pe_split), :],
- y_train[: round(len(y_train) * pe_split)],
- )
- X_ve, y_ve = (
- X_train[round(len(X_train) * pe_split) :, :],
- y_train[round(len(y_train) * pe_split) :],
- )
- X_val, y_val = X[round(len(X) * train_split) :, :], y[round(len(y) * train_split) :]
-
- lwcr = LocallyWeightedConformalRegression(
- point_estimator_architecture=point_estimator_architecture,
- demeaning_estimator_architecture=demeaning_estimator_architecture,
- variance_estimator_architecture=variance_estimator_architecture,
- )
- lwcr.fit(
- X_pe=X_pe,
- y_pe=y_pe,
- X_ve=X_ve,
- y_ve=y_ve,
- X_val=X_val,
- y_val=y_val,
- tuning_iterations=tuning_param_combinations,
- random_state=DEFAULT_SEED,
- )
-
- assert lwcr.nonconformity_scores is not None
- assert lwcr.pe_estimator is not None
- assert lwcr.ve_estimator is not None
-
-
-@pytest.mark.parametrize("confidence_level", [0.2, 0.8])
-@pytest.mark.parametrize("tuning_param_combinations", [5])
-@pytest.mark.parametrize("point_estimator_architecture", [GBM_NAME, RF_NAME])
-@pytest.mark.parametrize("demeaning_estimator_architecture", [GBM_NAME])
-@pytest.mark.parametrize("variance_estimator_architecture", [GBM_NAME])
-def test_locally_weighted_conformal_regression__predict(
- dummy_fixed_quantile_dataset,
- confidence_level,
- tuning_param_combinations,
- point_estimator_architecture,
- demeaning_estimator_architecture,
- variance_estimator_architecture,
-):
- X, y = (
- dummy_fixed_quantile_dataset[:, 0].reshape(-1, 1),
- dummy_fixed_quantile_dataset[:, 1],
- )
- train_split = 0.8
- X_train, y_train = (
- X[: round(len(X) * train_split), :],
- y[: round(len(y) * train_split)],
- )
- pe_split = 0.8
- X_pe, y_pe = (
- X_train[: round(len(X_train) * pe_split), :],
- y_train[: round(len(y_train) * pe_split)],
- )
- X_ve, y_ve = (
- X_train[round(len(X_train) * pe_split) :, :],
- y_train[round(len(y_train) * pe_split) :],
- )
- X_val, y_val = X[round(len(X) * train_split) :, :], y[round(len(y) * train_split) :]
-
- lwcr = LocallyWeightedConformalRegression(
- point_estimator_architecture=point_estimator_architecture,
- demeaning_estimator_architecture=demeaning_estimator_architecture,
- variance_estimator_architecture=variance_estimator_architecture,
- )
- lwcr.fit(
- X_pe=X_pe,
- y_pe=y_pe,
- X_ve=X_ve,
- y_ve=y_ve,
- X_val=X_val,
- y_val=y_val,
- tuning_iterations=tuning_param_combinations,
- random_state=DEFAULT_SEED,
- )
-
- y_low_bounds, y_high_bounds = lwcr.predict(X_val, confidence_level=confidence_level)
-
- # Check lower bound is always lower than higher bound:
- for y_low, y_high in zip(y_low_bounds, y_high_bounds):
- assert y_low <= y_high
-
- # Compute observed quantiles per X slice during training (only works for univariate dummy datasets):
- low_quantile_dict_train = get_discretized_quantile_dict(
- X_train.reshape(
- -1,
- ),
- y_train,
- confidence_level + ((1 - confidence_level) / 2),
- )
- high_quantile_dict_train = get_discretized_quantile_dict(
- X_train.reshape(
- -1,
- ),
- y_train,
- (1 - confidence_level) / 2,
- )
-
- # Check that predictions return observed quantiles during training
- # Prediction error deviations of more than this amount
- # will count as a breach:
- y_breach_threshold = 1
- # More than this percentage of breaches will fail the test:
- breach_tolerance = 0.3
- low_margin_breaches, high_margin_breaches = 0, 0
- for x_obs, y_low, y_high in zip(
- X_train.reshape(
- -1,
- ),
- y_low_bounds,
- y_high_bounds,
- ):
- if abs(y_low - low_quantile_dict_train[x_obs]) > y_breach_threshold:
- low_margin_breaches += 1
- if abs(y_high - high_quantile_dict_train[x_obs]) > y_breach_threshold:
- high_margin_breaches += 1
- assert low_margin_breaches < len(X_train) * breach_tolerance
- assert high_margin_breaches < len(X_train) * breach_tolerance
-
- # Check conformal interval coverage on validation data
- # (note validation data is actively used by the searcher
- # to calibrate its conformal intervals, so this is not an
- # OOS test, just a sanity check):
- interval_breach_states = []
- for y_obs, y_low, y_high in zip(y_val, y_low_bounds, y_high_bounds):
- is_interval_breach = 0 if y_high > y_obs > y_low else 1
- interval_breach_states.append(is_interval_breach)
-
- interval_breach_rate = sum(interval_breach_states) / len(interval_breach_states)
- breach_margin = 0.01
- assert (
- (confidence_level - breach_margin)
- <= (1 - interval_breach_rate)
- <= (confidence_level + breach_margin)
- )
diff --git a/tests/test_optimization.py b/tests/test_optimization.py
deleted file mode 100644
index 2914213..0000000
--- a/tests/test_optimization.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import time
-
-import pytest
-
-from confopt.optimization import derive_optimal_tuning_count, RuntimeTracker
-
-
-def test_runtime_tracker__return_runtime():
- dummy_tracker = RuntimeTracker()
- sleep_time = 5
- time.sleep(sleep_time)
- time_elapsed = dummy_tracker.return_runtime()
- assert sleep_time - 1 < round(time_elapsed) < sleep_time + 1
-
-
-def test_runtime_tracker__pause_runtime():
- dummy_tracker = RuntimeTracker()
- dummy_tracker.pause_runtime()
- sleep_time = 5
- time.sleep(sleep_time)
- dummy_tracker.resume_runtime()
- time_elapsed = dummy_tracker.return_runtime()
- assert time_elapsed < 1
-
-
-@pytest.mark.parametrize("base_model_runtime", [1, 100])
-@pytest.mark.parametrize("search_model_runtime", [1, 100])
-@pytest.mark.parametrize("search_to_base_runtime_ratio", [0.5, 2])
-@pytest.mark.parametrize("search_retraining_freq", [1, 10])
-def test_derive_optimal_tuning_count(
- base_model_runtime,
- search_model_runtime,
- search_to_base_runtime_ratio,
- search_retraining_freq,
-):
- n_iterations = derive_optimal_tuning_count(
- baseline_model_runtime=base_model_runtime,
- search_model_runtime=search_model_runtime,
- search_to_baseline_runtime_ratio=search_to_base_runtime_ratio,
- search_model_retraining_freq=search_retraining_freq,
- )
- assert n_iterations >= 1
- assert isinstance(n_iterations, int)
-
-
-def test_derive_optimal_tuning_count__no_iterations():
- n_iterations = derive_optimal_tuning_count(
- baseline_model_runtime=1,
- search_model_runtime=1,
- search_to_baseline_runtime_ratio=1,
- search_model_retraining_freq=1,
- )
- assert n_iterations == 1
diff --git a/tests/test_tuning.py b/tests/test_tuning.py
index c7058cd..36b2edf 100644
--- a/tests/test_tuning.py
+++ b/tests/test_tuning.py
@@ -1,430 +1,353 @@
-import random
-from copy import deepcopy
-
-import numpy as np
-import pandas as pd
import pytest
+import numpy as np
+from typing import Dict
+from itertools import product
-from confopt.config import GBM_NAME
-from confopt.optimization import RuntimeTracker
-from confopt.tuning import (
- score_predictions,
- get_best_configuration_idx,
- process_and_split_estimation_data,
- normalize_estimation_data,
- update_adaptive_confidence_level,
-)
-
-DEFAULT_SEED = 1234
-
+from confopt.tuning import ConformalTuner, stop_search
+from confopt.wrapping import CategoricalRange, IntRange
+from confopt.utils.tracking import RuntimeTracker
+from confopt.selection.acquisition import QuantileConformalSearcher, LowerBoundSampler
-@pytest.mark.parametrize("optimization_direction", ["direct", "inverse"])
-def test_get_best_configuration_idx(optimization_direction):
- lower_bound = np.array([5, 4, 3, 2, 1])
- higher_bound = lower_bound + 1
- dummy_performance_bounds = (lower_bound, higher_bound)
- best_idx = get_best_configuration_idx(
- configuration_performance_bounds=dummy_performance_bounds,
- optimization_direction=optimization_direction,
+def test_stop_search_no_remaining_configurations():
+ assert stop_search(
+ n_remaining_configurations=0,
+ current_iter=5,
+ current_runtime=10.0,
+ max_runtime=100.0,
+ max_searches=50,
)
- assert best_idx >= 0
- if optimization_direction == "direct":
- assert best_idx == np.argmax(higher_bound)
- elif optimization_direction == "inverse":
- assert best_idx == np.argmin(lower_bound)
+@pytest.mark.parametrize("max_runtime", [10.0, 15.0, 20.0])
+def test_stop_search_runtime_exceeded(max_runtime):
+ current_runtime = 25.0
+ should_stop = current_runtime >= max_runtime
+ assert (
+ stop_search(
+ n_remaining_configurations=10,
+ current_iter=5,
+ current_runtime=current_runtime,
+ max_runtime=max_runtime,
+ max_searches=50,
+ )
+ == should_stop
+ )
-@pytest.mark.parametrize(
- "scoring_function", ["accuracy_score", "mean_squared_error", "log_loss"]
-)
-def test_score_predictions__perfect_score(scoring_function):
- dummy_y_obs = np.array([1, 0, 1, 0, 1, 1])
- dummy_y_pred = deepcopy(dummy_y_obs)
- score = score_predictions(
- y_obs=dummy_y_obs, y_pred=dummy_y_pred, scoring_function=scoring_function
+@pytest.mark.parametrize("max_searches", [10, 20, 30])
+def test_stop_search_iterations_exceeded(max_searches):
+ current_iter = 25
+ should_stop = current_iter >= max_searches
+ assert (
+ stop_search(
+ n_remaining_configurations=10,
+ current_iter=current_iter,
+ current_runtime=5.0,
+ max_runtime=100.0,
+ max_searches=max_searches,
+ )
+ == should_stop
)
- if scoring_function == "accuracy_score":
- assert score == 1
- elif scoring_function == "mean_squared_error":
- assert score == 0
- elif scoring_function == "log_loss":
- assert 0 < score < 0.001
-
-def test_process_and_split_estimation_data(dummy_configurations):
- train_split = 0.5
- dummy_searched_configurations = pd.DataFrame(dummy_configurations).to_numpy()
- stored_dummy_searched_configurations = deepcopy(dummy_searched_configurations)
- dummy_searched_performances = np.array(
- [random.random() for _ in range(len(dummy_configurations))]
- )
- stored_dummy_searched_performances = deepcopy(dummy_searched_performances)
-
- X_train, y_train, X_val, y_val = process_and_split_estimation_data(
- searched_configurations=dummy_searched_configurations,
- searched_performances=dummy_searched_performances,
- train_split=train_split,
- filter_outliers=False,
- outlier_scope=None,
- random_state=DEFAULT_SEED,
+def test_stop_search_continue_search():
+ assert not stop_search(
+ n_remaining_configurations=10,
+ current_iter=5,
+ current_runtime=10.0,
+ max_runtime=100.0,
+ max_searches=50,
)
- assert len(X_val) == len(y_val)
- assert len(X_train) == len(y_train)
- assert len(X_val) + len(X_train) == len(dummy_searched_configurations)
+def test_check_objective_function_wrong_argument_count(dummy_parameter_grid):
+ def invalid_objective(config1, config2):
+ return 1.0
- assert (
- abs(len(X_train) - round(len(dummy_searched_configurations) * train_split)) <= 1
- )
- assert (
- abs(len(X_val) - round(len(dummy_searched_configurations) * (1 - train_split)))
- <= 1
- )
+ with pytest.raises(
+ ValueError, match="Objective function must take exactly one argument"
+ ):
+ ConformalTuner(
+ objective_function=invalid_objective,
+ search_space=dummy_parameter_grid,
+ minimize=True,
+ )
- # Assert there is no mutability of input:
- assert np.array_equal(
- dummy_searched_configurations, stored_dummy_searched_configurations
- )
- assert np.array_equal(
- dummy_searched_performances, stored_dummy_searched_performances
- )
+def test_check_objective_function_wrong_argument_name(dummy_parameter_grid):
+ def invalid_objective(config):
+ return 1.0
-def test_process_and_split_estimation_data__reproducibility(dummy_configurations):
- train_split = 0.5
- dummy_searched_configurations = pd.DataFrame(dummy_configurations).to_numpy()
- dummy_searched_performances = np.array(
- [random.random() for _ in range(len(dummy_configurations))]
- )
+ with pytest.raises(
+ ValueError,
+ match="The objective function must take exactly one argument named 'configuration'",
+ ):
+ ConformalTuner(
+ objective_function=invalid_objective,
+ search_space=dummy_parameter_grid,
+ minimize=True,
+ )
- (
- X_train_first_call,
- y_train_first_call,
- X_val_first_call,
- y_val_first_call,
- ) = process_and_split_estimation_data(
- searched_configurations=dummy_searched_configurations,
- searched_performances=dummy_searched_performances,
- train_split=train_split,
- filter_outliers=False,
- outlier_scope=None,
- random_state=DEFAULT_SEED,
- )
- (
- X_train_second_call,
- y_train_second_call,
- X_val_second_call,
- y_val_second_call,
- ) = process_and_split_estimation_data(
- searched_configurations=dummy_searched_configurations,
- searched_performances=dummy_searched_performances,
- train_split=train_split,
- filter_outliers=False,
- outlier_scope=None,
- random_state=DEFAULT_SEED,
- )
- assert np.array_equal(X_train_first_call, X_train_second_call)
- assert np.array_equal(y_train_first_call, y_train_second_call)
- assert np.array_equal(X_val_first_call, X_val_second_call)
- assert np.array_equal(y_val_first_call, y_val_second_call)
+def test_evaluate_configuration(tuner):
+ config = {"param_1": 0.5, "param_2": 10, "param_3": "option1"}
+ performance, runtime = tuner._evaluate_configuration(config)
-def test_normalize_estimation_data(dummy_configurations):
- # Proportion of all candidate configurations that
- # have already been searched:
- searched_split = 0.5
- # Split of searched configurations that is used as
- # training data for the search estimator:
- train_split = 0.5
+ assert performance == 2
+ assert runtime >= 0
- dummy_searched_configurations = dummy_configurations[
- : round(len(dummy_configurations) * searched_split)
- ]
- dummy_searchable_configurations = pd.DataFrame(
- dummy_configurations[round(len(dummy_configurations) * searched_split) :]
- ).to_numpy()
- stored_dummy_searchable_configurations = deepcopy(dummy_searchable_configurations)
- dummy_training_searched_configurations = pd.DataFrame(
- dummy_searched_configurations[
- : round(len(dummy_searched_configurations) * train_split)
- ]
- ).to_numpy()
- stored_dummy_training_searched_configurations = deepcopy(
- dummy_training_searched_configurations
- )
- dummy_validation_searched_configurations = pd.DataFrame(
- dummy_searched_configurations[
- round(len(dummy_searched_configurations) * train_split) :
- ]
- ).to_numpy()
- stored_dummy_validation_searched_configurations = deepcopy(
- dummy_validation_searched_configurations
- )
- (
- normalized_training_searched_configurations,
- normalized_validation_searched_configurations,
- normalized_searchable_configurations,
- ) = normalize_estimation_data(
- training_searched_configurations=dummy_training_searched_configurations,
- validation_searched_configurations=dummy_validation_searched_configurations,
- searchable_configurations=dummy_searchable_configurations,
- )
+def test_random_search_with_warm_start(
+ mock_constant_objective_function, dummy_parameter_grid
+):
+ warm_start_configs = [
+ ({"param_1": 0.5, "param_2": 10, "param_3": "option1"}, 0.8),
+ ]
- assert len(normalized_training_searched_configurations) == len(
- dummy_training_searched_configurations
- )
- assert len(normalized_validation_searched_configurations) == len(
- normalized_validation_searched_configurations
- )
- assert len(normalized_searchable_configurations) == len(
- normalized_searchable_configurations
+ tuner = ConformalTuner(
+ objective_function=mock_constant_objective_function,
+ search_space=dummy_parameter_grid,
+ minimize=True,
+ warm_starts=warm_start_configs,
)
- # Assert there is no mutability of inputs:
- assert np.array_equal(
- dummy_training_searched_configurations,
- stored_dummy_training_searched_configurations,
- )
- assert np.array_equal(
- dummy_validation_searched_configurations,
- stored_dummy_validation_searched_configurations,
- )
- assert np.array_equal(
- dummy_searchable_configurations, stored_dummy_searchable_configurations
- )
+ tuner.initialize_tuning_resources()
+ tuner.search_timer = RuntimeTracker()
+ assert len(tuner.study.trials) == 1
+ assert tuner.study.trials[0].acquisition_source == "warm_start"
-@pytest.mark.parametrize("breach", [True, False])
-@pytest.mark.parametrize("true_confidence_level", [0.2, 0.8])
-@pytest.mark.parametrize("learning_rate", [0.01, 0.1])
-def test_update_adaptive_interval(breach, true_confidence_level, learning_rate):
- updated_confidence_level = update_adaptive_confidence_level(
- true_confidence_level=true_confidence_level,
- last_confidence_level=true_confidence_level,
- breach=breach,
- learning_rate=learning_rate,
+ tuner.random_search(
+ max_random_iter=3,
+ verbose=False,
)
- assert 0 < updated_confidence_level < 1
- if breach:
- assert updated_confidence_level >= true_confidence_level
- else:
- assert updated_confidence_level <= true_confidence_level
-
+ assert len(tuner.study.trials) == 4
+ assert tuner.study.trials[0].acquisition_source == "warm_start"
+ assert all(trial.acquisition_source == "rs" for trial in tuner.study.trials[1:])
-def test_get_tuning_configurations(dummy_initialized_conformal_searcher__gbm_mse):
- stored_search_space = dummy_initialized_conformal_searcher__gbm_mse.search_space
- tuning_configurations = (
- dummy_initialized_conformal_searcher__gbm_mse._get_tuning_configurations()
- )
+def test_random_search_with_nan_performance(dummy_parameter_grid):
+ def nan_objective(configuration: Dict) -> float:
+ return np.nan
- for configuration in tuning_configurations:
- for param_name, param_value in configuration.items():
- # Check configuration only has parameter names from parameter grid prompt:
- assert param_name in stored_search_space.keys()
- # Check values in configuration come from range in parameter grid prompt:
- assert param_value in stored_search_space[param_name]
- # Test for mutability:
- assert (
- stored_search_space
- == dummy_initialized_conformal_searcher__gbm_mse.search_space
+ tuner = ConformalTuner(
+ objective_function=nan_objective,
+ search_space=dummy_parameter_grid,
+ minimize=True,
)
+ tuner.initialize_tuning_resources()
+ tuner.search_timer = RuntimeTracker()
-def test_get_tuning_configurations__reproducibility(
- dummy_initialized_conformal_searcher__gbm_mse,
-):
- assert (
- dummy_initialized_conformal_searcher__gbm_mse._get_tuning_configurations()
- == dummy_initialized_conformal_searcher__gbm_mse._get_tuning_configurations()
+ tuner.random_search(
+ max_random_iter=3,
+ verbose=False,
)
+ # Should handle NaN gracefully and not crash
+ assert len(tuner.study.trials) == 0
-def test_evaluate_configuration_performance(
- dummy_initialized_conformal_searcher__gbm_mse, dummy_gbm_configurations
-):
- # Arbitrarily select the first configuration in the list:
- dummy_configuration = dummy_gbm_configurations[0]
- stored_dummy_configuration = deepcopy(dummy_configuration)
- performance = dummy_initialized_conformal_searcher__gbm_mse._evaluate_configuration_performance(
- configuration=dummy_configuration, random_state=DEFAULT_SEED
- )
+@pytest.mark.parametrize("random_state", [42, 123, 999])
+def test_tune_method_reproducibility(dummy_parameter_grid, random_state):
+ """Test that tune method produces identical results with same random seed"""
- assert performance > 0
- # Test for mutability:
- assert stored_dummy_configuration == dummy_configuration
+ def complex_objective(configuration: Dict) -> float:
+ # Complex objective with multiple terms
+ x1 = configuration["param_1"]
+ x2 = configuration["param_2"]
+ x3_val = {"option1": 1, "option2": 2, "option3": 3}[configuration["param_3"]]
+ return x1**2 + np.sin(x2) + x3_val * 0.5
+ def run_tune_session():
+ # Create fresh searcher for each run to avoid state contamination
+ searcher = QuantileConformalSearcher(
+ quantile_estimator_architecture="ql",
+ sampler=LowerBoundSampler(
+ interval_width=0.1,
+ adapter="DtACI",
+ beta_decay="logarithmic_decay",
+ c=1,
+ ),
+ n_pre_conformal_trials=5,
+ )
-def test_evaluate_configuration_performance__reproducibility(
- dummy_initialized_conformal_searcher__gbm_mse, dummy_gbm_configurations
-):
- # Arbitrarily select the first configuration in the list:
- dummy_configuration = dummy_gbm_configurations[0]
+ tuner = ConformalTuner(
+ objective_function=complex_objective,
+ search_space=dummy_parameter_grid,
+ minimize=True,
+ n_candidates=200,
+ )
- assert dummy_initialized_conformal_searcher__gbm_mse._evaluate_configuration_performance(
- configuration=dummy_configuration, random_state=DEFAULT_SEED
- ) == dummy_initialized_conformal_searcher__gbm_mse._evaluate_configuration_performance(
- configuration=dummy_configuration, random_state=DEFAULT_SEED
- )
+ tuner.tune(
+ n_random_searches=10,
+ searcher=searcher,
+ optimizer_framework=None,
+ random_state=random_state,
+ max_searches=25,
+ max_runtime=None,
+ verbose=False,
+ )
+ return tuner.study
-def test_random_search(dummy_initialized_conformal_searcher__gbm_mse):
- n_searches = 5
- max_runtime = 30
- dummy_initialized_conformal_searcher__gbm_mse.search_timer = RuntimeTracker()
-
- (
- searched_configurations,
- searched_performances,
- searched_timestamps,
- runtime_per_search,
- ) = dummy_initialized_conformal_searcher__gbm_mse._random_search(
- n_searches=n_searches,
- max_runtime=max_runtime,
- random_state=DEFAULT_SEED,
- )
+ # Run twice with same seed
+ study1 = run_tune_session()
+ study2 = run_tune_session()
- for performance in searched_performances:
- assert performance > 0
- assert len(searched_configurations) > 0
- assert len(searched_performances) > 0
- assert len(searched_timestamps) > 0
- assert (
- len(searched_configurations)
- == len(searched_performances)
- == len(searched_timestamps)
- )
- assert len(searched_configurations) == n_searches
- assert 0 < runtime_per_search < max_runtime
+ # Verify identical results
+ assert len(study1.trials) == len(study2.trials)
+ for trial1, trial2 in zip(study1.trials, study2.trials):
+ assert trial1.configuration == trial2.configuration
+ assert trial1.performance == trial2.performance
+ # Skip acquisition_source comparison as it contains object addresses
-def test_random_search__reproducibility(
- dummy_initialized_conformal_searcher__gbm_mse,
-):
- n_searches = 5
- max_runtime = 30
- dummy_initialized_conformal_searcher__gbm_mse.search_timer = RuntimeTracker()
-
- (
- searched_configurations_first_call,
- searched_performances_first_call,
- _,
- _,
- ) = dummy_initialized_conformal_searcher__gbm_mse._random_search(
- n_searches=n_searches,
- max_runtime=max_runtime,
- random_state=DEFAULT_SEED,
- )
- (
- searched_configurations_second_call,
- searched_performances_second_call,
- _,
- _,
- ) = dummy_initialized_conformal_searcher__gbm_mse._random_search(
- n_searches=n_searches,
- max_runtime=max_runtime,
- random_state=DEFAULT_SEED,
- )
- assert searched_configurations_first_call == searched_configurations_second_call
- assert searched_performances_first_call == searched_performances_second_call
-
-
-def test_search(dummy_initialized_conformal_searcher__gbm_mse):
- # TODO: Below I hard coded a slice of possible inputs, but consider
- # pytest parametrizing these (though test will be very heavy,
- # so tag as slow and only run when necessary)
- confidence_level = 0.2
- conformal_model_type = GBM_NAME
- conformal_retraining_frequency = 1
- conformal_learning_rate = 0.01
- enable_adaptive_intervals = True
- max_runtime = 120
- min_training_iterations = 20
-
- stored_search_space = dummy_initialized_conformal_searcher__gbm_mse.search_space
- stored_tuning_configurations = (
- dummy_initialized_conformal_searcher__gbm_mse.tuning_configurations
+@pytest.mark.slow
+@pytest.mark.parametrize("dynamic_sampling", [True, False])
+def test_tune_method_comprehensive_integration(
+ comprehensive_minimizing_tuning_setup, dynamic_sampling
+):
+ """Comprehensive integration test for tune method (single run, logic only)"""
+ tuner, searcher, warm_start_configs, _ = comprehensive_minimizing_tuning_setup(
+ dynamic_sampling
)
- dummy_initialized_conformal_searcher__gbm_mse.search(
- conformal_search_estimator=conformal_model_type,
- confidence_level=confidence_level,
- n_random_searches=min_training_iterations,
- runtime_budget=max_runtime,
- conformal_retraining_frequency=conformal_retraining_frequency,
- conformal_learning_rate=conformal_learning_rate,
- enable_adaptive_intervals=enable_adaptive_intervals,
- verbose=0,
+ tuner.tune(
+ n_random_searches=15,
+ searcher=searcher,
+ optimizer_framework=None,
+ random_state=42,
+ max_searches=50,
+ max_runtime=5 * 60,
+ verbose=False,
)
+ study = tuner.study
- assert (
- len(dummy_initialized_conformal_searcher__gbm_mse.searched_configurations) > 0
- )
- assert len(dummy_initialized_conformal_searcher__gbm_mse.searched_performances) > 0
- assert len(
- dummy_initialized_conformal_searcher__gbm_mse.searched_configurations
- ) == len(dummy_initialized_conformal_searcher__gbm_mse.searched_performances)
- # Test for mutability:
- assert (
- stored_search_space
- == dummy_initialized_conformal_searcher__gbm_mse.search_space
- )
- assert (
- stored_tuning_configurations
- == dummy_initialized_conformal_searcher__gbm_mse.tuning_configurations
- )
+ # Test 1: Verify correct number of trials
+ assert len(study.trials) == 50
+ # Test 2: Verify warm starts are present
+ warm_start_trials = [
+ t for t in study.trials if t.acquisition_source == "warm_start"
+ ]
+ assert len(warm_start_trials) == 3
+ warm_start_performances = [t.performance for t in warm_start_trials]
+ expected_performances = [perf for _, perf in warm_start_configs]
+ assert set(warm_start_performances) == set(expected_performances)
+
+ # Test 3: Verify trial sources
+ rs_trials = [t for t in study.trials if t.acquisition_source == "rs"]
+ conformal_trials = [
+ t for t in study.trials if t.acquisition_source not in ["warm_start", "rs"]
+ ]
+ assert len(rs_trials) == 12
+ assert len(conformal_trials) == 35
+
+ # Test 4: Verify configurations are diverse
+ all_configs = [t.configuration for t in study.trials]
+ unique_configs = set(str(config) for config in all_configs)
+ assert len(unique_configs) == len(all_configs)
+
+ # Test 5: Verify study methods work correctly
+ best_config = study.get_best_configuration()
+ best_value = study.get_best_performance()
+ assert best_config in all_configs
+ assert best_value == min(t.performance for t in study.trials)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("minimize", [True, False])
+@pytest.mark.parametrize("dynamic_sampling", [True, False])
+def test_conformal_vs_random_performance_averaged(
+ comprehensive_minimizing_tuning_setup,
+ comprehensive_maximizing_tuning_setup,
+ minimize,
+ dynamic_sampling,
+):
+ """Compare conformal vs random search win rate over multiple runs."""
+ n_repeats = 20
+ conformal_wins, total_comparisons = 0, 0
-def test_search__reproducibility(dummy_initialized_conformal_searcher__gbm_mse):
- confidence_level = 0.2
- conformal_model_type = GBM_NAME
- conformal_retraining_frequency = 1
- conformal_learning_rate = 0.01
- enable_adaptive_intervals = True
- max_runtime = 120
- min_training_iterations = 20
-
- searcher_first_call = deepcopy(dummy_initialized_conformal_searcher__gbm_mse)
- searcher_second_call = deepcopy(dummy_initialized_conformal_searcher__gbm_mse)
-
- searcher_first_call.search(
- conformal_search_estimator=conformal_model_type,
- confidence_level=confidence_level,
- n_random_searches=min_training_iterations,
- runtime_budget=max_runtime,
- conformal_retraining_frequency=conformal_retraining_frequency,
- conformal_learning_rate=conformal_learning_rate,
- enable_adaptive_intervals=enable_adaptive_intervals,
- verbose=0,
- random_state=DEFAULT_SEED,
- )
- searcher_second_call.search(
- conformal_search_estimator=conformal_model_type,
- confidence_level=confidence_level,
- n_random_searches=min_training_iterations,
- runtime_budget=max_runtime,
- conformal_retraining_frequency=conformal_retraining_frequency,
- conformal_learning_rate=conformal_learning_rate,
- enable_adaptive_intervals=enable_adaptive_intervals,
- verbose=0,
- random_state=DEFAULT_SEED,
- )
+ if minimize:
+ tuning_setup = comprehensive_minimizing_tuning_setup
+ else:
+ tuning_setup = comprehensive_maximizing_tuning_setup
+
+ for seed in range(n_repeats):
+ # Run conformal tuner (15 random + 35 conformal searches)
+ conformal_tuner, searcher, _, _ = tuning_setup(dynamic_sampling)
+ conformal_tuner.tune(
+ n_random_searches=10,
+ searcher=searcher,
+ optimizer_framework=None,
+ random_state=seed,
+ max_searches=40,
+ max_runtime=5 * 60,
+ verbose=False,
+ )
+ conformal_best = conformal_tuner.get_best_value()
+
+ # Run pure random search tuner (40 random searches, no conformal)
+ random_tuner, searcher, _, _ = tuning_setup(dynamic_sampling)
+ random_tuner.tune(
+ n_random_searches=40,
+ searcher=searcher,
+ optimizer_framework=None,
+ random_state=seed,
+ max_searches=40, # This ensures only 40 random searches, no conformal
+ max_runtime=5 * 60,
+ verbose=False,
+ )
+ random_best = random_tuner.get_best_value()
+
+ if minimize:
+ conformal_wins_round = conformal_best < random_best
+ else:
+ conformal_wins_round = conformal_best > random_best
+
+ if conformal_wins_round:
+ conformal_wins += 1
+ total_comparisons += 1
+
+ assert conformal_wins / total_comparisons >= 0.8
+
+
+@pytest.mark.parametrize("minimize", [True, False])
+def test_best_fetcher_methods(minimize):
+ grid = {
+ "x": CategoricalRange(choices=[0, 1]),
+ "y": IntRange(min_value=0, max_value=2),
+ }
+
+ def objective(configuration):
+ return configuration["x"] + configuration["y"] * 10
+
+ tuner = ConformalTuner(
+ objective_function=objective,
+ search_space=grid,
+ minimize=minimize,
+ n_candidates=100,
+ )
+ tuner.initialize_tuning_resources()
+ tuner.search_timer = RuntimeTracker()
+
+ total_configs = len(list(product([0, 1], [0, 1, 2])))
+ tuner.random_search(max_random_iter=total_configs, verbose=False)
+
+ # Use built-in methods to get best config and value
+ best_config = tuner.get_best_params()
+ best_value = tuner.get_best_value()
+
+ if minimize:
+ expected_config = {"x": 0, "y": 0}
+ else:
+ expected_config = {"x": 1, "y": 2}
+ expected_value = objective(expected_config)
- assert (
- searcher_first_call.searched_configurations
- == searcher_second_call.searched_configurations
- )
- assert (
- searcher_first_call.searched_performances
- == searcher_second_call.searched_performances
- )
+ assert best_config == expected_config
+ assert best_value == expected_value
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index f9fc341..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from confopt.utils import (
- get_tuning_configurations,
- get_perceptron_layers,
-)
-
-DEFAULT_SEED = 1234
-
-
-def test_get_perceptron_layers():
- dummy_n_layers_grid = [2, 3, 4]
- dummy_layer_size_grid = [16, 32, 64, 128]
-
- layer_list = get_perceptron_layers(
- n_layers_grid=dummy_n_layers_grid,
- layer_size_grid=dummy_layer_size_grid,
- random_seed=DEFAULT_SEED,
- )
-
- for layer in layer_list:
- assert isinstance(layer, tuple)
- assert min(dummy_n_layers_grid) <= len(layer) <= max(dummy_n_layers_grid)
- for layer_size in layer:
- assert (
- min(dummy_layer_size_grid) <= layer_size <= max(dummy_layer_size_grid)
- )
-
-
-def test_get_perceptron_layers__reproducibility():
- dummy_n_layers_grid = [2, 3, 4]
- dummy_layer_size_grid = [16, 32, 64, 128]
-
- layer_list_first_call = get_perceptron_layers(
- n_layers_grid=dummy_n_layers_grid,
- layer_size_grid=dummy_layer_size_grid,
- random_seed=DEFAULT_SEED,
- )
- layer_list_second_call = get_perceptron_layers(
- n_layers_grid=dummy_n_layers_grid,
- layer_size_grid=dummy_layer_size_grid,
- random_seed=DEFAULT_SEED,
- )
- for layer_first_call, layer_second_call in zip(
- layer_list_first_call, layer_list_second_call
- ):
- assert layer_first_call == layer_second_call
-
-
-def test_get_tuning_configurations(dummy_parameter_grid):
- dummy_n_configurations = 10
-
- tuning_configurations = get_tuning_configurations(
- parameter_grid=dummy_parameter_grid,
- n_configurations=dummy_n_configurations,
- random_state=DEFAULT_SEED,
- )
- assert len(tuning_configurations) < dummy_n_configurations
- for configuration in tuning_configurations:
- for k, v in configuration.items():
- # Check configuration only has parameter names from parameter grid prompt:
- assert k in dummy_parameter_grid.keys()
- # Check values in configuration come from range in parameter grid prompt:
- assert v in dummy_parameter_grid[k]
-
-
-def test_get_tuning_configurations__reproducibility(dummy_parameter_grid):
- dummy_n_configurations = 10
-
- tuning_configurations_first_call = get_tuning_configurations(
- parameter_grid=dummy_parameter_grid,
- n_configurations=dummy_n_configurations,
- random_state=DEFAULT_SEED,
- )
- tuning_configurations_second_call = get_tuning_configurations(
- parameter_grid=dummy_parameter_grid,
- n_configurations=dummy_n_configurations,
- random_state=DEFAULT_SEED,
- )
- for configuration_first_call, configuration_second_call in zip(
- tuning_configurations_first_call, tuning_configurations_second_call
- ):
- assert configuration_first_call == configuration_second_call
diff --git a/tests/utils/configurations/test_encoding.py b/tests/utils/configurations/test_encoding.py
new file mode 100644
index 0000000..2664149
--- /dev/null
+++ b/tests/utils/configurations/test_encoding.py
@@ -0,0 +1,66 @@
+from confopt.utils.configurations.encoding import ConfigurationEncoder
+from confopt.wrapping import IntRange, FloatRange, CategoricalRange
+
+
+def test_configuration_encoder():
+ """Test that ConfigurationEncoder properly encodes configurations"""
+ # Create configurations with mixed parameter types
+ configs = [
+ {"numeric1": 1.0, "numeric2": 5, "cat1": "a", "cat2": True},
+ {"numeric1": 2.0, "numeric2": 10, "cat1": "b", "cat2": False},
+ {"numeric1": 3.0, "numeric2": 15, "cat1": "a", "cat2": True},
+ ]
+
+ # Define search space with categorical parameters
+ search_space = {
+ "numeric1": FloatRange(min_value=0.0, max_value=10.0),
+ "numeric2": IntRange(min_value=0, max_value=20),
+ "cat1": CategoricalRange(choices=["a", "b", "c"]),
+ "cat2": CategoricalRange(choices=[True, False]),
+ }
+
+ # Test initialization
+ encoder = ConfigurationEncoder(search_space)
+
+ # Verify categorical mappings are created correctly
+ assert "cat1" in encoder.categorical_mappings
+ assert "cat2" in encoder.categorical_mappings
+
+ # Test transformation
+ df = encoder.transform(configs)
+
+ # Check shape - should have columns for numeric1, numeric2, cat1_a, cat1_b, cat1_c, cat2_False, cat2_True
+ assert df.shape[0] == 3 # 3 rows
+
+ # Verify numeric columns are preserved
+ assert "numeric1" in df.columns
+ assert "numeric2" in df.columns
+
+ # Check one-hot encoding worked correctly for string categorical values
+ cat1_cols = [col for col in df.columns if col.startswith("cat1_")]
+ assert (
+ len(cat1_cols) == 3
+ ) # "a", "b", and "c" (all possible values from search space)
+
+ cat1_a_col = next(col for col in cat1_cols if "a" in col)
+ cat1_b_col = next(col for col in cat1_cols if "b" in col)
+
+ # First row has cat1="a", so a=1, b=0
+ assert df.loc[0, cat1_a_col] == 1
+ assert df.loc[0, cat1_b_col] == 0
+
+ # Second row has cat1="b", so a=0, b=1
+ assert df.loc[1, cat1_a_col] == 0
+ assert df.loc[1, cat1_b_col] == 1
+
+ # Check boolean categorical values
+ cat2_cols = [col for col in df.columns if col.startswith("cat2_")]
+ assert len(cat2_cols) == 2 # True and False
+
+ # Boolean values are encoded with their string representation: 'cat2_True' and 'cat2_False'
+ cat2_true_col = "cat2_True"
+ cat2_false_col = "cat2_False"
+
+ # First row has cat2=True, so True=1, False=0
+ assert df.loc[0, cat2_true_col] == 1
+ assert df.loc[0, cat2_false_col] == 0
diff --git a/tests/utils/configurations/test_sampling_configurations.py b/tests/utils/configurations/test_sampling_configurations.py
new file mode 100644
index 0000000..1904a92
--- /dev/null
+++ b/tests/utils/configurations/test_sampling_configurations.py
@@ -0,0 +1,60 @@
+import pytest
+
+
+from confopt.utils.configurations.sampling import get_tuning_configurations
+
+RANDOM_STATE = 1234
+
+
+@pytest.mark.parametrize("method", ["uniform", "sobol"])
+def test_reproducibility(dummy_parameter_grid, method):
+ configs1 = get_tuning_configurations(
+ parameter_grid=dummy_parameter_grid,
+ n_configurations=10,
+ random_state=RANDOM_STATE,
+ sampling_method=method,
+ )
+ configs2 = get_tuning_configurations(
+ parameter_grid=dummy_parameter_grid,
+ n_configurations=10,
+ random_state=RANDOM_STATE,
+ sampling_method=method,
+ )
+ assert configs1 == configs2
+
+
+@pytest.mark.parametrize("method", ["uniform", "sobol"])
+def test_config_value_ranges(dummy_parameter_grid, method):
+ n = 50
+ configs = get_tuning_configurations(
+ parameter_grid=dummy_parameter_grid,
+ n_configurations=n,
+ random_state=RANDOM_STATE,
+ sampling_method=method,
+ )
+ assert len(configs) == n
+
+ for config in configs:
+ int_val = config["param_2"]
+ assert isinstance(int_val, int)
+ assert 1 <= int_val <= 100
+
+ float_val = config["param_1"]
+ assert isinstance(float_val, float)
+ assert 0.01 <= float_val <= 100
+
+ cat_val = config["param_3"]
+ assert cat_val in dummy_parameter_grid["param_3"].choices
+
+
+@pytest.mark.parametrize("method", ["uniform", "sobol"])
+def test_sampling_uniqueness(dummy_parameter_grid, method):
+ n = 100
+ configs = get_tuning_configurations(
+ parameter_grid=dummy_parameter_grid,
+ n_configurations=n,
+ random_state=123,
+ sampling_method=method,
+ )
+ unique_configs = {frozenset(cfg.items()) for cfg in configs}
+ assert len(unique_configs) == len(configs)
diff --git a/tests/utils/test_optimization.py b/tests/utils/test_optimization.py
new file mode 100644
index 0000000..20c17db
--- /dev/null
+++ b/tests/utils/test_optimization.py
@@ -0,0 +1,137 @@
+import pytest
+from confopt.utils.optimization import FixedSearcherOptimizer, DecayingSearcherOptimizer
+
+
+@pytest.fixture
+def fixed_surrogate_tuner():
+ """Fixture to create a FixedSurrogateTuner instance."""
+ return FixedSearcherOptimizer(n_tuning_episodes=8, tuning_interval=6)
+
+
+def test_fixed_surrogate_tuner_initialization():
+ """Test initialization of FixedSurrogateTuner."""
+ tuner = FixedSearcherOptimizer(tuning_interval=7)
+ assert tuner.fixed_interval == 7
+
+
+def test_fixed_surrogate_tuner_select_arm(fixed_surrogate_tuner):
+ """Test that select_arm returns the fixed values."""
+ arm = fixed_surrogate_tuner.select_arm()
+ assert arm == (8, 6)
+
+
+def test_fixed_surrogate_tuner_update(fixed_surrogate_tuner):
+ """Test that update method doesn't change behavior."""
+ fixed_surrogate_tuner.update(
+ search_iter=10,
+ )
+
+ arm = fixed_surrogate_tuner.select_arm()
+ assert arm == (8, 6)
+
+
+@pytest.fixture
+def decaying_tuner():
+ """Fixture to create a DecayingSearcherOptimizer instance."""
+ return DecayingSearcherOptimizer(
+ n_tuning_episodes=10,
+ initial_tuning_interval=2,
+ decay_rate=0.5,
+ decay_type="linear",
+ max_tuning_interval=20,
+ )
+
+
+def test_decaying_tuner_initialization():
+ """Test that the DecayingSearcherOptimizer initializes correctly."""
+ tuner = DecayingSearcherOptimizer(
+ initial_tuning_interval=3,
+ )
+ assert tuner.initial_tuning_interval == 3
+
+ tuner = DecayingSearcherOptimizer(
+ initial_tuning_interval=4,
+ )
+ assert tuner.initial_tuning_interval == 4
+
+
+def test_decaying_tuner_invalid_decay_type():
+ """Test that invalid decay_type raises ValueError."""
+ with pytest.raises(ValueError, match="decay_type must be one of"):
+ DecayingSearcherOptimizer(decay_type="invalid")
+
+
+def test_decaying_tuner_linear_decay(decaying_tuner):
+ """Test linear decay calculation."""
+ # At iteration 0
+ decaying_tuner.update(search_iter=0)
+ arm = decaying_tuner.select_arm()
+ assert arm[0] == 10 # n_tuning_episodes should remain constant
+ assert arm[1] == 2 # initial_tuning_interval
+
+ # At iteration 2: interval = 2 + 0.5 * 2 = 3, rounded to 3
+ decaying_tuner.update(search_iter=2)
+ arm = decaying_tuner.select_arm()
+ assert arm[0] == 10
+ assert arm[1] == 3
+
+ # At iteration 10: interval = 2 + 0.5 * 10 = 7, rounded to 7
+ decaying_tuner.update(search_iter=10)
+ arm = decaying_tuner.select_arm()
+ assert arm[0] == 10
+ assert arm[1] == 7
+
+
+def test_decaying_tuner_exponential_decay():
+ """Test exponential decay calculation."""
+ tuner = DecayingSearcherOptimizer(
+ n_tuning_episodes=5,
+ initial_tuning_interval=2,
+ decay_rate=0.1,
+ decay_type="exponential",
+ max_tuning_interval=20,
+ )
+
+ # At iteration 0
+ tuner.update(search_iter=0)
+ arm = tuner.select_arm()
+ assert arm[0] == 5
+ assert arm[1] == 2 # initial_tuning_interval
+
+ # At iteration 5: interval = 2 * (1.1)^5 ≈ 3.22, rounded to 3
+ tuner.update(search_iter=5)
+ arm = tuner.select_arm()
+ assert arm[0] == 5
+ assert arm[1] == 3
+
+
+def test_decaying_tuner_logarithmic_decay():
+ """Test logarithmic decay calculation."""
+ tuner = DecayingSearcherOptimizer(
+ n_tuning_episodes=8,
+ initial_tuning_interval=2,
+ decay_rate=2.0,
+ decay_type="logarithmic",
+ max_tuning_interval=20,
+ )
+
+ # At iteration 0
+ tuner.update(search_iter=0)
+ arm = tuner.select_arm()
+ assert arm[0] == 8
+ assert arm[1] == 2 # initial_tuning_interval
+
+ # At iteration 4: interval = 2 + 2.0 * log(5) ≈ 5.22, rounded to 5
+ tuner.update(search_iter=4)
+ arm = tuner.select_arm()
+ assert arm[0] == 8
+ assert arm[1] == 5
+
+
+def test_decaying_tuner_max_interval_cap(decaying_tuner):
+ """Test that tuning interval is capped at max_tuning_interval."""
+ # Set a very high iteration to exceed max_tuning_interval
+ decaying_tuner.update(search_iter=100)
+ arm = decaying_tuner.select_arm()
+ assert arm[0] == 10
+ assert arm[1] == 20 # Should be capped at max_tuning_interval
diff --git a/tests/test_preprocessing.py b/tests/utils/test_preprocessing.py
similarity index 97%
rename from tests/test_preprocessing.py
rename to tests/utils/test_preprocessing.py
index cd4dd65..e780d4f 100644
--- a/tests/test_preprocessing.py
+++ b/tests/utils/test_preprocessing.py
@@ -1,7 +1,7 @@
import numpy as np
import pytest
-from confopt.preprocessing import train_val_split
+from confopt.utils.preprocessing import train_val_split
DEFAULT_SEED = 1234
diff --git a/tests/utils/test_tracking.py b/tests/utils/test_tracking.py
new file mode 100644
index 0000000..0aa4753
--- /dev/null
+++ b/tests/utils/test_tracking.py
@@ -0,0 +1,22 @@
+import time
+
+
+from confopt.utils.tracking import RuntimeTracker
+
+
+def test_runtime_tracker__return_runtime():
+ dummy_tracker = RuntimeTracker()
+ sleep_time = 2
+ time.sleep(sleep_time)
+ time_elapsed = dummy_tracker.return_runtime()
+ assert sleep_time - 1 < round(time_elapsed) < sleep_time + 1
+
+
+def test_runtime_tracker__pause_runtime():
+ dummy_tracker = RuntimeTracker()
+ dummy_tracker.pause_runtime()
+ sleep_time = 2
+ time.sleep(sleep_time)
+ dummy_tracker.resume_runtime()
+ time_elapsed = dummy_tracker.return_runtime()
+ assert time_elapsed < 1