feat: optuna

Satvik-Singh192 · Satvik-Singh192 · commit c6245a92b746 · 2025-11-12T13:44:49.000+05:30
diff --git a/README.md b/README.md
@@ -24,7 +24,8 @@ QuantResearchStarter aims to provide a clean, well-documented starting point for
 * **Factor library** — example implementations of momentum, value, size, and volatility factors.
 * **Vectorized backtesting engine** — supports transaction costs, slippage, portfolio constraints, and configurable rebalancing frequencies (daily, weekly, monthly).
 * **Risk & performance analytics** — returns, drawdowns, Sharpe, turnover, and other risk metrics.
-* **CLI & scripts** — small tools to generate data, compute factors, and run backtests from the terminal.
+* **Hyperparameter optimization** — automated tuning with Optuna, pruning, and distributed study support.
+* **CLI & scripts** — small tools to generate data, compute factors, run backtests, and optimize hyperparameters from the terminal.
 * **Production-ready utilities** — type hints, tests, continuous integration, and documentation scaffolding.
 
 ---
@@ -153,9 +154,42 @@ Run `python -m quant_research_starter.cli --help` or `python -m quant_research_s
 * `python -m quant_research_starter.cli generate-data` — create synthetic price series or download data from adapters
 * `python -m quant_research_starter.cli compute-factors` — calculate and export factor scores
 * `python -m quant_research_starter.cli backtest` — run the vectorized backtest and export results
+* `python -m quant_research_starter.cli autotune` — optimize hyperparameters with Optuna
 
 **Note:** If you have the `qrs` command in your PATH, you can use `qrs` instead of `python -m quant_research_starter.cli`.
 
+### Hyperparameter Tuning (Autotune)
+
+The `autotune` command automates hyperparameter search using Optuna with pruning support for efficient optimization.
+
+**Basic usage:**
+```bash
+# Optimize momentum factor hyperparameters
+qrs autotune -f momentum -n 100 -m sharpe_ratio
+
+# Use YAML configuration file
+qrs autotune -c examples/autotune_config.yaml
+```
+
+**Key features:**
+- **Pruning**: Early stopping of bad trials to save computation time
+- **Distributed tuning**: Optional RDB storage (SQLite, PostgreSQL, MySQL) for multi-worker setups
+- **Flexible objectives**: Optimize any metric (Sharpe ratio, total return, CAGR, etc.)
+- **Factor support**: Optimize momentum, volatility, and other factor hyperparameters
+
+**Example YAML configuration:**
+```yaml
+data_file: "data_sample/sample_prices.csv"
+factor_type: "momentum"
+n_trials: 100
+metric: "sharpe_ratio"
+output: "output/tuning_results.json"
+pruner: "median"  # Options: none, median, percentile
+storage: "sqlite:///optuna.db"  # Optional: for distributed runs
+```
+
+See `examples/autotune_config.yaml` for a complete example configuration.
+
 ---
 
 ## Project structure (overview)
@@ -167,6 +201,7 @@ QuantResearchStarter/
 │  ├─ factors/           # factor implementations
 │  ├─ backtest/          # backtester & portfolio logic
 │  ├─ analytics/         # performance and risk metrics
+│  ├─ tuning/            # Optuna hyperparameter optimization
 │  ├─ cli/               # command line entry points
 │  └─ dashboard/         # optional Streamlit dashboard
 ├─ examples/             # runnable notebooks & example strategies
diff --git a/examples/autotune_config.yaml b/examples/autotune_config.yaml
@@ -0,0 +1,27 @@
+# Example configuration for hyperparameter tuning with Optuna
+# Usage: qrs autotune -c examples/autotune_config.yaml
+
+# Data configuration
+data_file: "data_sample/sample_prices.csv"
+
+# Factor to optimize
+factor_type: "momentum"  # Options: momentum, value, size, volatility
+
+# Optimization settings
+n_trials: 100  # Number of trials to run
+metric: "sharpe_ratio"  # Metric to optimize (sharpe_ratio, total_return, cagr, etc.)
+
+# Output configuration
+output: "output/tuning_results.json"
+study_name: "momentum_factor_study"
+
+# Pruning configuration (for early stopping of bad trials)
+# Options: none, median, percentile
+pruner: "median"
+
+# Optional: RDB storage for distributed tuning runs
+# Uncomment and configure for multi-worker setups
+# storage: "sqlite:///optuna.db"
+# For PostgreSQL: "postgresql://user:password@localhost/dbname"
+# For MySQL: "mysql://user:password@localhost/dbname"
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,6 +31,8 @@ dependencies = [
     "uvicorn>=0.23.0",
     "python-dotenv>=1.0.0",
     "requests>=2.31.0",
+    "optuna>=3.0.0",
+    "pyyaml>=6.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/quant_research_starter/cli.py b/src/quant_research_starter/cli.py
@@ -6,12 +6,14 @@
 import click
 import matplotlib.pyplot as plt
 import pandas as pd
+import yaml
 from tqdm import tqdm
 
 from .backtest import VectorizedBacktest
 from .data import SampleDataLoader, SyntheticDataGenerator
 from .factors import MomentumFactor, SizeFactor, ValueFactor, VolatilityFactor
 from .metrics import RiskMetrics, create_equity_curve_plot
+from .tuning import OptunaRunner, create_backtest_objective
 
 
 @click.group()
@@ -247,5 +249,136 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly):
     click.echo(f"Results saved -> {output}")
 
 
+@cli.command()
+@click.option(
+    "--config",
+    "-c",
+    type=click.Path(exists=True),
+    help="YAML configuration file for hyperparameter tuning",
+)
+@click.option(
+    "--data-file",
+    "-d",
+    default="data_sample/sample_prices.csv",
+    help="Price data file path",
+)
+@click.option(
+    "--factor-type",
+    "-f",
+    type=click.Choice(["momentum", "value", "size", "volatility"]),
+    default="momentum",
+    help="Factor type to optimize",
+)
+@click.option(
+    "--n-trials",
+    "-n",
+    default=100,
+    help="Number of optimization trials",
+)
+@click.option(
+    "--metric",
+    "-m",
+    default="sharpe_ratio",
+    help="Metric to optimize (sharpe_ratio, total_return, cagr, etc.)",
+)
+@click.option(
+    "--output",
+    "-o",
+    default="output/tuning_results.json",
+    help="Output file for tuning results",
+)
+@click.option(
+    "--storage",
+    "-s",
+    default=None,
+    help="RDB storage URL (e.g., sqlite:///optuna.db) for distributed tuning",
+)
+@click.option(
+    "--pruner",
+    "-p",
+    type=click.Choice(["none", "median", "percentile"]),
+    default="median",
+    help="Pruning strategy for early stopping",
+)
+@click.option(
+    "--study-name",
+    default="optuna_study",
+    help="Name of the Optuna study",
+)
+def autotune(
+    config,
+    data_file,
+    factor_type,
+    n_trials,
+    metric,
+    output,
+    storage,
+    pruner,
+    study_name,
+):
+    """Run hyperparameter optimization with Optuna."""
+    click.echo("Starting hyperparameter optimization...")
+
+    # Load configuration from YAML if provided
+    if config:
+        with open(config, "r") as f:
+            config_data = yaml.safe_load(f)
+            data_file = config_data.get("data_file", data_file)
+            factor_type = config_data.get("factor_type", factor_type)
+            n_trials = config_data.get("n_trials", n_trials)
+            metric = config_data.get("metric", metric)
+            output = config_data.get("output", output)
+            storage = config_data.get("storage", storage)
+            pruner = config_data.get("pruner", pruner)
+            study_name = config_data.get("study_name", study_name)
+
+    # Load data
+    if Path(data_file).exists():
+        prices = pd.read_csv(data_file, index_col=0, parse_dates=True)
+    else:
+        click.echo("Data file not found, using sample data...")
+        loader = SampleDataLoader()
+        prices = loader.load_sample_prices()
+
+    click.echo(f"Optimizing {factor_type} factor with {n_trials} trials...")
+    click.echo(f"Optimizing metric: {metric}")
+
+    # Create objective function
+    objective = create_backtest_objective(
+        prices=prices,
+        factor_type=factor_type,
+        metric=metric,
+    )
+
+    # Create and run Optuna runner
+    runner = OptunaRunner(
+        search_space={},  # Not used when using create_backtest_objective
+        objective=objective,
+        n_trials=n_trials,
+        study_name=study_name,
+        storage=storage,
+        pruner=pruner,
+        direction=(
+            "maximize"
+            if metric in ["sharpe_ratio", "total_return", "cagr"]
+            else "minimize"
+        ),
+    )
+
+    # Run optimization
+    results = runner.optimize()
+
+    # Save results
+    runner.save_results(output)
+
+    click.echo("\n" + "=" * 60)
+    click.echo("Optimization Results")
+    click.echo("=" * 60)
+    click.echo(f"Best parameters: {results['best_params']}")
+    click.echo(f"Best {metric}: {results['best_value']:.4f}")
+    click.echo(f"Total trials: {len(results['trial_history'])}")
+    click.echo(f"Results saved -> {output}")
+
+
 if __name__ == "__main__":
     cli()
diff --git a/src/quant_research_starter/tuning/__init__.py b/src/quant_research_starter/tuning/__init__.py
@@ -0,0 +1,6 @@
+"""Hyperparameter tuning with Optuna."""
+
+from .optuna_runner import OptunaRunner
+
+__all__ = ["OptunaRunner"]
+
diff --git a/src/quant_research_starter/tuning/optuna_runner.py b/src/quant_research_starter/tuning/optuna_runner.py

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,8 @@ dependencies = [`
`31`	`31`	`"uvicorn>=0.23.0",`
`32`	`32`	`"python-dotenv>=1.0.0",`
`33`	`33`	`"requests>=2.31.0",`
	`34`	`+ "optuna>=3.0.0",`
	`35`	`+ "pyyaml>=6.0",`
`34`	`36`	`]`
`35`	`37`
`36`	`38`	`[project.optional-dependencies]`