feat: numba implemented, benchmarking provided #143

Satvik-Singh192 · web-flow · commit a27eedaa1377 · 2025-11-15T19:56:01.000+05:30
closes #92
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -0,0 +1,42 @@
+name: Performance Benchmarks
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - 'src/quant_research_starter/backtest/**'
+      - 'src/quant_research_starter/benchmarks/**'
+      - '.github/workflows/benchmark.yml'
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install numpy pandas numba
+          pip install -e .
+      
+      - name: Run benchmarks
+        run: |
+          cd src/quant_research_starter/benchmarks
+          python bench_opt.py > benchmark_results.txt 2>&1 || true
+      
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: benchmark-results
+          path: src/quant_research_starter/benchmarks/benchmark_results.txt
+          retention-days: 30
+
diff --git a/src/quant_research_starter/backtest/cython_opt.pyx b/src/quant_research_starter/backtest/cython_opt.pyx
@@ -0,0 +1,55 @@
+"""Cython-optimized backtest operations (skeleton)."""
+
+cimport cython
+import numpy as np
+cimport numpy as np
+
+DTYPE = np.float64
+ctypedef np.float64_t DTYPE_t
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def compute_strategy_returns_cython(
+    np.ndarray[DTYPE_t, ndim=2] weights_prev,
+    np.ndarray[DTYPE_t, ndim=2] returns,
+    np.ndarray[DTYPE_t, ndim=1] turnover,
+    DTYPE_t transaction_cost
+):
+    """Compute strategy returns with transaction costs (Cython version)."""
+    cdef int n_days = weights_prev.shape[0]
+    cdef int n_assets = weights_prev.shape[1]
+    cdef np.ndarray[DTYPE_t, ndim=1] strat_ret = np.zeros(n_days, dtype=DTYPE)
+    cdef int i, j
+    cdef DTYPE_t ret_sum
+    
+    for i in range(n_days):
+        ret_sum = 0.0
+        for j in range(n_assets):
+            ret_sum += weights_prev[i, j] * returns[i, j]
+        strat_ret[i] = ret_sum - (turnover[i] * transaction_cost)
+    
+    return strat_ret
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def compute_turnover_cython(
+    np.ndarray[DTYPE_t, ndim=2] weights,
+    np.ndarray[DTYPE_t, ndim=2] weights_prev
+):
+    """Compute turnover (L1 change / 2) (Cython version)."""
+    cdef int n_days = weights.shape[0]
+    cdef int n_assets = weights.shape[1]
+    cdef np.ndarray[DTYPE_t, ndim=1] turnover = np.zeros(n_days, dtype=DTYPE)
+    cdef int i, j
+    cdef DTYPE_t total_change
+    
+    for i in range(n_days):
+        total_change = 0.0
+        for j in range(n_assets):
+            total_change += abs(weights[i, j] - weights_prev[i, j])
+        turnover[i] = total_change * 0.5
+    
+    return turnover
+
diff --git a/src/quant_research_starter/backtest/numba_opt.py b/src/quant_research_starter/backtest/numba_opt.py
@@ -0,0 +1,156 @@
+"""Numba-accelerated backtest operations."""
+
+import numpy as np
+
+try:
+    from numba import jit, prange
+
+    NUMBA_AVAILABLE = True
+except ImportError:
+    NUMBA_AVAILABLE = False
+
+    def jit(*args, **kwargs):
+        def decorator(func):
+            return func
+
+        return decorator
+
+    prange = range
+
+
+@jit(nopython=True, cache=True)
+def compute_strategy_returns(
+    weights_prev: np.ndarray,
+    returns: np.ndarray,
+    turnover: np.ndarray,
+    transaction_cost: float,
+) -> np.ndarray:
+    """Compute strategy returns with transaction costs."""
+    n_days, n_assets = returns.shape
+    strat_ret = np.zeros(n_days)
+
+    for i in prange(n_days):
+        ret_sum = 0.0
+        for j in prange(n_assets):
+            ret_sum += weights_prev[i, j] * returns[i, j]
+        strat_ret[i] = ret_sum - (turnover[i] * transaction_cost)
+
+    return strat_ret
+
+
+@jit(nopython=True, cache=True)
+def compute_turnover(weights: np.ndarray, weights_prev: np.ndarray) -> np.ndarray:
+    """Compute turnover (L1 change / 2)."""
+    n_days, n_assets = weights.shape
+    turnover = np.zeros(n_days)
+
+    for i in prange(n_days):
+        total_change = 0.0
+        for j in prange(n_assets):
+            total_change += abs(weights[i, j] - weights_prev[i, j])
+        turnover[i] = total_change * 0.5
+
+    return turnover
+
+
+@jit(nopython=True, cache=True)
+def compute_portfolio_value(
+    strategy_returns: np.ndarray, initial_capital: float
+) -> np.ndarray:
+    """Compute cumulative portfolio value."""
+    n_days = len(strategy_returns)
+    portfolio_value = np.zeros(n_days + 1)
+    portfolio_value[0] = initial_capital
+
+    for i in prange(n_days):
+        portfolio_value[i + 1] = portfolio_value[i] * (1.0 + strategy_returns[i])
+
+    return portfolio_value[1:]
+
+
+@jit(nopython=True, cache=True)
+def compute_returns_from_prices(prices: np.ndarray) -> np.ndarray:
+    """Compute percentage returns from prices."""
+    n_days, n_assets = prices.shape
+    returns = np.zeros((n_days - 1, n_assets))
+
+    for i in prange(n_days - 1):
+        for j in prange(n_assets):
+            if prices[i, j] > 0:
+                returns[i, j] = (prices[i + 1, j] - prices[i, j]) / prices[i, j]
+
+    return returns
+
+
+@jit(nopython=True, cache=True)
+def rank_based_weights(
+    signals: np.ndarray, max_leverage: float, long_pct: float, short_pct: float
+) -> np.ndarray:
+    """Compute rank-based portfolio weights."""
+    n_assets = len(signals)
+    weights = np.zeros(n_assets)
+
+    valid_mask = np.zeros(n_assets, dtype=np.bool_)
+    n_valid = 0
+    for i in range(n_assets):
+        if not np.isnan(signals[i]):
+            valid_mask[i] = True
+            n_valid += 1
+
+    if n_valid == 0:
+        return weights
+
+    valid_values = np.zeros(n_valid)
+    valid_indices = np.zeros(n_valid, dtype=np.int64)
+    idx = 0
+    for i in range(n_assets):
+        if valid_mask[i]:
+            valid_values[idx] = signals[i]
+            valid_indices[idx] = i
+            idx += 1
+
+    sorted_idx = np.argsort(valid_values)
+    ranks = np.zeros(n_valid)
+    for i in range(n_valid):
+        ranks[sorted_idx[i]] = i + 1.0
+
+    sorted_ranks = np.sort(ranks)
+    long_idx = int(n_valid * long_pct)
+    short_idx = int(n_valid * short_pct)
+    long_threshold = sorted_ranks[long_idx] if long_idx < n_valid else sorted_ranks[-1]
+    short_threshold = sorted_ranks[short_idx] if short_idx >= 0 else sorted_ranks[0]
+
+    long_count = 0
+    short_count = 0
+
+    for idx in range(n_valid):
+        i = valid_indices[idx]
+        rank_val = ranks[idx]
+        if rank_val >= long_threshold:
+            weights[i] = 1.0
+            long_count += 1
+        elif rank_val <= short_threshold:
+            weights[i] = -1.0
+            short_count += 1
+
+    if long_count > 0:
+        long_weight = 1.0 / long_count
+        for i in range(n_assets):
+            if weights[i] > 0:
+                weights[i] = long_weight
+    if short_count > 0:
+        short_weight = -1.0 / short_count
+        for i in range(n_assets):
+            if weights[i] < 0:
+                weights[i] = short_weight
+
+    total_leverage = 0.0
+    for i in range(n_assets):
+        total_leverage += abs(weights[i])
+
+    if total_leverage > max_leverage and total_leverage > 0:
+        scale = max_leverage / total_leverage
+        for i in range(n_assets):
+            weights[i] *= scale
+
+    return weights
diff --git a/src/quant_research_starter/backtest/profile_backtest.py b/src/quant_research_starter/backtest/profile_backtest.py
@@ -0,0 +1,55 @@
+"""Simple profiler to identify hotspots in backtest."""
+
+import cProfile
+import pstats
+import sys
+from io import StringIO
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from quant_research_starter.backtest.vectorized import VectorizedBacktest
+from quant_research_starter.data import SampleDataLoader
+
+
+def profile_backtest():
+    """Profile the backtest to identify hotspots."""
+    loader = SampleDataLoader()
+    prices = loader.load_sample_prices()
+
+    signals = prices.pct_change(20).fillna(0)
+
+    profiler = cProfile.Profile()
+    profiler.enable()
+
+    backtest = VectorizedBacktest(
+        prices=prices,
+        signals=signals,
+        initial_capital=1_000_000,
+        transaction_cost=0.001,
+    )
+    backtest.run(weight_scheme="rank")
+
+    profiler.disable()
+
+    s = StringIO()
+    stats = pstats.Stats(profiler, stream=s)
+    stats.sort_stats("cumulative")
+    stats.print_stats(20)
+
+    print("Top 20 functions by cumulative time:")
+    print(s.getvalue())
+
+    stats.sort_stats("tottime")
+    stats.print_stats(20)
+
+    print("\nTop 20 functions by total time:")
+    s2 = StringIO()
+    stats = pstats.Stats(profiler, stream=s2)
+    stats.sort_stats("tottime")
+    stats.print_stats(20)
+    print(s2.getvalue())
+
+
+if __name__ == "__main__":
+    profile_backtest()
diff --git a/src/quant_research_starter/backtest/setup_cython.py b/src/quant_research_starter/backtest/setup_cython.py
@@ -0,0 +1,18 @@
+"""Setup script for Cython extensions."""
+
+import numpy
+from Cython.Build import cythonize
+from setuptools import Extension, setup
+
+extensions = [
+    Extension(
+        "cython_opt",
+        ["cython_opt.pyx"],
+        include_dirs=[numpy.get_include()],
+        extra_compile_args=["-O3"],
+    )
+]
+
+setup(
+    ext_modules=cythonize(extensions, compiler_directives={"language_level": "3"}),
+)
diff --git a/src/quant_research_starter/benchmarks/bench_opt.py b/src/quant_research_starter/benchmarks/bench_opt.py