-
Notifications
You must be signed in to change notification settings - Fork 7
feat: numba #143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: numba #143
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| name: Performance Benchmarks | ||
|
|
||
| on: | ||
| workflow_dispatch: | ||
| push: | ||
| branches: | ||
| - main | ||
| paths: | ||
| - 'src/quant_research_starter/backtest/**' | ||
| - 'src/quant_research_starter/benchmarks/**' | ||
| - '.github/workflows/benchmark.yml' | ||
|
|
||
| jobs: | ||
| benchmark: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v3 | ||
|
|
||
| - name: Set up Python | ||
| uses: actions/setup-python@v4 | ||
| with: | ||
| python-version: '3.10' | ||
|
|
||
| - name: Install dependencies | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| pip install numpy pandas numba | ||
| pip install -e . | ||
|
|
||
| - name: Run benchmarks | ||
| run: | | ||
| cd src/quant_research_starter/benchmarks | ||
| python bench_opt.py > benchmark_results.txt 2>&1 || true | ||
|
|
||
| - name: Upload benchmark results | ||
| uses: actions/upload-artifact@v3 | ||
| if: always() | ||
| with: | ||
| name: benchmark-results | ||
| path: src/quant_research_starter/benchmarks/benchmark_results.txt | ||
| retention-days: 30 | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| """Cython-optimized backtest operations (skeleton).""" | ||
|
|
||
| cimport cython | ||
| import numpy as np | ||
| cimport numpy as np | ||
|
|
||
| DTYPE = np.float64 | ||
| ctypedef np.float64_t DTYPE_t | ||
|
|
||
|
|
||
| @cython.boundscheck(False) | ||
| @cython.wraparound(False) | ||
| def compute_strategy_returns_cython( | ||
| np.ndarray[DTYPE_t, ndim=2] weights_prev, | ||
| np.ndarray[DTYPE_t, ndim=2] returns, | ||
| np.ndarray[DTYPE_t, ndim=1] turnover, | ||
| DTYPE_t transaction_cost | ||
| ): | ||
| """Compute strategy returns with transaction costs (Cython version).""" | ||
| cdef int n_days = weights_prev.shape[0] | ||
| cdef int n_assets = weights_prev.shape[1] | ||
| cdef np.ndarray[DTYPE_t, ndim=1] strat_ret = np.zeros(n_days, dtype=DTYPE) | ||
| cdef int i, j | ||
| cdef DTYPE_t ret_sum | ||
|
|
||
| for i in range(n_days): | ||
| ret_sum = 0.0 | ||
| for j in range(n_assets): | ||
| ret_sum += weights_prev[i, j] * returns[i, j] | ||
| strat_ret[i] = ret_sum - (turnover[i] * transaction_cost) | ||
|
|
||
| return strat_ret | ||
|
|
||
|
|
||
| @cython.boundscheck(False) | ||
| @cython.wraparound(False) | ||
| def compute_turnover_cython( | ||
| np.ndarray[DTYPE_t, ndim=2] weights, | ||
| np.ndarray[DTYPE_t, ndim=2] weights_prev | ||
| ): | ||
| """Compute turnover (L1 change / 2) (Cython version).""" | ||
| cdef int n_days = weights.shape[0] | ||
| cdef int n_assets = weights.shape[1] | ||
| cdef np.ndarray[DTYPE_t, ndim=1] turnover = np.zeros(n_days, dtype=DTYPE) | ||
| cdef int i, j | ||
| cdef DTYPE_t total_change | ||
|
|
||
| for i in range(n_days): | ||
| total_change = 0.0 | ||
| for j in range(n_assets): | ||
| total_change += abs(weights[i, j] - weights_prev[i, j]) | ||
| turnover[i] = total_change * 0.5 | ||
|
|
||
| return turnover | ||
|
|
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,156 @@ | ||||||
| """Numba-accelerated backtest operations.""" | ||||||
|
|
||||||
| import numpy as np | ||||||
|
|
||||||
| try: | ||||||
| from numba import jit, prange | ||||||
|
|
||||||
| NUMBA_AVAILABLE = True | ||||||
| except ImportError: | ||||||
| NUMBA_AVAILABLE = False | ||||||
|
|
||||||
| def jit(*args, **kwargs): | ||||||
| def decorator(func): | ||||||
| return func | ||||||
|
|
||||||
| return decorator | ||||||
|
|
||||||
| prange = range | ||||||
|
|
||||||
|
|
||||||
| @jit(nopython=True, cache=True) | ||||||
| def compute_strategy_returns( | ||||||
| weights_prev: np.ndarray, | ||||||
| returns: np.ndarray, | ||||||
| turnover: np.ndarray, | ||||||
| transaction_cost: float, | ||||||
| ) -> np.ndarray: | ||||||
| """Compute strategy returns with transaction costs.""" | ||||||
| n_days, n_assets = returns.shape | ||||||
| strat_ret = np.zeros(n_days) | ||||||
|
|
||||||
| for i in prange(n_days): | ||||||
| ret_sum = 0.0 | ||||||
| for j in prange(n_assets): | ||||||
| ret_sum += weights_prev[i, j] * returns[i, j] | ||||||
| strat_ret[i] = ret_sum - (turnover[i] * transaction_cost) | ||||||
|
|
||||||
| return strat_ret | ||||||
|
|
||||||
|
|
||||||
| @jit(nopython=True, cache=True) | ||||||
| def compute_turnover(weights: np.ndarray, weights_prev: np.ndarray) -> np.ndarray: | ||||||
| """Compute turnover (L1 change / 2).""" | ||||||
| n_days, n_assets = weights.shape | ||||||
| turnover = np.zeros(n_days) | ||||||
|
|
||||||
| for i in prange(n_days): | ||||||
| total_change = 0.0 | ||||||
| for j in prange(n_assets): | ||||||
|
||||||
| for j in prange(n_assets): | |
| for j in range(n_assets): |
Copilot
AI
Nov 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using prange for sequential computation with dependencies. The compute_portfolio_value function uses prange (line 65) but each iteration depends on the previous value (portfolio_value[i]), making parallelization impossible. This should use regular range instead of prange as the operations cannot be parallelized.
| for i in prange(n_days): | |
| for i in range(n_days): |
Copilot
AI
Nov 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nested prange usage in Numba may cause performance issues. Using prange for both the outer loop (line 77) and inner loop (line 78) can lead to thread contention and may not provide the expected parallelization benefits. Consider using prange only for the outer loop and regular range for the inner loop.
| for j in prange(n_assets): | |
| for j in range(n_assets): |
Copilot
AI
Nov 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Deprecated np.bool_ dtype usage. Line 93 uses dtype=np.bool_ which is deprecated in NumPy 1.20+ and will be removed. Use dtype=np.bool or dtype=bool instead.
| valid_mask = np.zeros(n_assets, dtype=np.bool_) | |
| valid_mask = np.zeros(n_assets, dtype=bool) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| """Simple profiler to identify hotspots in backtest.""" | ||
|
|
||
| import cProfile | ||
| import pstats | ||
| import sys | ||
| from io import StringIO | ||
| from pathlib import Path | ||
|
|
||
| sys.path.insert(0, str(Path(__file__).parent.parent.parent)) | ||
|
|
||
| from quant_research_starter.backtest.vectorized import VectorizedBacktest | ||
| from quant_research_starter.data import SampleDataLoader | ||
|
|
||
|
|
||
| def profile_backtest(): | ||
| """Profile the backtest to identify hotspots.""" | ||
| loader = SampleDataLoader() | ||
| prices = loader.load_sample_prices() | ||
|
|
||
| signals = prices.pct_change(20).fillna(0) | ||
|
|
||
| profiler = cProfile.Profile() | ||
| profiler.enable() | ||
|
|
||
| backtest = VectorizedBacktest( | ||
| prices=prices, | ||
| signals=signals, | ||
| initial_capital=1_000_000, | ||
| transaction_cost=0.001, | ||
| ) | ||
| backtest.run(weight_scheme="rank") | ||
|
|
||
| profiler.disable() | ||
|
|
||
| s = StringIO() | ||
| stats = pstats.Stats(profiler, stream=s) | ||
| stats.sort_stats("cumulative") | ||
| stats.print_stats(20) | ||
|
|
||
| print("Top 20 functions by cumulative time:") | ||
| print(s.getvalue()) | ||
|
|
||
| stats.sort_stats("tottime") | ||
| stats.print_stats(20) | ||
|
|
||
| print("\nTop 20 functions by total time:") | ||
| s2 = StringIO() | ||
| stats = pstats.Stats(profiler, stream=s2) | ||
| stats.sort_stats("tottime") | ||
| stats.print_stats(20) | ||
| print(s2.getvalue()) | ||
|
Comment on lines
+43
to
+51
|
||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| profile_backtest() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| """Setup script for Cython extensions.""" | ||
|
|
||
| import numpy | ||
| from Cython.Build import cythonize | ||
| from setuptools import Extension, setup | ||
|
|
||
| extensions = [ | ||
| Extension( | ||
| "cython_opt", | ||
| ["cython_opt.pyx"], | ||
| include_dirs=[numpy.get_include()], | ||
| extra_compile_args=["-O3"], | ||
| ) | ||
| ] | ||
|
|
||
| setup( | ||
| ext_modules=cythonize(extensions, compiler_directives={"language_level": "3"}), | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nested
prangeusage in Numba may cause performance issues. Usingprangefor both the outer loop (line 32) and inner loop (line 34) can lead to thread contention and may not provide the expected parallelization benefits. Consider usingprangeonly for the outer loop and regularrangefor the inner loop, or using a flattened parallelization strategy.