EasyA-Consensus-Hackathon---Toronto/backtest_stats.py at main · Jamie1377/EasyA-Consensus-Hackathon---Toronto · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
"""
Backtest Statistics and Benchmarking Module

This module provides utilities for running statistical tests on backtesting strategies,
evaluating performance across multiple symbols, and analyzing which indicators contribute
most to trading success.
"""

import asyncio
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
from typing import List, Tuple, Dict, Any, Optional, Union
from numba import jit, njit

# Import necessary components
import backtester
from predictor import StockPredictor
from backtester import (
    AptosBacktester,
    select_stocks,
    create_signal_generator,
    create_multi_stock_signal_generator,
)


# JIT-optimized utility functions
@njit
def calculate_momentum_scores(prices):
    """Calculate momentum scores for stock selection with Numba optimization"""
    returns = np.zeros(len(prices))
    for i in range(1, len(prices)):
        returns[i] = prices[i] / prices[i - 1] - 1
    return np.sum(returns)


@jit(nopython=True)
def calculate_sharpe_ratio(returns, risk_free_rate=0.02):
    """Calculate Sharpe ratio with Numba optimization"""
    excess_returns = returns - (risk_free_rate / 252)  # Daily risk-free rate
    if np.std(excess_returns) == 0:
        return 0.0
    return (np.mean(excess_returns) / np.std(excess_returns)) * np.sqrt(252)


@jit(nopython=True)
def calculate_drawdown(equity_curve):
    """Calculate maximum drawdown with Numba optimization"""
    max_so_far = equity_curve[0]
    max_drawdown = 0.0

    for i in range(1, len(equity_curve)):
        if equity_curve[i] > max_so_far:
            max_so_far = equity_curve[i]
        drawdown = (max_so_far - equity_curve[i]) / max_so_far
        if drawdown > max_drawdown:
            max_drawdown = drawdown

    return max_drawdown


@jit(nopython=True)
def calculate_win_rate(trade_returns):
    """Calculate win rate with Numba optimization"""
    wins = 0
    for ret in trade_returns:
        if ret > 0:
            wins += 1
    return wins / len(trade_returns) if len(trade_returns) > 0 else 0


@jit(nopython=True)
def compare_returns(strategy_returns, benchmark_returns):
    """Optimized comparison of strategy vs benchmark returns"""
    strategy_cumulative = 1.0
    benchmark_cumulative = 1.0

    for i in range(len(strategy_returns)):
        strategy_cumulative *= 1 + strategy_returns[i]
        benchmark_cumulative *= 1 + benchmark_returns[i]

    return strategy_cumulative > benchmark_cumulative


import yfinance as yf

tech_sec = list(
    yf.Sector("technology").top_companies.index
)  # Example usage to ensure yfinance is imported correctly
import random

# Configure logging
log_directory = "."
log_file = f"backtest_stats_{date.today().strftime('%Y%m%d')}.log"

# Create logger
logger = logging.getLogger("backtest_stats")
logger.setLevel(logging.INFO)

# Ensure we only add handlers once
if not logger.handlers:
    # File handler
    file_handler = logging.FileHandler(log_file)
    file_handler.setFormatter(
        logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    )
    logger.addHandler(file_handler)

    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(
        logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    )
    logger.addHandler(console_handler)

try:
    from private_strat import (
        get_entry_signal,
    )  # Import custom signal generator if available
except ImportError:
    logger.warning("Custom signal generator not found!")


async def run_multi_stock_backtest(
    indicators_to_drop=None, vizualize=True, random_stocks=False
) -> Tuple[AptosBacktester, pd.DataFrame, Dict[str, Any]]:
    """
    Run a backtest on multiple stocks with specified configuration

    Args:
        indicators_to_drop: Optional pair of indicators to exclude from the strategy

    Returns:
        tuple: (backtester, history_df, metrics) - Results of the backtest
    """
    # Select stocks for backtest
    if random_stocks:
        # Select random stocks from the tech sector
        selected_stocks = random.sample(tech_sec, 5)
    else:
        # Use predefined selection logic but optimize with Numba if possible
        selected_stocks = select_stocks(
            num_stocks=20,
            universe="NASDAQ 100",
            lookback_days=200,  # Randomized lookback
            strategy="momentum",
            interval="1d",
            end_date=None,  # Default to today
        )

    # Create backtester with multiple stocks
    backtester = AptosBacktester(symbols=selected_stocks, initial_capital=100000)

    # Define backtest parameters
    start_date = (date.today() - timedelta(days=600)).strftime("%Y-%m-%d")
    end_date = date.today().strftime("%Y-%m-%d")

    # Use the predictor-based signal generator
    from predictor import StockPredictor

    # Run backtest with our multi-stock signal generator
    history_df, metrics = backtester.run_backtest(
        start_date=start_date,
        end_date=end_date,
        signal_generator=create_multi_stock_signal_generator(
            predictor_class=StockPredictor,
            always_reverse=False,
            autodetect_reversal=True,
            indicators_to_drop=indicators_to_drop,
        ),
    )

    # Display results
    logger.info("\n=== Multi-Stock Backtest Results ===")
    logger.info(f"Period: {start_date} to {end_date}")
    logger.info(f"Stocks: {', '.join(selected_stocks)}")
    logger.info(f"Total Return: {metrics['total_return']*100:.2f}%")
    logger.info(f"Sharpe Ratio: {metrics['sharpe']:.2f}")
    logger.info(f"Max Drawdown: {metrics['max_drawdown']*100:.2f}%")
    logger.info(f"Win Rate: {metrics['win_rate']*100:.2f}%")
    logger.info(f"Number of Trades: {metrics.get('num_trades', 0)}")

    if indicators_to_drop:
        logger.info(f"Indicators dropped: {indicators_to_drop}")

    # Plot results
    if vizualize:
        backtester.plot_results(history_df)

    return backtester, history_df, metrics


def counter_of_win_over_mkt(num_trials=20):
    """
    Run multiple backtest trials to count how often the strategy beats the market

    Args:
        num_trials: Number of backtest trials to run

    Returns:
        dict: Statistics about market outperformance
    """
    beat_market = 0
    beat_risk_free = 0
    total_return = 0

    logger.info(
        f"Running {num_trials} backtest trials to measure market outperformance..."
    )

    for i in range(num_trials):
        logger.info(f"Trial {i+1}/{num_trials}")

        # Select random stocks for this trial
        symbols = random.choices(tech_sec, k=5)

        # Create backtester instance
        backtester = AptosBacktester(symbols=symbols, initial_capital=100000)

        # Define time period - using a randomized lookback between 6-18 months

        lookback_days = random.randint(500, 750)
        end_date = date.today() - timedelta(days=random.randint(180, 540))
        start_date = (end_date - timedelta(days=lookback_days)).strftime("%Y-%m-%d")
        end_date = end_date.strftime("%Y-%m-%d")

        try:
            # Run backtest using multi-stock signal generator
            history, metrics = backtester.run_backtest(
                start_date=start_date,
                end_date=end_date,
                signal_generator=create_multi_stock_signal_generator(
                    predictor_class=StockPredictor,
                    always_reverse=False,
                    autodetect_reversal=False,
                ),
            )

            # Plot results for visual inspection
            backtester.plot_results(history)

            # Count wins against benchmarks using JIT-optimized comparisons when possible
            if backtester.whether_beat_market():
                beat_market += 1

            if backtester.whether_beat_risk_free(metrics):
                beat_risk_free += 1

            total_return += metrics["total_return"]

            logger.info(
                f"Trial {i+1}, "
                f"Return={metrics['total_return']:.2%}, "
                f"Beat Market={backtester.whether_beat_market()}, "
                f"Beat Risk-Free={backtester.whether_beat_risk_free(metrics)}"
            )

        except Exception as e:
            logger.error(f"Error in trial {i+1}: {e}")

    # Calculate statistics
    market_win_rate = beat_market / num_trials if num_trials > 0 else 0
    risk_free_win_rate = beat_risk_free / num_trials if num_trials > 0 else 0
    avg_return = total_return / num_trials if num_trials > 0 else 0

    results = {
        "market_win_rate": market_win_rate,
        "risk_free_win_rate": risk_free_win_rate,
        "avg_return": avg_return,
        "num_trials": num_trials,
        "beat_market_count": beat_market,
        "beat_risk_free_count": beat_risk_free,
    }

    logger.info("\n=== Market Outperformance Results ===")
    logger.info(
        f"Win Rate vs Market: {market_win_rate:.2%} ({beat_market}/{num_trials})"
    )
    logger.info(
        f"Win Rate vs Risk-Free: {risk_free_win_rate:.2%} ({beat_risk_free}/{num_trials})"
    )
    logger.info(f"Average Return: {avg_return:.2%}")

    return results


async def analyze_indicators(vizualize=True):
    """
    Analyze which indicators contribute most to trading success
    by running tests with different indicators dropped
    """
    # First get the full list of indicators
    predictor = StockPredictor(
        symbol="AAPL", start_date="2020-01-01", end_date="2025-05-17"
    )
    predictor.load_data()
    _, _, _, info = get_entry_signal(predictor)

    buy_indicators = info["buy_signal_indicators"]
    sell_indicators = info["sell_signal_indicators"]

    logger.info(f"Buy indicators are {buy_indicators}")
    logger.info(f"Sell indicators are {sell_indicators}")

    # Baseline test with all indicators
    logger.info("Running baseline test with all indicators...")
    _, _, baseline_metrics = await run_multi_stock_backtest(vizualize=vizualize)
    baseline_return = baseline_metrics["total_return"]
    logger.info(f"Baseline return: {baseline_return:.2%}")

    # Define a JIT-optimized function for calculating indicator impact
    @jit(nopython=True)
    def calculate_indicator_impact(baseline, test_return):
        """Calculate the impact of removing indicators"""
        return baseline - test_return

    # Test dropping pairs of indicators
    results = []
    for i, indicator_pair in enumerate(zip(buy_indicators, sell_indicators)):
        logger.info(f"Testing with indicators dropped: {indicator_pair}")
        try:
            _, _, test_metrics = await run_multi_stock_backtest(
                indicators_to_drop=indicator_pair, vizualize=vizualize
            )
            test_return = test_metrics["total_return"]
            # Use the JIT-optimized function
            impact = calculate_indicator_impact(baseline_return, test_return)

            results.append(
                {
                    "indicator_pair": indicator_pair,
                    "return": test_return,
                    "impact": impact,
                    "sharpe": test_metrics["sharpe"],
                    "win_rate": test_metrics["win_rate"],
                }
            )

            logger.info(f"Result: Return={test_return:.2%}, Impact={impact:.2%}")
        except Exception as e:
            logger.error(f"Error testing {indicator_pair}: {e}")

    # Sort results by impact
    results.sort(key=lambda x: x["impact"], reverse=True)

    # Display and visualize results
    logger.info("\n=== Indicator Impact Analysis ===")
    logger.info(f"Baseline return with all indicators: {baseline_return:.2%}")
    logger.info("\nIndicator pairs ranked by impact:")

    for i, result in enumerate(results):
        logger.info(
            f"{i+1}. {result['indicator_pair']} - "
            f"Impact: {result['impact']:.2%}, Return: {result['return']:.2%}, "
            f"Sharpe: {result['sharpe']:.2f}, Win Rate: {result['win_rate']:.2%}"
        )

    # Create visualization
    plt.figure(figsize=(10, 6))
    indicators = [
        f"{pair[0]}/{pair[1]}" for pair in [r["indicator_pair"] for r in results]
    ]

    # JIT-optimize the impact calculation for plotting
    @jit(nopython=True)
    def scale_impacts(impacts, scale=100):
        """Scale impact values for visualization"""
        return np.array(impacts) * scale

    impacts = scale_impacts(
        [r["impact"] for r in results], 100
    )  # Convert to percentage

    plt.barh(indicators, impacts)
    plt.xlabel("Impact on Return (%) (baseline_return - test_return)")
    plt.ylabel("Indicator Pair")
    plt.title(
        "Impact of Removing Indicator Pairs on Strategy Performance (More negative, more likely to drop this pair)"
    )
    plt.grid(axis="x", linestyle="--", alpha=0.7)
    plt.tight_layout()
    plt.show()

    return results


if __name__ == "__main__":
    # Uncomment the tests you want to run

    # Test 1: Run the main backtest
    # asyncio.run(main())

    # Test 2: Run the multi-stock backtest
    asyncio.run(run_multi_stock_backtest())

    # Test 3: Measure how often strategy beats the market
    # counter_of_win_over_mkt(1)

    # Test 4: Analyze indicators
    # asyncio.run(analyze_indicators(vizualize=False))

    print(
        "Select a test to run by uncommenting the relevant section in backtest_stats.py"
    )