Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ __pycache__/
*.py[cod]
*$py.class
src/quant_research_starter.egg-info/PKG-INFO

myenv/
58 changes: 43 additions & 15 deletions src/quant_research_starter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import click
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

from .backtest import VectorizedBacktest
from .data import SampleDataLoader, SyntheticDataGenerator
Expand All @@ -30,9 +31,14 @@ def generate_data(output, symbols, days):
click.echo("Generating synthetic price data...")

generator = SyntheticDataGenerator()
prices = generator.generate_price_data(
n_symbols=symbols, days=days, start_date="2020-01-01"
)
all_prices = []
for _ in tqdm(range(symbols), desc="Generating price series"):
prices = generator.generate_price_data(
n_symbols=1, days=days, start_date="2020-01-01"
)
all_prices.append(prices)

prices = pd.concat(all_prices, axis=1)

Comment on lines +34 to 42
Copy link

Copilot AI Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generating price data one symbol at a time in a loop defeats the purpose of the vectorized implementation in SyntheticDataGenerator.generate_price_data(). The original method was designed to efficiently generate multiple symbols at once, including correlation structure between them. This change removes the ability to generate correlated price series and introduces unnecessary overhead. Additionally, all generated symbols will have the same name "SYMBOL_00" since n_symbols=1 is used repeatedly, which will cause issues when concatenating.

Suggested change
all_prices = []
for _ in tqdm(range(symbols), desc="Generating price series"):
prices = generator.generate_price_data(
n_symbols=1, days=days, start_date="2020-01-01"
)
all_prices.append(prices)
prices = pd.concat(all_prices, axis=1)
prices = generator.generate_price_data(
n_symbols=symbols, days=days, start_date="2020-01-01"
)

Copilot uses AI. Check for mistakes.
# Ensure output directory exists
output_path = Path(output)
Expand Down Expand Up @@ -94,8 +100,13 @@ def compute_factors(data_file, factors, output):
vol = VolatilityFactor(lookback=21)
factor_data["volatility"] = vol.compute(prices)

# Combine factors (simple average for demo)
combined_signals = pd.DataFrame({k: v.mean(axis=1) for k, v in factor_data.items()})
combined_signals = pd.DataFrame(
{
k: tqdm(v.mean(axis=1), desc=f"Averaging {k} factor")
for k, v in factor_data.items()
Comment on lines +105 to +106
Copy link

Copilot AI Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tqdm usage here is incorrect. tqdm is being called on the result of v.mean(axis=1) (a pandas Series), which will not display a progress bar. Instead, it will just return the Series unchanged. To show progress during dictionary comprehension, you should wrap the iterator (e.g., factor_data.items()) with tqdm, not the values being computed.

Suggested change
k: tqdm(v.mean(axis=1), desc=f"Averaging {k} factor")
for k, v in factor_data.items()
k: v.mean(axis=1)
for k, v in tqdm(factor_data.items(), desc="Averaging factors")

Copilot uses AI. Check for mistakes.
}
)

combined_signals["composite"] = combined_signals.mean(axis=1)

# Save results
Expand Down Expand Up @@ -148,7 +159,6 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly):
# Load signals
if Path(signals_file).exists():
signals_data = pd.read_csv(signals_file, index_col=0, parse_dates=True)
# Use composite signal if available, otherwise first column
if "composite" in signals_data.columns:
signals = signals_data["composite"]
else:
Expand All @@ -158,27 +168,48 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly):
momentum = MomentumFactor(lookback=63)
signals = momentum.compute(prices).mean(axis=1)

# Ensure signals align with prices
# Align dates
common_dates = prices.index.intersection(signals.index)
prices = prices.loc[common_dates]
signals = signals.loc[common_dates]

# Expand signals to all symbols (simplified - same signal for all)
# Expand signals across symbols
signal_matrix = pd.DataFrame(
dict.fromkeys(prices.columns, signals), index=signals.index
)

# Run backtest
def run_with_progress(self, weight_scheme="rank"):
returns = []
idx = self.prices.index

for i in tqdm(range(1, len(idx)), desc="Running backtest"):
ret = self._compute_daily_return(
self.prices.iloc[i - 1],
self.prices.iloc[i],
weight_scheme,
)
returns.append(ret)

results = pd.DataFrame({"returns": returns}, index=idx[1:])
Comment on lines +182 to +193
Copy link

Copilot AI Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is attempting to monkey-patch the VectorizedBacktest.run method but references self._compute_daily_return() which does not exist in the VectorizedBacktest class. The original run method uses a vectorized approach with pct_change() instead of iterating through each day. This will cause an AttributeError when the backtest runs.

Suggested change
returns = []
idx = self.prices.index
for i in tqdm(range(1, len(idx)), desc="Running backtest"):
ret = self._compute_daily_return(
self.prices.iloc[i - 1],
self.prices.iloc[i],
weight_scheme,
)
returns.append(ret)
results = pd.DataFrame({"returns": returns}, index=idx[1:])
# Compute daily returns for each symbol
daily_returns = self.prices.pct_change().fillna(0)
# Compute portfolio weights (assume equal weight or rank-based)
if weight_scheme == "rank":
weights = self.signals.rank(axis=1, pct=True)
else:
weights = self.signals
# Align weights and returns
weights = weights.loc[daily_returns.index]
# Compute portfolio returns (weighted average across symbols)
portfolio_returns = (daily_returns * weights).mean(axis=1)
results = pd.DataFrame({"returns": portfolio_returns}, index=self.prices.index)

Copilot uses AI. Check for mistakes.
Comment on lines +182 to +193
Copy link

Copilot AI Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implementation defeats the purpose of a "vectorized" backtest. The original implementation uses pandas vectorized operations for performance, but this change introduces a Python loop that iterates through each day individually, which will be significantly slower for large datasets. This is a performance regression, not an improvement.

Suggested change
returns = []
idx = self.prices.index
for i in tqdm(range(1, len(idx)), desc="Running backtest"):
ret = self._compute_daily_return(
self.prices.iloc[i - 1],
self.prices.iloc[i],
weight_scheme,
)
returns.append(ret)
results = pd.DataFrame({"returns": returns}, index=idx[1:])
# Vectorized implementation for daily returns
# Calculate daily returns for each symbol
price_returns = self.prices.pct_change().iloc[1:]
# Get weights for each day and symbol
weights = self._get_weights(weight_scheme)
weights = weights.loc[price_returns.index, price_returns.columns]
# Calculate portfolio returns as weighted sum
portfolio_returns = (weights * price_returns).sum(axis=1)
results = pd.DataFrame({"returns": portfolio_returns}, index=price_returns.index)

Copilot uses AI. Check for mistakes.
results["portfolio_value"] = (
self.initial_capital * (1 + results["returns"]).cumprod()
)
results["final_value"] = results["portfolio_value"].iloc[-1]
results["total_return"] = results["final_value"] / self.initial_capital - 1

return results

Comment on lines +193 to +201
Copy link

Copilot AI Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return value structure is inconsistent with the original VectorizedBacktest.run() method. The original returns a dictionary (from _generate_results()) containing multiple keys like 'positions', 'cash', 'trades', etc., but this implementation returns a DataFrame. This breaking change will cause the code at line 213 (metrics_calc = RiskMetrics(results["returns"])) to fail because results is now a DataFrame, not a dictionary.

Suggested change
results = pd.DataFrame({"returns": returns}, index=idx[1:])
results["portfolio_value"] = (
self.initial_capital * (1 + results["returns"]).cumprod()
)
results["final_value"] = results["portfolio_value"].iloc[-1]
results["total_return"] = results["final_value"] / self.initial_capital - 1
return results
results_df = pd.DataFrame({"returns": returns}, index=idx[1:])
results_df["portfolio_value"] = (
self.initial_capital * (1 + results_df["returns"]).cumprod()
)
final_value = results_df["portfolio_value"].iloc[-1]
total_return = final_value / self.initial_capital - 1
# Return a dictionary matching the original structure
return {
"returns": results_df["returns"],
"portfolio_value": results_df["portfolio_value"],
"final_value": final_value,
"total_return": total_return,
# Add other keys as needed to match the original structure
}

Copilot uses AI. Check for mistakes.
VectorizedBacktest.run = run_with_progress

backtest = VectorizedBacktest(
Comment on lines +181 to 204
Copy link

Copilot AI Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Monkey-patching a class method inside a CLI command function is a poor practice that affects global state and can lead to unexpected behavior if the command is called multiple times or in tests. This breaks the original vectorized implementation of VectorizedBacktest.run(). Consider either: (1) creating a subclass of VectorizedBacktest with the desired progress bar implementation, or (2) adding progress bar support directly to the VectorizedBacktest class in its own module.

Suggested change
def run_with_progress(self, weight_scheme="rank"):
returns = []
idx = self.prices.index
for i in tqdm(range(1, len(idx)), desc="Running backtest"):
ret = self._compute_daily_return(
self.prices.iloc[i - 1],
self.prices.iloc[i],
weight_scheme,
)
returns.append(ret)
results = pd.DataFrame({"returns": returns}, index=idx[1:])
results["portfolio_value"] = (
self.initial_capital * (1 + results["returns"]).cumprod()
)
results["final_value"] = results["portfolio_value"].iloc[-1]
results["total_return"] = results["final_value"] / self.initial_capital - 1
return results
VectorizedBacktest.run = run_with_progress
backtest = VectorizedBacktest(
class VectorizedBacktestWithProgress(VectorizedBacktest):
def run(self, weight_scheme="rank"):
returns = []
idx = self.prices.index
for i in tqdm(range(1, len(idx)), desc="Running backtest"):
ret = self._compute_daily_return(
self.prices.iloc[i - 1],
self.prices.iloc[i],
weight_scheme,
)
returns.append(ret)
results = pd.DataFrame({"returns": returns}, index=idx[1:])
results["portfolio_value"] = (
self.initial_capital * (1 + results["returns"]).cumprod()
)
results["final_value"] = results["portfolio_value"].iloc[-1]
results["total_return"] = results["final_value"] / self.initial_capital - 1
return results
backtest = VectorizedBacktestWithProgress(

Copilot uses AI. Check for mistakes.
prices=prices,
signals=signal_matrix,
initial_capital=initial_capital,
transaction_cost=0.001,
)

results = backtest.run(weight_scheme="rank")

# Calculate metrics
# Metrics
metrics_calc = RiskMetrics(results["returns"])
metrics = metrics_calc.calculate_all()

Expand All @@ -195,18 +226,16 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly):
with open(output_path, "w") as f:
json.dump(results_dict, f, indent=2)

# Generate plot
# Plotting
if plot:
plt.figure(figsize=(12, 8))

# Plot portfolio value
plt.subplot(2, 1, 1)
plt.plot(results["portfolio_value"].index, results["portfolio_value"].values)
plt.title("Portfolio Value")
plt.ylabel("USD")
plt.grid(True)

# Plot returns
plt.subplot(2, 1, 2)
plt.bar(results["returns"].index, results["returns"].values, alpha=0.7)
plt.title("Daily Returns")
Expand All @@ -220,7 +249,6 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly):

click.echo(f"Plot saved -> {plot_path}")

# Generate Plotly HTML chart if requested
if plotly:
html_path = output_path.parent / "backtest_plot.html"

Expand Down
Loading