From ed37dcba84f8a065883b36cb0910c95d482307bc Mon Sep 17 00:00:00 2001 From: Satvik-Singh192 Date: Wed, 5 Nov 2025 17:21:18 +0530 Subject: [PATCH] feat: adds loading bar --- .../backtest/vectorized.py | 74 ++-- src/quant_research_starter/cli.py | 341 ++++++++++-------- .../data/downloaders.py | 27 +- .../data/sample_loader.py | 15 +- src/quant_research_starter/data/synthetic.py | 1 + .../factors/bollinger.py | 35 +- .../factors/momentum.py | 43 ++- src/quant_research_starter/factors/value.py | 48 ++- 8 files changed, 349 insertions(+), 235 deletions(-) diff --git a/src/quant_research_starter/backtest/vectorized.py b/src/quant_research_starter/backtest/vectorized.py index 4fc2bbf..d6de3d9 100644 --- a/src/quant_research_starter/backtest/vectorized.py +++ b/src/quant_research_starter/backtest/vectorized.py @@ -3,7 +3,7 @@ from typing import Dict, Optional import pandas as pd - +from tqdm import tqdm class VectorizedBacktest: """ @@ -74,40 +74,54 @@ def run(self, weight_scheme: str = "rank") -> Dict: # Compute daily weights from signals (rebalance only on rebalance dates) weights_list = [] - for date in returns_df.index: - if self._should_rebalance(date, prev_rebalance_date): - # Rebalance: compute new target weights - current_weights = self._calculate_weights( - aligned_signals.loc[date], weight_scheme - ) - prev_rebalance_date = date - - # Append current weights (maintain between rebalances) - weights_list.append(current_weights) + with tqdm(len(returns_df.index),desc="Backtesting", unit="day") as pbar: + for date in returns_df.index: + if self._should_rebalance(date, prev_rebalance_date): + # Rebalance: compute new target weights + current_weights = self._calculate_weights( + aligned_signals.loc[date], weight_scheme + ) + prev_rebalance_date = date + pbar.set_postfix(rebalance="✓", refresh=False) + else : + pbar.set_postfix(rebalance=" ", refresh=False) + + # Append current weights (maintain between rebalances) + weights_list.append(current_weights) + pbar.update(1) weights = pd.DataFrame( weights_list, index=returns_df.index, columns=self.prices.columns ).fillna(0.0) - # Previous day weights for PnL calculation - weights_prev = weights.shift(1).fillna(0.0) - - # Turnover for transaction costs (L1 change / 2) - turnover = (weights.fillna(0.0) - weights_prev).abs().sum(axis=1) * 0.5 - tc_series = turnover * self.transaction_cost - - # Strategy returns - strat_ret = (weights_prev * returns_df).sum(axis=1) - tc_series - - # Build portfolio value series - portfolio_value = (1 + strat_ret).cumprod() * self.initial_capital - portfolio_value = pd.concat( - [ - pd.Series(self.initial_capital, index=[self.prices.index[0]]), - portfolio_value, - ] - ) - portfolio_value = portfolio_value.reindex(self.prices.index).ffill() + with tqdm(total=4, desc="Calculating performance") as pbar: + # Previous day weights for PnL calculation + pbar.set_description("Calculating weight shifts") + weights_prev = weights.shift(1).fillna(0.0) + pbar.update(1) + + # Turnover for transaction costs (L1 change / 2) + pbar.set_description("Calculating transaction costs") + turnover = (weights.fillna(0.0) - weights_prev).abs().sum(axis=1) * 0.5 + tc_series = turnover * self.transaction_cost + pbar.update(1) + + # Strategy returns + pbar.set_description("Calculating strategy returns") + strat_ret = (weights_prev * returns_df).sum(axis=1) - tc_series + pbar.update(1) + + # Build portfolio value series + pbar.set_description("Building portfolio series") + portfolio_value = (1 + strat_ret).cumprod() * self.initial_capital + portfolio_value = pd.concat( + [ + pd.Series(self.initial_capital, index=[self.prices.index[0]]), + portfolio_value, + ] + ) + portfolio_value = portfolio_value.reindex(self.prices.index).ffill() + pbar.update(1) # Store results self.positions = weights # interpret as weights positions diff --git a/src/quant_research_starter/cli.py b/src/quant_research_starter/cli.py index 18ca8c1..edbc555 100644 --- a/src/quant_research_starter/cli.py +++ b/src/quant_research_starter/cli.py @@ -6,6 +6,7 @@ import click import matplotlib.pyplot as plt import pandas as pd +from tqdm import tqdm from .backtest import VectorizedBacktest from .data import SampleDataLoader, SyntheticDataGenerator @@ -29,17 +30,25 @@ def generate_data(output, symbols, days): """Generate synthetic price data.""" click.echo("Generating synthetic price data...") - generator = SyntheticDataGenerator() - prices = generator.generate_price_data( - n_symbols=symbols, days=days, start_date="2020-01-01" - ) + with tqdm(total=3, desc="Data Generation") as pbar: + pbar.set_description("Initializing data generator") + generator = SyntheticDataGenerator() + pbar.update(1) - # Ensure output directory exists - output_path = Path(output) - output_path.parent.mkdir(parents=True, exist_ok=True) + pbar.set_description("Generating price data") + prices = generator.generate_price_data( + n_symbols=symbols, days=days, start_date="2020-01-01" + ) + pbar.update(1) + + # Ensure output directory exists and save + pbar.set_description("Saving data to file") + output_path = Path(output) + output_path.parent.mkdir(parents=True, exist_ok=True) + prices.to_csv(output_path) + pbar.update(1) - prices.to_csv(output_path) - click.echo(f"Generated {symbols} symbols for {days} days -> {output}") + click.echo(f"✅ Generated {symbols} symbols for {days} days -> {output}") @cli.command() @@ -63,47 +72,66 @@ def compute_factors(data_file, factors, output): """Compute factors from price data.""" click.echo(f"Computing factors: {list(factors)}") - # Load data - if Path(data_file).exists(): - prices = pd.read_csv(data_file, index_col=0, parse_dates=True) - else: - click.echo("Data file not found, using sample data...") - loader = SampleDataLoader() - prices = loader.load_sample_prices() - - # Compute selected factors + # Load data with progress + with tqdm(total=2, desc="Loading Data") as pbar: + pbar.set_description("Checking data file") + if Path(data_file).exists(): + prices = pd.read_csv(data_file, index_col=0, parse_dates=True) + else: + click.echo("Data file not found, using sample data...") + loader = SampleDataLoader() + prices = loader.load_sample_prices() + pbar.update(1) + + pbar.set_description("Data validation") + n_symbols = len(prices.columns) + n_days = len(prices) + pbar.set_postfix(symbols=n_symbols, days=n_days, refresh=False) + pbar.update(1) + + # Compute selected factors with progress tracking factor_data = {} - - if "momentum" in factors: - click.echo("Computing momentum factor...") - momentum = MomentumFactor(lookback=63) - factor_data["momentum"] = momentum.compute(prices) - - if "value" in factors: - click.echo("Computing value factor...") - value = ValueFactor() - factor_data["value"] = value.compute(prices) - - if "size" in factors: - click.echo("Computing size factor...") - size = SizeFactor() - factor_data["size"] = size.compute(prices) - - if "volatility" in factors: - click.echo("Computing volatility factor...") - vol = VolatilityFactor(lookback=21) - factor_data["volatility"] = vol.compute(prices) - - # Combine factors (simple average for demo) - combined_signals = pd.DataFrame({k: v.mean(axis=1) for k, v in factor_data.items()}) - combined_signals["composite"] = combined_signals.mean(axis=1) - - # Save results - output_path = Path(output) - output_path.parent.mkdir(parents=True, exist_ok=True) - combined_signals.to_csv(output_path) - - click.echo(f"Factors computed -> {output}") + selected_factors = list(factors) + + with tqdm(total=len(selected_factors) + 2, desc="Factor Computation") as pbar: + if "momentum" in factors: + pbar.set_description("Computing momentum factor") + momentum = MomentumFactor(lookback=63) + factor_data["momentum"] = momentum.compute(prices) + pbar.update(1) + + if "value" in factors: + pbar.set_description("Computing value factor") + value = ValueFactor() + factor_data["value"] = value.compute(prices) + pbar.update(1) + + if "size" in factors: + pbar.set_description("Computing size factor") + size = SizeFactor() + factor_data["size"] = size.compute(prices) + pbar.update(1) + + if "volatility" in factors: + pbar.set_description("Computing volatility factor") + vol = VolatilityFactor(lookback=21) + factor_data["volatility"] = vol.compute(prices) + pbar.update(1) + + # Combine factors + pbar.set_description("Combining factors") + combined_signals = pd.DataFrame({k: v.mean(axis=1) for k, v in factor_data.items()}) + combined_signals["composite"] = combined_signals.mean(axis=1) + pbar.update(1) + + # Save results + pbar.set_description("Saving factor results") + output_path = Path(output) + output_path.parent.mkdir(parents=True, exist_ok=True) + combined_signals.to_csv(output_path) + pbar.update(1) + + click.echo(f"✅ Factors computed -> {output}") @cli.command() @@ -137,109 +165,126 @@ def backtest(data_file, signals_file, initial_capital, output, plot, plotly): """Run backtest with given signals.""" click.echo("Running backtest...") - # Load data - if Path(data_file).exists(): - prices = pd.read_csv(data_file, index_col=0, parse_dates=True) - else: - click.echo("Data file not found, using sample data...") - loader = SampleDataLoader() - prices = loader.load_sample_prices() - - # Load signals - if Path(signals_file).exists(): - signals_data = pd.read_csv(signals_file, index_col=0, parse_dates=True) - # Use composite signal if available, otherwise first column - if "composite" in signals_data.columns: - signals = signals_data["composite"] + # Load data and signals with progress + with tqdm(total=4, desc="Loading Input Data") as pbar: + pbar.set_description("Loading price data") + if Path(data_file).exists(): + prices = pd.read_csv(data_file, index_col=0, parse_dates=True) else: - signals = signals_data.iloc[:, 0] - else: - click.echo("Signals file not found, computing demo factors...") - momentum = MomentumFactor(lookback=63) - signals = momentum.compute(prices).mean(axis=1) - - # Ensure signals align with prices - common_dates = prices.index.intersection(signals.index) - prices = prices.loc[common_dates] - signals = signals.loc[common_dates] - - # Expand signals to all symbols (simplified - same signal for all) - signal_matrix = pd.DataFrame( - dict.fromkeys(prices.columns, signals), index=signals.index - ) - - # Run backtest - backtest = VectorizedBacktest( - prices=prices, - signals=signal_matrix, - initial_capital=initial_capital, - transaction_cost=0.001, - ) - - results = backtest.run(weight_scheme="rank") - - # Calculate metrics - metrics_calc = RiskMetrics(results["returns"]) - metrics = metrics_calc.calculate_all() - - # Save results - output_path = Path(output) - output_path.parent.mkdir(parents=True, exist_ok=True) - - results_dict = { - "metrics": metrics, - "portfolio_value": results["portfolio_value"].tolist(), - "dates": results["portfolio_value"].index.strftime("%Y-%m-%d").tolist(), - } - - with open(output_path, "w") as f: - json.dump(results_dict, f, indent=2) - - # Generate plot - if plot: - plt.figure(figsize=(12, 8)) - - # Plot portfolio value - plt.subplot(2, 1, 1) - plt.plot(results["portfolio_value"].index, results["portfolio_value"].values) - plt.title("Portfolio Value") - plt.ylabel("USD") - plt.grid(True) - - # Plot returns - plt.subplot(2, 1, 2) - plt.bar(results["returns"].index, results["returns"].values, alpha=0.7) - plt.title("Daily Returns") - plt.ylabel("Return") - plt.grid(True) - - plt.tight_layout() - plot_path = output_path.parent / "backtest_plot.png" - plt.savefig(plot_path) - plt.close() - - click.echo(f"Plot saved -> {plot_path}") - - # Generate Plotly HTML chart if requested - if plotly: - html_path = output_path.parent / "backtest_plot.html" - - create_equity_curve_plot( - dates=results_dict["dates"], - portfolio_values=results_dict["portfolio_value"], - initial_capital=initial_capital, - output_path=str(html_path), - plot_type="html", + click.echo("Data file not found, using sample data...") + loader = SampleDataLoader() + prices = loader.load_sample_prices() + pbar.update(1) + + pbar.set_description("Loading signal data") + if Path(signals_file).exists(): + signals_data = pd.read_csv(signals_file, index_col=0, parse_dates=True) + if "composite" in signals_data.columns: + signals = signals_data["composite"] + else: + signals = signals_data.iloc[:, 0] + else: + click.echo("Signals file not found, computing demo factors...") + momentum = MomentumFactor(lookback=63) + signals = momentum.compute(prices).mean(axis=1) + pbar.update(1) + + pbar.set_description("Aligning data") + common_dates = prices.index.intersection(signals.index) + prices = prices.loc[common_dates] + signals = signals.loc[common_dates] + pbar.update(1) + + pbar.set_description("Expanding signals") + signal_matrix = pd.DataFrame( + dict.fromkeys(prices.columns, signals), index=signals.index ) + pbar.update(1) - click.echo(f"Plotly HTML chart saved -> {html_path}") + # Run backtest (progress handled inside VectorizedBacktest) + with tqdm(total=1, desc="Running Backtest") as pbar: + backtest = VectorizedBacktest( + prices=prices, + signals=signal_matrix, + initial_capital=initial_capital, + transaction_cost=0.001, + ) - click.echo("Backtest completed!") - click.echo(f"Final portfolio value: ${results['final_value']:,.2f}") - click.echo(f"Total return: {results['total_return']:.2%}") - click.echo(f"Sharpe ratio: {metrics['sharpe_ratio']:.2f}") - click.echo(f"Results saved -> {output}") + results = backtest.run(weight_scheme="rank") + pbar.update(1) + + # Calculate metrics with progress + with tqdm(total=2, desc="Calculating Metrics") as pbar: + pbar.set_description("Computing risk metrics") + metrics_calc = RiskMetrics(results["returns"]) + metrics = metrics_calc.calculate_all() + pbar.update(1) + + pbar.set_description("Saving results") + output_path = Path(output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + results_dict = { + "metrics": metrics, + "portfolio_value": results["portfolio_value"].tolist(), + "dates": results["portfolio_value"].index.strftime("%Y-%m-%d").tolist(), + } + + with open(output_path, "w") as f: + json.dump(results_dict, f, indent=2) + pbar.update(1) + + # Generate plots with progress + if plot or plotly: + with tqdm(total=plot + plotly, desc="Generating Visualizations") as pbar: + if plot: + pbar.set_description("Creating matplotlib plot") + plt.figure(figsize=(12, 8)) + + # Plot portfolio value + plt.subplot(2, 1, 1) + plt.plot(results["portfolio_value"].index, results["portfolio_value"].values) + plt.title("Portfolio Value") + plt.ylabel("USD") + plt.grid(True) + + # Plot returns + plt.subplot(2, 1, 2) + plt.bar(results["returns"].index, results["returns"].values, alpha=0.7) + plt.title("Daily Returns") + plt.ylabel("Return") + plt.grid(True) + + plt.tight_layout() + plot_path = output_path.parent / "backtest_plot.png" + plt.savefig(plot_path) + plt.close() + + click.echo(f"📊 Plot saved -> {plot_path}") + pbar.update(1) + + if plotly: + pbar.set_description("Creating Plotly chart") + html_path = output_path.parent / "backtest_plot.html" + + create_equity_curve_plot( + dates=results_dict["dates"], + portfolio_values=results_dict["portfolio_value"], + initial_capital=initial_capital, + output_path=str(html_path), + plot_type="html", + ) + + click.echo(f"📈 Interactive chart saved -> {html_path}") + pbar.update(1) + + # Final results summary + click.echo("🎯 Backtest completed!") + click.echo(f"💰 Final portfolio value: ${results['final_value']:,.2f}") + click.echo(f"📈 Total return: {results['total_return']:.2%}") + click.echo(f"⚡ Sharpe ratio: {metrics['sharpe_ratio']:.2f}") + click.echo(f"💾 Results saved -> {output}") if __name__ == "__main__": - cli() + cli() \ No newline at end of file diff --git a/src/quant_research_starter/data/downloaders.py b/src/quant_research_starter/data/downloaders.py index e6a12fe..dc0651d 100644 --- a/src/quant_research_starter/data/downloaders.py +++ b/src/quant_research_starter/data/downloaders.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd from dotenv import load_dotenv +from tqdm import tqdm load_dotenv() @@ -41,12 +42,14 @@ def download( # In real implementation, use yfinance or similar dates = pd.date_range(start=start_date, end=end_date, freq="D") data = {} - - for symbol in symbols: - # Generate mock price data - np.random.seed(hash(symbol) % 2**32) - prices = 100 + np.cumsum(np.random.randn(len(dates)) * 0.5) - data[symbol] = prices + with tqdm(total=len(symbols),desc="Downloading Yahoo data", unit="symbol") as pbar: + for symbol in symbols: + # Generate mock price data + np.random.seed(hash(symbol) % 2**32) + prices = 100 + np.cumsum(np.random.randn(len(dates)) * 0.5) + data[symbol] = prices + pbar.update(1) + pbar.set_postfix(symbol=symbol, refresh=False) df = pd.DataFrame(data, index=dates) df.index.name = "date" @@ -79,11 +82,13 @@ def download( # Mock implementation - similar to Yahoo downloader dates = pd.date_range(start=start_date, end=end_date, freq="D") data = {} - - for symbol in symbols: - np.random.seed(hash(symbol) % 2**32) - prices = 100 + np.cumsum(np.random.randn(len(dates)) * 0.3) - data[symbol] = prices + with tqdm(total=len(symbols), desc="Downloading Alpha Vantage data", unit="symbol") as pbar: + for symbol in symbols: + np.random.seed(hash(symbol) % 2**32) + prices = 100 + np.cumsum(np.random.randn(len(dates)) * 0.3) + data[symbol] = prices + pbar.update(1) + pbar.set_postfix(symbol=symbol, refresh=False) df = pd.DataFrame(data, index=dates) df.index.name = "date" diff --git a/src/quant_research_starter/data/sample_loader.py b/src/quant_research_starter/data/sample_loader.py index b67531b..8c10652 100644 --- a/src/quant_research_starter/data/sample_loader.py +++ b/src/quant_research_starter/data/sample_loader.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +from tqdm import tqdm class SampleDataLoader: @@ -29,12 +30,14 @@ def _generate_sample_data(self) -> pd.DataFrame: np.random.seed(42) data = {} - - for symbol in symbols: - # Generate realistic-looking price series with trends - returns = np.random.normal(0.0005, 0.02, len(dates)) - prices = 100 * np.cumprod(1 + returns) - data[symbol] = prices + with tqdm(total=len(symbols), desc="Generating sample data", unit="symbol") as pbar: + for symbol in symbols: + # Generate realistic-looking price series with trends + returns = np.random.normal(0.0005, 0.02, len(dates)) + prices = 100 * np.cumprod(1 + returns) + data[symbol] = prices + pbar.update(1) + pbar.set_postfix(symbol=symbol, refresh=False) df = pd.DataFrame(data, index=dates) df.index.name = "date" diff --git a/src/quant_research_starter/data/synthetic.py b/src/quant_research_starter/data/synthetic.py index 1d7abf0..90d45b7 100644 --- a/src/quant_research_starter/data/synthetic.py +++ b/src/quant_research_starter/data/synthetic.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +from tqdm import tqdm class SyntheticDataGenerator: diff --git a/src/quant_research_starter/factors/bollinger.py b/src/quant_research_starter/factors/bollinger.py index ad0ca5a..bb74e86 100644 --- a/src/quant_research_starter/factors/bollinger.py +++ b/src/quant_research_starter/factors/bollinger.py @@ -1,4 +1,5 @@ import pandas as pd +from tqdm import tqdm from .base import Factor @@ -18,15 +19,25 @@ def __init__( def compute(self, prices: pd.DataFrame) -> pd.DataFrame: # Validate data self._validate_data(prices) - - # Rolling statistics - rolling_mean = prices.rolling(self.lookback).mean() - rolling_std = prices.rolling(self.lookback).std() - - # Bollinger z-score - zscore = (prices - rolling_mean) / rolling_std - - # Save results - self._values = zscore - - return zscore + n_symbols = len(prices.columns) + with tqdm(total=4, desc=f"Computing {self.name} ({n_symbols} symbols)") as pbar: + # Rolling statistics + pbar.set_description("Calculating rolling mean") + rolling_mean = prices.rolling(self.lookback).mean() + pbar.update(1) + + pbar.set_description("Calculating rolling standard deviation") + rolling_std = prices.rolling(self.lookback).std() + pbar.update(1) + + # Bollinger z-score + pbar.set_description("Computing z-scores") + zscore = (prices - rolling_mean) / rolling_std + pbar.update(1) + + # Save results + pbar.set_description("Finalizing factor values") + self._values = zscore + pbar.update(1) + + return zscore \ No newline at end of file diff --git a/src/quant_research_starter/factors/momentum.py b/src/quant_research_starter/factors/momentum.py index c63eeb4..8d5b026 100644 --- a/src/quant_research_starter/factors/momentum.py +++ b/src/quant_research_starter/factors/momentum.py @@ -1,6 +1,7 @@ """Momentum factor implementations.""" import pandas as pd +from tqdm import tqdm from .base import Factor @@ -32,13 +33,22 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: if len(prices) < total_lookback: raise ValueError(f"Need at least {total_lookback} periods of data") - # Calculate momentum - shifted_prices = prices.shift(self.skip_period) - momentum = (shifted_prices / shifted_prices.shift(self.lookback)) - 1 + n_symbols = len(prices.columns) + n_days = len(prices) + with tqdm(total=3, desc=f"Momentum({self.lookback}d): {n_symbols} symbols") as pbar: + pbar.set_description("Shifting prices") + shifted_prices = prices.shift(self.skip_period) + pbar.update(1) - # Keep alignment: back-fill so the earliest valid window propagates forward - # This matches unit tests expecting the last value to reflect the first valid window - momentum = momentum.bfill() + pbar.set_description("Calculating momentum returns") + momentum = (shifted_prices / shifted_prices.shift(self.lookback)) - 1 + pbar.update(1) + + pbar.set_description("Forward-filling missing values") + # Keep alignment: back-fill so the earliest valid window propagates forward + # This matches unit tests expecting the last value to reflect the first valid window + momentum = momentum.bfill() + pbar.update(1) self._values = momentum return momentum @@ -55,9 +65,22 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: """Compute cross-sectional momentum z-scores.""" raw_momentum = super().compute(prices) - # Z-score normalization cross-sectionally - z_scores = raw_momentum.sub(raw_momentum.mean(axis=1), axis=0) - z_scores = z_scores.div(raw_momentum.std(axis=1), axis=0) + n_days = len(raw_momentum) + + # Z-score normalization cross-sectionally with progress + with tqdm(total=3, desc="Cross-sectional normalization") as pbar: + pbar.set_description("Calculating daily means") + daily_means = raw_momentum.mean(axis=1) + pbar.update(1) + + pbar.set_description("Calculating daily standard deviations") + daily_stds = raw_momentum.std(axis=1) + pbar.update(1) + + pbar.set_description("Computing z-scores") + z_scores = raw_momentum.sub(daily_means, axis=0) + z_scores = z_scores.div(daily_stds, axis=0) + pbar.update(1) self._values = z_scores - return z_scores + return z_scores \ No newline at end of file diff --git a/src/quant_research_starter/factors/value.py b/src/quant_research_starter/factors/value.py index 8313f47..9396f1a 100644 --- a/src/quant_research_starter/factors/value.py +++ b/src/quant_research_starter/factors/value.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +from tqdm import tqdm from .base import Factor @@ -30,29 +31,40 @@ def compute(self, prices: pd.DataFrame) -> pd.DataFrame: """ self._validate_data(prices) - # Create synthetic value scores that persist but have some noise - np.random.seed(42) # For reproducible synthetic data n_assets = prices.shape[1] + n_days = len(prices) + with tqdm(total=5, desc=f"Value Factor: {n_assets} assets") as pbar: + # Create synthetic value scores that persist but have some noise + pbar.set_description("Seeding random generator") + np.random.seed(42) # For reproducible synthetic data + pbar.update(1) - # Base value scores (simulate persistent value characteristics) - base_scores = np.random.normal(0, 1, n_assets) + # Base value scores (simulate persistent value characteristics) + pbar.set_description("Generating base value scores") + base_scores = np.random.normal(0, 1, n_assets) + pbar.update(1) - # Add some time-varying component (value factors change slowly) - days = len(prices) - noise = np.random.normal(0, 0.1, (days, n_assets)) - time_trend = np.linspace(0, 0.5, days).reshape(-1, 1) + # Add some time-varying component (value factors change slowly) + pbar.set_description("Creating time-varying components") + noise = np.random.normal(0, 0.1, (n_days, n_assets)) + time_trend = np.linspace(0, 0.5, n_days).reshape(-1, 1) + pbar.update(1) - # Combine to create value scores - value_scores = base_scores + time_trend + noise + # Combine to create value scores + pbar.set_description("Combining value components") + value_scores = base_scores + time_trend + noise + pbar.update(1) - # Create DataFrame - value_df = pd.DataFrame( - value_scores, index=prices.index, columns=prices.columns - ) + # Create DataFrame and normalize + pbar.set_description("Normalizing cross-sectionally") + value_df = pd.DataFrame( + value_scores, index=prices.index, columns=prices.columns + ) - # Z-score normalize cross-sectionally each day - value_z = value_df.sub(value_df.mean(axis=1), axis=0) - value_z = value_z.div(value_df.std(axis=1), axis=0) + # Z-score normalize cross-sectionally each day + value_z = value_df.sub(value_df.mean(axis=1), axis=0) + value_z = value_z.div(value_df.std(axis=1), axis=0) + pbar.update(1) self._values = value_z - return value_z + return value_z \ No newline at end of file