Skip to content

Commit 7eda0a4

Browse files
jmccorristonGerry Manoim
authored andcommitted
Performance improvements and other changes.
1 parent 3e88615 commit 7eda0a4

File tree

5 files changed

+309
-227
lines changed

5 files changed

+309
-227
lines changed

alphalens/performance.py

Lines changed: 27 additions & 186 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import numpy as np
1818
import warnings
1919

20+
import empyrical as ep
2021
from pandas.tseries.offsets import BDay
2122
from scipy import stats
2223
from statsmodels.regression.linear_model import OLS
@@ -329,169 +330,27 @@ def factor_alpha_beta(factor_data,
329330
return alpha_beta
330331

331332

332-
def cumulative_returns(returns, period, freq=None):
333+
def cumulative_returns(returns):
333334
"""
334-
Builds cumulative returns from 'period' returns. This function simulates
335-
the cumulative effect that a series of gains or losses (the 'returns')
336-
have on an original amount of capital over a period of time.
337-
338-
if F is the frequency at which returns are computed (e.g. 1 day if
339-
'returns' contains daily values) and N is the period for which the retuns
340-
are computed (e.g. returns after 1 day, 5 hours or 3 days) then:
341-
- if N <= F the cumulative retuns are trivially computed as Compound Return
342-
- if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the
343-
cumulative returns are computed building and averaging N interleaved sub
344-
portfolios (started at subsequent periods 1,2,..,N) each one rebalancing
345-
every N periods. This correspond to an algorithm which trades the factor
346-
every single time it is computed, which is statistically more robust and
347-
with a lower volatity compared to an algorithm that trades the factor
348-
every N periods and whose returns depend on the specific starting day of
349-
trading.
350-
351-
Also note that when the factor is not computed at a specific frequency, for
352-
exaple a factor representing a random event, it is not efficient to create
353-
multiples sub-portfolios as it is not certain when the factor will be
354-
traded and this would result in an underleveraged portfolio. In this case
355-
the simulated portfolio is fully invested whenever an event happens and if
356-
a subsequent event occur while the portfolio is still invested in a
357-
previous event then the portfolio is rebalanced and split equally among the
358-
active events.
335+
Computes cumulative returns from simple daily returns.
359336
360337
Parameters
361338
----------
362339
returns: pd.Series
363-
pd.Series containing factor 'period' forward returns, the index
364-
contains timestamps at which the trades are computed and the values
365-
correspond to returns after 'period' time
366-
period: pandas.Timedelta or string
367-
Length of period for which the returns are computed (1 day, 2 mins,
368-
3 hours etc). It can be a Timedelta or a string in the format accepted
369-
by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc)
370-
freq : pandas DateOffset, optional
371-
Used to specify a particular trading calendar. If not present
372-
returns.index.freq will be used
340+
pd.Series containing daily factor returns (i.e. '1D' returns).
373341
374342
Returns
375343
-------
376344
Cumulative returns series : pd.Series
377345
Example:
378-
2015-07-16 09:30:00 -0.012143
379-
2015-07-16 12:30:00 0.012546
380-
2015-07-17 09:30:00 0.045350
381-
2015-07-17 12:30:00 0.065897
382-
2015-07-20 09:30:00 0.030957
346+
2015-01-05 1.001310
347+
2015-01-06 1.000805
348+
2015-01-07 1.001092
349+
2015-01-08 0.999200
383350
"""
384351

385-
if not isinstance(period, pd.Timedelta):
386-
period = pd.Timedelta(period)
352+
return ep.cum_returns(returns, starting_value=1)
387353

388-
if freq is None:
389-
freq = returns.index.freq
390-
391-
if freq is None:
392-
freq = BDay()
393-
warnings.warn("'freq' not set, using business day calendar",
394-
UserWarning)
395-
396-
#
397-
# returns index contains factor computation timestamps, then add returns
398-
# timestamps too (factor timestamps + period) and save them to 'full_idx'
399-
# Cumulative returns will use 'full_idx' index,because we want a cumulative
400-
# returns value for each entry in 'full_idx'
401-
#
402-
trades_idx = returns.index.copy()
403-
returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq)
404-
full_idx = trades_idx.union(returns_idx)
405-
406-
#
407-
# Build N sub_returns from the single returns Series. Each sub_retuns
408-
# stream will contain non-overlapping returns.
409-
# In the next step we'll compute the portfolio returns averaging the
410-
# returns happening on those overlapping returns streams
411-
#
412-
sub_returns = []
413-
while len(trades_idx) > 0:
414-
415-
#
416-
# select non-overlapping returns starting with first timestamp in index
417-
#
418-
sub_index = []
419-
next = trades_idx.min()
420-
while next <= trades_idx.max():
421-
sub_index.append(next)
422-
next = utils.add_custom_calendar_timedelta(next, period, freq)
423-
# make sure to fetch the next available entry after 'period'
424-
try:
425-
i = trades_idx.get_loc(next, method='bfill')
426-
next = trades_idx[i]
427-
except KeyError:
428-
break
429-
430-
sub_index = pd.DatetimeIndex(sub_index, tz=full_idx.tz)
431-
subret = returns[sub_index]
432-
433-
# make the index to have all entries in 'full_idx'
434-
subret = subret.reindex(full_idx)
435-
436-
#
437-
# compute intermediate returns values for each index in subret that are
438-
# in between the timestaps at which the factors are computed and the
439-
# timestamps at which the 'period' returns actually happen
440-
#
441-
for pret_idx in reversed(sub_index):
442-
443-
pret = subret[pret_idx]
444-
445-
# get all timestamps between factor computation and period returns
446-
pret_end_idx = \
447-
utils.add_custom_calendar_timedelta(pret_idx, period, freq)
448-
slice = subret[(subret.index > pret_idx) & (
449-
subret.index <= pret_end_idx)].index
450-
451-
if pd.isnull(pret):
452-
continue
453-
454-
def rate_of_returns(ret, period):
455-
return ((np.nansum(ret) + 1)**(1. / period)) - 1
456-
457-
# compute intermediate 'period' returns values, note that this also
458-
# moves the final 'period' returns value from trading timestamp to
459-
# trading timestamp + 'period'
460-
for slice_idx in slice:
461-
sub_period = utils.diff_custom_calendar_timedeltas(
462-
pret_idx, slice_idx, freq)
463-
subret[slice_idx] = rate_of_returns(pret, period / sub_period)
464-
465-
subret[pret_idx] = np.nan
466-
467-
# transform returns as percentage change from previous value
468-
subret[slice[1:]] = (subret[slice] + 1).pct_change()[slice[1:]]
469-
470-
sub_returns.append(subret)
471-
trades_idx = trades_idx.difference(sub_index)
472-
473-
#
474-
# Compute portfolio cumulative returns averaging the returns happening on
475-
# overlapping returns streams.
476-
#
477-
sub_portfolios = pd.concat(sub_returns, axis=1)
478-
portfolio = pd.Series(index=sub_portfolios.index)
479-
480-
for i, (index, row) in enumerate(sub_portfolios.iterrows()):
481-
482-
# check the active portfolios, count() returns non-nans elements
483-
active_subfolios = row.count()
484-
485-
# fill forward portfolio value
486-
portfolio.iloc[i] = portfolio.iloc[i - 1] if i > 0 else 1.
487-
488-
if active_subfolios <= 0:
489-
continue
490-
491-
# current portfolio is the average of active sub_portfolios
492-
portfolio.iloc[i] *= (row + 1).mean(skipna=True)
493-
494-
return portfolio
495354

496355

497356
def positions(weights, period, freq=None):
@@ -709,7 +568,7 @@ def compute_mean_returns_spread(mean_returns,
709568

710569
def quantile_turnover(quantile_factor, quantile, period=1):
711570
"""
712-
Computes the proportion of names in a factor quantile that were
571+
Computes the daily proportion of names in a factor quantile that were
713572
not in that quantile in the previous period.
714573
715574
Parameters
@@ -718,10 +577,8 @@ def quantile_turnover(quantile_factor, quantile, period=1):
718577
DataFrame with date, asset and factor quantile.
719578
quantile : int
720579
Quantile on which to perform turnover analysis.
721-
period: string or int, optional
722-
Period over which to calculate the turnover. If it is a string it must
723-
follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m',
724-
'3h', '1D1h', etc).
580+
period: int, optional
581+
Number of days over which to calculate the turnover.
725582
Returns
726583
-------
727584
quant_turnover : pd.Series
@@ -732,14 +589,7 @@ def quantile_turnover(quantile_factor, quantile, period=1):
732589
quant_name_sets = quant_names.groupby(level=['date']).apply(
733590
lambda x: set(x.index.get_level_values('asset')))
734591

735-
if isinstance(period, int):
736-
name_shifted = quant_name_sets.shift(period)
737-
else:
738-
shifted_idx = utils.add_custom_calendar_timedelta(
739-
quant_name_sets.index, -pd.Timedelta(period),
740-
quantile_factor.index.levels[0].freq)
741-
name_shifted = quant_name_sets.reindex(shifted_idx)
742-
name_shifted.index = quant_name_sets.index
592+
name_shifted = quant_name_sets.shift(period)
743593

744594
new_names = (quant_name_sets - name_shifted).dropna()
745595
quant_turnover = new_names.apply(
@@ -765,10 +615,8 @@ def factor_rank_autocorrelation(factor_data, period=1):
765615
each period, the factor quantile/bin that factor value belongs to, and
766616
(optionally) the group the asset belongs to.
767617
- See full explanation in utils.get_clean_factor_and_forward_returns
768-
period: string or int, optional
769-
Period over which to calculate the turnover. If it is a string it must
770-
follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m',
771-
'3h', '1D1h', etc).
618+
period: int, optional
619+
Number of days over which to calculate the turnover.
772620
Returns
773621
-------
774622
autocorr : pd.Series
@@ -785,22 +633,15 @@ def factor_rank_autocorrelation(factor_data, period=1):
785633
columns='asset',
786634
values='factor')
787635

788-
if isinstance(period, int):
789-
asset_shifted = asset_factor_rank.shift(period)
790-
else:
791-
shifted_idx = utils.add_custom_calendar_timedelta(
792-
asset_factor_rank.index, -pd.Timedelta(period),
793-
factor_data.index.levels[0].freq)
794-
asset_shifted = asset_factor_rank.reindex(shifted_idx)
795-
asset_shifted.index = asset_factor_rank.index
636+
asset_shifted = asset_factor_rank.shift(period)
796637

797638
autocorr = asset_factor_rank.corrwith(asset_shifted, axis=1)
798639
autocorr.name = period
799640
return autocorr
800641

801642

802643
def common_start_returns(factor,
803-
prices,
644+
returns,
804645
before,
805646
after,
806647
cumulative=False,
@@ -845,10 +686,8 @@ def common_start_returns(factor,
845686
index: -before to after
846687
"""
847688

848-
if cumulative:
849-
returns = prices
850-
else:
851-
returns = prices.pct_change(axis=0)
689+
if not cumulative:
690+
returns = returns.apply(cumulative_returns, axis=0)
852691

853692
all_returns = []
854693

@@ -893,7 +732,7 @@ def common_start_returns(factor,
893732

894733

895734
def average_cumulative_return_by_quantile(factor_data,
896-
prices,
735+
returns,
897736
periods_before=10,
898737
periods_after=15,
899738
demeaned=True,
@@ -952,16 +791,18 @@ def average_cumulative_return_by_quantile(factor_data,
952791
---------------------------------------------------
953792
"""
954793

955-
def cumulative_return(q_fact, demean_by):
956-
return common_start_returns(q_fact, prices,
794+
def cumulative_return_around_event(q_fact, demean_by):
795+
return common_start_returns(q_fact, returns,
957796
periods_before,
958797
periods_after,
959798
True, True, demean_by)
960799

961800
def average_cumulative_return(q_fact, demean_by):
962-
q_returns = cumulative_return(q_fact, demean_by)
963-
return pd.DataFrame({'mean': q_returns.mean(axis=1),
964-
'std': q_returns.std(axis=1)}).T
801+
q_returns = cumulative_return_around_event(q_fact, demean_by)
802+
q_returns.replace([np.inf, -np.inf], np.nan, inplace=True)
803+
804+
return pd.DataFrame({'mean': q_returns.mean(skipna=True, axis=1),
805+
'std': q_returns.std(skipna=True, axis=1)}).T
965806

966807
if by_group:
967808
#

alphalens/plotting.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,11 @@ def plot_turnover_table(autocorrelation_data, quantile_turnover):
152152
for period in sorted(quantile_turnover.keys()):
153153
for quantile, p_data in quantile_turnover[period].iteritems():
154154
turnover_table.loc["Quantile {} Mean Turnover ".format(quantile),
155-
"{}".format(period)] = p_data.mean()
155+
"{}D".format(period)] = p_data.mean()
156156
auto_corr = pd.DataFrame()
157157
for period, p_data in autocorrelation_data.iteritems():
158158
auto_corr.loc["Mean Factor Rank Autocorrelation",
159-
"{}".format(period)] = p_data.mean()
159+
"{}D".format(period)] = p_data.mean()
160160

161161
print("Turnover Analysis")
162162
utils.print_table(turnover_table.apply(lambda x: x.round(3)))
@@ -607,7 +607,7 @@ def plot_factor_rank_auto_correlation(factor_autocorrelation,
607607
if ax is None:
608608
f, ax = plt.subplots(1, 1, figsize=(18, 6))
609609

610-
factor_autocorrelation.plot(title='{} Period Factor Rank Autocorrelation'
610+
factor_autocorrelation.plot(title='{}D Period Factor Rank Autocorrelation'
611611
.format(period), ax=ax)
612612
ax.set(ylabel='Autocorrelation Coefficient', xlabel='')
613613
ax.axhline(0.0, linestyle='-', color='black', lw=1)
@@ -646,7 +646,7 @@ def plot_top_bottom_quantile_turnover(quantile_turnover, period=1, ax=None):
646646
turnover = pd.DataFrame()
647647
turnover['top quantile turnover'] = quantile_turnover[max_quantile]
648648
turnover['bottom quantile turnover'] = quantile_turnover[min_quantile]
649-
turnover.plot(title='{} Period Top and Bottom Quantile Turnover'
649+
turnover.plot(title='{}D Period Top and Bottom Quantile Turnover'
650650
.format(period), ax=ax, alpha=0.6, lw=0.8)
651651
ax.set(ylabel='Proportion Of Names New To Quantile', xlabel="")
652652

@@ -711,7 +711,11 @@ def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None):
711711
return ax
712712

713713

714-
def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None):
714+
def plot_cumulative_returns(factor_returns,
715+
period,
716+
freq=None,
717+
title=None,
718+
ax=None):
715719
"""
716720
Plots the cumulative returns of the returns series passed in.
717721
@@ -720,7 +724,7 @@ def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None):
720724
factor_returns : pd.Series
721725
Period wise returns of dollar neutral portfolio weighted by factor
722726
value.
723-
period: pandas.Timedelta or string
727+
period : pandas.Timedelta or string
724728
Length of period for which the returns are computed (e.g. 1 day)
725729
if 'period' is a string it must follow pandas.Timedelta constructor
726730
format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc)
@@ -742,7 +746,7 @@ def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None):
742746
if ax is None:
743747
f, ax = plt.subplots(1, 1, figsize=(18, 6))
744748

745-
factor_returns = perf.cumulative_returns(factor_returns, period, freq)
749+
factor_returns = perf.cumulative_returns(factor_returns)
746750

747751
factor_returns.plot(ax=ax, lw=3, color='forestgreen', alpha=0.6)
748752
ax.set(ylabel='Cumulative Returns',
@@ -756,7 +760,7 @@ def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None):
756760

757761
def plot_cumulative_returns_by_quantile(quantile_returns,
758762
period,
759-
freq,
763+
freq=None,
760764
ax=None):
761765
"""
762766
Plots the cumulative returns of various factor quantiles.
@@ -765,7 +769,7 @@ def plot_cumulative_returns_by_quantile(quantile_returns,
765769
----------
766770
quantile_returns : pd.DataFrame
767771
Returns by factor quantile
768-
period: pandas.Timedelta or string
772+
period : pandas.Timedelta or string
769773
Length of period for which the returns are computed (e.g. 1 day)
770774
if 'period' is a string it must follow pandas.Timedelta constructor
771775
format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc)
@@ -787,7 +791,9 @@ def plot_cumulative_returns_by_quantile(quantile_returns,
787791

788792
ret_wide = quantile_returns.unstack('factor_quantile')
789793

790-
cum_ret = ret_wide.apply(perf.cumulative_returns, period=period, freq=freq)
794+
cum_ret = ret_wide.apply(perf.cumulative_returns)
795+
796+
791797
cum_ret = cum_ret.loc[:, ::-1] # we want negative quantiles as 'red'
792798

793799
cum_ret.plot(lw=2, ax=ax, cmap=cm.coolwarm)

0 commit comments

Comments
 (0)