1717import numpy as np
1818import warnings
1919
20+ import empyrical as ep
2021from pandas .tseries .offsets import BDay
2122from scipy import stats
2223from statsmodels .regression .linear_model import OLS
@@ -329,169 +330,27 @@ def factor_alpha_beta(factor_data,
329330 return alpha_beta
330331
331332
332- def cumulative_returns (returns , period , freq = None ):
333+ def cumulative_returns (returns ):
333334 """
334- Builds cumulative returns from 'period' returns. This function simulates
335- the cumulative effect that a series of gains or losses (the 'returns')
336- have on an original amount of capital over a period of time.
337-
338- if F is the frequency at which returns are computed (e.g. 1 day if
339- 'returns' contains daily values) and N is the period for which the retuns
340- are computed (e.g. returns after 1 day, 5 hours or 3 days) then:
341- - if N <= F the cumulative retuns are trivially computed as Compound Return
342- - if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the
343- cumulative returns are computed building and averaging N interleaved sub
344- portfolios (started at subsequent periods 1,2,..,N) each one rebalancing
345- every N periods. This correspond to an algorithm which trades the factor
346- every single time it is computed, which is statistically more robust and
347- with a lower volatity compared to an algorithm that trades the factor
348- every N periods and whose returns depend on the specific starting day of
349- trading.
350-
351- Also note that when the factor is not computed at a specific frequency, for
352- exaple a factor representing a random event, it is not efficient to create
353- multiples sub-portfolios as it is not certain when the factor will be
354- traded and this would result in an underleveraged portfolio. In this case
355- the simulated portfolio is fully invested whenever an event happens and if
356- a subsequent event occur while the portfolio is still invested in a
357- previous event then the portfolio is rebalanced and split equally among the
358- active events.
335+ Computes cumulative returns from simple daily returns.
359336
360337 Parameters
361338 ----------
362339 returns: pd.Series
363- pd.Series containing factor 'period' forward returns, the index
364- contains timestamps at which the trades are computed and the values
365- correspond to returns after 'period' time
366- period: pandas.Timedelta or string
367- Length of period for which the returns are computed (1 day, 2 mins,
368- 3 hours etc). It can be a Timedelta or a string in the format accepted
369- by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc)
370- freq : pandas DateOffset, optional
371- Used to specify a particular trading calendar. If not present
372- returns.index.freq will be used
340+ pd.Series containing daily factor returns (i.e. '1D' returns).
373341
374342 Returns
375343 -------
376344 Cumulative returns series : pd.Series
377345 Example:
378- 2015-07-16 09:30:00 -0.012143
379- 2015-07-16 12:30:00 0.012546
380- 2015-07-17 09:30:00 0.045350
381- 2015-07-17 12:30:00 0.065897
382- 2015-07-20 09:30:00 0.030957
346+ 2015-01-05 1.001310
347+ 2015-01-06 1.000805
348+ 2015-01-07 1.001092
349+ 2015-01-08 0.999200
383350 """
384351
385- if not isinstance (period , pd .Timedelta ):
386- period = pd .Timedelta (period )
352+ return ep .cum_returns (returns , starting_value = 1 )
387353
388- if freq is None :
389- freq = returns .index .freq
390-
391- if freq is None :
392- freq = BDay ()
393- warnings .warn ("'freq' not set, using business day calendar" ,
394- UserWarning )
395-
396- #
397- # returns index contains factor computation timestamps, then add returns
398- # timestamps too (factor timestamps + period) and save them to 'full_idx'
399- # Cumulative returns will use 'full_idx' index,because we want a cumulative
400- # returns value for each entry in 'full_idx'
401- #
402- trades_idx = returns .index .copy ()
403- returns_idx = utils .add_custom_calendar_timedelta (trades_idx , period , freq )
404- full_idx = trades_idx .union (returns_idx )
405-
406- #
407- # Build N sub_returns from the single returns Series. Each sub_retuns
408- # stream will contain non-overlapping returns.
409- # In the next step we'll compute the portfolio returns averaging the
410- # returns happening on those overlapping returns streams
411- #
412- sub_returns = []
413- while len (trades_idx ) > 0 :
414-
415- #
416- # select non-overlapping returns starting with first timestamp in index
417- #
418- sub_index = []
419- next = trades_idx .min ()
420- while next <= trades_idx .max ():
421- sub_index .append (next )
422- next = utils .add_custom_calendar_timedelta (next , period , freq )
423- # make sure to fetch the next available entry after 'period'
424- try :
425- i = trades_idx .get_loc (next , method = 'bfill' )
426- next = trades_idx [i ]
427- except KeyError :
428- break
429-
430- sub_index = pd .DatetimeIndex (sub_index , tz = full_idx .tz )
431- subret = returns [sub_index ]
432-
433- # make the index to have all entries in 'full_idx'
434- subret = subret .reindex (full_idx )
435-
436- #
437- # compute intermediate returns values for each index in subret that are
438- # in between the timestaps at which the factors are computed and the
439- # timestamps at which the 'period' returns actually happen
440- #
441- for pret_idx in reversed (sub_index ):
442-
443- pret = subret [pret_idx ]
444-
445- # get all timestamps between factor computation and period returns
446- pret_end_idx = \
447- utils .add_custom_calendar_timedelta (pret_idx , period , freq )
448- slice = subret [(subret .index > pret_idx ) & (
449- subret .index <= pret_end_idx )].index
450-
451- if pd .isnull (pret ):
452- continue
453-
454- def rate_of_returns (ret , period ):
455- return ((np .nansum (ret ) + 1 )** (1. / period )) - 1
456-
457- # compute intermediate 'period' returns values, note that this also
458- # moves the final 'period' returns value from trading timestamp to
459- # trading timestamp + 'period'
460- for slice_idx in slice :
461- sub_period = utils .diff_custom_calendar_timedeltas (
462- pret_idx , slice_idx , freq )
463- subret [slice_idx ] = rate_of_returns (pret , period / sub_period )
464-
465- subret [pret_idx ] = np .nan
466-
467- # transform returns as percentage change from previous value
468- subret [slice [1 :]] = (subret [slice ] + 1 ).pct_change ()[slice [1 :]]
469-
470- sub_returns .append (subret )
471- trades_idx = trades_idx .difference (sub_index )
472-
473- #
474- # Compute portfolio cumulative returns averaging the returns happening on
475- # overlapping returns streams.
476- #
477- sub_portfolios = pd .concat (sub_returns , axis = 1 )
478- portfolio = pd .Series (index = sub_portfolios .index )
479-
480- for i , (index , row ) in enumerate (sub_portfolios .iterrows ()):
481-
482- # check the active portfolios, count() returns non-nans elements
483- active_subfolios = row .count ()
484-
485- # fill forward portfolio value
486- portfolio .iloc [i ] = portfolio .iloc [i - 1 ] if i > 0 else 1.
487-
488- if active_subfolios <= 0 :
489- continue
490-
491- # current portfolio is the average of active sub_portfolios
492- portfolio .iloc [i ] *= (row + 1 ).mean (skipna = True )
493-
494- return portfolio
495354
496355
497356def positions (weights , period , freq = None ):
@@ -709,7 +568,7 @@ def compute_mean_returns_spread(mean_returns,
709568
710569def quantile_turnover (quantile_factor , quantile , period = 1 ):
711570 """
712- Computes the proportion of names in a factor quantile that were
571+ Computes the daily proportion of names in a factor quantile that were
713572 not in that quantile in the previous period.
714573
715574 Parameters
@@ -718,10 +577,8 @@ def quantile_turnover(quantile_factor, quantile, period=1):
718577 DataFrame with date, asset and factor quantile.
719578 quantile : int
720579 Quantile on which to perform turnover analysis.
721- period: string or int, optional
722- Period over which to calculate the turnover. If it is a string it must
723- follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m',
724- '3h', '1D1h', etc).
580+ period: int, optional
581+ Number of days over which to calculate the turnover.
725582 Returns
726583 -------
727584 quant_turnover : pd.Series
@@ -732,14 +589,7 @@ def quantile_turnover(quantile_factor, quantile, period=1):
732589 quant_name_sets = quant_names .groupby (level = ['date' ]).apply (
733590 lambda x : set (x .index .get_level_values ('asset' )))
734591
735- if isinstance (period , int ):
736- name_shifted = quant_name_sets .shift (period )
737- else :
738- shifted_idx = utils .add_custom_calendar_timedelta (
739- quant_name_sets .index , - pd .Timedelta (period ),
740- quantile_factor .index .levels [0 ].freq )
741- name_shifted = quant_name_sets .reindex (shifted_idx )
742- name_shifted .index = quant_name_sets .index
592+ name_shifted = quant_name_sets .shift (period )
743593
744594 new_names = (quant_name_sets - name_shifted ).dropna ()
745595 quant_turnover = new_names .apply (
@@ -765,10 +615,8 @@ def factor_rank_autocorrelation(factor_data, period=1):
765615 each period, the factor quantile/bin that factor value belongs to, and
766616 (optionally) the group the asset belongs to.
767617 - See full explanation in utils.get_clean_factor_and_forward_returns
768- period: string or int, optional
769- Period over which to calculate the turnover. If it is a string it must
770- follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m',
771- '3h', '1D1h', etc).
618+ period: int, optional
619+ Number of days over which to calculate the turnover.
772620 Returns
773621 -------
774622 autocorr : pd.Series
@@ -785,22 +633,15 @@ def factor_rank_autocorrelation(factor_data, period=1):
785633 columns = 'asset' ,
786634 values = 'factor' )
787635
788- if isinstance (period , int ):
789- asset_shifted = asset_factor_rank .shift (period )
790- else :
791- shifted_idx = utils .add_custom_calendar_timedelta (
792- asset_factor_rank .index , - pd .Timedelta (period ),
793- factor_data .index .levels [0 ].freq )
794- asset_shifted = asset_factor_rank .reindex (shifted_idx )
795- asset_shifted .index = asset_factor_rank .index
636+ asset_shifted = asset_factor_rank .shift (period )
796637
797638 autocorr = asset_factor_rank .corrwith (asset_shifted , axis = 1 )
798639 autocorr .name = period
799640 return autocorr
800641
801642
802643def common_start_returns (factor ,
803- prices ,
644+ returns ,
804645 before ,
805646 after ,
806647 cumulative = False ,
@@ -845,10 +686,8 @@ def common_start_returns(factor,
845686 index: -before to after
846687 """
847688
848- if cumulative :
849- returns = prices
850- else :
851- returns = prices .pct_change (axis = 0 )
689+ if not cumulative :
690+ returns = returns .apply (cumulative_returns , axis = 0 )
852691
853692 all_returns = []
854693
@@ -893,7 +732,7 @@ def common_start_returns(factor,
893732
894733
895734def average_cumulative_return_by_quantile (factor_data ,
896- prices ,
735+ returns ,
897736 periods_before = 10 ,
898737 periods_after = 15 ,
899738 demeaned = True ,
@@ -952,16 +791,18 @@ def average_cumulative_return_by_quantile(factor_data,
952791 ---------------------------------------------------
953792 """
954793
955- def cumulative_return (q_fact , demean_by ):
956- return common_start_returns (q_fact , prices ,
794+ def cumulative_return_around_event (q_fact , demean_by ):
795+ return common_start_returns (q_fact , returns ,
957796 periods_before ,
958797 periods_after ,
959798 True , True , demean_by )
960799
961800 def average_cumulative_return (q_fact , demean_by ):
962- q_returns = cumulative_return (q_fact , demean_by )
963- return pd .DataFrame ({'mean' : q_returns .mean (axis = 1 ),
964- 'std' : q_returns .std (axis = 1 )}).T
801+ q_returns = cumulative_return_around_event (q_fact , demean_by )
802+ q_returns .replace ([np .inf , - np .inf ], np .nan , inplace = True )
803+
804+ return pd .DataFrame ({'mean' : q_returns .mean (skipna = True , axis = 1 ),
805+ 'std' : q_returns .std (skipna = True , axis = 1 )}).T
965806
966807 if by_group :
967808 #
0 commit comments