55from sklearn .decomposition import PCA
66from statsmodels .tools .tools import add_constant
77
8- from pyacm .utils import vec , vec_quad_form , commutation_matrix
98
10- # TODO Curve in daily frequency could be None
11- # TODO Make sure it works for DI Futures
12- # TODO make sure data is accepted as decimals
139class NominalACM :
1410 """
1511 This class implements the model from the article:
@@ -42,22 +38,19 @@ class NominalACM:
4238 Yield curve data resampled to a monthly frequency by averageing
4339 the observations
4440
45- t: int
46- Number of observations in the timeseries dimension
41+ t_m: int
42+ Number of observations in the monthly timeseries dimension
43+
44+ t_d: int
45+ Number of observations in the daily timeseries dimension
4746
4847 n: int
49- Number of observations in the cross-sectional dimension. Same
50- as number of maturities available after returns are computed
48+ Number of observations in the cross-sectional dimension, the number of
49+ maturities available
5150
5251 rx_m: pd.DataFrame
5352 Excess returns in monthly frquency
5453
55- rf_m: pandas.Series
56- Risk-free rate in monthly frequency
57-
58- rf_d: pandas.Series
59- Risk-free rate in daily frequency
60-
6154 pc_factors_m: pandas.DataFrame
6255 Principal components in monthly frequency
6356
@@ -70,23 +63,26 @@ class NominalACM:
7063 pc_factors_d: pandas.DataFrame
7164 Principal components in daily frequency
7265
73- pc_loadings_d: pandas.DataFrame
74- Factor loadings of the daily PCs
75-
76- pc_explained_d: pandas.Series
77- Percent of total variance explained by each monthly principal component
78-
7966 mu, phi, Sigma, v: numpy.array
8067 Estimates of the VAR(1) parameters, the first stage of estimation.
8168 The names are the same as the original paper
8269
83- a, beta, c, sigma2 : numpy.array
70+ beta: numpy.array
8471 Estimates of the risk premium equation, the second stage of estimation.
85- The names are the same as the original paper
72+ The name is the same as the original paper
8673
8774 lambda0, lambda1: numpy.array
88- Estimates of the price of risk parameters, the third stage of estimation.
89- The names are the same as the original paper
75+ Estimates of the price of risk parameters, the third stage of
76+ estimation.
77+
78+ delta0, delta1: numpy.array
79+ Estimates of the short rate equation coefficients.
80+
81+ A, B: numpy.array
82+ Affine coefficients for the fitted yields of different maturities
83+
84+ Arn, Brn: numpy.array
85+ Affine coefficients for the risk neutral yields of different maturities
9086
9187 miy: pandas.DataFrame
9288 Model implied / fitted yields
@@ -100,25 +96,16 @@ class NominalACM:
10096 er_loadings: pandas.DataFrame
10197 Loadings of the expected reutrns on the principal components
10298
103- er_hist_m: pandas.DataFrame
104- Historical estimates of expected returns, computed in-sample, in monthly frequency
105-
106- er_hist_d: pandas.DataFrame
107- Historical estimates of expected returns, computed in-sample, in daily frequency
108-
109- z_lambda: pandas.DataFrame
110- Z-stat for inference on the price of risk parameters
111-
112- z_beta: pandas.DataFrame
113- Z-stat for inference on the loadings of expected returns
99+ er_hist: pandas.DataFrame
100+ Historical estimates of expected returns, computed in-sample.
114101 """
115102
116103 def __init__ (
117104 self ,
118105 curve ,
119- curve_m = None , # TODO Documentation
106+ curve_m = None ,
120107 n_factors = 5 ,
121- selected_maturities = None , # TODO may select if you trust representativeness / liquidity
108+ selected_maturities = None ,
122109 ):
123110 """
124111 Runs the baseline varsion of the ACM term premium model. Works for data
@@ -128,25 +115,40 @@ def __init__(
128115 ----------
129116 curve : pandas.DataFrame
130117 Annualized log-yields. Maturities (columns) must start at month 1
131- and be equally spaced in monthly frequency. The labels of the
132- columns do not matter, they be kept the same. Observations (index)
133- must be of monthly frequency or higher. The index must be a
134- pandas.DateTimeIndex.
118+ and be equally spaced in monthly frequency. Column labels must be
119+ integers from 1 to n. Observations (index) must be a pandas
120+ DatetimeIndex with daily frequency.
121+
122+ curve_m: pandas.DataFrame
123+ Annualized log-yields in monthly frequency to be used for the
124+ parameters estimates. This is here in case the user wants to use a
125+ different curve for the parameter estimation. If None is passed,
126+ the input `curve` is resampled to monthly frequency. If something
127+ is passed, maturities (columns) must start at month 1 and be
128+ equally spaced in monthly frequency. Column labels must be
129+ integers from 1 to n. Observations (index) must be a pandas
130+ DatetimeIndex with monthly frequency.
135131
136132 n_factors : int
137133 number of principal components to used as state variables.
134+
135+ selected_maturities: list of int
136+ the maturities to be considered in the parameter estimation steps.
137+ If None is passed, all the maturities are considered. The user may
138+ choose smaller set of yields to consider due to, for example,
139+ liquidity and representativeness of certain maturities.
138140 """
139141
140- # TODO assert columns of daily and monthly are the same
141- # TODO assert monthly index frequency
142- # TODO assert columns are consecutive integers
142+ self . _assertions ( curve , curve_m , selected_maturities )
143+
144+
143145
144146 self .n_factors = n_factors
145147 self .curve = curve
146148 self .selected_maturities = selected_maturities
147149
148150 if curve_m is None :
149- self .curve_monthly = curve .resample ('M' ).last ()
151+ self .curve_monthly = curve .resample ('M' ).mean ()
150152 else :
151153 self .curve_monthly = curve_m
152154
@@ -217,7 +219,7 @@ def fwd_curve(self, date=None):
217219 df = pd .concat (
218220 [
219221 fwd_mkt .rename ("Observed" ),
220- fwd_miy .rename ("Model Implied " ),
222+ fwd_miy .rename ("Fitted " ),
221223 fwd_rny .rename ("Risk-Neutral" ),
222224 ],
223225 axis = 1 ,
@@ -234,12 +236,36 @@ def _compute_fwd_curve(curve):
234236 fwds = pd .Series (fwds .values , index = curve .index )
235237 return fwds
236238
239+ @staticmethod
240+ def _assertions (curve , curve_m , selected_maturities ):
241+ # Selected maturities are available
242+ if selected_maturities is not None :
243+ assert all ([col in curve .columns for col in selected_maturities ]), \
244+ "not all `selected_columns` are available in `curve`"
245+
246+ # Consecutive monthly maturities
247+ cond1 = curve .columns [0 ] != 1
248+ cond2 = not all (np .diff (curve .columns .values ) == 1 )
249+ if cond1 or cond2 :
250+ msg = "`curve` columns must be consecutive integers starting from 1"
251+ raise AssertionError (msg )
252+
253+ # Only if `curve_m` is passed
254+ if curve_m is not None :
255+
256+ # Same columns
257+ assert curve_m .columns .equals (curve .columns ), \
258+ "columns of `curve` and `curve_m` must be the same"
259+
260+ # Monthly frequency
261+ assert pd .infer_freq (curve_m .index ) == 'M' , \
262+ "`curve_m` must have a DatetimeIndex with monthly frequency"
263+
237264 def _get_excess_returns (self ):
238265 ttm = np .arange (1 , self .n + 1 ) / 12
239- log_prices = - ( self .curve_monthly / 100 ) * ttm # TODO this division by 100 has to go, test with decimal rates and check if output is the same
266+ log_prices = - self .curve_monthly * ttm
240267 rf = - log_prices .iloc [:, 0 ].shift (1 )
241268 rx = (log_prices - log_prices .shift (1 , axis = 0 ).shift (- 1 , axis = 1 )).subtract (rf , axis = 0 )
242- # rx = rx.shift(1, axis=1) # TODO is this needed?
243269 rx = rx .shift (1 , axis = 1 )
244270
245271 rx = rx .dropna (how = 'all' , axis = 0 )
@@ -248,8 +274,10 @@ def _get_excess_returns(self):
248274
249275 def _get_pcs (self , curve_m , curve_d ):
250276
251- curve_m_cut = curve_m .iloc [:, 2 :] # TODO The authors do this, do not know why
252- curve_d_cut = curve_d .iloc [:, 2 :] # TODO The authors do this, do not know why
277+ # The authors' code shows that they ignore the first 2 maturities for
278+ # the PC estimation.
279+ curve_m_cut = curve_m .iloc [:, 2 :]
280+ curve_d_cut = curve_d .iloc [:, 2 :]
253281
254282 mean_yields = curve_m_cut .mean ()
255283 curve_m_cut = curve_m_cut - mean_yields
@@ -264,8 +292,6 @@ def _get_pcs(self, curve_m, curve_d):
264292 index = curve_m_cut .columns ,
265293 )
266294
267- # TODO Try a different normalization, keeping the PCs with their respective variances and loadings with unit norm.
268-
269295 df_pc_m = curve_m_cut @ df_loadings
270296 sigma_factor = df_pc_m .std ()
271297 df_pc_m = df_pc_m / df_pc_m .std ()
@@ -360,7 +386,7 @@ def _retrieve_lambda(self):
360386
361387 @staticmethod
362388 def _short_rate_equation (r1 , X ):
363- r1 = r1 / 1200 # TODO remove the 100
389+ r1 = r1 / 12
364390 X = add_constant (X )
365391 Delta = inv (X .T @ X ) @ X .T @ r1
366392 delta0 = Delta .iloc [0 ]
@@ -387,7 +413,7 @@ def _affine_coefficients(self, lambda0, lambda1):
387413
388414 def _compute_yields (self , A , B ):
389415 A = A .reshape (- 1 , 1 )
390- multiplier = np .tile (self .curve .columns / 12 , (self .t_d , 1 )).T / 100 # TODO remove the 100
416+ multiplier = np .tile (self .curve .columns / 12 , (self .t_d , 1 )).T
391417 yields = (- ((np .tile (A , (1 , self .t_d )) + B @ self .pc_factors_d .T ) / multiplier ).T ).values
392418 yields = pd .DataFrame (
393419 data = yields ,
0 commit comments