Skip to content

Commit afd4107

Browse files
committed
Dealing with TODOs
1 parent 8d31562 commit afd4107

File tree

2 files changed

+83
-54
lines changed

2 files changed

+83
-54
lines changed

example_us.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55

66
ylds_d = pd.read_excel("sample_data/us_data.xlsx", index_col=0, sheet_name="daily")
77
ylds_d.index = pd.to_datetime(ylds_d.index)
8+
ylds_d = ylds_d / 100
89

910
ylds_m = pd.read_excel("sample_data/us_data.xlsx", index_col=0, sheet_name="monthly")
1011
ylds_m.index = pd.to_datetime(ylds_m.index)
12+
ylds_m = ylds_m.resample("M").last()
13+
ylds_m = ylds_m / 100
1114

1215
acm = NominalACM(
1316
curve=ylds_d,

pyacm/acm.py

Lines changed: 80 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@
55
from sklearn.decomposition import PCA
66
from statsmodels.tools.tools import add_constant
77

8-
from pyacm.utils import vec, vec_quad_form, commutation_matrix
98

10-
# TODO Curve in daily frequency could be None
11-
# TODO Make sure it works for DI Futures
12-
# TODO make sure data is accepted as decimals
139
class NominalACM:
1410
"""
1511
This class implements the model from the article:
@@ -42,22 +38,19 @@ class NominalACM:
4238
Yield curve data resampled to a monthly frequency by averageing
4339
the observations
4440
45-
t: int
46-
Number of observations in the timeseries dimension
41+
t_m: int
42+
Number of observations in the monthly timeseries dimension
43+
44+
t_d: int
45+
Number of observations in the daily timeseries dimension
4746
4847
n: int
49-
Number of observations in the cross-sectional dimension. Same
50-
as number of maturities available after returns are computed
48+
Number of observations in the cross-sectional dimension, the number of
49+
maturities available
5150
5251
rx_m: pd.DataFrame
5352
Excess returns in monthly frquency
5453
55-
rf_m: pandas.Series
56-
Risk-free rate in monthly frequency
57-
58-
rf_d: pandas.Series
59-
Risk-free rate in daily frequency
60-
6154
pc_factors_m: pandas.DataFrame
6255
Principal components in monthly frequency
6356
@@ -70,23 +63,26 @@ class NominalACM:
7063
pc_factors_d: pandas.DataFrame
7164
Principal components in daily frequency
7265
73-
pc_loadings_d: pandas.DataFrame
74-
Factor loadings of the daily PCs
75-
76-
pc_explained_d: pandas.Series
77-
Percent of total variance explained by each monthly principal component
78-
7966
mu, phi, Sigma, v: numpy.array
8067
Estimates of the VAR(1) parameters, the first stage of estimation.
8168
The names are the same as the original paper
8269
83-
a, beta, c, sigma2: numpy.array
70+
beta: numpy.array
8471
Estimates of the risk premium equation, the second stage of estimation.
85-
The names are the same as the original paper
72+
The name is the same as the original paper
8673
8774
lambda0, lambda1: numpy.array
88-
Estimates of the price of risk parameters, the third stage of estimation.
89-
The names are the same as the original paper
75+
Estimates of the price of risk parameters, the third stage of
76+
estimation.
77+
78+
delta0, delta1: numpy.array
79+
Estimates of the short rate equation coefficients.
80+
81+
A, B: numpy.array
82+
Affine coefficients for the fitted yields of different maturities
83+
84+
Arn, Brn: numpy.array
85+
Affine coefficients for the risk neutral yields of different maturities
9086
9187
miy: pandas.DataFrame
9288
Model implied / fitted yields
@@ -100,25 +96,16 @@ class NominalACM:
10096
er_loadings: pandas.DataFrame
10197
Loadings of the expected reutrns on the principal components
10298
103-
er_hist_m: pandas.DataFrame
104-
Historical estimates of expected returns, computed in-sample, in monthly frequency
105-
106-
er_hist_d: pandas.DataFrame
107-
Historical estimates of expected returns, computed in-sample, in daily frequency
108-
109-
z_lambda: pandas.DataFrame
110-
Z-stat for inference on the price of risk parameters
111-
112-
z_beta: pandas.DataFrame
113-
Z-stat for inference on the loadings of expected returns
99+
er_hist: pandas.DataFrame
100+
Historical estimates of expected returns, computed in-sample.
114101
"""
115102

116103
def __init__(
117104
self,
118105
curve,
119-
curve_m=None, # TODO Documentation
106+
curve_m=None,
120107
n_factors=5,
121-
selected_maturities=None, # TODO may select if you trust representativeness / liquidity
108+
selected_maturities=None,
122109
):
123110
"""
124111
Runs the baseline varsion of the ACM term premium model. Works for data
@@ -128,25 +115,40 @@ def __init__(
128115
----------
129116
curve : pandas.DataFrame
130117
Annualized log-yields. Maturities (columns) must start at month 1
131-
and be equally spaced in monthly frequency. The labels of the
132-
columns do not matter, they be kept the same. Observations (index)
133-
must be of monthly frequency or higher. The index must be a
134-
pandas.DateTimeIndex.
118+
and be equally spaced in monthly frequency. Column labels must be
119+
integers from 1 to n. Observations (index) must be a pandas
120+
DatetimeIndex with daily frequency.
121+
122+
curve_m: pandas.DataFrame
123+
Annualized log-yields in monthly frequency to be used for the
124+
parameters estimates. This is here in case the user wants to use a
125+
different curve for the parameter estimation. If None is passed,
126+
the input `curve` is resampled to monthly frequency. If something
127+
is passed, maturities (columns) must start at month 1 and be
128+
equally spaced in monthly frequency. Column labels must be
129+
integers from 1 to n. Observations (index) must be a pandas
130+
DatetimeIndex with monthly frequency.
135131
136132
n_factors : int
137133
number of principal components to used as state variables.
134+
135+
selected_maturities: list of int
136+
the maturities to be considered in the parameter estimation steps.
137+
If None is passed, all the maturities are considered. The user may
138+
choose smaller set of yields to consider due to, for example,
139+
liquidity and representativeness of certain maturities.
138140
"""
139141

140-
# TODO assert columns of daily and monthly are the same
141-
# TODO assert monthly index frequency
142-
# TODO assert columns are consecutive integers
142+
self._assertions(curve, curve_m, selected_maturities)
143+
144+
143145

144146
self.n_factors = n_factors
145147
self.curve = curve
146148
self.selected_maturities = selected_maturities
147149

148150
if curve_m is None:
149-
self.curve_monthly = curve.resample('M').last()
151+
self.curve_monthly = curve.resample('M').mean()
150152
else:
151153
self.curve_monthly = curve_m
152154

@@ -217,7 +219,7 @@ def fwd_curve(self, date=None):
217219
df = pd.concat(
218220
[
219221
fwd_mkt.rename("Observed"),
220-
fwd_miy.rename("Model Implied"),
222+
fwd_miy.rename("Fitted"),
221223
fwd_rny.rename("Risk-Neutral"),
222224
],
223225
axis=1,
@@ -234,12 +236,36 @@ def _compute_fwd_curve(curve):
234236
fwds = pd.Series(fwds.values, index=curve.index)
235237
return fwds
236238

239+
@staticmethod
240+
def _assertions(curve, curve_m, selected_maturities):
241+
# Selected maturities are available
242+
if selected_maturities is not None:
243+
assert all([col in curve.columns for col in selected_maturities]), \
244+
"not all `selected_columns` are available in `curve`"
245+
246+
# Consecutive monthly maturities
247+
cond1 = curve.columns[0] != 1
248+
cond2 = not all(np.diff(curve.columns.values) == 1)
249+
if cond1 or cond2:
250+
msg = "`curve` columns must be consecutive integers starting from 1"
251+
raise AssertionError(msg)
252+
253+
# Only if `curve_m` is passed
254+
if curve_m is not None:
255+
256+
# Same columns
257+
assert curve_m.columns.equals(curve.columns), \
258+
"columns of `curve` and `curve_m` must be the same"
259+
260+
# Monthly frequency
261+
assert pd.infer_freq(curve_m.index) == 'M', \
262+
"`curve_m` must have a DatetimeIndex with monthly frequency"
263+
237264
def _get_excess_returns(self):
238265
ttm = np.arange(1, self.n + 1) / 12
239-
log_prices = - (self.curve_monthly / 100) * ttm # TODO this division by 100 has to go, test with decimal rates and check if output is the same
266+
log_prices = - self.curve_monthly * ttm
240267
rf = - log_prices.iloc[:, 0].shift(1)
241268
rx = (log_prices - log_prices.shift(1, axis=0).shift(-1, axis=1)).subtract(rf, axis=0)
242-
# rx = rx.shift(1, axis=1) # TODO is this needed?
243269
rx = rx.shift(1, axis=1)
244270

245271
rx = rx.dropna(how='all', axis=0)
@@ -248,8 +274,10 @@ def _get_excess_returns(self):
248274

249275
def _get_pcs(self, curve_m, curve_d):
250276

251-
curve_m_cut = curve_m.iloc[:, 2:] # TODO The authors do this, do not know why
252-
curve_d_cut = curve_d.iloc[:, 2:] # TODO The authors do this, do not know why
277+
# The authors' code shows that they ignore the first 2 maturities for
278+
# the PC estimation.
279+
curve_m_cut = curve_m.iloc[:, 2:]
280+
curve_d_cut = curve_d.iloc[:, 2:]
253281

254282
mean_yields = curve_m_cut.mean()
255283
curve_m_cut = curve_m_cut - mean_yields
@@ -264,8 +292,6 @@ def _get_pcs(self, curve_m, curve_d):
264292
index=curve_m_cut.columns,
265293
)
266294

267-
# TODO Try a different normalization, keeping the PCs with their respective variances and loadings with unit norm.
268-
269295
df_pc_m = curve_m_cut @ df_loadings
270296
sigma_factor = df_pc_m.std()
271297
df_pc_m = df_pc_m / df_pc_m.std()
@@ -360,7 +386,7 @@ def _retrieve_lambda(self):
360386

361387
@staticmethod
362388
def _short_rate_equation(r1, X):
363-
r1 = r1 / 1200 # TODO remove the 100
389+
r1 = r1 / 12
364390
X = add_constant(X)
365391
Delta = inv(X.T @ X) @ X.T @ r1
366392
delta0 = Delta.iloc[0]
@@ -387,7 +413,7 @@ def _affine_coefficients(self, lambda0, lambda1):
387413

388414
def _compute_yields(self, A, B):
389415
A = A.reshape(-1, 1)
390-
multiplier = np.tile(self.curve.columns / 12, (self.t_d, 1)).T / 100 # TODO remove the 100
416+
multiplier = np.tile(self.curve.columns / 12, (self.t_d, 1)).T
391417
yields = (- ((np.tile(A, (1, self.t_d)) + B @ self.pc_factors_d.T) / multiplier).T).values
392418
yields = pd.DataFrame(
393419
data=yields,

0 commit comments

Comments
 (0)