Merge pull request #4 from gusamarante/feature/FRED

gusamarante · web-flow · commit ef8b0b9736c4 · 2024-12-04T23:54:29.000-03:00
Adding examples with jupyter notebooks and refining code
diff --git a/README.md b/README.md
@@ -27,14 +27,26 @@ carries all the relevant variables as atributes:
 pip install pyacm
 ```
 
-# Example
-The tricky part is getting the correct data format. The model works with 
-annualized log-yields for zero-coupon bonds, observed at daily or monthly 
-frequency. Maturities must be equally spaced in monthly frequency and start 
-at month 1. This means that you need to construct a bootstraped curve for every
-date and interpolate it at fixed monthly maturities.
-
-MORE SOON...
+
+# Original Article
+> Adrian, Tobias and Crump, Richard K. and Moench, Emanuel, 
+> Pricing the Term Structure with Linear Regressions (April 11, 2013). 
+> FRB of New York Staff Report No. 340, 
+> Available at SSRN: https://ssrn.com/abstract=1362586 or http://dx.doi.org/10.2139/ssrn.1362586
+
+The version of the article that was published by the NY FED is not 100% explicit on how the data is being manipulated, 
+but I found an earlier version of the paper on SSRN where the authors go deeper into the details on how everything is being estimated:
+- Data for zero yields uses monthly maturities starting from month 1
+- All principal components and model parameters are estiamted with data resampled to a monthly frequency, averaging observations in each month
+- To get daily / real-time estimates, the factor loadings estimated from the monthly frquency are used to transform the daily data
+
+
+# Usage
+The tricky part of using this model is getting the correct data format:
+- The model works with annualized log-yields for zero-coupon bonds
+- Observations (index) must be in either monthly or daily frequency
+- Maturities (columns) must be equally spaced in **monthly** frequency and start at month 1. This means that you need to construct a bootstraped curve for every date and interpolate it at fixed monthly maturities.
+- Whichever maturity you want to be the longest, your input data should have one column more. For example, if you want term premium estimate up to the 10-year yield (120 months), your input data should include maturities up to 121 months. This is needed to properly compute the returns.
 
 
 # Observations
diff --git a/example.py b/example.py
diff --git a/example_br.ipynb b/example_br.ipynb
diff --git a/pit_estimates.py b/pit_estimates.py
@@ -1,5 +1,8 @@
 """
-Generate point-in-time estimates
+Generate point-in-time estimates.
+
+For each date, rerun the model and save the latest estimate of expected return
+and term premium
 """
 from tqdm import tqdm
 import pandas as pd
@@ -16,7 +19,7 @@
 yield_curve = yield_curve.iloc[:, :121]  # maturities up to 10y
 yield_curve = yield_curve.dropna()
 yield_curve.index = pd.to_datetime(yield_curve.index)
-yield_curve = yield_curve[yield_curve.index >= "2007-03-01"]  # TODO deal with this missing data
+yield_curve = yield_curve[yield_curve.index >= "2007-03-01"]
 
 tp = pd.DataFrame(columns=yield_curve.columns)
 er = pd.DataFrame(columns=yield_curve.columns)
diff --git a/pyacm/__init__.py b/pyacm/__init__.py
@@ -1,3 +1,5 @@
 from pyacm.acm import NominalACM
 
-__all__ = ["NominalACM"]
+__all__ = [
+    "NominalACM",
+]
diff --git a/pyacm/acm.py b/pyacm/acm.py
@@ -26,6 +26,88 @@ class NominalACM:
           month.
         - To get daily / real-time estimates, the factor loadings estimated
           from the monthly frquency are used to transform the daily data.
+
+    Attributes
+    ----------
+    n_factors: int
+        number of principal components used
+
+    curve: pandas.DataFrame
+        Raw data of the yield curve
+
+    curve_monthly: pandas.DataFrame
+        Yield curve data resampled to a monthly frequency by averageing
+        the observations
+
+    t: int
+        Number of observations in the timeseries dimension
+
+    n: int
+        Number of observations in the cross-sectional dimension. Same
+        as number of maturities available after returns are computed
+
+    rx_m: pd.DataFrame
+        Excess returns in monthly frquency
+
+    rf_m: pandas.Series
+        Risk-free rate in monthly frequency
+
+    rf_d: pandas.Series
+        Risk-free rate in daily frequency
+
+    pc_factors_m: pandas.DataFrame
+        Principal components in monthly frequency
+
+    pc_loadings_m: pandas.DataFrame
+        Factor loadings of the monthly PCs
+
+    pc_explained_m: pandas.Series
+        Percent of total variance explained by each monthly principal component
+
+    pc_factors_d: pandas.DataFrame
+        Principal components in daily frequency
+
+    pc_loadings_d: pandas.DataFrame
+        Factor loadings of the daily PCs
+
+    pc_explained_d: pandas.Series
+        Percent of total variance explained by each monthly principal component
+
+    mu, phi, Sigma, v: numpy.array
+        Estimates of the VAR(1) parameters, the first stage of estimation.
+        The names are the same as the original paper
+
+    a, beta, c, sigma2: numpy.array
+        Estimates of the risk premium equation, the second stage of estimation.
+        The names are the same as the original paper
+
+    lambda0, lambda1: numpy.array
+        Estimates of the price of risk parameters, the third stage of estimation.
+        The names are the same as the original paper
+
+    miy: pandas.DataFrame
+        Model implied / fitted yields
+
+    rny: pandas.DataFrame
+        Risk neutral yields
+
+    tp: pandas.DataFrame
+        Term premium estimates
+
+    er_loadings: pandas.DataFrame
+        Loadings of the expected reutrns on the principal components
+
+    er_hist_m: pandas.DataFrame
+        Historical estimates of expected returns, computed in-sample, in monthly frequency
+
+    er_hist_d: pandas.DataFrame
+        Historical estimates of expected returns, computed in-sample, in daily frequency
+
+    z_lambda: pandas.DataFrame
+        Z-stat for inference on the price of risk parameters
+
+    z_beta: pandas.DataFrame
+        Z-stat for inference on the loadings of expected returns
     """
 
     def __init__(self, curve, n_factors=5):
@@ -48,7 +130,7 @@ def __init__(self, curve, n_factors=5):
 
         self.n_factors = n_factors
         self.curve = curve
-        self.curve_monthly = curve.resample('M').mean()
+        self.curve_monthly = curve.resample('ME').mean()
         self.t = self.curve_monthly.shape[0] - 1
         self.n = self.curve_monthly.shape[1]
         self.rx_m, self.rf_m = self._get_excess_returns()
diff --git a/setup.py b/setup.py
@@ -22,14 +22,15 @@
     long_description=long_description,
     packages=find_packages(),
     install_requires=[
+        'matplotlib',
+        'numpy',
         'pandas',
         'scikit-learn',
-        'numpy',
-        'matplotlib',
+        'tqdm',
     ],
     keywords=[
         'asset pricing',
         'yield curve',
         'term premium',
     ],
-)
+)