Skip to content

Commit 404550d

Browse files
Remove curve component (not relevant in general)
1 parent 31ce2f2 commit 404550d

File tree

1 file changed

+0
-242
lines changed

1 file changed

+0
-242
lines changed

pymc_experimental/model/modular/components.py

Lines changed: 0 additions & 242 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,6 @@
1212
POOLING_TYPES = Literal["none", "complete", "partial"]
1313
valid_pooling = get_args(POOLING_TYPES)
1414

15-
CURVE_TYPES = Literal["log", "abc", "ns", "nss", "box-cox"]
16-
valid_curves = get_args(CURVE_TYPES)
17-
18-
19-
FEATURE_DICT = {
20-
"log": ["slope"],
21-
"box-cox": ["lambda", "slope", "intercept"],
22-
"nss": ["tau", "beta0", "beta1", "beta2"],
23-
"abc": ["a", "b", "c"],
24-
}
25-
2615

2716
def _validate_pooling_params(pooling_columns: ColumnType, pooling: POOLING_TYPES):
2817
"""
@@ -191,237 +180,6 @@ def build(self, model=None):
191180
return intercept
192181

193182

194-
def build_curve(
195-
time_pt: pt.TensorVariable,
196-
beta: pt.TensorVariable,
197-
curve_type: Literal["log", "abc", "ns", "nss", "box-cox"],
198-
):
199-
"""
200-
Build a curve based on the time data and parameters beta.
201-
202-
In this context, a "curve" is a deterministic function that maps time to a value. The curve should (in general) be
203-
strictly increasing with time (df(t)/dt > 0), and should (in general) exhibit diminishing marginal growth with time
204-
(d^2f(t)/dt^2 < 0). These properties are not strictly necessary; some curve functions (such as nss) allow for
205-
local reversals.
206-
207-
Parameters
208-
----------
209-
time_pt: TensorVariable
210-
A pytensor variable representing the time data to build the curve from.
211-
beta: TensorVariable
212-
A pytensor variable representing the parameters of the curve. The number of parameters and their meaning depend
213-
on the curve_type.
214-
215-
.. warning::
216-
Currently no checks are in place to ensure that the number of parameters in beta matches the expected number
217-
for the curve_type.
218-
219-
curve_type: str, one of ["log", "abc", "ns", "nss", "box-cox"]
220-
Type of curve to build. Options are:
221-
222-
- "log":
223-
A simple log-linear curve. The curve is defined as:
224-
225-
.. math::
226-
227-
\beta \\log(t)
228-
229-
- "abc":
230-
A curve parameterized by "a", "b", and "c", such that the minimum value of the curve is "a", the
231-
maximum value is "a + b", and the inflection point is "a + b / c". "C" thus controls the speed of change
232-
from the minimum to the maximum value. The curve is defined as:
233-
234-
.. math::
235-
236-
\frac{a + bc t}{1 + ct}
237-
238-
- "ns":
239-
The Nelson-Siegel yield curve model. The curve is parameterized by three parameters: :math:`\tau`,
240-
:math:`\beta_1`, and :math:`\beta_2`. :math:`\tau` is the decay rate of the exponential term, and
241-
:math:`\beta_1` and :math:`\beta_2` control the slope and curvature of the curve. The curve is defined as:
242-
243-
.. math::
244-
245-
\begin{align}
246-
x_t &= \beta_1 \\phi(t) + \beta_2 \\left (\\phi(t) - \\exp(-t/\tau) \right ) \\
247-
\\phi(t) &= \frac{1 - \\exp(-t/\tau)}{t/\tau}
248-
\\end{align}
249-
250-
- "nss":
251-
The Nelson-Siegel-Svensson yield curve model. The curve is parameterized by four parameters:
252-
:math:`\tau_1`, :math:`\tau_2`, :math:`\beta_1`, and :math:`\beta_2`. :math:`\beta_3`
253-
254-
Where :math:`\tau_1` and :math:`\tau_2` are the decay rates of the two exponential terms, :math:`\beta_1`
255-
controls the slope of the curve, and :math:`\beta_2` and :math:`\beta_3` control the curvature of the curve.
256-
To ensure that short-term rates are strictly postitive, one typically restrices :math:`\beta_1 + \beta_2 > 0`.
257-
258-
The curve is defined as:
259-
260-
.. math::
261-
\begin{align}
262-
x_t & = \beta_1 \\phi_1(t) + \beta_2 \\left (\\phi_1(t) - \\exp(-t/\tau_1) \right) + \beta_3 \\left (\\phi_2(t) - \\exp(-t/\tau_2) \right) \\
263-
\\phi_1(t) &= \frac{1 - \\exp(-t/\tau_1)}{t/\tau_1} \\
264-
\\phi_2(t) &= \frac{1 - \\exp(-t/\tau_2)}{t/\tau_2}
265-
\\end{align}
266-
267-
Note that this definition omits the constant term that is typically included in the Nelson-Siegel-Svensson;
268-
you are assumed to have already accounted for this with another component in the model.
269-
270-
- "box-cox":
271-
A curve that applies a box-cox transformation to the time data. The curve is parameterized by two
272-
parameters: :math:`\\lambda` and :math:`\beta`, where :math:`\\lambda` is the box-cox parameter that
273-
interpolates between the log and linear transformations, and :math:`\beta` is the slope of the curve.
274-
275-
The curve is defined as:
276-
277-
.. math::
278-
279-
\beta \\left ( \frac{t^{\\lambda} - 1}{\\lambda} \right )
280-
281-
Returns
282-
-------
283-
TensorVariable
284-
A pytensor variable representing the curve.
285-
"""
286-
if curve_type == "box-cox":
287-
lam = beta[0] + 1e-12
288-
time_scaled = (time_pt**lam - 1) / lam
289-
curve = beta[1] * time_scaled
290-
291-
elif curve_type == "log":
292-
time_scaled = pt.log(time_pt)
293-
curve = beta[0] * time_scaled
294-
295-
elif curve_type == "ns":
296-
tau = pt.exp(beta[0])
297-
t_over_tau = time_pt / tau
298-
time_scaled = (1 - pt.exp(-t_over_tau)) / t_over_tau
299-
curve = beta[1] * time_scaled + beta[2] * (time_scaled - pt.exp(-t_over_tau))
300-
301-
elif curve_type == "nss":
302-
tau = pt.exp(beta[:2])
303-
beta = beta[2:]
304-
t_over_tau_1 = time_pt / tau[0]
305-
t_over_tau_2 = time_pt / tau[1]
306-
time_scaled_1 = (1 - pt.exp(t_over_tau_1)) / t_over_tau_1
307-
time_scaled_2 = (1 - pt.exp(t_over_tau_2)) / t_over_tau_2
308-
curve = (
309-
beta[0] * time_scaled_1
310-
+ beta[1] * (time_scaled_1 - pt.exp(-t_over_tau_1))
311-
+ beta[2] * (time_scaled_2 - pt.exp(-t_over_tau_2))
312-
)
313-
314-
elif curve_type == "abc":
315-
curve = (beta[0] + beta[1] * beta[2] * time_pt) / (1 + beta[2] * time_pt)
316-
317-
else:
318-
raise ValueError(f"Unknown curve type: {curve_type}")
319-
320-
return curve
321-
322-
323-
class Curve(GLMModel):
324-
def __init__(
325-
self,
326-
name: str,
327-
t: pd.Series | pd.DataFrame,
328-
prior: str = "Normal",
329-
index_data: pd.Series | pd.DataFrame | None = None,
330-
pooling: POOLING_TYPES = "complete",
331-
curve_type: CURVE_TYPES = "log",
332-
prior_params: dict | None = None,
333-
hierarchical_params: dict | None = None,
334-
):
335-
"""
336-
Class to represent a curve in a GLM model.
337-
338-
A curve is a deterministic function that transforms time data via a non-linear function. Currently, the following
339-
curve types are supported:
340-
- "log": A simple log-linear curve.
341-
- "abc": A curve defined by a minimum value (a), maximum value (b), and inflection point ((a + b) / c).
342-
- "ns": The Nelson-Siegel yield curve model.
343-
- "nss": The Nelson-Siegel-Svensson yield curve model.
344-
- "box-cox": A curve that applies a box-cox transformation to the time data.
345-
346-
Parameters
347-
----------
348-
name: str, optional
349-
Name of the intercept term. If None, a default name is generated based on the index_data.
350-
t: Series
351-
Time data used to build the curve. If Series, must have a name attribute. If dataframe, must have exactly
352-
one column.
353-
index_data: Series or DataFrame, optional
354-
Index data used to build hierarchical priors. If there are multiple columns, the columns are treated as
355-
levels of a "telescoping" hierarchy, with the leftmost column representing the top level of the hierarchy,
356-
and depth increasing to the right.
357-
358-
The index of the index_data must match the index of the observed data.
359-
prior: str, optional
360-
Name of the PyMC distribution to use for the intercept term. Default is "Normal".
361-
pooling: str, one of ["none", "complete", "partial"], default "complete"
362-
Type of pooling to use for the intercept term. If "none", no pooling is applied, and each group in the
363-
index_data is treated as independent. If "complete", complete pooling is applied, and all data are treated
364-
as coming from the same group. If "partial", a hierarchical prior is constructed that shares information
365-
across groups in the index_data.
366-
curve_type: str, one of ["log", "abc", "ns", "nss", "box-cox"]
367-
Type of curve to build. For details, see the build_curve function.
368-
prior_params: dict, optional
369-
Additional keyword arguments to pass to the PyMC distribution specified by the prior argument.
370-
hierarchical_params: dict, optional
371-
Additional keyword arguments to configure priors in the hierarchical_prior_to_requested_depth function.
372-
Options include:
373-
sigma_dist: str
374-
Name of the distribution to use for the standard deviation of the hierarchy. Default is "Gamma"
375-
sigma_kwargs: dict
376-
Additional keyword arguments to pass to the sigma distribution specified by the sigma_dist argument.
377-
Default is {"alpha": 2, "beta": 1}
378-
offset_dist: str, one of ["zerosum", "normal", "laplace"]
379-
Name of the distribution to use for the offset distribution. Default is "zerosum"
380-
"""
381-
382-
_validate_pooling_params(index_data, pooling)
383-
384-
self.name = name
385-
self.t = t if isinstance(t, pd.Series) else t.iloc[:, 0]
386-
self.curve_type = curve_type
387-
388-
self.index_data = index_data
389-
self.pooling = pooling
390-
391-
self.prior = prior
392-
self.prior_params = prior_params if prior_params is not None else {}
393-
self.hierarchical_params = hierarchical_params if hierarchical_params is not None else {}
394-
395-
super().__init__()
396-
397-
def build(self, model=None):
398-
model = pm.modelcontext(model)
399-
obs_dim = self.t.index.name
400-
feature_dim = f"{self.name}_features"
401-
if feature_dim not in model.coords:
402-
model.add_coord(feature_dim, FEATURE_DICT[self.curve_type])
403-
404-
with model:
405-
t_pt = pm.Data("t", self.t.values, dims=[obs_dim])
406-
if self.pooling == "complete":
407-
beta = getattr(pm, self.prior)(
408-
f"{self.name}_beta", **self.prior_params, dims=[feature_dim]
409-
)
410-
curve = build_curve(t_pt, beta, self.curve_type)
411-
return pm.Deterministic(f"{self.name}", curve, dims=[obs_dim])
412-
413-
beta = hierarchical_prior_to_requested_depth(
414-
self.name,
415-
self.index_data,
416-
model=model,
417-
dims=[feature_dim],
418-
no_pooling=self.pooling == "none",
419-
)
420-
421-
curve = build_curve(t_pt, beta, self.curve_type)
422-
return pm.Deterministic(f"{self.name}", curve, dims=[obs_dim])
423-
424-
425183
class Regression(GLMModel):
426184
def __init__(
427185
self,

0 commit comments

Comments
 (0)