Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 34 additions & 8 deletions fife/lgb_modelers.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,14 +322,20 @@ def train_single_model(
return model

def predict(
self, subset: Union[None, pd.core.series.Series] = None, cumulative: bool = True
self,
subset: Union[None, pd.core.series.Series] = None,
custom_data: Union[None, pd.core.frame.DataFrame] = None,
cumulative: bool = True
) -> np.ndarray:
"""Use trained LightGBM models to predict the outcome for each observation and time horizon.

Args:
subset: A Boolean Series that is True for observations for which
predictions will be produced. If None, default to all
observations.
custom_data: A DataFrame in the same format as the input data for
which predictions will be produced. If None, default to the
assigned input data.
cumulative: If True, produce cumulative survival probabilies.
If False, produce marginal survival probabilities (i.e., one
minus the hazard rate).
Expand All @@ -338,10 +344,19 @@ def predict(
A numpy array of predictions by observation and lead
length.
"""
subset = default_subset_to_all(subset, self.data)
predict_data = self.data[self.categorical_features + self.numeric_features][
subset
]
if custom_data is not None:
data = self.transform_features(custom_data)
if not set(self.categorical_features + self.numeric_features).issubset(
set(data.columns)
):
raise KeyError(
f"Columns {[i for i in (self.categorical_features + self.numeric_features) if i not in data.columns]} not found in data or are of an incompatible type"
)
else:
data = self.data.copy(deep=True)
subset = default_subset_to_all(subset, data)

predict_data = data[self.categorical_features + self.numeric_features][subset]
predictions = np.array(
[
lead_specific_model.predict(predict_data)
Expand All @@ -352,9 +367,20 @@ def predict(
predictions = np.cumprod(predictions, axis=1)
return predictions

def transform_features(self) -> pd.DataFrame:
"""Transform features to suit model training."""
data = self.data.copy(deep=True)
def transform_features(self,
custom_data: Union[None, pd.core.frame.DataFrame] = None
) -> pd.DataFrame:
"""Transform features to suit model training.

Args:
custom_data: A DataFrame in the same format as the input data for
which predictions will be produced. If None, default to the
assigned input data.
"""
if custom_data is not None:
data = custom_data.copy(deep=True)
else:
data = self.data.copy(deep=True)
if self.config.get("DATETIME_AS_DATE", True):
date_cols = list(data.select_dtypes("datetime").columns) + [
col
Expand Down