from __future__ import annotations
from typing import Any, Generic, Hashable, Sequence
from pandas import DataFrame, Series, concat
from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
from typing_extensions import Self
from .pandas import DataFrameWrapper
from .types import TEstimator
[docs]
class AppendXPredictionToX(BaseEstimator, TransformerMixin, Generic[TEstimator]):
"""Append the prediction of X by the estimator to X."""
estimator: TEstimator
def __init__(
self,
estimator: TEstimator,
*,
variables: Sequence[Hashable] | None = None,
append: bool = True,
append_pred_diff: bool = True,
append_pred_real_diff: bool = True,
) -> None:
"""Append the prediction of X by the estimator to X.
The new columns are suffixed with "_pred".
Parameters
----------
estimator : TEstimator
The estimator to be wrapped.
variables : Sequence[Hashable] | None, optional
The variables to be used for prediction, by default None
append : bool, optional
Whether to append the original X, by default True
append_diff : bool, optional
Whether to append the difference between
the previous prediction and the current prediction, by default True
The column names are suffixed with "_pred_diff"
append_real_diff : bool, optional
Whether to append the difference between
the current value and the current prediction, by default True
The column names are suffixed with "_pred_real_diff"
"""
self.estimator = estimator
self.variables = variables
self.append = append
self.append_pred_diff = append_pred_diff
self.append_pred_real_diff = append_pred_real_diff
[docs]
def fit(self, X: DataFrame, y: Series | None = None, **fit_params: Any) -> Self:
if self.variables is not None:
X = X.loc[:, self.variables]
X_future = X.shift(-1) # do not specify freq
# drop first
X = X.iloc[:-1]
X_future = X_future.iloc[:-1]
self.estimator_ = DataFrameWrapper(self.estimator)
self.estimator_.fit(X, X_future, **fit_params)
return self