Source code for sklearn_utilities.pandas.column_transformer_pandas

from __future__ import annotations

from typing import Any, Callable, Sequence

from pandas import DataFrame
from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
from typing_extensions import Self

from ..id_transformer import IdTransformer


[docs] class ExcludedColumnTransformerPandas(BaseEstimator, TransformerMixin): """A transformer that excludes columns from the input data frame.""" feature_names_in_: Sequence[str] feature_names_out_: Sequence[str] def __init__( self, estimator: Any = IdTransformer(), exclude_columns: Sequence[str] | Callable[[Sequence[str]], Sequence[bool]] = [], ) -> None: """A transformer that excludes columns from the input data frame. Parameters ---------- estimator : Any, optional The estimator to be wrapped, by default IdTransformer() exclude_columns : Sequence[str] | Callable[[Sequence[str]], Sequence[bool]], optional The columns to be excluded, by default [] If callable, a function that takes the column names as an argument. """ self.exclude_columns = exclude_columns self.estimator = estimator def _fit(self, X: DataFrame) -> None: self.feature_names_in_ = list(X.columns) if isinstance(self.exclude_columns, Callable): # type: ignore exclude_columns = X.columns[self.exclude_columns(X.columns)] # type: ignore else: exclude_columns = self.exclude_columns self.feature_names_out_ = list(set(X.columns) - set(exclude_columns))
[docs] def fit(self, X: DataFrame, **fit_params: Any) -> Self: self._fit(X) self.estimator.fit(X[self.feature_names_out_], **fit_params) return self
[docs] def transform( self, X: DataFrame, y: Any = None, **transform_params: Any ) -> DataFrame: check_is_fitted(self) return self.estimator.transform(X[self.feature_names_out_], **transform_params)
[docs] def fit_transform( self, X: DataFrame, y: Any = None, **fit_params: Any ) -> DataFrame: self._fit(X) return self.estimator.fit_transform(X[self.feature_names_out_], y, **fit_params)
[docs] class IncludedColumnTransformerPandas(BaseEstimator, TransformerMixin): """A transformer that includes columns from the input data frame.""" feature_names_in_: Sequence[str] feature_names_out_: Sequence[str] def __init__( self, estimator: Any = IdTransformer(), include_columns: Sequence[str] | Callable[[Sequence[str]], Sequence[bool]] = [], ) -> None: """A transformer that includes columns from the input data frame. Parameters ---------- estimator : Any, optional The estimator to be wrapped, by default IdTransformer() include_columns : Sequence[str] | Callable[[Sequence[str]], Sequence[bool]], optional The columns to be included, by default [] If callable, a function that takes the column names as an argument. """ self.include_columns = include_columns self.estimator = estimator def _fit(self, X: DataFrame) -> None: self.feature_names_in_ = list(X.columns) if isinstance(self.include_columns, Callable): # type: ignore self.feature_names_out_ = X.columns[ self.include_columns(X.columns) # type: ignore ] else: self.feature_names_out_ = self.include_columns # type: ignore
[docs] def fit(self, X: DataFrame, **fit_params: Any) -> Self: self._fit(X) self.estimator.fit(X[self.feature_names_out_], **fit_params) return self
[docs] def transform( self, X: DataFrame, y: Any = None, **transform_params: Any ) -> DataFrame: check_is_fitted(self) return self.estimator.transform(X[self.feature_names_out_], **transform_params)
[docs] def fit_transform( self, X: DataFrame, y: Any = None, **fit_params: Any ) -> DataFrame: self._fit(X) return self.estimator.fit_transform(X[self.feature_names_out_], y, **fit_params)