from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
# Preprocessing for numerical data
numerical_transformer = SimpleImputer(strategy='mean')
# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
('Categorical_Imputer', SimpleImputer(strategy='most_frequent')),
('One_Hot_Encoder', OneHotEncoder(handle_unknown='ignore'))
])
# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
transformers=[
('Numerical_Transformer', numerical_transformer, numerical_cols),
('Categorical_Transformer', categorical_transformer, categorical_cols)
])