Trying to build data preparation pipeline. But got error message suggests that the column 'short_model' is not found in the DataFrame that I'm trying to use within pipeline. But I created it in the ShortModelTransformer() class. Maybe I missed smth in that class? Killing 3 days already trying to figure that out

class CalculateOutliers(BaseEstimator, TransformerMixin):

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        q25 = X['year'].quantile(0.25)
        q75 = X['year'].quantile(0.75)
        iqr = q75 - q25
        boundaries = (q25 - 1.5 * iqr, q75 + 1.5 * iqr)
        X['year'].loc[X['year'] < boundaries[0]] = round(boundaries[0])
        X['year'].loc[X['year'] > boundaries[1]] = round(boundaries[1])
        return X

class ShortModelTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def short_model(self, x):
        if not pd.isna(x):
            return x.lower().split(' ')[0]
        else:
            return x

    def transform(self, X):
        X['short_model'] = X['model'].apply(self.short_model)
        return X

class AgeCategoryTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X['age_category'] = X['year'].apply(lambda x: 'new' if x > 2013 else ('old' if x < 2006 else 'average'))
        return X

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
])

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('encoder', StandardScaler())
])

preprocessor = ColumnTransformer(transformers=[
    ('outliers', CalculateOutliers(), ['year']),
    ('short_model', ShortModelTransformer(), ['model']),
    ('age_category', AgeCategoryTransformer(), ['year']),
    ('numerical', numerical_transformer, ['odometer']),
    ('categorical', categorical_transformer, ['fuel', 'title_status', 'transmission',
                                              'state', 'short_model', 'age_category'])
])
0

There are 0 answers