Model apparently overfitting but performing good on the test set | Learn AI Together | Page 1

barren nexus Mar 13, 2025, 2:29 PM

#

from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin
import skimage as ski

class ImageTransformer(TransformerMixin):
    def __init__(self):
        pass

    def fit(self):
        return self

    def transform(self, X):
        transformed_images = []
        for image_path in X:
            image = ski.io.imread(image_path)
            image_rgb = ski.color.rgba2rgb(image)  # Convert to RGB
            grayscaled_image = ski.color.rgb2gray(image_rgb)  # Convert to grayscale
            resized_image = ski.transform.resize(grayscaled_image, (28, 28), anti_aliasing=True)  # Resize to 28x28

            resized_image = resized_image / 255.0  # Normalize the pixel values to [0, 1]

            plt.imshow(resized_image, cmap='gray')  # Optional: visualize the image
            plt.show()

            transformed_images.append(resized_image.flatten())  # Flatten the image for the model

        return np.array(transformed_images)


full_pipeline = Pipeline([
    ("image_scaler", ImageTransformer()),
    ("model", rf_classifier)
])

#

Where rf_classifier is:

from sklearn.ensemble import RandomForestClassifier

rf_classifier = RandomForestClassifier(random_state=42)

rf_classifier.fit(X_train, y_train)

#

But whenever I do:

full_pipeline.predict(["image.png"])

#

It always returns 7, no matter what

#

#

This is an uploaded image

#

But it predicts well on the test data:

calm moon Mar 13, 2025, 2:40 PM

#

what do you think overfitting is

barren nexus Mar 13, 2025, 2:41 PM

#

The model gets "biased" on predicting something

#

But it's not overfitting in this case I think, since it performs well on the test set

barren nexus Mar 13, 2025, 5:42 PM

#

Nvm, managed to make it work.

#Model apparently overfitting but performing good on the test set