from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin
import skimage as ski
class ImageTransformer(TransformerMixin):
def __init__(self):
pass
def fit(self):
return self
def transform(self, X):
transformed_images = []
for image_path in X:
image = ski.io.imread(image_path)
image_rgb = ski.color.rgba2rgb(image) # Convert to RGB
grayscaled_image = ski.color.rgb2gray(image_rgb) # Convert to grayscale
resized_image = ski.transform.resize(grayscaled_image, (28, 28), anti_aliasing=True) # Resize to 28x28
resized_image = resized_image / 255.0 # Normalize the pixel values to [0, 1]
plt.imshow(resized_image, cmap='gray') # Optional: visualize the image
plt.show()
transformed_images.append(resized_image.flatten()) # Flatten the image for the model
return np.array(transformed_images)
full_pipeline = Pipeline([
("image_scaler", ImageTransformer()),
("model", rf_classifier)
])
#Model apparently overfitting but performing good on the test set
11 messages · Page 1 of 1 (latest)
Where rf_classifier is:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
But whenever I do:
full_pipeline.predict(["image.png"])
It always returns 7, no matter what
This is an uploaded image
But it predicts well on the test data:
what do you think overfitting is
The model gets "biased" on predicting something
But it's not overfitting in this case I think, since it performs well on the test set
Nvm, managed to make it work.