hey am trying to predict Mortalité hosp
import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import classification_report
Mount Google Drive
drive.mount('/content/drive')
Load Excel file into dataframe
df = pd.read_excel('/content/drive/MyDrive/Classeur2_enfants.xlsx')
df.fillna(value=0, inplace=True)
Define the independent variables that you want to use to predict mortality
X = df[['num ', 'age ', 'sexe ', 'ATCDS ', 'AAR', 'RAA', 'Dyspnée', 'ICD', 'ACFA', 'IM isolée ', 'stade ', 'MM à IM prédom', 'stade .1', 'SOR ', 'grade ', 'FE %', 'FE ', 'PAPS ( mmhg)', 'grade (paps)', 'IT ', 'I,Ao', 'autres anomalies ', 'CAV complet', 'CAV partielle', 'CIA os ', 'CIV ', 'annuloplastie Mitrale', 'Plastie de KAY', 'Commissurotomie', 'Elargissement du feuillet post de la valve', 'fermeture du cleft', 'DEVEGA', 'autre PT ', 'RVAo', 'PVAo', 'fermeture de CAV complet ', 'fermeture de CAV partiel ', 'fermeture de CIA os', 'fermeture de CIV', 'CEC (min)', 'Clampage (min)', 'Mortalité hosp', 'décès précoce ']]
Define the target variable that you want to predict
y = df['Mortalité hosp']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lr_cv = LogisticRegressionCV(cv=5)
lr_cv.fit(X_train, y_train)
y_pred = lr_cv.predict(X_test)
print(classification_report(y_test, y_pred))