Solution officielle
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
def pipeline_complet(X, y, feature_names):
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=42
)
pipe = Pipeline([
('scaler', StandardScaler()),
('model', RandomForestClassifier(random_state=42)),
])
param_grid = {
'model__n_estimators': [50, 100],
'model__max_depth': [3, 5, 10],
}
grid = GridSearchCV(pipe, param_grid, cv=3, scoring='accuracy')
grid.fit(X_train, y_train)
y_pred = grid.best_estimator_.predict(X_test)
return {
'meilleurs_params': grid.best_params_,
'score_cv': float(grid.best_score_),
'score_test': float(accuracy_score(y_test, y_pred)),
'taille_train': len(X_train),
'taille_test': len(X_test),
}