from sklearn.model_selection import GridSearchCV # チューニングするハイパーパラメータの範囲を定義 param_grid = { ‘n_estimators’: [100, 500, 1000], ‘max_depth’: [3, 5, 7], ‘learning_rate’: [0.01, 0.1, 0.3], ‘subsample’: [0.6, 0.8, 1.0], ‘colsample_bytree’: [0.6, 0.8, 1.0], } # グリッドサーチを行う grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring=’accuracy’, verbose=2, n_jobs=-1) grid_search.fit(X_train, y_train_encoded) # 最適なモデルを取得 best_model = grid_search.best_estimator_ # 最適なモデルで予測を行う y_pred_encoded = best_model.predict(X_test) # モデルの評価 accuracy = accuracy_score(y_test_encoded, y_pred_encoded) precision = precision_score(y_test_encoded, y_pred_encoded, average=’weighted’) recall = recall_score(y_test_encoded, y_pred_encoded, average=’weighted’) f1 = f1_score(y_test_encoded, y_pred_encoded, average=’weighted’) roc_auc = roc_auc_score(y_test_encoded, best_model.predict_proba(X_test), multi_class=’ovr’) print(“Best Parameters:”, grid_search.best_params_) print(“Accuracy:”, accuracy) print(“Precision:”, precision) print(“Recall:”, recall) print(“F1 Score:”, f1) print(“ROC AUC Score:”, roc_auc) ランダムサーチでおねがい

2024年6月8日

GridSearchCV を使用している場合、ハイパーパラメータの組み合わせをすべて試すため、計算コストが高くなる可能性があります。そのため、大規模なパラメータ空間を効率的に探索するためには、RandomizedSearchCV を使用することが推奨されます。RandomizedSearchCV は、指定した分布からランダムにパラメータの組み合わせを選択し、指定した数の候補を評価します。

以下は、RandomizedSearchCV を使ったハイパーパラメータのチューニングの例です。

python
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

# チューニングするハイパーパラメータの範囲を定義
param_dist = {
    'n_estimators': randint(100, 1000),
    'max_depth': randint(3, 10),
    'learning_rate': uniform(0.01, 0.3),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4),
}

# ランダムサーチを行う
random_search = RandomizedSearchCV(estimator=xgb_model, param_distributions=param_dist, n_iter=100,
                                   cv=3, scoring='accuracy', verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train_encoded)

# 最適なモデルを取得
best_model = random_search.best_estimator_

# 最適なモデルで予測を行う
y_pred_encoded = best_model.predict(X_test)

# モデルの評価
accuracy = accuracy_score(y_test_encoded, y_pred_encoded)
precision = precision_score(y_test_encoded, y_pred_encoded, average='weighted')
recall = recall_score(y_test_encoded, y_pred_encoded, average='weighted')
f1 = f1_score(y_test_encoded, y_pred_encoded, average='weighted')
roc_auc = roc_auc_score(y_test_encoded, best_model.predict_proba(X_test), multi_class='ovr')

print("Best Parameters:", random_search.best_params_)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("ROC AUC Score:", roc_auc)

このコードでは、randint 関数と uniform 関数を使用して、各ハイパーパラメータの探索範囲を指定しています。n_iter パラメータを使用して、ランダムサーチのイテレーション数を指定します。

未分類

Posted by ぼっち