import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.combine import SMOTETomek
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.impute import SimpleImputer
import numpy as np
# データ読み込み
data = pd.read_csv("/home/share/temp/20230301-20240331_site_zaiko_suii.csv", encoding="cp932")
# 特徴量と目的変数の選択
X = data.drop(["SKU","原価", "商品名", "ブランド名", "メーカー名"], axis=1)
y = X.iloc[:, -1].apply(lambda x: 0 if x < 0 else 1)
X = X.iloc[:, :-1]
# 欠損値を補完するImputerを定義
imputer = SimpleImputer(strategy='mean')
# 欠損値を補完
X = imputer.fit_transform(X)
# オーバーサンプリングとアンダーサンプリングを組み合わせる
smt = SMOTETomek(random_state=42)
X_resampled, y_resampled = smt.fit_resample(X, y)
# SVCモデルの定義
svc_classifier = SVC(probability=True) # AUCを計算するためにprobabilityをTrueに設定
# モデルのトレーニング
svc_classifier.fit(X_resampled, y_resampled)
# モデルの性能評価
y_pred = svc_classifier.predict(X_resampled)
accuracy = accuracy_score(y_resampled, y_pred)
print("Accuracy:", accuracy)
print("nClassification Report:")
print(classification_report(y_resampled, y_pred))
# AUCスコアを計算
y_pred_proba = svc_classifier.predict_proba(X_resampled)[:, 1]
auc_score = roc_auc_score(y_resampled, y_pred_proba)
print("AUC Score:", auc_score)
# 評価指標の定義(Precision, Recall, F1-score)
scoring = {'precision': make_scorer(precision_score),
'recall': make_scorer(recall_score),
'f1_score': make_scorer(f1_score)}
# 層化k分割交差検証を実行
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
cv_results = cross_validate(svc_classifier, X_resampled, y_resampled, cv=cv, scoring=scoring)
# 結果の表示
print("Precision:", cv_results['test_precision'])
print("Recall:", cv_results['test_recall'])
print("F1-score:", cv_results['test_f1_score'])
# 交差検証の結果からPrecision、Recall、F1-scoreの標準偏差を計算
precision_std = np.std(cv_results['test_precision'])
recall_std = np.std(cv_results['test_recall'])
f1_score_std = np.std(cv_results['test_f1_score'])
# 結果の表示
print("Precision Standard Deviation:", precision_std)
print("Recall Standard Deviation:", recall_std)
print("F1-score Standard Deviation:", f1_score_std)
これじゃだめ?
Prev
ディスカッション
コメント一覧
まだ、コメントがありません