我一直在尝试在 python 中实现逻辑回归。基本上代码可以工作,它给出了 91% 的预测模型的准确性,但由于某种原因,AUC 得分为 0.5,这基本上是最差的得分,因为这意味着模型是完全随机的。分类报告还返回错误:“UndefinedMetricWarning: Precision 和 F-score 定义不明确,在没有预测样本的标签中设置为 0.0。'precision'、'predicted'、average、warn_for)”。有谁知道我应该改变什么才能正常工作?
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
data_file = pd.read_csv('loan.csv', delimiter=',')
# variable preprocessing
data_file['loan_status'] = np.where(data_file['loan_status'].isin(['Fully
Paid', 'Current']), 1, 0)
loan_stat=data_file['loan_status']
loan_stat=loan_stat.astype(np.float64)
m = {
'n/a': 0,
'< 1 year': 0,
'1 year': 1,
'2 years': 2,
'3 years': 3,
'4 years': 4,
'5 years': 5,
'6 years': 6,
'7 years': 7,
'8 years': 8,
'9 years': 9,
'10+ years': 10
}
emp_length=data_file.emp_length.map(m)
emp_length.astype(np.float64)
annual_inc=data_file['annual_inc']
delinq_2yrs=data_file['delinq_2yrs']
dti=data_file['dti']
loan_amnt=data_file['loan_amnt']
installment=data_file['installment']
int_rate=data_file['int_rate']
total_acc=data_file['total_acc']
open_acc=data_file['open_acc']
pub_rec=data_file['pub_rec']
acc_now_delinq=data_file['acc_now_delinq']
#variables combined into one dataframe
X=pd.DataFrame()
X['annua_inc']=annual_inc
X['delinq_2yrs']=delinq_2yrs
X['dti']=dti
X['emp_length']=emp_length
X['loan_amnt']=loan_amnt
X['installment']=installment
X['int_rate']=int_rate
X['total_acc']=total_acc
X['open_acc']=open_acc
X['pub_rec']=pub_rec
X['acc_now_delinq']=acc_now_delinq
X['loan_stat']=loan_stat
X=X.dropna(axis=0)
y=X['loan_stat']
X=X.drop(['loan_stat'], axis=1)
scaler=StandardScaler()
X=scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
model=LogisticRegression(penalty='l2', C=1)
model.fit(X_train, y_train)
score=accuracy_score(y_test, model.predict(X_test))
roc=roc_auc_score(y_test, model.predict(X_test))
cr=classification_report(y_test, model.predict(X_test))
以下是数据链接:https ://www.kaggle.com/wendykan/lending-club-loan-data/downloads/lending-club-loan-data.zip