parameters = [{'C': [10**-2, 10**-1, 10**0,10**1, 10**2, 10**3]}]
model_tunning = GridSearchCV(OneVsRestClassifier(LogisticRegression(penalty='l1')), param_grid=parameters,scoring="f1")
model_tunning.fit(x_train_multilabel, y_train)
ValueError Traceback (most recent call last)
<ipython-input-38-5d5850fe8978> in <module>()
2
3 model_tunning = GridSearchCV(OneVsRestClassifier(LogisticRegression(penalty='l1')), param_grid=parameters,scoring="f1")
----> 4 model_tunning.fit(x_train_multilabel, y_train)
ValueError: Invalid parameter C for estimator OneVsRestClassifier(estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l1', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False),
n_jobs=1). Check the list of available parameters with `estimator.get_params().keys()
如何为 LogisticRegression 分类器的 onevsrestclassifier 实现 gridsearchCV?
数据挖掘
Python
分类
逻辑回归
网格搜索
2021-10-01 00:54:12
2个回答
当您使用带有网格搜索的嵌套估计器时,您可以使用 __ 作为分隔符来限定参数。在这种情况下,LogisticRegression 模型存储为 OneVsRestClassifier 模型中名为 estimator 的属性:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
tuned_parameters = [{'estimator__C': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001]}]
# Find Optimal C by grid search
log_reg_clf = OneVsRestClassifier(LogisticRegression())
logistic_gs = GridSearchCV(log_reg_clf, tuned_parameters,scoring = 'f1_micro', cv=3)
logistic_gs.fit(x_train_bow, y_train)
print(logistic_gs.best_estimator_)
您可以看到我在这里使用 GridSearchCV、tf-idf、Logistic Regression 和 OneVsRestClassifier 设置了一个基本管道。在 param_grid 中,您可以设置'clf__estimator__C'而不仅仅是'C'
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
tfidf_vectorizer = TfidfVectorizer(smooth_idf=True)
log_reg_clf = OneVsRestClassifier(
estimator=LogisticRegression(
intercept_scaling=1,
class_weight='balanced',
random_state=0
)
)
# Create regularization hyperparameter space
C = np.logspace(0, 4, 10)
param_grid = [{
'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
'vect__max_features': (None, 5000, 10000, 50000),
'vect__norm': ['l1','l2'],
'clf__estimator__C': C,
'clf__estimator__penalty': ['l1','l2']
}]
log_reg_clf_tfidf = Pipeline([
('vect', tfidf_vectorizer),
('clf', log_reg_clf)
])
print(log_reg_clf_tfidf.get_params().keys())
gs_logReg_tfidf = GridSearchCV(
log_reg_clf_tfidf,
param_grid,
scoring='accuracy',
cv=5,
verbose=1,
n_jobs=-1
)
gs_logReg_tfidf.fit(X_train, y_train)
print("The best parameters: \n", gs_logReg_tfidf.best_params_)
print("The best score: \n", gs_logReg_tfidf.best_score_)
df_test_predicted_idf = gs_logReg_tfidf.predict(X_test)
其它你可能感兴趣的问题