我正在使用三种不同的货架分类器。这是一个三类分类任务。我想计算每个分类器的最佳权重(c1weight、c2weight、c3weight)(实际任务更多分类器以及每个类的权重)。
也许简单的网格搜索方法或sklearn 集成分类器可以做到这一点。
vc = VotingClassifier(estimators=[('gbc',GradientBoostingClassifier()),
('rf',RandomForestClassifier()),('svc',SVC(probability=True))],
voting='soft',n_jobs=-1)
params = {'weights':[[1,2,3],[2,1,3],[3,2,1]]}
grid_Search = GridSearchCV(param_grid = params, estimator=vc)
grid_Search.fit(X_new,y)
print(grid_Search.best_Score_)
我不明白如何为以下代码实现这一点。
def get_classification(text, c1weight, c2weight, c3weight):
prediction1 = classifier1.predict(text)
if prediction1 = 1:
class1 =+ 1 * c1weight
elif prediction1 = 2:
class2 =+ 1 * c1weight
else:
class3 =+ 1 * c1weight
prediction2 = classifier2.predict(text)
if prediction2 = 1:
class1 =+ 1 * c2weight
elif prediction2 = 2:
class2 =+ 1 * c2weight
else:
class3 =+ 1 * c2weight
prediction3 = classifier3.predict(text)
if prediction3 = 1:
class1 =+ 1 * c3weight
elif prediction3 = 2:
class2 =+ 1 * c3weight
else:
class3 =+ 1 * c3weight
if class1 > class2 and class1 > class3:
return ("class1",class1)
elif class2 > class1 and class2 > class3:
return ("class2",class2)
else:
return("class3",class3)
c1weight = 0.5
c2weight = 0.7
c3weight = 0.4
for i, row in df_raw.iterrows():
classification = get_classification(df_raw.at[i, 'text'],c1weight, c2weight, c3weight)
df_raw[i,'classification'] = classification
score = get_accuracy(df_raw['classification'],df_raw['label'])
已解决:此示例代码帮助我理解它
def your_function(number):
print(number)
from sklearn.model_selection import ParameterGrid
param_grid = {'param1': [1, 2, 3]}
grid = ParameterGrid(param_grid)
for params in grid:
your_function(params['param1'])