对我来说,这些是不同的事情,因为两种模型都有不同的成本函数需要优化。
另一方面,您可以通过构建基于随机森林分割的嵌入,然后使用这些嵌入作为神经网络的输入来组合这些模型。
玩具示例表明,有一个神经网络的非平凡配置可以获得与随机森林获得的结果一样好的结果:
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomTreesEmbedding
X, y = load_iris(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = .2)
params = dict(n_estimators=100,
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
random_state=42,
verbose=0)
mlp = Pipeline([("embeddings", RandomTreesEmbedding(**params)),
("model", MLPClassifier(activation = "identity",hidden_layer_sizes=(1000,), max_iter = 10000, random_state = 42))]).fit(X_train, y_train)
rf = Pipeline([("model", RandomForestClassifier(**params))]).fit(X_train, y_train)
mlp.score(X_test, y_test)
rf.score(X_test, y_test)
