我是 scikit-learn 的初学者,我在使用特征选择模块 VarianceThreshold 时遇到了一点问题,问题是当我设置方差时Var[X]=.8*(1-.8)
的所有特征(在所有样本中具有相同的值)。在我的情况下,第五列应该被删除,.p=8/10>(threshold=0,7)
#####################################
from sklearn.feature_selection import VarianceThreshold
X=[[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.01,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00],
[0,1,2,1,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0.00,0.00,0.00,0.00,0.50,1.00,0.00,10,3,0.30,0.30,0.30,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,253,0.99,0.01,0.00,0.00,0.00,0.00,0.00,0.00],
[0,1,1,1,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00],
[0,2,3,1,223,185,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,71,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00]]
sel=VarianceThreshold(threshold=(.7*(1-.7)))
这就是我在运行脚本时得到的
>>> sel.fit_transform(X)
array([[ 1., 105., 146., 1., 1., 255., 254.],
[ 1., 105., 146., 1., 1., 255., 254.],
[ 1., 105., 146., 1., 1., 255., 254.],
[ 1., 105., 146., 2., 2., 255., 254.],
[ 1., 105., 146., 2., 2., 255., 254.],
[ 1., 105., 146., 2., 2., 255., 255.],
[ 2., 29., 0., 2., 1., 10., 3.],
[ 1., 105., 146., 1., 1., 255., 253.],
[ 1., 105., 146., 2., 2., 255., 254.],
[ 3., 223., 185., 4., 4., 71., 255.]])
这里的第二列不应该出现。谢谢;)