数据挖掘 - ANOVA RBF 内核返回非常差的结果 - 吾爱随笔录

我对 R 中可用的 kernlab 包提供的 ANOVA RBF 内核感到好奇。

我用一个包含 34 个输入变量和一个输出变量的数字数据集对其进行了测试。对于每个变量，我有 700 个不同的值。与其他内核相比，我用这个内核得到了非常糟糕的结果。例如，使用简单的 RBF 内核，我可以用 0.88 R2 进行预测，但是使用 anova RBF，我只能得到 0.33 R2。我认为 ANOVA RBF 将是一个非常好的内核。有什么想法吗？谢谢

代码如下：

set.seed(100) #use the same seed to train different models
svrFitanovaacv <- train(R ~ .,
                       data = trainSet,
                       method = SVManova,
                       preProc = c("center", "scale"),
                       trControl = ctrl, tuneLength = 10) #By default, RMSE and R2 are computed for regression (in all cases, selects the tunning and cross-val model with best value) , metric = "ROC"

在插入符号包中定义自定义模型：

library(caret)
#RBF ANOVA KERNEL
SVManova <- list(type = "Regression", library = "kernlab", loop = NULL)
prmanova <- data.frame(parameter = c("C", "sigma", "degree", "epsilon"),
                     class = rep("numeric", 4),
                     label = c("Cost", "Sigma", "Degree", "Epsilon"))
SVManova$parameters <- prmanova
svmGridanova <- function(x, y, len = NULL) {
library(kernlab)
sigmas <- sigest(as.matrix(x), na.action = na.omit, scaled = TRUE, frac = 1)
expand.grid(sigma = mean(sigmas[-2]), epsilon = 0.000001,
            C = 2^(-40:len), degree = 1:2) # len = tuneLength in train
}
SVManova$grid <- svmGridanova
svmFitanova <- function(x, y, wts, param, lev, last, weights, classProbs, ...) {
  ksvm(x = as.matrix(x), y = y,
       kernel = "anovadot",
       kpar = list(sigma = param$sigma, degree = param$degree),
       C = param$C, epsilon = param$epsilon,
       prob.model = classProbs,
       ...) #default type = "eps-svr"
}
SVManova$fit <- svmFitanova
svmPredanova <- function(modelFit, newdata, preProc = NULL, submodels = NULL)
  predict(modelFit, newdata)
SVManova$predict <- svmPredanova
svmProb <- function(modelFit, newdata, preProc = NULL, submodels = NULL)
  predict(modelFit, newdata, type="probabilities")
SVManova$prob <- svmProb
svmSortanova <- function(x) x[order(x$C), ]
SVManova$sort <- svmSortanova

加载数据：

dataA2<-read.csv("C:/results/A2.txt",header = TRUE, 
                             blank.lines.skip = TRUE,sep = ",")
set.seed(1)
inTrainSet <- createDataPartition(dataA2$R, p = 0.75, list = FALSE) #[[1]]
trainSet <- dataA2[inTrainSet,]
testSet <- dataA2[-inTrainSet,]
#-----------------------------------------------------------------------------
#K-folds resampling method for fitting svr
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10,
                     allowParallel = TRUE) #10 separate 10-fold cross-validations

数据链接：

wuala.com/jpcgandre/Documents/Data%20SVR/?key=BOD9NTINzRHG