我对 R 中可用的 kernlab 包提供的 ANOVA RBF 内核感到好奇。
我用一个包含 34 个输入变量和一个输出变量的数字数据集对其进行了测试。对于每个变量,我有 700 个不同的值。与其他内核相比,我用这个内核得到了非常糟糕的结果。例如,使用简单的 RBF 内核,我可以用 0.88 R2 进行预测,但是使用 anova RBF,我只能得到 0.33 R2。我认为 ANOVA RBF 将是一个非常好的内核。有什么想法吗?谢谢
代码如下:
set.seed(100) #use the same seed to train different models
svrFitanovaacv <- train(R ~ .,
data = trainSet,
method = SVManova,
preProc = c("center", "scale"),
trControl = ctrl, tuneLength = 10) #By default, RMSE and R2 are computed for regression (in all cases, selects the tunning and cross-val model with best value) , metric = "ROC"
在插入符号包中定义自定义模型:
library(caret)
#RBF ANOVA KERNEL
SVManova <- list(type = "Regression", library = "kernlab", loop = NULL)
prmanova <- data.frame(parameter = c("C", "sigma", "degree", "epsilon"),
class = rep("numeric", 4),
label = c("Cost", "Sigma", "Degree", "Epsilon"))
SVManova$parameters <- prmanova
svmGridanova <- function(x, y, len = NULL) {
library(kernlab)
sigmas <- sigest(as.matrix(x), na.action = na.omit, scaled = TRUE, frac = 1)
expand.grid(sigma = mean(sigmas[-2]), epsilon = 0.000001,
C = 2^(-40:len), degree = 1:2) # len = tuneLength in train
}
SVManova$grid <- svmGridanova
svmFitanova <- function(x, y, wts, param, lev, last, weights, classProbs, ...) {
ksvm(x = as.matrix(x), y = y,
kernel = "anovadot",
kpar = list(sigma = param$sigma, degree = param$degree),
C = param$C, epsilon = param$epsilon,
prob.model = classProbs,
...) #default type = "eps-svr"
}
SVManova$fit <- svmFitanova
svmPredanova <- function(modelFit, newdata, preProc = NULL, submodels = NULL)
predict(modelFit, newdata)
SVManova$predict <- svmPredanova
svmProb <- function(modelFit, newdata, preProc = NULL, submodels = NULL)
predict(modelFit, newdata, type="probabilities")
SVManova$prob <- svmProb
svmSortanova <- function(x) x[order(x$C), ]
SVManova$sort <- svmSortanova
加载数据:
dataA2<-read.csv("C:/results/A2.txt",header = TRUE,
blank.lines.skip = TRUE,sep = ",")
set.seed(1)
inTrainSet <- createDataPartition(dataA2$R, p = 0.75, list = FALSE) #[[1]]
trainSet <- dataA2[inTrainSet,]
testSet <- dataA2[-inTrainSet,]
#-----------------------------------------------------------------------------
#K-folds resampling method for fitting svr
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10,
allowParallel = TRUE) #10 separate 10-fold cross-validations
数据链接:
wuala.com/jpcgandre/Documents/Data%20SVR/?key=BOD9NTINzRHG