我正在尝试根据glmnet包调整弹性网络的 alpha 和 lambda 参数。我找到了一些资料来源,它们为此目的提出了不同的选择。根据这个指令我做了一个基于caret包的优化。根据这个线程,我手动优化了参数。两种方法都给了我有效的结果,但是,方法的选择参数非常不同。请参阅下面的 R 中的可重现示例:
library("caret")
library("glmnet")
set.seed(1234)
# Some example data
N <- 1000
y <- rnorm(N, 5, 10)
x1 <- y + rnorm(N, 2, 10)
x2 <- y + rnorm(N, - 5, 20)
x3 <- y + rnorm(N, 10, 200)
x4 <- rnorm(N, 20, 50)
x5 <- rnorm(N, - 7, 200)
x6 <- rbinom(N, 1, exp(x1) / (exp(x1) + 1))
x7 <- rbinom(N, 1, exp(x2) / (exp(x2) + 1))
x8 <- rbinom(N, 1, exp(x3) / (exp(x3) + 1))
x9 <- rbinom(N, 1, exp(x4) / (exp(x4) + 1))
x10 <- rbinom(N, 1, exp(x5) / (exp(x5) + 1))
data <- data.frame(y, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10)
# Tune parameteres with caret and glmnet
# Set up grid and cross validation method for train function
lambda_grid <- seq(0, 3, 0.1)
alpha_grid <- seq(0, 1, 0.1)
trnCtrl <- trainControl(method = "repeatedCV",
number = 10,
repeats = 5)
srchGrid <- expand.grid(.alpha = alpha_grid, .lambda = lambda_grid)
# Cross validation
my_train <- train(y ~., data,
method = "glmnet",
tuneGrid = srchGrid,
trControl = trnCtrl)
# Best tuning parameters
my_train$bestTune
# Tune parameteres with glmnet only
alphasOfInterest <- seq(0, 1, 0.1)
# Step 1: Do all crossvalidations for each alpha
cvs <- lapply(alphasOfInterest, function(curAlpha) {
cv.glmnet(x = as.matrix(data[ , colnames(data) %in% "y" == FALSE]),
y = y, alpha = curAlpha, family = "gaussian")
})
# Step 2: Collect the optimum lambda for each alpha
optimumPerAlpha <- sapply(seq_along(alphasOfInterest), function(curi) {
curcvs <- cvs[[curi]]
curAlpha <- alphasOfInterest[curi]
indOfMin <- match(curcvs$lambda.min, curcvs$lambda)
c(lam = curcvs$lambda.min, alph = curAlpha, cvup = curcvs$cvup[indOfMin])
})
# Step 3: Find the overall optimum
posOfOptimum <- which.min(optimumPerAlpha["lam", ])
overall.lambda.min <- optimumPerAlpha["lam", posOfOptimum]
overall.alpha.min <- optimumPerAlpha["alph", posOfOptimum]
overall.criterionthreshold <- optimumPerAlpha["cvup", posOfOptimum]
# Step 4: Now check for each alpha which lambda is the best within the threshold
corrected1se <- sapply(seq_along(alphasOfInterest), function(curi) {
curcvs <- cvs[[curi]]
lams <- curcvs$lambda
lams[lams < overall.lambda.min] <- NA
lams[curcvs$cvm > overall.criterionthreshold] <- NA
lam1se<-max(lams, na.rm = TRUE)
c(lam = lam1se, alph = alphasOfInterest[curi])
})
# Step 5: Find the best (lowest) of these lambdas
overall.lambda.1se <- max(corrected1se["lam", ])
pos <- match(overall.lambda.1se, corrected1se["lam", ])
overall.alpha.1se <- corrected1se["alph", pos]
# Comparison --> Parameters are very different
my_train$bestTune # Parameters according to caret
c(overall.alpha.1se, overall.lambda.1se) # Parameters according to glmnet only
好像我做错了什么,但不幸的是我无法弄清楚问题所在。 问题:如何为 R 中的弹性网络调整 alpha 和 lambda?
更新:添加了模拟研究,用于比较caret和手动调整 alpha 和 lambda
根据 Hong Ooi 的建议,我在一个小型模拟研究中比较了这两种调整方法的结果。这两种方法仍然会导致非常不同的最佳参数,并且手动调整的性能略优于插入符号包。这个结果对我来说非常令人惊讶,因为caret与手动编程相比,我本以为该包会产生更好的估计。因此我想知道,如果手动调整实际上优于caret软件包,或者我是否犯了任何错误。非常欢迎任何建议!
##### Small simulation #####
alpha_caret <- numeric()
lambda_caret <- numeric()
MSE_caret <- numeric()
alpha_without_caret <- numeric()
lambda_without_caret <- numeric()
MSE_without_caret <- numeric()
R <- 20 # Simulation runs
for(r in 1:R) {
##### Tune parameteres with caret and glmnet #####
# Set up grid and cross validation method for train function
lambda_grid <- seq(0, 3, 0.1)
alpha_grid <- seq(0, 1, 0.1)
trnCtrl <- trainControl(method = "repeatedCV",
number = 10,
repeats = 5)
srchGrid <- expand.grid(.alpha = alpha_grid, .lambda = lambda_grid)
# Cross validation
my_train <- train(y ~., data,
method = "glmnet",
tuneGrid = srchGrid,
trControl = trnCtrl)
# Best parameters
alpha_caret[r] <- as.numeric(my_train$bestTune[1]) # alpha according to caret
lambda_caret[r] <- as.numeric(my_train$bestTune[2]) # lambda according to caret
# Elastic net with best parameters
mod_elnet <- glmnet(x = as.matrix(data[colnames(data) %in% "y" == FALSE]),
y = data$y, alpha = alpha_caret[r], family = "gaussian", lambda = lambda_caret[r])
# Estimation of lm with the variables that have been selected in the elastic net
vars_elnet <- names(mod_elnet$beta[ , 1])[as.numeric(mod_elnet$beta[ , 1]) != 0]
mod_elnet_lm <- lm(y ~ ., data[ , colnames(data) %in% c(vars_elnet, "y")])
# MSE
MSE_caret[r] <- mean(mod_elnet_lm$residuals^2)
##### Tune parameteres with glmnet only #####
alphasOfInterest <- seq(0, 1, 0.1)
# Step 1: Do all crossvalidations for each alpha
cvs <- lapply(alphasOfInterest, function(curAlpha) {
cv.glmnet(x = as.matrix(data[ , colnames(data) %in% "y" == FALSE]),
y = y, alpha = curAlpha, family = "gaussian")
})
# Step 2: Collect the optimum lambda for each alpha
optimumPerAlpha <- sapply(seq_along(alphasOfInterest), function(curi) {
curcvs <- cvs[[curi]]
curAlpha <- alphasOfInterest[curi]
indOfMin <- match(curcvs$lambda.min, curcvs$lambda)
c(lam = curcvs$lambda.min, alph = curAlpha, cvup = curcvs$cvup[indOfMin])
})
# Step 3: Find the overall optimum
posOfOptimum <- which.min(optimumPerAlpha["lam", ])
overall.lambda.min <- optimumPerAlpha["lam", posOfOptimum]
overall.alpha.min <- optimumPerAlpha["alph", posOfOptimum]
overall.criterionthreshold <- optimumPerAlpha["cvup", posOfOptimum]
# Step 4: Now check for each alpha which lambda is the best within the threshold
corrected1se <- sapply(seq_along(alphasOfInterest), function(curi) {
curcvs <- cvs[[curi]]
lams <- curcvs$lambda
lams[lams < overall.lambda.min] <- NA
lams[curcvs$cvm > overall.criterionthreshold] <- NA
lam1se<-max(lams, na.rm = TRUE)
c(lam = lam1se, alph = alphasOfInterest[curi])
})
# Step 5: Find the best (lowest) of these lambdas
overall.lambda.1se <- max(corrected1se["lam", ])
pos <- match(overall.lambda.1se, corrected1se["lam", ])
overall.alpha.1se <- corrected1se["alph", pos]
# Best parameters
alpha_without_caret[r] <- as.numeric(overall.alpha.1se) # alpha according to glmnet only
lambda_without_caret[r] <- as.numeric(overall.lambda.1se) # lambda according to glmnet only
# Elastic net with best parameters
mod_elnet_wc <- glmnet(x = as.matrix(data[colnames(data) %in% "y" == FALSE]),
y = data$y, alpha = alpha_without_caret[r], family = "gaussian", lambda = lambda_without_caret[r])
# Estimation of lm with the variables that have been selected in the elastic net
vars_elnet_wc <- names(mod_elnet_wc$beta[ , 1])[as.numeric(mod_elnet_wc$beta[ , 1]) != 0]
mod_elnet_wc_lm <- lm(y ~ ., data[ , colnames(data) %in% c(vars_elnet_wc, "y")])
# MSE
MSE_without_caret[r] <- mean(mod_elnet_wc_lm$residuals^2)
}
# Compare results
data.frame(alpha_caret, lambda_caret, MSE_caret, alpha_without_caret, lambda_without_caret, MSE_without_caret)
mean(MSE_caret)
mean(MSE_without_caret) # Better results
结果如下所示:
alpha_caret lambda_caret MSE_caret alpha_without_caret lambda_without_caret MSE_without_caret
1 0.9 0.2 40.28436 0.0 1.850340 40.14838
2 1.0 0.2 40.28436 0.4 1.228666 40.48928
3 1.0 0.2 40.28436 0.0 1.850340 40.14838
4 1.0 0.2 40.28436 0.2 1.693744 40.23916
5 1.0 0.2 40.28436 0.0 2.030746 40.14838
6 1.0 0.2 40.28436 0.2 1.858882 40.36684
7 1.0 0.2 40.28436 0.0 2.684526 40.14838
8 1.0 0.2 40.28436 0.1 2.127441 40.16517
9 0.7 0.1 40.16302 0.1 1.766239 40.16011
10 1.0 0.2 40.28436 0.1 2.127441 40.16517
11 0.7 0.1 40.16302 0.0 1.536185 40.14838
12 1.0 0.2 40.28436 0.2 2.239030 40.36684
13 1.0 0.2 40.28436 0.1 1.938445 40.16011
14 1.0 0.2 40.28436 0.1 2.127441 40.16517
15 1.0 0.2 40.28436 0.1 2.334864 40.16517
16 0.9 0.2 40.28436 0.1 2.127441 40.16517
17 0.8 0.1 40.16302 0.2 1.543276 40.22040
18 1.0 0.2 40.28436 0.1 2.562510 40.22040
19 1.0 0.2 40.28436 0.0 2.946264 40.14838
20 1.0 0.2 40.28436 0.1 1.938445 40.16011
手工编程的 MSE 更像是基于caret包的 MSE。估计的最佳 alpha 和 lambda 非常不同。
问题:为什么这两种方法会导致对 alpha 和 lambda 的估计如此不同?