机器算法验证 - 具有多重估算数据的 lmer - 吾爱随笔录

具有多重估算数据的 lmer

机器算法验证 r lme4-nlme 多重插补

2022-02-01 14:02:52

多重插补后如何获得 lmer 的汇总随机效应？

我正在使用鼠标对数据框进行多重插补。lme4 用于具有随机截距和随机斜率的混合模型。池化 lmer 很好，除了它不会汇集随机效应。我已经搜索了很多没有任何运气的解决方案。我尝试了 mi 包，但是我只看到估计和 std.error 的合并输出。我试过将鼠标对象导出到spss，但没有任何运气。我看到一些关于 Zelig 的讨论。我认为这可能会解决我的问题。但是，我无法弄清楚如何将包与 lmer 的估算数据一起使用。

我知道 mouse 包只支持合并固定效果。有解决办法吗？

多重插补：

library(mice)
Data <- subset(Data0, select=c(id, faculty, gender, age, age_sqr, occupation, degree, private_sector, overtime, wage))
ini <- mice(Data, maxit=0, pri=F) #get predictor matrix
pred <- ini$pred
    pred[,"id"] <- 0 #don't use id as predictor
    meth <- ini$meth
meth[c("id", "faculty", "gender", "age", "age_sqr", "occupation", "degree", "private_sector", "overtime", "wage")] <- "" #don't impute these variables, use only as predictors.
imp <- mice(Data, m=22, maxit=10, printFlag=TRUE, pred=pred, meth=meth) #impute Data with 22 imputations and 10 iterations.

多级模型：

library(lme4)
    fm1 <- with(imp, lmer(log(wage) ~ gender + age + age_sqr + occupation + degree + private_sector + overtime + (1+gender|faculty))) #my multilevel model
    summary(est <- pool(fm1)) #pool my results

合并 lmer 的更新 结果：

> summary(est <- pool(fm1))
                                est           se            t       df     Pr(>|t|)         lo 95         hi 95 nmis       fmi    lambda
(Intercept)   7,635148e+00 0,1749178710 43,649905006 212,5553 0,000000e+00  7,2903525425  7,9799443672   NA 0,2632782 0,2563786
Gender        -1,094186e-01 0,0286629154 -3,817427078 117,1059 2,171066e-04 -0,1661834550 -0,0526537238   NA 0,3846276 0,3742069
Occupation1   1,125022e-01 0,0250082538  4,498601518 157,6557 1,320753e-05  0,0631077322  0,1618966049   NA 0,3207350 0,3121722
Occupation2   2,753089e-02 0,0176032487  1,563966385 215,6197 1,192919e-01 -0,0071655902  0,0622273689   NA 0,2606725 0,2538465
Occupation3   1,881908e-04 0,0221992053  0,008477365 235,3705 9,932433e-01 -0,0435463305  0,0439227120   NA 0,2449795 0,2385910
Age           1,131147e-02 0,0087366178  1,294719230 187,0021 1,970135e-01 -0,0059235288  0,0285464629    0 0,2871640 0,2795807
Age_sqr       -7,790476e-05 0,0001033263 -0,753968159 185,4630 4,518245e-01 -0,0002817508  0,0001259413    0 0,2887420 0,2811131
Overtime      -2,376501e-03 0,0004065466 -5,845581504 243,3563 1,614693e-08 -0,0031773002 -0,0015757019    9 0,2391179 0,2328903
Private_sector  8,322438e-02 0,0203047665  4,098760934 371,9971 5,102752e-05  0,0432978716  0,1231508962   NA 0,1688478 0,1643912

缺少此信息，我在没有多重插补的情况下运行 lmer 时得到：

Random effects:
 Groups   Name        Variance Std.Dev. Corr
 Faculty  (Intercept) 0,008383 0,09156      
          Genderfemale0,002240 0,04732  1,00
 Residual             0,041845 0,20456      
Number of obs: 698, groups:  Faculty, 17

2个回答

如果利用 R 中的功能和多重插补包lapply返回的列表结构，您可以手动执行此操作。Amelia这是一个快速示例脚本。

library(Amelia)
library(lme4)
library(merTools)
library(plyr) # for collapsing estimates

Amelia类似于，mice所以你可以在mice这里的调用中替换你的变量——这个例子来自我正在处理的一个项目。

 a.out <- amelia(dat[sub1, varIndex], idvars = "SCH_ID", 
            noms = varIndex[!varIndex %in% c("SCH_ID", "math12")], 
            m = 10)

a.out是插补对象，现在我们需要在每个插补数据集上运行模型。为此，我们使用lapplyR 中的函数在列表元素上重复一个函数。此函数将函数（模型规范）应用于列表中的每个数据集 (d)，并在模型列表中返回结果。

 mods <- lapply(a.out$imputations,
           function(d) lmer((log(wage) ~ gender + age + age_sqr + 
            occupation + degree + private_sector + overtime + 
             (1+gender|faculty), data = d)

现在我们从该列表中创建一个 data.frame，通过使用 merTools 包中的函数 FEsim 和 REsim 模拟固定和随机效应的值

imputeFEs <- ldply(mods, FEsim, nsims = 1000)
imputeREs <- ldply(mods, REsim, nsims = 1000)

上面的 data.frames 包括对每个数据集的单独估计，现在我们需要使用像参数折叠这样的折叠来组合它们

imputeREs <- ddply(imputeREs, .(X1, X2), summarize, mean = mean(mean), 
               median = mean(median), sd = mean(sd), 
               level = level[1])

imputeFEs <- ddply(imputeFEs, .(var), summarize, meanEff = mean(meanEff), 
               medEff = mean(medEff), sdEff = mean(sdEff))

现在我们还可以提取一些关于插补值的随机效应的方差/协方差的统计数据。在这里，我编写了两个简单的提取器函数来执行此操作。

REsdExtract <- function(model){
  out <- unlist(lapply(VarCorr(model), attr, "stddev"))
  return(out)
}

REcorrExtract <- function(model){
  out <- unlist(lapply(VarCorr(model), attr, "corre"))
  return(min(unique(out)))
}

现在我们可以将它们应用于模型并将它们存储为向量：

modStats <- cbind(ldply(mods, REsdExtract), ldply(mods, REcorrExtract))

更新

下面的函数将使您更接近通过对或对象arm::display列表进行操作所提供的输出。希望这将在不久的将来合并到包中：lmerglmermerTools

# Functions to extract standard deviation of random effects from model
REsdExtract <- function(model){
  out <- unlist(lapply(VarCorr(model), attr, "stddev"))
  return(out)
}

#slope intercept correlation from model
REcorrExtract <- function(model){
  out <- unlist(lapply(VarCorr(model), attr, "corre"))
  return(min(unique(out)))
}

modelRandEffStats <- function(modList){
  SDs <- ldply(modList, REsdExtract)
  corrs <- ldply(modList, REcorrExtract)
  tmp <- cbind(SDs, corrs)
  names(tmp) <- c("Imp", "Int", "Slope", "id", "Corr")
  out <- data.frame(IntSD_mean = mean(tmp$Int), 
                        SlopeSD_mean = mean(tmp$Slope), 
                    Corr_mean = mean(tmp$Corr), 
                        IntSD_sd = sd(tmp$Int),
                    SlopeSD_sd = sd(tmp$Slope), 
                        Corr_sd = sd(tmp$Corr))
  return(out)
}

modelFixedEff <- function(modList){
  require(broom)
  fixEst <- ldply(modList, tidy, effects = "fixed")
  # Collapse
  out <- ddply(fixEst, .(term), summarize,
               estimate = mean(estimate), 
               std.error = mean(std.error))
  out$statistic <- out$estimate / out$std.error
  return(out)
}

print.merModList <- function(modList, digits = 3){
  len <- length(modList)
  form <- modList[[1]]@call
  print(form)
  cat("\nFixed Effects:\n")
  fedat <- modelFixedEff(modList)
  dimnames(fedat)[[1]] <- fedat$term
  pfround(fedat[-1, -1], digits)
  cat("\nError Terms Random Effect Std. Devs\n")
  cat("and covariances:\n")
  cat("\n")
  ngrps <- length(VarCorr(modmathG[[1]]))
  errorList <- vector(mode = 'list', length = ngrps)
  corrList <- vector(mode = 'list', length = ngrps)
  for(i in 1:ngrps){
    subList <- lapply(modList, function(x) VarCorr(x)[[i]])
    subList <- apply(simplify2array(subList), 1:2, mean)
    errorList[[i]] <- subList
    subList <- lapply(modList, function(x) attr(VarCorr(x)[[i]], "corre"))
    subList <- min(unique(apply(simplify2array(subList), 1:2, function(x) mean(x))))
    corrList[[i]] <- subList
  }
  errorList <- lapply(errorList, function(x) {
    diag(x) <- sqrt(diag(x))
    return(x)
    })

  lapply(errorList, pfround, digits)
  cat("\nError Term Correlations:\n")
  lapply(corrList, pfround, digits)
  residError <- mean(unlist(lapply(modList, function(x) attr(VarCorr(x), "sc"))))
  cat("\nResidual Error =", fround(residError,
                                             digits), "\n")
  cat("\n---Groups\n")
  ngrps <- lapply(modList[[1]]@flist, function(x) length(levels(x)))
  modn <- getME(modList[[1]], "devcomp")$dims["n"]
  cat(sprintf("number of obs: %d, groups: ", modn))
  cat(paste(paste(names(ngrps), ngrps, sep = ", "),
            collapse = "; "))
  cat("\n")
  cat("\nModel Fit Stats")
  mAIC <- mean(unlist(lapply(modList, AIC)))
  cat(sprintf("\nAIC = %g", round(mAIC, 1)))
  moDsigma.hat <- mean(unlist(lapply(modmathG, sigma)))
  cat("\nOverdispersion parameter =", fround(moDsigma.hat,
                                             digits), "\n")
}

您还可以在使用鼠标进行插补后使用 testEstimates 函数 testEstimates(as.mitml.result(fm1), var.comp = T)$var.comp

其它你可能感兴趣的问题

上一篇在居中和缩放之前或之后对缺失数据进行插补？下一篇R 的引导包中 cv.glm 的成本函数是什么？