来自非线性混合效应模型的 10% 误报:为什么?

机器算法验证 假设检验 混合模式 群体差异 lme4-nlme
2022-03-08 05:01:07

我进行了一项模拟研究,以使用 lme4 包中的 nlmer 估计非线性混合效应模型中组效应测试的 I 类错误率。结果显示有 8-10% 的假阳性。

我想知道我的模拟代码或模型规范是否有问题?为什么它表现出反保守主义?有哪些可能的补救措施?

提前致谢。

编码 :

list.of.packages <- c("lme4")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
require(lme4)


# start of simulation

# "real" global equation (for data simulation) : Y ~ a * X^b + X2^c + residuals         # in case of 2 groups

# true parameters
a <- 10
a_var <- .9
b <- 0.3
b_var <- .05
c <- .22
c_var <- .01


N <- 2  # number of groups

a <- rep(a[1],N)
a_var <- rep(a_var[1],N)
b <- rep(b[1],N)
b_var <- rep(b_var[1],N)
c <- rep(c[1],N)
c_var <- rep(c_var[1],N)


n <- 4  # number of experimental units per group



### nlmer


## a. Define full model (2 groups)
nlmer_Model <- function(X,X2, a,a2, b,b2, c,c2, gp2){
    (
    a + a2*gp2 
    )* X^(
    b + b2*gp2 
    )* X2^(
    c + c2*gp2 
    )
}

## b. Use deriv() to construct function:
nlmer_ModelGradient <- deriv(
    body(nlmer_Model)[[2]],
    namevec=paste(rep(c("a","b","c"),each=N), c("",2:N), sep=""),
    function.arg=nlmer_Model
)


## a. Define null model
nlmer_Model_null <- function(X,X2,a,b,c){
    (a)*X^(b)*X2^c
}

## b. Use deriv() to construct function:
nlmer_ModelGradient_null <- deriv(
    body(nlmer_Model_null)[[2]],
    namevec=c("a","b","c"),
    function.arg=nlmer_Model_null
)

recpnlmer <- c()    # anti-conQservatif (10% des p-values < 0.05 sous H0) mais peut-être plus de puissance pour détecter un effet s'il existe

for(k in 1:1000){       # répéter 10000 fois la simulation
    print(k)


    Xij <- rep(c(1:5),3)
    X2ij <- rep(c(4,40,400),each=5)

    Xijlvl <- 1:5
    X2ijlvl <- c(4,40,400)

    Xij <- rep(Xijlvl,length(X2ijlvl))
    X2ij <- rep(X2ijlvl,each=length(Xijlvl))

    plot.new()
    par(usr=c(-0,6,-.5,100))
    axis(1)
    axis(2)
    grid()
    title(main = paste(k,"ième simulation"))

    group <- c()    # traitement
    indiv <- c()    # parcelle
    X <- c()    # Cp
    X2 <- c()   # temps
    Y <- c()    # variable réponse

    for(i in 1:N){

        for(j in 1:n){


            Yij <- (a[i]+rnorm(1,0,a_var[i])) * Xij^(b[i]+rnorm(1,0,b_var[i])) * X2ij^(c[i]+rnorm(1,0,c_var[i])) + rnorm(length(c(Xij)),0,3)    # addition of individual's variability on each parameter and residual variability

            for(m in 1:length(X2ij)){
                lines(x=Xij[1:5],y=Yij[1:5],col=i)
                lines(x=Xij[6:10],y=Yij[6:10],col=i)
                lines(x=Xij[11:15],y=Yij[11:15],col=i)
            }

            group <- c(group,rep(i,length(Xij)))

            ji <- j+(i-1)*n
            indiv <- c(indiv,rep(ji,length(Xij)))
            X  <- c(X,Xij )
            X2 <- c(X2,X2ij)
            Y  <- c(Y,Yij )

        }

    }

    legend(x="topleft",legend=c(paste("group",c(1:N))), lty="solid", col=1:N)

    simdat <- data.frame(indiv=indiv,group=group,X=X,X2=X2,Y=Y)

    simdat$indivfactor <- as.factor(as.character(simdat$indiv))
    simdat$groupfactor <- as.factor(as.character(simdat$group))

    simdat[,paste(rep("gp",(N-1)),2:N,sep="")] <- dummy(simdat$groupfactor)


    nlmer_fit <- nlmer(
        Y # Response
        ~ nlmer_ModelGradient(X=X,X2=X2, a,a2, b,b2, c,c2, gp2=gp2) # Fixed effects
        ~ (a | indivfactor) + (b | indivfactor) + (c | indivfactor) + (a2 | indivfactor) + (b2 | indivfactor) + (c2 | indivfactor), # Random effects
        data = simdat, # Data
        start = c(a=10,a2=0,b=.3,b2=0,c=.2,c2=0)
        ,REML = FALSE   # FALSE nécessaire quand on veut comparer deux modèles mixtes
    )

    nlmer_fit_null <- nlmer(
        Y # Response
        ~ nlmer_ModelGradient_null(X=X, X2=X2, a,b,c) # Fixed effects
        ~ (a | indivfactor) + (b | indivfactor) + (c | indivfactor), # Random effects
        data = simdat, # Data
        start = c(a=10,b=.3,c=.2)
        ,REML = FALSE # FALSE nécessaire quand on veut comparer deux modèles mixtes

    recpnlmer <- c(recpnlmer,anova(nlmer_fit_null,nlmer_fit)$'Pr(>Chisq)'[2])

}

hist(recpnlmer, n=20, main = "p-value distribution:\nnlmer", xlab="p-value")


# probability of rejecting H0 at alpha = 5%
100*length(which(recpnlmer<0.05))/length(recpnlmer)
0个回答
没有发现任何回复~