我们试图证明经过某种处理后在细胞中发生的非常微妙的影响。让我们假设测量值是正态分布的。我们还假设未经处理的细胞有和处理过的细胞有和. 问题是:
样本量必须有多大才能使观察到的效果具有统计显着性()?
我知道非常微妙的效果需要比更明显的效果更大的样本量,但是有多少?我还在学习统计,所以请耐心等待。我尝试在 R 中进行一些模拟。假设您随机选择来自正态分布的样本,我试图计算平均 p 值作为.

这是找到正确样本量的正确方法吗?还是我完全偏离了这种方法?
代码:
library(ggplot2)
ctrl.mean <- 1
ctrl.sd <- 0.1
treated.mean <- 1.1
treated.sd <- 0.22
# Function that repeats t-test a number of times (rpt) with given sample size, means and sds.
# Returns a list of p-values from the test
tsim <- function(rpt, n, mean1, sd1, mean2, sd2) {
x <- 0
ppool <- NULL
while (x <= rpt) {
ppool <- c(ppool, t.test(rnorm(n,mean1,sd1), y = rnorm(n,mean2,sd2))$p.value)
x <- x + 1
}
return(ppool)
}
# Iterate through sample sizes and perform the function
# Returns data frame with list of mean p-values at a given sample size
i <- 2
num <- 50
res <- NULL
while (i <= num) {
sim <- tsim(1000, i, ctrl.mean, ctrl.sd, treated.mean, treated.sd)
res <- rbind(res, cbind(i, mean(sim), sd(sim)))
i <- i + 1
}
# Plot the result
res <- as.data.frame(res)
ggplot(res, aes(x=i, y=-log10(V2))) +
geom_line() +
geom_ribbon(aes(ymin=-log10(V2)-log10(V3), ymax=-log10(V2)+log10(V3)), alpha = 0.2) +
annotate("segment", x = 6, xend = num, y = -log10(0.05), yend = -log10(0.05), colour = "red", linetype = "dashed") +
annotate("text", x = 0, y=-log10(0.05), label= "p = 0.05", hjust=0, size=3) +
annotate("segment", x = 6, xend = num, y = -log10(0.01), yend = -log10(0.01), colour = "red", linetype = "dashed") +
annotate("text", x = 0, y=-log10(0.01), label= "p = 0.01", hjust=0, size=3) +
annotate("segment", x = 6, xend = num, y = -log10(0.001), yend = -log10(0.001), colour = "red", linetype = "dashed") +
annotate("text", x = 0, y=-log10(0.001), label= "p = 0.001", hjust=0, size=3) +
xlab("Number of replicates") +
ylab("-log10(p-value)") +
theme_bw()
