这是一种可能性,其中模型被修改为 (1) 以明确概率,以及 (2) 发生在离散时间。
下面的代码解释了修改后的模型,对其进行了仿真,然后使用 MLE 来恢复参数(在这个玩具示例中知道其真实值,因为我们模拟了数据)。小心:我的测试版不会完全等同于您的测试版——请参阅下面评论中的“故事”。
library(ggplot2)
library(reshape2)
## S(t) susceptible, I(t) infected, R(t) recovered at time t
## Probabilistic model in discrete time:
## S(t+1) = S(t) - DeltaS(t)
## I(t+1) = I(t) + DeltaS(t) - DeltaR(t)
## R(t+1) = R(t) + DeltaR(t)
## DeltaR(t) ~ Binomial(I(t), gamma) >= 0
## DeltaS(t) ~ Binomial(S(t), 1 - (1 - beta)^I(t)) >= 0
## Story: each infected has probability gamma of recovering during the period;
## before recoveries are realized, each susceptible interacts with each infected;
## each interaction leads to infection with probability beta;
## susceptible becomes infected if >= 1 of her interactions leads to infection
simulate <- function(T=100, S1=100, I1=10, R1=0, beta=0.005, gamma=0.10) {
stopifnot(T > 0)
stopifnot(beta >= 0 && beta <= 1)
stopifnot(gamma >= 0 && gamma <= 1)
total_pop <- S1 + I1 + R1
df <- data.frame(t=seq_len(T))
df[, c("S", "I", "R")] <- NA
for(t in seq_len(T)) {
if(t == 1) {
df$S[t] <- S1
df$I[t] <- I1
df$R[t] <- R1
next
}
DeltaS <- rbinom(n=1, size=df$S[t-1], prob=1 - (1-beta)^df$I[t-1])
DeltaR <- rbinom(n=1, size=df$I[t-1], prob=gamma)
df$S[t] <- df$S[t-1] - DeltaS
df$I[t] <- df$I[t-1] + DeltaS - DeltaR
df$R[t] <- df$R[t-1] + DeltaR
stopifnot(df$S[t] + df$I[t] + df$R[t] == total_pop) # Sanity check
}
return(df)
}
inverse_logit <- function(x) {
p <- exp(x) / (1 + exp(x)) # Maps R to [0, 1]
return(p)
}
curve(inverse_logit, -10, 10) # Sanity check
loglik <- function(logit_beta_gamma, df) {
stopifnot(length(logit_beta_gamma) == 2)
beta <- inverse_logit(logit_beta_gamma[1])
gamma <- inverse_logit(logit_beta_gamma[2])
dS <- -diff(df$S)
dR <- diff(df$R)
n <- nrow(df)
pr_dS <- 1 - (1-beta)^df$I[seq_len(n-1)] # Careful, problematic if 1 or 0
return(sum(dbinom(dS, size=df$S[seq_len(n-1)], prob=pr_dS, log=TRUE) +
dbinom(dR, size=df$I[seq_len(n-1)], prob=gamma, log=TRUE)))
}
get_estimates <- function() {
df <- simulate()
mle <- optim(par=c(-4, 0), fn=loglik, control=list(fnscale=-1), df=df)
beta_gamma_hat <- inverse_logit(mle$par)
names(beta_gamma_hat) <- c("beta", "gamma")
return(beta_gamma_hat)
}
set.seed(54321999)
df <- simulate()
df_melted <- melt(df, id.vars="t")
p <- (ggplot(df_melted, aes(x=t, y=value, color=variable)) +
geom_line(size=1.1) + theme_bw() +
xlab("time") +
theme(legend.key=element_blank()) +
theme(panel.border=element_blank()))
p
## Sampling distribution of beta_gamma_hat
estimates <- replicate(100, get_estimates())
df_estimates <- as.data.frame(t(estimates))
summary(df_estimates) # Looks reasonable given true values of (0.005, 0.10)
让我知道是否有任何不言自明的地方。
免责声明:我没有研究过 SIR 模型,除了几年前在大学课堂上非常简短的一次。我在上面模拟和估计的模型并不完全是您在问题中所述的经典微分方程 SIR 模型。另外我今天感觉有点发烧,所以检查代码是否有错误!