这需要相当多的工作,但我最终得到了它。请注意,我使用了来自 github 的开发版本(pymc 2.2grad),而不是 pypi 上可用的旧版本。
此外,这运行得非常缓慢,因为它没有很好地利用 numpy 的数组操作。对于合理大小的数据集,一些智能预处理和对 cutpoints 节点函数的重写可能会解决这个问题。
这是完整的源代码:
from scipy.stats import norm, pearsonr
from pymc import Normal, Lambda, Uniform, Exponential, stochastic, deterministic, observed, MCMC, Matplot
from numpy import mean, std, log
import numpy as np
#Set array dimensions
(I, J, K, M, N) = (5, 20, 3, 4, 1000)
#Set simulation parameters
alpha_star = np.random.normal(0, 1, size=(I,K))
beta_star = np.random.normal(1, 1, size=(I,K))
z_star = np.random.normal(0, 1, size=(J,1))
w_star = np.array([0,1,3])
#Generate data
coder = np.random.randint(I, size=(N))
doc = np.random.randint(J, size=(N))
item = np.random.randint(K, size=(N))
code = np.zeros(shape=(N))
for n in range(N):
i, j, k = coder[n], doc[n], item[n]
m = alpha_star[i,k] + beta_star[i,k] * z_star[j] + np.random.normal(0, 1)
code[n] = 1+sum(m > w_star)
# print "\t".join([str(x) for x in [i, j, k, m, code[n]]])
#Set GLM parameters
alpha = Normal('alpha', mu=0.0, tau=0.01, value=np.zeros(I*K))
beta = Normal('beta', mu=1.0, tau=0.01, value=np.ones(I*K))
z = Normal('z', mu=0.0, tau=0.01, value=np.random.normal(0,1,J))
w = Exponential('w', .1, value=np.ones(M-3))
#Link functions
mu = Lambda('mu', lambda alpha=alpha, beta=beta, z=z, i=coder, j=doc, k=item: alpha[i+I*k]+beta[i+I*k]*z[j])
@deterministic(plot=False)
def cutpoints(w=w):
w2 = [-np.inf, 0.0, 1.0]
v = 1
for i in w:
v += i
w2.append(v)
w2.append(np.inf)
cp = np.array( w2 )
return cp
@stochastic(dtype=int, observed=True)
def y(value=code, mu=mu, cp=cutpoints):
def logp(value, mu, cp):
d = norm.cdf(cp[value]-mu)-norm.cdf(cp[value-1]-mu)
lp = sum(log(d))
return lp
#Run chain
M = MCMC([alpha, beta, z, mu, w, cutpoints, y])
M.isample(10000, 5000, thin=5, verbose=0)
#Summarize results
Matplot.summary_plot([alpha], name="alpha", path="./graphs/")
Matplot.summary_plot([beta], name="beta", path="./graphs/")
Matplot.summary_plot([z], name="z", path="./graphs/")
Matplot.summary_plot([w], name="w", path="./graphs/")
print pearsonr( alpha_star.transpose().reshape((I*K,)), alpha.stats()['mean'])
print pearsonr( beta_star.transpose().reshape((I*K,)), beta.stats()['mean'])
print pearsonr( z_star.reshape((J,)), z.stats()['mean'])