当我跑
from scipy.stats import anderson_ksamp
a = [-1.8, -2.4, -2.4, -0.0, -1.5, -2.7, -1.8, -3.0, -1.8, -1.2, -3.0, -3.0, -2.8, -3.0, -2.1, -0.0, 0.6, -2.5, -2.4, -0.0, -2.7, -0.0, -2.5, -2.1, -0.9, -3.0, -0.6, -0.6, -1.5, -2.2, -1.2, -2.4, -2.4, -3.0, 1.5, -1.8, 1.5, -2.7, -3.0, -2.5, -2.5, -1.5, -1.5, -2.1, -2.1, -3.0, -0.6, -2.7, -3.0, -1.5, -0.6, -0.0, -2.1, 0.6, -2.0, -3.0, -3.0, -2.4, -3.0, -1.8, -0.0, -0.0, -3.0, -1.5, -3.0, -3.0, -1.5, -2.5, -3.0, -2.8, -3.0, -2.2, -0.6, -0.0, -1.5, -2.7, -2.1, -2.1, -2.2, -2.1, -0.6, -0.0, -2.5, -2.1, -1.5, -3.0, -2.2, -1.8, -2.7, -2.4, -1.5, -2.1, -2.9, -2.4, -0.9, -0.0, -0.0, -2.4, -2.7, -0.6, -2.2, -3.0, -1.5, -0.9, -3.0, -3.0, -0.0, -2.7, -2.7, -1.5, -2.2, -3.0, -0.0, 1.5, -3.0, -2.7, -2.2, -2.9, -2.2, -3.0, -1.8, -0.0, -3.0, -1.5, -2.7, -3.0, -3.0, -2.9, -3.0, -3.0, -3.0, -3.0, -2.5, -0.0, 0.9, -3.0, -0.0, -3.0, 3.0, -3.0, -3.0, -1.2, -2.1, 1.5, -0.0, -0.9, -3.0, -2.7, -1.5, -2.4, -2.1, -3.0, -0.9, -3.0, -0.8, -1.5, -2.1, -2.7, -0.0, -0.0, -2.2, -1.8, -2.1, -2.2, -3.0, 0.6, -2.4, -2.2, -2.4, -2.5, -1.5, -0.0, -2.7, -3.0, -3.0, -2.1, -0.0, -2.4, -2.4, -0.0, -2.0, -0.9, -2.4, -3.0, -1.4, -2.7, -2.7, -3.0, -3.0, -2.7, -1.2, -2.1, -3.0, -0.0, -3.0, -2.7, -2.7, -3.0, -3.0, -2.5, -3.0, -1.8, -1.5, -2.7, -2.4, -1.8, -3.0, -2.7, 2.1, -3.0, -2.2, -2.2, 0.6, -0.9, 6.0, -3.0, -2.1, -3.0, -2.1, -2.5, -3.0, -1.5, -2.5, 3.0, -2.1, -3.0, -3.0, -1.5, -2.1, -2.7, -2.5, 1.5, -2.1, -0.0, -0.0, -3.0, -0.6, 1.5, -2.7, -2.4, -2.1, -3.0, -2.7, -3.0, 9.0, -3.0, -1.7, -3.0, -0.0, -3.0, -2.2, -0.0, -0.6, -2.7, -2.7, -3.0, -3.0, -1.7, -2.1, -2.0, -3.0, -2.1, -3.0, -1.1, -3.0, -3.0, -2.4, -1.5, -3.0, -2.2, -3.0, -1.5, -2.7, -3.0, -3.0, -3.0, -2.2, -3.0, -2.1, -2.1, -2.4, -3.0, -3.0, -0.0, -3.0, -3.0, -2.1, -2.0, 1.5, -3.0, -3.0, -2.1, -2.9, -2.4, -3.0, -3.0, -1.5, -2.2, -0.9, -1.8, -2.1, -1.8, -1.5, -3.0, -1.5, -3.0, -1.5, -3.0, -2.4, 1.5, -2.7, -3.0, -1.8, -1.8, -1.5, -2.1, -2.7, -2.7, -2.7, -3.0, -1.5, -2.7, -3.0, -2.7, -3.0, -1.5, -1.5, -3.0, 0.6, -0.6, -3.0, -2.1, -2.4, -2.1, -3.0, -2.2, -3.0, -1.8, -1.2, -3.0, -0.8, -2.4, -2.5, -3.0, -1.5, -1.2, -0.0, -2.7, -2.4, -3.0, -3.0, -2.1, -2.1, -2.1, -3.0, -2.7, -2.4, -2.1, -1.5, -2.1, -0.6, -3.0, -3.0, -3.0, -3.0]
b = [-1.2, 6.0, -3.0, 1.5, 12.0, 1.5, 3.0, -0.0, -1.5, -0.0, 6.0, -0.0, 1.8, -1.5, -3.0, -3.0, 1.5, -0.0, 1.5, -3.0, -3.0, -0.0, -1.2, -3.0, 22.5, -0.0, -3.0, -3.0, -2.1, 3.0, 2.4, 1.5, -2.1, 4.5, -3.0, -3.0, 12.0, 6.0, -3.0, 3.0, -3.0, 12.0, -2.8, 0.6, 4.5, 3.0, -0.0, -3.0, -1.0, -3.0, -0.0, 3.0, 2.1, -0.6, -3.0, -3.0, 1.5, 1.5, -0.6, -0.0, 1.5, 6.0, -2.2, -2.1, -0.0, -0.8, 6.0, -3.0, -3.0, 9.0, -3.0, -0.9, 15.0, 1.5, -2.9, 19.5, 4.5, -3.0, 1.5, -1.8, 1.5, 0.9, -3.0, -3.0, 9.0, -0.3, 9.0, -2.1, -2.2, -3.0, 12.0, -2.1, -1.2, -3.0, 4.5, -1.2, -0.0, 12.0, -0.9, -3.0, -0.6, -3.0, -1.5, -0.0, 27.0, 4.5, -2.4, -2.7, -0.0, 3.0, 1.5, 3.0, -3.0, 16.5, -2.1, -0.6, -3.0, -1.5, -1.2, -3.0, 4.4, -2.5, -2.1, 3.0, -1.2, -3.0, 12.0, -1.8, -0.9, -3.0, -3.0, -0.0, -3.0, -1.2, -3.0, -3.0, -0.0, 1.5, -3.0, 7.5, -3.0, -2.1, 3.0, 1.5, 3.0, -3.0, 1.5, -3.0, 19.5, -3.0, -2.2, 27.0, 3.0, -1.5, -3.0, -3.0, 4.5, -1.2, 12.0, 3.0, 3.0, -2.1, 6.0, -2.2, -3.0, -2.4, 6.0, -3.0, -1.9, -0.6, -0.0, 3.6, 15.0, -3.0, -3.0, 7.5, -0.0, 4.5, -2.4, -3.0, -3.0, -3.0, -2.0, -3.0, -3.0, -3.0, -2.6, 3.0, -0.0, -3.0, 4.5, -1.2, -3.0, -3.0, 3.0, -3.0, 1.6, 1.5, -3.0, -3.0, -3.0, -3.0, 3.0, -3.0, -1.5, -3.0, -1.5, 12.0, -1.5, 3.0, 9.6, -0.0, -1.8, -3.0, -2.1, -0.6, 9.0, -3.0, 19.5, -2.4, -1.8, 15.0, -3.0, -3.0, 27.0, 7.5, 12.0, -3.0, -2.0, -3.0, 12.0, -3.0, -3.0, 0.9, -3.0, -2.5, 12.6, -1.5, -0.6, -3.0, -3.0, -0.0, -0.8, -3.0, 3.0, -0.0, 4.5, -3.0, -0.0, 0.6, 0.3, -2.7, -0.0, 4.5, -3.0, 3.0, -1.5, 9.0, -3.0, 1.5, -3.0, 6.0, -2.2, -0.0, -3.0, -3.0, 3.0, -3.0, -2.7, -0.0, -3.0, 0.6, 4.5, 1.5, 1.5, 1.5, -2.1, 7.5, -3.0, -3.0, -3.0, -2.1, -3.0, 6.0, -1.5, -2.0, -3.0, -0.6, 6.0, -1.5, -3.0, 6.0, -3.0, 3.0, 3.0, -3.0, 7.5, -3.0, -0.0, -3.0, -0.0, -3.0, -3.0, 0.6, -3.0, -3.0, -3.0, -3.0, -2.4, -0.0, 1.5, 3.0, 2.1, -3.0, 3.0, 4.5, -3.0, -2.4, -3.0, -2.2, 7.5, 2.1, -3.0, -0.0, -2.0, -3.0, -3.0, -1.8, -3.0, 4.5, -1.5, -3.0, 4.5, 3.0, 15.0, 4.5, -2.1, 12.0, 6.0, 4.5, 27.0, -3.0, 3.0, 0.6, -3.0, 0.6, -2.4, 4.5, -1.5, -2.2, 12.0, -2.0, 1.5, 9.0, -1.5, -1.2, -2.0, 1.5, -1.2, -1.8, -3.0, -0.0, -0.0, -3.0, -0.9]
print(anderson_ksamp([a,b]))
我得到以下输出:
Anderson_ksampResult(
statistic=53.560696338122263,
critical_values=array([ 0.325, 1.226, 1.961, 2.718, 3.752]),
significance_level=7238105.535194747
)
“显着性水平”远大于1。这是为什么呢?
我主要想知道这是否是 scipy 实现的问题,或者是 Anderson-Darling 测试的数值实现所固有的问题(因为我不知道实际计算 p 值的细节)。我也对有关如何解决此错误的一些信息感兴趣。
我知道这个问题,但是由于来自相同分布的数据,这似乎是外推的一个小错误(scipy.stats.ks_2samp 给出的 p 值为 0.962305,而 anderson_ksamp 给出了 1.075969)。另一方面,有了上面的数据,我们可以运行
from scipy.stats import ks_2samp
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
import numpy as np
a = [-1.8, -2.4, -2.4, -0.0, -1.5, -2.7, -1.8, -3.0, -1.8, -1.2, -3.0, -3.0, -2.8, -3.0, -2.1, -0.0, 0.6, -2.5, -2.4, -0.0, -2.7, -0.0, -2.5, -2.1, -0.9, -3.0, -0.6, -0.6, -1.5, -2.2, -1.2, -2.4, -2.4, -3.0, 1.5, -1.8, 1.5, -2.7, -3.0, -2.5, -2.5, -1.5, -1.5, -2.1, -2.1, -3.0, -0.6, -2.7, -3.0, -1.5, -0.6, -0.0, -2.1, 0.6, -2.0, -3.0, -3.0, -2.4, -3.0, -1.8, -0.0, -0.0, -3.0, -1.5, -3.0, -3.0, -1.5, -2.5, -3.0, -2.8, -3.0, -2.2, -0.6, -0.0, -1.5, -2.7, -2.1, -2.1, -2.2, -2.1, -0.6, -0.0, -2.5, -2.1, -1.5, -3.0, -2.2, -1.8, -2.7, -2.4, -1.5, -2.1, -2.9, -2.4, -0.9, -0.0, -0.0, -2.4, -2.7, -0.6, -2.2, -3.0, -1.5, -0.9, -3.0, -3.0, -0.0, -2.7, -2.7, -1.5, -2.2, -3.0, -0.0, 1.5, -3.0, -2.7, -2.2, -2.9, -2.2, -3.0, -1.8, -0.0, -3.0, -1.5, -2.7, -3.0, -3.0, -2.9, -3.0, -3.0, -3.0, -3.0, -2.5, -0.0, 0.9, -3.0, -0.0, -3.0, 3.0, -3.0, -3.0, -1.2, -2.1, 1.5, -0.0, -0.9, -3.0, -2.7, -1.5, -2.4, -2.1, -3.0, -0.9, -3.0, -0.8, -1.5, -2.1, -2.7, -0.0, -0.0, -2.2, -1.8, -2.1, -2.2, -3.0, 0.6, -2.4, -2.2, -2.4, -2.5, -1.5, -0.0, -2.7, -3.0, -3.0, -2.1, -0.0, -2.4, -2.4, -0.0, -2.0, -0.9, -2.4, -3.0, -1.4, -2.7, -2.7, -3.0, -3.0, -2.7, -1.2, -2.1, -3.0, -0.0, -3.0, -2.7, -2.7, -3.0, -3.0, -2.5, -3.0, -1.8, -1.5, -2.7, -2.4, -1.8, -3.0, -2.7, 2.1, -3.0, -2.2, -2.2, 0.6, -0.9, 6.0, -3.0, -2.1, -3.0, -2.1, -2.5, -3.0, -1.5, -2.5, 3.0, -2.1, -3.0, -3.0, -1.5, -2.1, -2.7, -2.5, 1.5, -2.1, -0.0, -0.0, -3.0, -0.6, 1.5, -2.7, -2.4, -2.1, -3.0, -2.7, -3.0, 9.0, -3.0, -1.7, -3.0, -0.0, -3.0, -2.2, -0.0, -0.6, -2.7, -2.7, -3.0, -3.0, -1.7, -2.1, -2.0, -3.0, -2.1, -3.0, -1.1, -3.0, -3.0, -2.4, -1.5, -3.0, -2.2, -3.0, -1.5, -2.7, -3.0, -3.0, -3.0, -2.2, -3.0, -2.1, -2.1, -2.4, -3.0, -3.0, -0.0, -3.0, -3.0, -2.1, -2.0, 1.5, -3.0, -3.0, -2.1, -2.9, -2.4, -3.0, -3.0, -1.5, -2.2, -0.9, -1.8, -2.1, -1.8, -1.5, -3.0, -1.5, -3.0, -1.5, -3.0, -2.4, 1.5, -2.7, -3.0, -1.8, -1.8, -1.5, -2.1, -2.7, -2.7, -2.7, -3.0, -1.5, -2.7, -3.0, -2.7, -3.0, -1.5, -1.5, -3.0, 0.6, -0.6, -3.0, -2.1, -2.4, -2.1, -3.0, -2.2, -3.0, -1.8, -1.2, -3.0, -0.8, -2.4, -2.5, -3.0, -1.5, -1.2, -0.0, -2.7, -2.4, -3.0, -3.0, -2.1, -2.1, -2.1, -3.0, -2.7, -2.4, -2.1, -1.5, -2.1, -0.6, -3.0, -3.0, -3.0, -3.0]
b = [-1.2, 6.0, -3.0, 1.5, 12.0, 1.5, 3.0, -0.0, -1.5, -0.0, 6.0, -0.0, 1.8, -1.5, -3.0, -3.0, 1.5, -0.0, 1.5, -3.0, -3.0, -0.0, -1.2, -3.0, 22.5, -0.0, -3.0, -3.0, -2.1, 3.0, 2.4, 1.5, -2.1, 4.5, -3.0, -3.0, 12.0, 6.0, -3.0, 3.0, -3.0, 12.0, -2.8, 0.6, 4.5, 3.0, -0.0, -3.0, -1.0, -3.0, -0.0, 3.0, 2.1, -0.6, -3.0, -3.0, 1.5, 1.5, -0.6, -0.0, 1.5, 6.0, -2.2, -2.1, -0.0, -0.8, 6.0, -3.0, -3.0, 9.0, -3.0, -0.9, 15.0, 1.5, -2.9, 19.5, 4.5, -3.0, 1.5, -1.8, 1.5, 0.9, -3.0, -3.0, 9.0, -0.3, 9.0, -2.1, -2.2, -3.0, 12.0, -2.1, -1.2, -3.0, 4.5, -1.2, -0.0, 12.0, -0.9, -3.0, -0.6, -3.0, -1.5, -0.0, 27.0, 4.5, -2.4, -2.7, -0.0, 3.0, 1.5, 3.0, -3.0, 16.5, -2.1, -0.6, -3.0, -1.5, -1.2, -3.0, 4.4, -2.5, -2.1, 3.0, -1.2, -3.0, 12.0, -1.8, -0.9, -3.0, -3.0, -0.0, -3.0, -1.2, -3.0, -3.0, -0.0, 1.5, -3.0, 7.5, -3.0, -2.1, 3.0, 1.5, 3.0, -3.0, 1.5, -3.0, 19.5, -3.0, -2.2, 27.0, 3.0, -1.5, -3.0, -3.0, 4.5, -1.2, 12.0, 3.0, 3.0, -2.1, 6.0, -2.2, -3.0, -2.4, 6.0, -3.0, -1.9, -0.6, -0.0, 3.6, 15.0, -3.0, -3.0, 7.5, -0.0, 4.5, -2.4, -3.0, -3.0, -3.0, -2.0, -3.0, -3.0, -3.0, -2.6, 3.0, -0.0, -3.0, 4.5, -1.2, -3.0, -3.0, 3.0, -3.0, 1.6, 1.5, -3.0, -3.0, -3.0, -3.0, 3.0, -3.0, -1.5, -3.0, -1.5, 12.0, -1.5, 3.0, 9.6, -0.0, -1.8, -3.0, -2.1, -0.6, 9.0, -3.0, 19.5, -2.4, -1.8, 15.0, -3.0, -3.0, 27.0, 7.5, 12.0, -3.0, -2.0, -3.0, 12.0, -3.0, -3.0, 0.9, -3.0, -2.5, 12.6, -1.5, -0.6, -3.0, -3.0, -0.0, -0.8, -3.0, 3.0, -0.0, 4.5, -3.0, -0.0, 0.6, 0.3, -2.7, -0.0, 4.5, -3.0, 3.0, -1.5, 9.0, -3.0, 1.5, -3.0, 6.0, -2.2, -0.0, -3.0, -3.0, 3.0, -3.0, -2.7, -0.0, -3.0, 0.6, 4.5, 1.5, 1.5, 1.5, -2.1, 7.5, -3.0, -3.0, -3.0, -2.1, -3.0, 6.0, -1.5, -2.0, -3.0, -0.6, 6.0, -1.5, -3.0, 6.0, -3.0, 3.0, 3.0, -3.0, 7.5, -3.0, -0.0, -3.0, -0.0, -3.0, -3.0, 0.6, -3.0, -3.0, -3.0, -3.0, -2.4, -0.0, 1.5, 3.0, 2.1, -3.0, 3.0, 4.5, -3.0, -2.4, -3.0, -2.2, 7.5, 2.1, -3.0, -0.0, -2.0, -3.0, -3.0, -1.8, -3.0, 4.5, -1.5, -3.0, 4.5, 3.0, 15.0, 4.5, -2.1, 12.0, 6.0, 4.5, 27.0, -3.0, 3.0, 0.6, -3.0, 0.6, -2.4, 4.5, -1.5, -2.2, 12.0, -2.0, 1.5, 9.0, -1.5, -1.2, -2.0, 1.5, -1.2, -1.8, -3.0, -0.0, -0.0, -3.0, -0.9]
ecdf1, ecdf2 = ECDF(a), ECDF(b)
xs = np.linspace(min(a+b),max(a+b), num=10000)
plt.figure(figsize=(12,8))
plt.plot(xs,ecdf1(xs), xs,ecdf2(xs))
plt.show()
print(ks_2samp(a,b))
得到 2.6169e-17 的 p 值,下面的 ECDF 图显示从 a 和 b 采样的分布非常不同:
KstestResult(statistic=0.3232876712328767, pvalue=2.8586245432606456e-17)