You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

162 lines
6.5 KiB
Python

from __future__ import division, print_function, absolute_import
import numpy as np
from numpy.testing import (assert_, assert_equal, assert_allclose,
assert_almost_equal) # avoid new uses
from pytest import raises as assert_raises
from scipy.stats._hypotests import (epps_singleton_2samp, cramervonmises,
_cdf_cvm)
from scipy.stats import distributions
from .common_tests import check_named_results
class TestEppsSingleton(object):
def test_statistic_1(self):
# first example in Goerg & Kaiser, also in original paper of
# Epps & Singleton. Note: values do not match exactly, the
# value of the interquartile range varies depending on how
# quantiles are computed
x = np.array([-0.35, 2.55, 1.73, 0.73, 0.35,
2.69, 0.46, -0.94, -0.37, 12.07])
y = np.array([-1.15, -0.15, 2.48, 3.25, 3.71,
4.29, 5.00, 7.74, 8.38, 8.60])
w, p = epps_singleton_2samp(x, y)
assert_almost_equal(w, 15.14, decimal=1)
assert_almost_equal(p, 0.00442, decimal=3)
def test_statistic_2(self):
# second example in Goerg & Kaiser, again not a perfect match
x = np.array((0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 10,
10, 10, 10))
y = np.array((10, 4, 0, 5, 10, 10, 0, 5, 6, 7, 10, 3, 1, 7, 0, 8, 1,
5, 8, 10))
w, p = epps_singleton_2samp(x, y)
assert_allclose(w, 8.900, atol=0.001)
assert_almost_equal(p, 0.06364, decimal=3)
def test_epps_singleton_array_like(self):
np.random.seed(1234)
x, y = np.arange(30), np.arange(28)
w1, p1 = epps_singleton_2samp(list(x), list(y))
w2, p2 = epps_singleton_2samp(tuple(x), tuple(y))
w3, p3 = epps_singleton_2samp(x, y)
assert_(w1 == w2 == w3)
assert_(p1 == p2 == p3)
def test_epps_singleton_size(self):
# raise error if less than 5 elements
x, y = (1, 2, 3, 4), np.arange(10)
assert_raises(ValueError, epps_singleton_2samp, x, y)
def test_epps_singleton_nonfinite(self):
# raise error if there are non-finite values
x, y = (1, 2, 3, 4, 5, np.inf), np.arange(10)
assert_raises(ValueError, epps_singleton_2samp, x, y)
x, y = np.arange(10), (1, 2, 3, 4, 5, np.nan)
assert_raises(ValueError, epps_singleton_2samp, x, y)
def test_epps_singleton_1d_input(self):
x = np.arange(100).reshape(-1, 1)
assert_raises(ValueError, epps_singleton_2samp, x, x)
def test_names(self):
x, y = np.arange(20), np.arange(30)
res = epps_singleton_2samp(x, y)
attributes = ('statistic', 'pvalue')
check_named_results(res, attributes)
class TestCvm(object):
# the expected values of the cdfs are taken from Table 1 in
# Csorgo / Faraway: The Exact and Asymptotic Distribution of
# Cramér-von Mises Statistics, 1996.
def test_cdf_4(self):
assert_allclose(
_cdf_cvm([0.02983, 0.04111, 0.12331, 0.94251], 4),
[0.01, 0.05, 0.5, 0.999],
atol=1e-4)
def test_cdf_10(self):
assert_allclose(
_cdf_cvm([0.02657, 0.03830, 0.12068, 0.56643], 10),
[0.01, 0.05, 0.5, 0.975],
atol=1e-4)
def test_cdf_1000(self):
assert_allclose(
_cdf_cvm([0.02481, 0.03658, 0.11889, 1.16120], 1000),
[0.01, 0.05, 0.5, 0.999],
atol=1e-4)
def test_cdf_inf(self):
assert_allclose(
_cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204]),
[0.01, 0.05, 0.5, 0.999],
atol=1e-4)
def test_cdf_support(self):
# cdf has support on [1/(12*n), n/3]
assert_equal(_cdf_cvm([1/(12*533), 533/3], 533), [0, 1])
assert_equal(_cdf_cvm([1/(12*(27 + 1)), (27 + 1)/3], 27), [0, 1])
def test_cdf_large_n(self):
# test that asymptotic cdf and cdf for large samples are close
assert_allclose(
_cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100], 10000),
_cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100]),
atol=1e-4)
def test_large_x(self):
# for large values of x and n, the series used to compute the cdf
# converges slowly.
# this leads to bug in R package goftest and MAPLE code that is
# the basis of the implemenation in scipy
# note: cdf = 1 for x >= 1000/3 and n = 1000
assert_(0.99999 < _cdf_cvm(333.3, 1000) < 1.0)
assert_(0.99999 < _cdf_cvm(333.3) < 1.0)
def test_low_p(self):
# _cdf_cvm can return values larger than 1. In that case, we just
# return a p-value of zero.
n = 12
res = cramervonmises(np.ones(n)*0.8, 'norm')
assert_(_cdf_cvm(res.statistic, n) > 1.0)
assert_equal(res.pvalue, 0)
def test_invalid_input(self):
x = np.arange(10).reshape((2, 5))
assert_raises(ValueError, cramervonmises, x, "norm")
assert_raises(ValueError, cramervonmises, [1.5], "norm")
assert_raises(ValueError, cramervonmises, (), "norm")
def test_values_R(self):
# compared against R package goftest, version 1.1.1
# goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm")
res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm")
assert_allclose(res.statistic, 0.288156, atol=1e-6)
assert_allclose(res.pvalue, 0.1453465, atol=1e-6)
# goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6),
# "pnorm", mean = 3, sd = 1.5)
res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm", (3, 1.5))
assert_allclose(res.statistic, 0.9426685, atol=1e-6)
assert_allclose(res.pvalue, 0.002026417, atol=1e-6)
# goftest::cvm.test(c(1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5), "pexp")
res = cramervonmises([1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5], "expon")
assert_allclose(res.statistic, 0.8421854, atol=1e-6)
assert_allclose(res.pvalue, 0.004433406, atol=1e-6)
def test_callable_cdf(self):
x, args = np.arange(5), (1.4, 0.7)
r1 = cramervonmises(x, distributions.expon.cdf)
r2 = cramervonmises(x, "expon")
assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))
r1 = cramervonmises(x, distributions.beta.cdf, args)
r2 = cramervonmises(x, "beta", args)
assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))