from __future__ import division, print_function, absolute_import import numpy as np from numpy.testing import (assert_, assert_equal, assert_allclose, assert_almost_equal) # avoid new uses from pytest import raises as assert_raises from scipy.stats._hypotests import (epps_singleton_2samp, cramervonmises, _cdf_cvm) from scipy.stats import distributions from .common_tests import check_named_results class TestEppsSingleton(object): def test_statistic_1(self): # first example in Goerg & Kaiser, also in original paper of # Epps & Singleton. Note: values do not match exactly, the # value of the interquartile range varies depending on how # quantiles are computed x = np.array([-0.35, 2.55, 1.73, 0.73, 0.35, 2.69, 0.46, -0.94, -0.37, 12.07]) y = np.array([-1.15, -0.15, 2.48, 3.25, 3.71, 4.29, 5.00, 7.74, 8.38, 8.60]) w, p = epps_singleton_2samp(x, y) assert_almost_equal(w, 15.14, decimal=1) assert_almost_equal(p, 0.00442, decimal=3) def test_statistic_2(self): # second example in Goerg & Kaiser, again not a perfect match x = np.array((0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 10, 10, 10, 10)) y = np.array((10, 4, 0, 5, 10, 10, 0, 5, 6, 7, 10, 3, 1, 7, 0, 8, 1, 5, 8, 10)) w, p = epps_singleton_2samp(x, y) assert_allclose(w, 8.900, atol=0.001) assert_almost_equal(p, 0.06364, decimal=3) def test_epps_singleton_array_like(self): np.random.seed(1234) x, y = np.arange(30), np.arange(28) w1, p1 = epps_singleton_2samp(list(x), list(y)) w2, p2 = epps_singleton_2samp(tuple(x), tuple(y)) w3, p3 = epps_singleton_2samp(x, y) assert_(w1 == w2 == w3) assert_(p1 == p2 == p3) def test_epps_singleton_size(self): # raise error if less than 5 elements x, y = (1, 2, 3, 4), np.arange(10) assert_raises(ValueError, epps_singleton_2samp, x, y) def test_epps_singleton_nonfinite(self): # raise error if there are non-finite values x, y = (1, 2, 3, 4, 5, np.inf), np.arange(10) assert_raises(ValueError, epps_singleton_2samp, x, y) x, y = np.arange(10), (1, 2, 3, 4, 5, np.nan) assert_raises(ValueError, epps_singleton_2samp, x, y) def test_epps_singleton_1d_input(self): x = np.arange(100).reshape(-1, 1) assert_raises(ValueError, epps_singleton_2samp, x, x) def test_names(self): x, y = np.arange(20), np.arange(30) res = epps_singleton_2samp(x, y) attributes = ('statistic', 'pvalue') check_named_results(res, attributes) class TestCvm(object): # the expected values of the cdfs are taken from Table 1 in # Csorgo / Faraway: The Exact and Asymptotic Distribution of # Cramér-von Mises Statistics, 1996. def test_cdf_4(self): assert_allclose( _cdf_cvm([0.02983, 0.04111, 0.12331, 0.94251], 4), [0.01, 0.05, 0.5, 0.999], atol=1e-4) def test_cdf_10(self): assert_allclose( _cdf_cvm([0.02657, 0.03830, 0.12068, 0.56643], 10), [0.01, 0.05, 0.5, 0.975], atol=1e-4) def test_cdf_1000(self): assert_allclose( _cdf_cvm([0.02481, 0.03658, 0.11889, 1.16120], 1000), [0.01, 0.05, 0.5, 0.999], atol=1e-4) def test_cdf_inf(self): assert_allclose( _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204]), [0.01, 0.05, 0.5, 0.999], atol=1e-4) def test_cdf_support(self): # cdf has support on [1/(12*n), n/3] assert_equal(_cdf_cvm([1/(12*533), 533/3], 533), [0, 1]) assert_equal(_cdf_cvm([1/(12*(27 + 1)), (27 + 1)/3], 27), [0, 1]) def test_cdf_large_n(self): # test that asymptotic cdf and cdf for large samples are close assert_allclose( _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100], 10000), _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100]), atol=1e-4) def test_large_x(self): # for large values of x and n, the series used to compute the cdf # converges slowly. # this leads to bug in R package goftest and MAPLE code that is # the basis of the implemenation in scipy # note: cdf = 1 for x >= 1000/3 and n = 1000 assert_(0.99999 < _cdf_cvm(333.3, 1000) < 1.0) assert_(0.99999 < _cdf_cvm(333.3) < 1.0) def test_low_p(self): # _cdf_cvm can return values larger than 1. In that case, we just # return a p-value of zero. n = 12 res = cramervonmises(np.ones(n)*0.8, 'norm') assert_(_cdf_cvm(res.statistic, n) > 1.0) assert_equal(res.pvalue, 0) def test_invalid_input(self): x = np.arange(10).reshape((2, 5)) assert_raises(ValueError, cramervonmises, x, "norm") assert_raises(ValueError, cramervonmises, [1.5], "norm") assert_raises(ValueError, cramervonmises, (), "norm") def test_values_R(self): # compared against R package goftest, version 1.1.1 # goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm") res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm") assert_allclose(res.statistic, 0.288156, atol=1e-6) assert_allclose(res.pvalue, 0.1453465, atol=1e-6) # goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), # "pnorm", mean = 3, sd = 1.5) res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm", (3, 1.5)) assert_allclose(res.statistic, 0.9426685, atol=1e-6) assert_allclose(res.pvalue, 0.002026417, atol=1e-6) # goftest::cvm.test(c(1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5), "pexp") res = cramervonmises([1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5], "expon") assert_allclose(res.statistic, 0.8421854, atol=1e-6) assert_allclose(res.pvalue, 0.004433406, atol=1e-6) def test_callable_cdf(self): x, args = np.arange(5), (1.4, 0.7) r1 = cramervonmises(x, distributions.expon.cdf) r2 = cramervonmises(x, "expon") assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue)) r1 = cramervonmises(x, distributions.beta.cdf, args) r2 = cramervonmises(x, "beta", args) assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))