346 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			346 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import numpy as np
 | |
| from numpy.testing import assert_equal, assert_array_equal
 | |
| import pytest
 | |
| 
 | |
| from scipy import stats
 | |
| from scipy.conftest import skip_xp_invalid_arg
 | |
| from scipy.stats import rankdata, tiecorrect
 | |
| from scipy._lib._array_api import xp_assert_equal, make_xp_test_case
 | |
| 
 | |
| class TestTieCorrect:
 | |
| 
 | |
|     def test_empty(self):
 | |
|         """An empty array requires no correction, should return 1.0."""
 | |
|         ranks = np.array([], dtype=np.float64)
 | |
|         c = tiecorrect(ranks)
 | |
|         assert_equal(c, 1.0)
 | |
| 
 | |
|     def test_one(self):
 | |
|         """A single element requires no correction, should return 1.0."""
 | |
|         ranks = np.array([1.0], dtype=np.float64)
 | |
|         c = tiecorrect(ranks)
 | |
|         assert_equal(c, 1.0)
 | |
| 
 | |
|     def test_no_correction(self):
 | |
|         """Arrays with no ties require no correction."""
 | |
|         ranks = np.arange(2.0)
 | |
|         c = tiecorrect(ranks)
 | |
|         assert_equal(c, 1.0)
 | |
|         ranks = np.arange(3.0)
 | |
|         c = tiecorrect(ranks)
 | |
|         assert_equal(c, 1.0)
 | |
| 
 | |
|     def test_basic(self):
 | |
|         """Check a few basic examples of the tie correction factor."""
 | |
|         # One tie of two elements
 | |
|         ranks = np.array([1.0, 2.5, 2.5])
 | |
|         c = tiecorrect(ranks)
 | |
|         T = 2.0
 | |
|         N = ranks.size
 | |
|         expected = 1.0 - (T**3 - T) / (N**3 - N)
 | |
|         assert_equal(c, expected)
 | |
| 
 | |
|         # One tie of two elements (same as above, but tie is not at the end)
 | |
|         ranks = np.array([1.5, 1.5, 3.0])
 | |
|         c = tiecorrect(ranks)
 | |
|         T = 2.0
 | |
|         N = ranks.size
 | |
|         expected = 1.0 - (T**3 - T) / (N**3 - N)
 | |
|         assert_equal(c, expected)
 | |
| 
 | |
|         # One tie of three elements
 | |
|         ranks = np.array([1.0, 3.0, 3.0, 3.0])
 | |
|         c = tiecorrect(ranks)
 | |
|         T = 3.0
 | |
|         N = ranks.size
 | |
|         expected = 1.0 - (T**3 - T) / (N**3 - N)
 | |
|         assert_equal(c, expected)
 | |
| 
 | |
|         # Two ties, lengths 2 and 3.
 | |
|         ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
 | |
|         c = tiecorrect(ranks)
 | |
|         T1 = 2.0
 | |
|         T2 = 3.0
 | |
|         N = ranks.size
 | |
|         expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
 | |
|         assert_equal(c, expected)
 | |
| 
 | |
|     def test_overflow(self):
 | |
|         ntie, k = 2000, 5
 | |
|         a = np.repeat(np.arange(k), ntie)
 | |
|         n = a.size  # ntie * k
 | |
|         out = tiecorrect(rankdata(a))
 | |
|         assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n))
 | |
| 
 | |
| 
 | |
| @make_xp_test_case(stats.rankdata)
 | |
| class TestRankData:
 | |
| 
 | |
|     def desired_dtype(self, method='average', has_nans=False, *, xp):
 | |
|         if has_nans:
 | |
|             return xp.asarray(1.).dtype
 | |
|         return xp.asarray(1.).dtype if method=='average' else xp.asarray(1).dtype
 | |
| 
 | |
|     def test_empty(self, xp):
 | |
|         """stats.rankdata of empty array should return an empty array."""
 | |
|         a = xp.asarray([], dtype=xp.int64)
 | |
|         r = rankdata(a)
 | |
|         xp_assert_equal(r, xp.asarray([], dtype=self.desired_dtype(xp=xp)))
 | |
| 
 | |
|     def test_list(self):
 | |
|         # test that NumPy still accepts lists
 | |
|         r = rankdata([])
 | |
|         assert_array_equal(r, np.array([]))
 | |
| 
 | |
|         r = rankdata([40, 10, 30, 10, 50])
 | |
|         assert_equal(r, [4.0, 1.5, 3.0, 1.5, 5.0])
 | |
| 
 | |
|     @pytest.mark.parametrize("shape", [(0, 1, 2)])
 | |
|     @pytest.mark.parametrize("axis", [None, *range(3)])
 | |
|     def test_empty_multidim(self, shape, axis, xp):
 | |
|         a = xp.empty(shape, dtype=xp.int64)
 | |
|         r = rankdata(a, axis=axis)
 | |
|         expected_shape = (0,) if axis is None else shape
 | |
|         xp_assert_equal(r, xp.empty(expected_shape, dtype=self.desired_dtype(xp=xp)))
 | |
| 
 | |
|     def test_one(self, xp):
 | |
|         """Check stats.rankdata with an array of length 1."""
 | |
|         data = [100]
 | |
|         a = xp.asarray(data, dtype=xp.int64)
 | |
|         r = rankdata(a)
 | |
|         xp_assert_equal(r, xp.asarray([1.0], dtype=self.desired_dtype(xp=xp)))
 | |
| 
 | |
|     def test_basic(self, xp):
 | |
|         """Basic tests of stats.rankdata."""
 | |
|         desired_dtype = self.desired_dtype(xp=xp)
 | |
| 
 | |
|         data = [100, 10, 50]
 | |
|         expected = xp.asarray([3.0, 1.0, 2.0], dtype=desired_dtype)
 | |
|         a = xp.asarray(data, dtype=xp.int64)
 | |
|         r = rankdata(a)
 | |
|         xp_assert_equal(r, expected)
 | |
| 
 | |
|         data = [40, 10, 30, 10, 50]
 | |
|         expected = xp.asarray([4.0, 1.5, 3.0, 1.5, 5.0], dtype=desired_dtype)
 | |
|         a = xp.asarray(data, dtype=xp.int64)
 | |
|         r = rankdata(a)
 | |
|         xp_assert_equal(r, expected)
 | |
| 
 | |
|         data = [20, 20, 20, 10, 10, 10]
 | |
|         expected = xp.asarray([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=desired_dtype)
 | |
|         a = xp.asarray(data, dtype=xp.int64)
 | |
|         r = rankdata(a)
 | |
|         xp_assert_equal(r, expected)
 | |
| 
 | |
|         # # The docstring states explicitly that the argument is flattened.
 | |
|         a2d = xp.reshape(a, (2, 3))
 | |
|         r = rankdata(a2d)
 | |
|         xp_assert_equal(r, expected)
 | |
| 
 | |
|     @skip_xp_invalid_arg
 | |
|     def test_rankdata_object_string(self):
 | |
| 
 | |
|         def min_rank(a):
 | |
|             return [1 + sum(i < j for i in a) for j in a]
 | |
| 
 | |
|         def max_rank(a):
 | |
|             return [sum(i <= j for i in a) for j in a]
 | |
| 
 | |
|         def ordinal_rank(a):
 | |
|             return min_rank([(x, i) for i, x in enumerate(a)])
 | |
| 
 | |
|         def average_rank(a):
 | |
|             return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]
 | |
| 
 | |
|         def dense_rank(a):
 | |
|             b = np.unique(a)
 | |
|             return [1 + sum(i < j for i in b) for j in a]
 | |
| 
 | |
|         rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
 | |
|                      average=average_rank, dense=dense_rank)
 | |
| 
 | |
|         def check_ranks(a):
 | |
|             for method in 'min', 'max', 'dense', 'ordinal', 'average':
 | |
|                 out = rankdata(a, method=method)
 | |
|                 assert_array_equal(out, rankf[method](a))
 | |
| 
 | |
|         val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
 | |
|         check_ranks(np.random.choice(val, 200))
 | |
|         check_ranks(np.random.choice(val, 200).astype('object'))
 | |
| 
 | |
|         val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
 | |
|         check_ranks(np.random.choice(val, 200).astype('object'))
 | |
| 
 | |
|     def test_large_int(self, xp):
 | |
|         if hasattr(xp, 'uint64'):
 | |
|             data = xp.asarray([2**60, 2**60+1], dtype=xp.uint64)
 | |
|             r = rankdata(data)
 | |
|             xp_assert_equal(r, xp.asarray([1.0, 2.0], dtype=self.desired_dtype(xp=xp)))
 | |
| 
 | |
|         data = xp.asarray([2**60, 2**60+1], dtype=xp.int64)
 | |
|         r = rankdata(data)
 | |
|         xp_assert_equal(r, xp.asarray([1.0, 2.0], dtype=self.desired_dtype(xp=xp)))
 | |
| 
 | |
|         data = xp.asarray([2**60, -2**60+1], dtype=xp.int64)
 | |
|         r = rankdata(data)
 | |
|         xp_assert_equal(r, xp.asarray([2.0, 1.0], dtype=self.desired_dtype(xp=xp)))
 | |
| 
 | |
|     @pytest.mark.parametrize('n', [10000, 100000, 1000000])
 | |
|     def test_big_tie(self, n, xp):
 | |
|         data = xp.ones(n)
 | |
|         r = rankdata(data)
 | |
|         expected_rank = 0.5 * (n + 1)
 | |
|         ref = xp.asarray(expected_rank * data, dtype=self.desired_dtype(xp=xp))
 | |
|         xp_assert_equal(r, ref)
 | |
| 
 | |
|     def test_axis(self, xp):
 | |
|         data = xp.asarray([[0, 2, 1], [4, 2, 2]])
 | |
| 
 | |
|         expected0 = xp.asarray([[1., 1.5, 1.], [2., 1.5, 2.]])
 | |
|         r0 = rankdata(data, axis=0)
 | |
|         xp_assert_equal(r0, expected0)
 | |
| 
 | |
|         expected1 = xp.asarray([[1., 3., 2.], [3., 1.5, 1.5]])
 | |
|         r1 = rankdata(data, axis=1)
 | |
|         xp_assert_equal(r1, expected1)
 | |
| 
 | |
|     methods= ["average", "min", "max", "dense", "ordinal"]
 | |
| 
 | |
|     @pytest.mark.parametrize("axis", [0, 1])
 | |
|     @pytest.mark.parametrize("method", methods)
 | |
|     def test_size_0_axis(self, axis, method, xp):
 | |
|         shape = (3, 0)
 | |
|         desired_dtype = self.desired_dtype(method, xp=xp)
 | |
|         data = xp.zeros(shape)
 | |
|         r = rankdata(data, method=method, axis=axis)
 | |
|         assert_equal(r.shape, shape)
 | |
|         assert_equal(r.dtype, desired_dtype)
 | |
|         xp_assert_equal(r, xp.empty(shape, dtype=desired_dtype))
 | |
| 
 | |
|     @pytest.mark.parametrize('axis', range(3))
 | |
|     @pytest.mark.parametrize('method', methods)
 | |
|     def test_nan_policy_omit_3d(self, axis, method):
 | |
|         shape = (20, 21, 22)
 | |
|         rng = np.random.RandomState(23983242)
 | |
| 
 | |
|         a = rng.random(size=shape)
 | |
|         i = rng.random(size=shape) < 0.4
 | |
|         j = rng.random(size=shape) < 0.1
 | |
|         k = rng.random(size=shape) < 0.1
 | |
|         a[i] = np.nan
 | |
|         a[j] = -np.inf
 | |
|         a[k] - np.inf
 | |
| 
 | |
|         def rank_1d_omit(a, method):
 | |
|             out = np.zeros_like(a)
 | |
|             i = np.isnan(a)
 | |
|             a_compressed = a[~i]
 | |
|             res = rankdata(a_compressed, method)
 | |
|             out[~i] = res
 | |
|             out[i] = np.nan
 | |
|             return out
 | |
| 
 | |
|         def rank_omit(a, method, axis):
 | |
|             return np.apply_along_axis(lambda a: rank_1d_omit(a, method),
 | |
|                                        axis, a)
 | |
| 
 | |
|         res = rankdata(a, method, axis=axis, nan_policy='omit')
 | |
|         res0 = rank_omit(a, method, axis=axis)
 | |
| 
 | |
|         assert_array_equal(res, res0)
 | |
| 
 | |
|     def test_nan_policy_2d_axis_none(self):
 | |
|         # 2 2d-array test with axis=None
 | |
|         data = [[0, np.nan, 3],
 | |
|                 [4, 2, np.nan],
 | |
|                 [1, 2, 2]]
 | |
|         assert_array_equal(rankdata(data, axis=None, nan_policy='omit'),
 | |
|                            [1., np.nan, 6., 7., 4., np.nan, 2., 4., 4.])
 | |
|         assert_array_equal(rankdata(data, axis=None, nan_policy='propagate'),
 | |
|                            [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
 | |
|                             np.nan, np.nan, np.nan])
 | |
| 
 | |
|     def test_nan_policy_raise(self):
 | |
|         # 1 1d-array test
 | |
|         data = [0, 2, 3, -2, np.nan, np.nan]
 | |
|         with pytest.raises(ValueError, match="The input contains nan"):
 | |
|             rankdata(data, nan_policy='raise')
 | |
| 
 | |
|         # 2 2d-array test
 | |
|         data = [[0, np.nan, 3],
 | |
|                 [4, 2, np.nan],
 | |
|                 [np.nan, 2, 2]]
 | |
| 
 | |
|         with pytest.raises(ValueError, match="The input contains nan"):
 | |
|             rankdata(data, axis=0, nan_policy="raise")
 | |
| 
 | |
|         with pytest.raises(ValueError, match="The input contains nan"):
 | |
|             rankdata(data, axis=1, nan_policy="raise")
 | |
| 
 | |
|     def test_nan_policy_propagate(self):
 | |
|         # 1 1d-array test
 | |
|         data = [0, 2, 3, -2, np.nan, np.nan]
 | |
|         assert_array_equal(rankdata(data, nan_policy='propagate'),
 | |
|                            [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
 | |
| 
 | |
|         # 2 2d-array test
 | |
|         data = [[0, np.nan, 3],
 | |
|                 [4, 2, np.nan],
 | |
|                 [1, 2, 2]]
 | |
|         assert_array_equal(rankdata(data, axis=0, nan_policy='propagate'),
 | |
|                            [[1, np.nan, np.nan],
 | |
|                             [3, np.nan, np.nan],
 | |
|                             [2, np.nan, np.nan]])
 | |
|         assert_array_equal(rankdata(data, axis=1, nan_policy='propagate'),
 | |
|                            [[np.nan, np.nan, np.nan],
 | |
|                             [np.nan, np.nan, np.nan],
 | |
|                             [1, 2.5, 2.5]])
 | |
| 
 | |
|     _rankdata_cases = (
 | |
|         # values, method, expected
 | |
|         ([], 'average', []),
 | |
|         ([], 'min', []),
 | |
|         ([], 'max', []),
 | |
|         ([], 'dense', []),
 | |
|         ([], 'ordinal', []),
 | |
|         #
 | |
|         ([100], 'average', [1.0]),
 | |
|         ([100], 'min', [1.0]),
 | |
|         ([100], 'max', [1.0]),
 | |
|         ([100], 'dense', [1.0]),
 | |
|         ([100], 'ordinal', [1.0]),
 | |
|         #
 | |
|         ([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
 | |
|         ([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
 | |
|         ([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
 | |
|         ([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
 | |
|         ([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
 | |
|         #
 | |
|         ([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
 | |
|         ([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
 | |
|         ([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
 | |
|         ([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
 | |
|         ([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
 | |
|         #
 | |
|         ([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
 | |
|         ([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
 | |
|         ([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
 | |
|         ([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
 | |
|         ([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
 | |
|         #
 | |
|         ([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
 | |
|         ([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
 | |
|         ([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
 | |
|         ([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
 | |
|         ([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
 | |
|         #
 | |
|         ([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
 | |
|     )
 | |
| 
 | |
|     @pytest.mark.parametrize('case', _rankdata_cases)
 | |
|     def test_cases(self, case, xp):
 | |
|         values, method, expected = case
 | |
|         r = rankdata(xp.asarray(values), method=method)
 | |
|         ref = xp.asarray(expected, dtype=self.desired_dtype(method, xp=xp))
 | |
|         xp_assert_equal(r, ref)
 |