376 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			376 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from math import sqrt
 | |
| import numpy as np
 | |
| from scipy._lib._util import _validate_int
 | |
| from scipy.optimize import brentq
 | |
| from scipy.special import ndtri
 | |
| from ._discrete_distns import binom
 | |
| from ._common import ConfidenceInterval
 | |
| 
 | |
| 
 | |
| class BinomTestResult:
 | |
|     """
 | |
|     Result of `scipy.stats.binomtest`.
 | |
| 
 | |
|     Attributes
 | |
|     ----------
 | |
|     k : int
 | |
|         The number of successes (copied from `binomtest` input).
 | |
|     n : int
 | |
|         The number of trials (copied from `binomtest` input).
 | |
|     alternative : str
 | |
|         Indicates the alternative hypothesis specified in the input
 | |
|         to `binomtest`.  It will be one of ``'two-sided'``, ``'greater'``,
 | |
|         or ``'less'``.
 | |
|     statistic: float
 | |
|         The estimate of the proportion of successes.
 | |
|     pvalue : float
 | |
|         The p-value of the hypothesis test.
 | |
| 
 | |
|     """
 | |
|     def __init__(self, k, n, alternative, statistic, pvalue):
 | |
|         self.k = k
 | |
|         self.n = n
 | |
|         self.alternative = alternative
 | |
|         self.statistic = statistic
 | |
|         self.pvalue = pvalue
 | |
| 
 | |
|         # add alias for backward compatibility
 | |
|         self.proportion_estimate = statistic
 | |
| 
 | |
|     def __repr__(self):
 | |
|         s = ("BinomTestResult("
 | |
|              f"k={self.k}, "
 | |
|              f"n={self.n}, "
 | |
|              f"alternative={self.alternative!r}, "
 | |
|              f"statistic={self.statistic}, "
 | |
|              f"pvalue={self.pvalue})")
 | |
|         return s
 | |
| 
 | |
|     def proportion_ci(self, confidence_level=0.95, method='exact'):
 | |
|         """
 | |
|         Compute the confidence interval for ``statistic``.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         confidence_level : float, optional
 | |
|             Confidence level for the computed confidence interval
 | |
|             of the estimated proportion. Default is 0.95.
 | |
|         method : {'exact', 'wilson', 'wilsoncc'}, optional
 | |
|             Selects the method used to compute the confidence interval
 | |
|             for the estimate of the proportion:
 | |
| 
 | |
|             'exact' :
 | |
|                 Use the Clopper-Pearson exact method [1]_.
 | |
|             'wilson' :
 | |
|                 Wilson's method, without continuity correction ([2]_, [3]_).
 | |
|             'wilsoncc' :
 | |
|                 Wilson's method, with continuity correction ([2]_, [3]_).
 | |
| 
 | |
|             Default is ``'exact'``.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         ci : ``ConfidenceInterval`` object
 | |
|             The object has attributes ``low`` and ``high`` that hold the
 | |
|             lower and upper bounds of the confidence interval.
 | |
| 
 | |
|         References
 | |
|         ----------
 | |
|         .. [1] C. J. Clopper and E. S. Pearson, The use of confidence or
 | |
|                fiducial limits illustrated in the case of the binomial,
 | |
|                Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934).
 | |
|         .. [2] E. B. Wilson, Probable inference, the law of succession, and
 | |
|                statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212
 | |
|                (1927).
 | |
|         .. [3] Robert G. Newcombe, Two-sided confidence intervals for the
 | |
|                single proportion: comparison of seven methods, Statistics
 | |
|                in Medicine, 17, pp 857-872 (1998).
 | |
| 
 | |
|         Examples
 | |
|         --------
 | |
|         >>> from scipy.stats import binomtest
 | |
|         >>> result = binomtest(k=7, n=50, p=0.1)
 | |
|         >>> result.statistic
 | |
|         0.14
 | |
|         >>> result.proportion_ci()
 | |
|         ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846)
 | |
|         """
 | |
|         if method not in ('exact', 'wilson', 'wilsoncc'):
 | |
|             raise ValueError(f"method ('{method}') must be one of 'exact', "
 | |
|                              "'wilson' or 'wilsoncc'.")
 | |
|         if not (0 <= confidence_level <= 1):
 | |
|             raise ValueError(f'confidence_level ({confidence_level}) must be in '
 | |
|                              'the interval [0, 1].')
 | |
|         if method == 'exact':
 | |
|             low, high = _binom_exact_conf_int(self.k, self.n,
 | |
|                                               confidence_level,
 | |
|                                               self.alternative)
 | |
|         else:
 | |
|             # method is 'wilson' or 'wilsoncc'
 | |
|             low, high = _binom_wilson_conf_int(self.k, self.n,
 | |
|                                                confidence_level,
 | |
|                                                self.alternative,
 | |
|                                                correction=method == 'wilsoncc')
 | |
|         return ConfidenceInterval(low=low, high=high)
 | |
| 
 | |
| 
 | |
| def _findp(func):
 | |
|     try:
 | |
|         p = brentq(func, 0, 1)
 | |
|     except RuntimeError:
 | |
|         raise RuntimeError('numerical solver failed to converge when '
 | |
|                            'computing the confidence limits') from None
 | |
|     except ValueError as exc:
 | |
|         raise ValueError('brentq raised a ValueError; report this to the '
 | |
|                          'SciPy developers') from exc
 | |
|     return p
 | |
| 
 | |
| 
 | |
| def _binom_exact_conf_int(k, n, confidence_level, alternative):
 | |
|     """
 | |
|     Compute the estimate and confidence interval for the binomial test.
 | |
| 
 | |
|     Returns proportion, prop_low, prop_high
 | |
|     """
 | |
|     if alternative == 'two-sided':
 | |
|         alpha = (1 - confidence_level) / 2
 | |
|         if k == 0:
 | |
|             plow = 0.0
 | |
|         else:
 | |
|             plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
 | |
|         if k == n:
 | |
|             phigh = 1.0
 | |
|         else:
 | |
|             phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
 | |
|     elif alternative == 'less':
 | |
|         alpha = 1 - confidence_level
 | |
|         plow = 0.0
 | |
|         if k == n:
 | |
|             phigh = 1.0
 | |
|         else:
 | |
|             phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
 | |
|     elif alternative == 'greater':
 | |
|         alpha = 1 - confidence_level
 | |
|         if k == 0:
 | |
|             plow = 0.0
 | |
|         else:
 | |
|             plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
 | |
|         phigh = 1.0
 | |
|     return plow, phigh
 | |
| 
 | |
| 
 | |
| def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction):
 | |
|     # This function assumes that the arguments have already been validated.
 | |
|     # In particular, `alternative` must be one of 'two-sided', 'less' or
 | |
|     # 'greater'.
 | |
|     p = k / n
 | |
|     if alternative == 'two-sided':
 | |
|         z = ndtri(0.5 + 0.5*confidence_level)
 | |
|     else:
 | |
|         z = ndtri(confidence_level)
 | |
| 
 | |
|     # For reference, the formulas implemented here are from
 | |
|     # Newcombe (1998) (ref. [3] in the proportion_ci docstring).
 | |
|     denom = 2*(n + z**2)
 | |
|     center = (2*n*p + z**2)/denom
 | |
|     q = 1 - p
 | |
|     if correction:
 | |
|         if alternative == 'less' or k == 0:
 | |
|             lo = 0.0
 | |
|         else:
 | |
|             dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom
 | |
|             lo = center - dlo
 | |
|         if alternative == 'greater' or k == n:
 | |
|             hi = 1.0
 | |
|         else:
 | |
|             dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom
 | |
|             hi = center + dhi
 | |
|     else:
 | |
|         delta = z/denom * sqrt(4*n*p*q + z**2)
 | |
|         if alternative == 'less' or k == 0:
 | |
|             lo = 0.0
 | |
|         else:
 | |
|             lo = center - delta
 | |
|         if alternative == 'greater' or k == n:
 | |
|             hi = 1.0
 | |
|         else:
 | |
|             hi = center + delta
 | |
| 
 | |
|     return lo, hi
 | |
| 
 | |
| 
 | |
| def binomtest(k, n, p=0.5, alternative='two-sided'):
 | |
|     """
 | |
|     Perform a test that the probability of success is p.
 | |
| 
 | |
|     The binomial test [1]_ is a test of the null hypothesis that the
 | |
|     probability of success in a Bernoulli experiment is `p`.
 | |
| 
 | |
|     Details of the test can be found in many texts on statistics, such
 | |
|     as section 24.5 of [2]_.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     k : int
 | |
|         The number of successes.
 | |
|     n : int
 | |
|         The number of trials.
 | |
|     p : float, optional
 | |
|         The hypothesized probability of success, i.e. the expected
 | |
|         proportion of successes.  The value must be in the interval
 | |
|         ``0 <= p <= 1``. The default value is ``p = 0.5``.
 | |
|     alternative : {'two-sided', 'greater', 'less'}, optional
 | |
|         Indicates the alternative hypothesis. The default value is
 | |
|         'two-sided'.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     result : `~scipy.stats._result_classes.BinomTestResult` instance
 | |
|         The return value is an object with the following attributes:
 | |
| 
 | |
|         k : int
 | |
|             The number of successes (copied from `binomtest` input).
 | |
|         n : int
 | |
|             The number of trials (copied from `binomtest` input).
 | |
|         alternative : str
 | |
|             Indicates the alternative hypothesis specified in the input
 | |
|             to `binomtest`.  It will be one of ``'two-sided'``, ``'greater'``,
 | |
|             or ``'less'``.
 | |
|         statistic : float
 | |
|             The estimate of the proportion of successes.
 | |
|         pvalue : float
 | |
|             The p-value of the hypothesis test.
 | |
| 
 | |
|         The object has the following methods:
 | |
| 
 | |
|         proportion_ci(confidence_level=0.95, method='exact') :
 | |
|             Compute the confidence interval for ``statistic``.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     .. versionadded:: 1.7.0
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test
 | |
|     .. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition),
 | |
|            Prentice Hall, Upper Saddle River, New Jersey USA (2010)
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     >>> from scipy.stats import binomtest
 | |
| 
 | |
|     A car manufacturer claims that no more than 10% of their cars are unsafe.
 | |
|     15 cars are inspected for safety, 3 were found to be unsafe. Test the
 | |
|     manufacturer's claim:
 | |
| 
 | |
|     >>> result = binomtest(3, n=15, p=0.1, alternative='greater')
 | |
|     >>> result.pvalue
 | |
|     0.18406106910639114
 | |
| 
 | |
|     The null hypothesis cannot be rejected at the 5% level of significance
 | |
|     because the returned p-value is greater than the critical value of 5%.
 | |
| 
 | |
|     The test statistic is equal to the estimated proportion, which is simply
 | |
|     ``3/15``:
 | |
| 
 | |
|     >>> result.statistic
 | |
|     0.2
 | |
| 
 | |
|     We can use the `proportion_ci()` method of the result to compute the
 | |
|     confidence interval of the estimate:
 | |
| 
 | |
|     >>> result.proportion_ci(confidence_level=0.95)
 | |
|     ConfidenceInterval(low=0.05684686759024681, high=1.0)
 | |
| 
 | |
|     """
 | |
|     k = _validate_int(k, 'k', minimum=0)
 | |
|     n = _validate_int(n, 'n', minimum=1)
 | |
|     if k > n:
 | |
|         raise ValueError(f'k ({k}) must not be greater than n ({n}).')
 | |
| 
 | |
|     if not (0 <= p <= 1):
 | |
|         raise ValueError(f"p ({p}) must be in range [0,1]")
 | |
| 
 | |
|     if alternative not in ('two-sided', 'less', 'greater'):
 | |
|         raise ValueError(f"alternative ('{alternative}') not recognized; \n"
 | |
|                          "must be 'two-sided', 'less' or 'greater'")
 | |
|     if alternative == 'less':
 | |
|         pval = binom.cdf(k, n, p)
 | |
|     elif alternative == 'greater':
 | |
|         pval = binom.sf(k-1, n, p)
 | |
|     else:
 | |
|         # alternative is 'two-sided'
 | |
|         d = binom.pmf(k, n, p)
 | |
|         rerr = 1 + 1e-7
 | |
|         if k == p * n:
 | |
|             # special case as shortcut, would also be handled by `else` below
 | |
|             pval = 1.
 | |
|         elif k < p * n:
 | |
|             ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p),
 | |
|                                               -d*rerr, np.ceil(p * n), n)
 | |
|             # y is the number of terms between mode and n that are <= d*rerr.
 | |
|             # ix gave us the first term where a(ix) <= d*rerr < a(ix-1)
 | |
|             # if the first equality doesn't hold, y=n-ix. Otherwise, we
 | |
|             # need to include ix as well as the equality holds. Note that
 | |
|             # the equality will hold in very very rare situations due to rerr.
 | |
|             y = n - ix + int(d*rerr == binom.pmf(ix, n, p))
 | |
|             pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p)
 | |
|         else:
 | |
|             ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p),
 | |
|                                               d*rerr, 0, np.floor(p * n))
 | |
|             # y is the number of terms between 0 and mode that are <= d*rerr.
 | |
|             # we need to add a 1 to account for the 0 index.
 | |
|             # For comparing this with old behavior, see
 | |
|             # tst_binary_srch_for_binom_tst method in test_morestats.
 | |
|             y = ix + 1
 | |
|             pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p)
 | |
| 
 | |
|         pval = min(1.0, pval)
 | |
| 
 | |
|     result = BinomTestResult(k=k, n=n, alternative=alternative,
 | |
|                              statistic=k/n, pvalue=pval)
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def _binary_search_for_binom_tst(a, d, lo, hi):
 | |
|     """
 | |
|     Conducts an implicit binary search on a function specified by `a`.
 | |
| 
 | |
|     Meant to be used on the binomial PMF for the case of two-sided tests
 | |
|     to obtain the value on the other side of the mode where the tail
 | |
|     probability should be computed. The values on either side of
 | |
|     the mode are always in order, meaning binary search is applicable.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     a : callable
 | |
|       The function over which to perform binary search. Its values
 | |
|       for inputs lo and hi should be in ascending order.
 | |
|     d : float
 | |
|       The value to search.
 | |
|     lo : int
 | |
|       The lower end of range to search.
 | |
|     hi : int
 | |
|       The higher end of the range to search.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     int
 | |
|       The index, i between lo and hi
 | |
|       such that a(i)<=d<a(i+1)
 | |
|     """
 | |
|     while lo < hi:
 | |
|         mid = lo + (hi-lo)//2
 | |
|         midval = a(mid)
 | |
|         if midval < d:
 | |
|             lo = mid+1
 | |
|         elif midval > d:
 | |
|             hi = mid-1
 | |
|         else:
 | |
|             return mid
 | |
|     if a(lo) <= d:
 | |
|         return lo
 | |
|     else:
 | |
|         return lo-1
 |