450 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			450 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import warnings
 | |
| from collections.abc import Sequence
 | |
| from dataclasses import dataclass, field
 | |
| from typing import TYPE_CHECKING, Literal
 | |
| 
 | |
| import numpy as np
 | |
| 
 | |
| from scipy import stats
 | |
| from scipy.optimize import minimize_scalar
 | |
| from scipy.stats._common import ConfidenceInterval
 | |
| from scipy.stats._qmc import check_random_state
 | |
| from scipy.stats._stats_py import _var
 | |
| from scipy._lib._util import _transition_to_rng, DecimalNumber, SeedType
 | |
| 
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     import numpy.typing as npt
 | |
| 
 | |
| 
 | |
| __all__ = [
 | |
|     'dunnett'
 | |
| ]
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class DunnettResult:
 | |
|     """Result object returned by `scipy.stats.dunnett`.
 | |
| 
 | |
|     Attributes
 | |
|     ----------
 | |
|     statistic : float ndarray
 | |
|         The computed statistic of the test for each comparison. The element
 | |
|         at index ``i`` is the statistic for the comparison between
 | |
|         groups ``i`` and the control.
 | |
|     pvalue : float ndarray
 | |
|         The computed p-value of the test for each comparison. The element
 | |
|         at index ``i`` is the p-value for the comparison between
 | |
|         group ``i`` and the control.
 | |
|     """
 | |
|     statistic: np.ndarray
 | |
|     pvalue: np.ndarray
 | |
|     _alternative: Literal['two-sided', 'less', 'greater'] = field(repr=False)
 | |
|     _rho: np.ndarray = field(repr=False)
 | |
|     _df: int = field(repr=False)
 | |
|     _std: float = field(repr=False)
 | |
|     _mean_samples: np.ndarray = field(repr=False)
 | |
|     _mean_control: np.ndarray = field(repr=False)
 | |
|     _n_samples: np.ndarray = field(repr=False)
 | |
|     _n_control: int = field(repr=False)
 | |
|     _rng: SeedType = field(repr=False)
 | |
|     _ci: ConfidenceInterval | None = field(default=None, repr=False)
 | |
|     _ci_cl: DecimalNumber | None = field(default=None, repr=False)
 | |
| 
 | |
|     def __str__(self):
 | |
|         # Note: `__str__` prints the confidence intervals from the most
 | |
|         # recent call to `confidence_interval`. If it has not been called,
 | |
|         # it will be called with the default CL of .95.
 | |
|         if self._ci is None:
 | |
|             self.confidence_interval(confidence_level=.95)
 | |
|         s = (
 | |
|             "Dunnett's test"
 | |
|             f" ({self._ci_cl*100:.1f}% Confidence Interval)\n"
 | |
|             "Comparison               Statistic  p-value  Lower CI  Upper CI\n"
 | |
|         )
 | |
|         for i in range(self.pvalue.size):
 | |
|             s += (f" (Sample {i} - Control) {self.statistic[i]:>10.3f}"
 | |
|                   f"{self.pvalue[i]:>10.3f}"
 | |
|                   f"{self._ci.low[i]:>10.3f}"
 | |
|                   f"{self._ci.high[i]:>10.3f}\n")
 | |
| 
 | |
|         return s
 | |
| 
 | |
|     def _allowance(
 | |
|         self, confidence_level: DecimalNumber = 0.95, tol: DecimalNumber = 1e-3
 | |
|     ) -> float:
 | |
|         """Allowance.
 | |
| 
 | |
|         It is the quantity to add/subtract from the observed difference
 | |
|         between the means of observed groups and the mean of the control
 | |
|         group. The result gives confidence limits.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         confidence_level : float, optional
 | |
|             Confidence level for the computed confidence interval.
 | |
|             Default is .95.
 | |
|         tol : float, optional
 | |
|             A tolerance for numerical optimization: the allowance will produce
 | |
|             a confidence within ``10*tol*(1 - confidence_level)`` of the
 | |
|             specified level, or a warning will be emitted. Tight tolerances
 | |
|             may be impractical due to noisy evaluation of the objective.
 | |
|             Default is 1e-3.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         allowance : float
 | |
|             Allowance around the mean.
 | |
|         """
 | |
|         alpha = 1 - confidence_level
 | |
| 
 | |
|         def pvalue_from_stat(statistic):
 | |
|             statistic = np.array(statistic)
 | |
|             sf = _pvalue_dunnett(
 | |
|                 rho=self._rho, df=self._df,
 | |
|                 statistic=statistic, alternative=self._alternative,
 | |
|                 rng=self._rng
 | |
|             )
 | |
|             return abs(sf - alpha)/alpha
 | |
| 
 | |
|         # Evaluation of `pvalue_from_stat` is noisy due to the use of RQMC to
 | |
|         # evaluate `multivariate_t.cdf`. `minimize_scalar` is not designed
 | |
|         # to tolerate a noisy objective function and may fail to find the
 | |
|         # minimum accurately. We mitigate this possibility with the validation
 | |
|         # step below, but implementation of a noise-tolerant root finder or
 | |
|         # minimizer would be a welcome enhancement. See gh-18150.
 | |
|         res = minimize_scalar(pvalue_from_stat, method='brent', tol=tol)
 | |
|         critical_value = res.x
 | |
| 
 | |
|         # validation
 | |
|         # tol*10 because tol=1e-3 means we tolerate a 1% change at most
 | |
|         if res.success is False or res.fun >= tol*10:
 | |
|             warnings.warn(
 | |
|                 "Computation of the confidence interval did not converge to "
 | |
|                 "the desired level. The confidence level corresponding with "
 | |
|                 f"the returned interval is approximately {alpha*(1+res.fun)}.",
 | |
|                 stacklevel=3
 | |
|             )
 | |
| 
 | |
|         # From [1] p. 1101 between (1) and (3)
 | |
|         allowance = critical_value*self._std*np.sqrt(
 | |
|             1/self._n_samples + 1/self._n_control
 | |
|         )
 | |
|         return abs(allowance)
 | |
| 
 | |
|     def confidence_interval(
 | |
|         self, confidence_level: DecimalNumber = 0.95
 | |
|     ) -> ConfidenceInterval:
 | |
|         """Compute the confidence interval for the specified confidence level.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         confidence_level : float, optional
 | |
|             Confidence level for the computed confidence interval.
 | |
|             Default is .95.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         ci : ``ConfidenceInterval`` object
 | |
|             The object has attributes ``low`` and ``high`` that hold the
 | |
|             lower and upper bounds of the confidence intervals for each
 | |
|             comparison. The high and low values are accessible for each
 | |
|             comparison at index ``i`` for each group ``i``.
 | |
| 
 | |
|         """
 | |
|         # check to see if the supplied confidence level matches that of the
 | |
|         # previously computed CI.
 | |
|         if (self._ci is not None) and (confidence_level == self._ci_cl):
 | |
|             return self._ci
 | |
| 
 | |
|         if not (0 < confidence_level < 1):
 | |
|             raise ValueError("Confidence level must be between 0 and 1.")
 | |
| 
 | |
|         allowance = self._allowance(confidence_level=confidence_level)
 | |
|         diff_means = self._mean_samples - self._mean_control
 | |
| 
 | |
|         low = diff_means-allowance
 | |
|         high = diff_means+allowance
 | |
| 
 | |
|         if self._alternative == 'greater':
 | |
|             high = [np.inf] * len(diff_means)
 | |
|         elif self._alternative == 'less':
 | |
|             low = [-np.inf] * len(diff_means)
 | |
| 
 | |
|         self._ci_cl = confidence_level
 | |
|         self._ci = ConfidenceInterval(
 | |
|             low=low,
 | |
|             high=high
 | |
|         )
 | |
|         return self._ci
 | |
| 
 | |
| 
 | |
| @_transition_to_rng('random_state', replace_doc=False)
 | |
| def dunnett(
 | |
|     *samples: "npt.ArrayLike",  # noqa: D417
 | |
|     control: "npt.ArrayLike",
 | |
|     alternative: Literal['two-sided', 'less', 'greater'] = "two-sided",
 | |
|     rng: SeedType = None
 | |
| ) -> DunnettResult:
 | |
|     """Dunnett's test: multiple comparisons of means against a control group.
 | |
| 
 | |
|     This is an implementation of Dunnett's original, single-step test as
 | |
|     described in [1]_.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample1, sample2, ... : 1D array_like
 | |
|         The sample measurements for each experimental group.
 | |
|     control : 1D array_like
 | |
|         The sample measurements for the control group.
 | |
|     alternative : {'two-sided', 'less', 'greater'}, optional
 | |
|         Defines the alternative hypothesis.
 | |
| 
 | |
|         The null hypothesis is that the means of the distributions underlying
 | |
|         the samples and control are equal. The following alternative
 | |
|         hypotheses are available (default is 'two-sided'):
 | |
| 
 | |
|         * 'two-sided': the means of the distributions underlying the samples
 | |
|           and control are unequal.
 | |
|         * 'less': the means of the distributions underlying the samples
 | |
|           are less than the mean of the distribution underlying the control.
 | |
|         * 'greater': the means of the distributions underlying the
 | |
|           samples are greater than the mean of the distribution underlying
 | |
|           the control.
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `random_state` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `random_state` keyword will emit warnings. Following a
 | |
|             deprecation period, the `random_state` keyword will be removed.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     res : `~scipy.stats._result_classes.DunnettResult`
 | |
|         An object containing attributes:
 | |
| 
 | |
|         statistic : float ndarray
 | |
|             The computed statistic of the test for each comparison. The element
 | |
|             at index ``i`` is the statistic for the comparison between
 | |
|             groups ``i`` and the control.
 | |
|         pvalue : float ndarray
 | |
|             The computed p-value of the test for each comparison. The element
 | |
|             at index ``i`` is the p-value for the comparison between
 | |
|             group ``i`` and the control.
 | |
| 
 | |
|         And the following method:
 | |
| 
 | |
|         confidence_interval(confidence_level=0.95) :
 | |
|             Compute the difference in means of the groups
 | |
|             with the control +- the allowance.
 | |
| 
 | |
|     See Also
 | |
|     --------
 | |
|     tukey_hsd : performs pairwise comparison of means.
 | |
|     :ref:`hypothesis_dunnett` : Extended example
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     Like the independent-sample t-test, Dunnett's test [1]_ is used to make
 | |
|     inferences about the means of distributions from which samples were drawn.
 | |
|     However, when multiple t-tests are performed at a fixed significance level,
 | |
|     the "family-wise error rate" - the probability of incorrectly rejecting the
 | |
|     null hypothesis in at least one test - will exceed the significance level.
 | |
|     Dunnett's test is designed to perform multiple comparisons while
 | |
|     controlling the family-wise error rate.
 | |
| 
 | |
|     Dunnett's test compares the means of multiple experimental groups
 | |
|     against a single control group. Tukey's Honestly Significant Difference Test
 | |
|     is another multiple-comparison test that controls the family-wise error
 | |
|     rate, but `tukey_hsd` performs *all* pairwise comparisons between groups.
 | |
|     When pairwise comparisons between experimental groups are not needed,
 | |
|     Dunnett's test is preferable due to its higher power.
 | |
| 
 | |
|     The use of this test relies on several assumptions.
 | |
| 
 | |
|     1. The observations are independent within and among groups.
 | |
|     2. The observations within each group are normally distributed.
 | |
|     3. The distributions from which the samples are drawn have the same finite
 | |
|        variance.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Dunnett, Charles W. (1955) "A Multiple Comparison Procedure for
 | |
|            Comparing Several Treatments with a Control." Journal of the American
 | |
|            Statistical Association, 50:272, 1096-1121,
 | |
|            :doi:`10.1080/01621459.1955.10501294`
 | |
|     .. [2] Thomson, M. L., & Short, M. D. (1969). Mucociliary function in
 | |
|            health, chronic obstructive airway disease, and asbestosis. Journal
 | |
|            of applied physiology, 26(5), 535-539.
 | |
|            :doi:`10.1152/jappl.1969.26.5.535`
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     We'll use data from [2]_, Table 1. The null hypothesis is that the means of
 | |
|     the distributions underlying the samples and control are equal.
 | |
| 
 | |
|     First, we test that the means of the distributions underlying the samples
 | |
|     and control are unequal (``alternative='two-sided'``, the default).
 | |
| 
 | |
|     >>> import numpy as np
 | |
|     >>> from scipy.stats import dunnett
 | |
|     >>> samples = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
 | |
|     >>> control = [2.9, 3.0, 2.5, 2.6, 3.2]
 | |
|     >>> res = dunnett(*samples, control=control)
 | |
|     >>> res.statistic
 | |
|     array([ 0.90874545, -0.05007117])
 | |
|     >>> res.pvalue
 | |
|     array([0.58325114, 0.99819341])
 | |
| 
 | |
|     Now, we test that the means of the distributions underlying the samples are
 | |
|     greater than the mean of the distribution underlying the control.
 | |
| 
 | |
|     >>> res = dunnett(*samples, control=control, alternative='greater')
 | |
|     >>> res.statistic
 | |
|     array([ 0.90874545, -0.05007117])
 | |
|     >>> res.pvalue
 | |
|     array([0.30230596, 0.69115597])
 | |
| 
 | |
|     For a more detailed example, see :ref:`hypothesis_dunnett`.
 | |
|     """
 | |
|     samples_, control_, rng = _iv_dunnett(
 | |
|         samples=samples, control=control,
 | |
|         alternative=alternative, rng=rng
 | |
|     )
 | |
| 
 | |
|     rho, df, n_group, n_samples, n_control = _params_dunnett(
 | |
|         samples=samples_, control=control_
 | |
|     )
 | |
| 
 | |
|     statistic, std, mean_control, mean_samples = _statistic_dunnett(
 | |
|         samples_, control_, df, n_samples, n_control
 | |
|     )
 | |
| 
 | |
|     pvalue = _pvalue_dunnett(
 | |
|         rho=rho, df=df, statistic=statistic, alternative=alternative, rng=rng
 | |
|     )
 | |
| 
 | |
|     return DunnettResult(
 | |
|         statistic=statistic, pvalue=pvalue,
 | |
|         _alternative=alternative,
 | |
|         _rho=rho, _df=df, _std=std,
 | |
|         _mean_samples=mean_samples,
 | |
|         _mean_control=mean_control,
 | |
|         _n_samples=n_samples,
 | |
|         _n_control=n_control,
 | |
|         _rng=rng
 | |
|     )
 | |
| 
 | |
| 
 | |
| def _iv_dunnett(
 | |
|     samples: Sequence["npt.ArrayLike"],
 | |
|     control: "npt.ArrayLike",
 | |
|     alternative: Literal['two-sided', 'less', 'greater'],
 | |
|     rng: SeedType
 | |
| ) -> tuple[list[np.ndarray], np.ndarray, SeedType]:
 | |
|     """Input validation for Dunnett's test."""
 | |
|     rng = check_random_state(rng)
 | |
| 
 | |
|     if alternative not in {'two-sided', 'less', 'greater'}:
 | |
|         raise ValueError(
 | |
|             "alternative must be 'less', 'greater' or 'two-sided'"
 | |
|         )
 | |
| 
 | |
|     ndim_msg = "Control and samples groups must be 1D arrays"
 | |
|     n_obs_msg = "Control and samples groups must have at least 1 observation"
 | |
| 
 | |
|     control = np.asarray(control)
 | |
|     samples_ = [np.asarray(sample) for sample in samples]
 | |
| 
 | |
|     # samples checks
 | |
|     samples_control: list[np.ndarray] = samples_ + [control]
 | |
|     for sample in samples_control:
 | |
|         if sample.ndim > 1:
 | |
|             raise ValueError(ndim_msg)
 | |
| 
 | |
|         if sample.size < 1:
 | |
|             raise ValueError(n_obs_msg)
 | |
| 
 | |
|     return samples_, control, rng
 | |
| 
 | |
| 
 | |
| def _params_dunnett(
 | |
|     samples: list[np.ndarray], control: np.ndarray
 | |
| ) -> tuple[np.ndarray, int, int, np.ndarray, int]:
 | |
|     """Specific parameters for Dunnett's test.
 | |
| 
 | |
|     Degree of freedom is the number of observations minus the number of groups
 | |
|     including the control.
 | |
|     """
 | |
|     n_samples = np.array([sample.size for sample in samples])
 | |
| 
 | |
|     # From [1] p. 1100 d.f. = (sum N)-(p+1)
 | |
|     n_sample = n_samples.sum()
 | |
|     n_control = control.size
 | |
|     n = n_sample + n_control
 | |
|     n_groups = len(samples)
 | |
|     df = n - n_groups - 1
 | |
| 
 | |
|     # From [1] p. 1103 rho_ij = 1/sqrt((N0/Ni+1)(N0/Nj+1))
 | |
|     rho = n_control/n_samples + 1
 | |
|     rho = 1/np.sqrt(rho[:, None] * rho[None, :])
 | |
|     np.fill_diagonal(rho, 1)
 | |
| 
 | |
|     return rho, df, n_groups, n_samples, n_control
 | |
| 
 | |
| 
 | |
| def _statistic_dunnett(
 | |
|     samples: list[np.ndarray], control: np.ndarray, df: int,
 | |
|     n_samples: np.ndarray, n_control: int
 | |
| ) -> tuple[np.ndarray, float, np.ndarray, np.ndarray]:
 | |
|     """Statistic of Dunnett's test.
 | |
| 
 | |
|     Computation based on the original single-step test from [1].
 | |
|     """
 | |
|     mean_control = np.mean(control)
 | |
|     mean_samples = np.array([np.mean(sample) for sample in samples])
 | |
|     all_samples = [control] + samples
 | |
|     all_means = np.concatenate([[mean_control], mean_samples])
 | |
| 
 | |
|     # Variance estimate s^2 from [1] Eq. 1
 | |
|     s2 = np.sum([_var(sample, mean=mean)*sample.size
 | |
|                  for sample, mean in zip(all_samples, all_means)]) / df
 | |
|     std = np.sqrt(s2)
 | |
| 
 | |
|     # z score inferred from [1] unlabeled equation after Eq. 1
 | |
|     z = (mean_samples - mean_control) / np.sqrt(1/n_samples + 1/n_control)
 | |
| 
 | |
|     return z / std, std, mean_control, mean_samples
 | |
| 
 | |
| 
 | |
| def _pvalue_dunnett(
 | |
|     rho: np.ndarray, df: int, statistic: np.ndarray,
 | |
|     alternative: Literal['two-sided', 'less', 'greater'],
 | |
|     rng: SeedType = None
 | |
| ) -> np.ndarray:
 | |
|     """pvalue from the multivariate t-distribution.
 | |
| 
 | |
|     Critical values come from the multivariate student-t distribution.
 | |
|     """
 | |
|     statistic = statistic.reshape(-1, 1)
 | |
| 
 | |
|     mvt = stats.multivariate_t(shape=rho, df=df, seed=rng)
 | |
|     if alternative == "two-sided":
 | |
|         statistic = abs(statistic)
 | |
|         pvalue = 1 - mvt.cdf(statistic, lower_limit=-statistic)
 | |
|     elif alternative == "greater":
 | |
|         pvalue = 1 - mvt.cdf(statistic, lower_limit=-np.inf)
 | |
|     else:
 | |
|         pvalue = 1 - mvt.cdf(np.inf, lower_limit=statistic)
 | |
| 
 | |
|     return np.atleast_1d(pvalue)
 |