2957 lines
		
	
	
		
			105 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			2957 lines
		
	
	
		
			105 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Quasi-Monte Carlo engines and helpers."""
 | |
| import copy
 | |
| import math
 | |
| import numbers
 | |
| import os
 | |
| import warnings
 | |
| from abc import ABC, abstractmethod
 | |
| from functools import partial
 | |
| from typing import (
 | |
|     ClassVar,
 | |
|     Literal,
 | |
|     overload,
 | |
|     TYPE_CHECKING,
 | |
| )
 | |
| from collections.abc import Callable
 | |
| 
 | |
| import numpy as np
 | |
| 
 | |
| from scipy._lib._util import DecimalNumber, GeneratorType, IntNumber, SeedType
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     import numpy.typing as npt
 | |
| 
 | |
| import scipy.stats as stats
 | |
| from scipy._lib._util import rng_integers, _rng_spawn, _transition_to_rng
 | |
| from scipy.sparse.csgraph import minimum_spanning_tree
 | |
| from scipy.spatial import distance, Voronoi
 | |
| from scipy.special import gammainc
 | |
| from ._sobol import (
 | |
|     _initialize_v, _cscramble, _fill_p_cumulative, _draw, _fast_forward,
 | |
|     _categorize, _MAXDIM
 | |
| )
 | |
| from ._qmc_cy import (
 | |
|     _cy_wrapper_centered_discrepancy,
 | |
|     _cy_wrapper_wrap_around_discrepancy,
 | |
|     _cy_wrapper_mixture_discrepancy,
 | |
|     _cy_wrapper_l2_star_discrepancy,
 | |
|     _cy_wrapper_update_discrepancy,
 | |
|     _cy_van_der_corput_scrambled,
 | |
|     _cy_van_der_corput,
 | |
| )
 | |
| 
 | |
| 
 | |
| __all__ = ['scale', 'discrepancy', 'geometric_discrepancy', 'update_discrepancy',
 | |
|            'QMCEngine', 'Sobol', 'Halton', 'LatinHypercube', 'PoissonDisk',
 | |
|            'MultinomialQMC', 'MultivariateNormalQMC']
 | |
| 
 | |
| 
 | |
| @overload
 | |
| def check_random_state(seed: IntNumber | None = ...) -> np.random.Generator:
 | |
|     ...
 | |
| 
 | |
| 
 | |
| @overload
 | |
| def check_random_state(seed: GeneratorType) -> GeneratorType:
 | |
|     ...
 | |
| 
 | |
| 
 | |
| # Based on scipy._lib._util.check_random_state
 | |
| # This is going to be removed at the end of the SPEC 7 transition,
 | |
| # so I'll just leave the argument name `seed` alone
 | |
| def check_random_state(seed=None):
 | |
|     """Turn `seed` into a `numpy.random.Generator` instance.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
 | |
|         If `seed` is an int or None, a new `numpy.random.Generator` is
 | |
|         created using ``np.random.default_rng(seed)``.
 | |
|         If `seed` is already a ``Generator`` or ``RandomState`` instance, then
 | |
|         the provided instance is used.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     seed : {`numpy.random.Generator`, `numpy.random.RandomState`}
 | |
|         Random number generator.
 | |
| 
 | |
|     """
 | |
|     if seed is None or isinstance(seed, numbers.Integral | np.integer):
 | |
|         return np.random.default_rng(seed)
 | |
|     elif isinstance(seed, np.random.RandomState | np.random.Generator):
 | |
|         return seed
 | |
|     else:
 | |
|         raise ValueError(f'{seed!r} cannot be used to seed a'
 | |
|                          ' numpy.random.Generator instance')
 | |
| 
 | |
| 
 | |
| def scale(
 | |
|     sample: "npt.ArrayLike",
 | |
|     l_bounds: "npt.ArrayLike",
 | |
|     u_bounds: "npt.ArrayLike",
 | |
|     *,
 | |
|     reverse: bool = False
 | |
| ) -> np.ndarray:
 | |
|     r"""Sample scaling from unit hypercube to different bounds.
 | |
| 
 | |
|     To convert a sample from :math:`[0, 1)` to :math:`[a, b), b>a`,
 | |
|     with :math:`a` the lower bounds and :math:`b` the upper bounds.
 | |
|     The following transformation is used:
 | |
| 
 | |
|     .. math::
 | |
| 
 | |
|         (b - a) \cdot \text{sample} + a
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         Sample to scale.
 | |
|     l_bounds, u_bounds : array_like (d,)
 | |
|         Lower and upper bounds (resp. :math:`a`, :math:`b`) of transformed
 | |
|         data. If `reverse` is True, range of the original data to transform
 | |
|         to the unit hypercube.
 | |
|     reverse : bool, optional
 | |
|         Reverse the transformation from different bounds to the unit hypercube.
 | |
|         Default is False.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     sample : array_like (n, d)
 | |
|         Scaled sample.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Transform 3 samples in the unit hypercube to bounds:
 | |
| 
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> l_bounds = [-2, 0]
 | |
|     >>> u_bounds = [6, 5]
 | |
|     >>> sample = [[0.5 , 0.75],
 | |
|     ...           [0.5 , 0.5],
 | |
|     ...           [0.75, 0.25]]
 | |
|     >>> sample_scaled = qmc.scale(sample, l_bounds, u_bounds)
 | |
|     >>> sample_scaled
 | |
|     array([[2.  , 3.75],
 | |
|            [2.  , 2.5 ],
 | |
|            [4.  , 1.25]])
 | |
| 
 | |
|     And convert back to the unit hypercube:
 | |
| 
 | |
|     >>> sample_ = qmc.scale(sample_scaled, l_bounds, u_bounds, reverse=True)
 | |
|     >>> sample_
 | |
|     array([[0.5 , 0.75],
 | |
|            [0.5 , 0.5 ],
 | |
|            [0.75, 0.25]])
 | |
| 
 | |
|     """
 | |
|     sample = np.asarray(sample)
 | |
| 
 | |
|     # Checking bounds and sample
 | |
|     if not sample.ndim == 2:
 | |
|         raise ValueError('Sample is not a 2D array')
 | |
| 
 | |
|     lower, upper = _validate_bounds(
 | |
|         l_bounds=l_bounds, u_bounds=u_bounds, d=sample.shape[1]
 | |
|     )
 | |
| 
 | |
|     if not reverse:
 | |
|         # Checking that sample is within the hypercube
 | |
|         if (sample.max() > 1.) or (sample.min() < 0.):
 | |
|             raise ValueError('Sample is not in unit hypercube')
 | |
| 
 | |
|         return sample * (upper - lower) + lower
 | |
|     else:
 | |
|         # Checking that sample is within the bounds
 | |
|         if not (np.all(sample >= lower) and np.all(sample <= upper)):
 | |
|             raise ValueError('Sample is out of bounds')
 | |
| 
 | |
|         return (sample - lower) / (upper - lower)
 | |
| 
 | |
| 
 | |
| def _ensure_in_unit_hypercube(sample: "npt.ArrayLike") -> np.ndarray:
 | |
|     """Ensure that sample is a 2D array and is within a unit hypercube
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         A 2D array of points.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     np.ndarray
 | |
|         The array interpretation of the input sample
 | |
| 
 | |
|     Raises
 | |
|     ------
 | |
|     ValueError
 | |
|         If the input is not a 2D array or contains points outside of
 | |
|         a unit hypercube.
 | |
|     """
 | |
|     sample = np.asarray(sample, dtype=np.float64, order="C")
 | |
| 
 | |
|     if not sample.ndim == 2:
 | |
|         raise ValueError("Sample is not a 2D array")
 | |
| 
 | |
|     if (sample.max() > 1.) or (sample.min() < 0.):
 | |
|         raise ValueError("Sample is not in unit hypercube")
 | |
| 
 | |
|     return sample
 | |
| 
 | |
| 
 | |
| def discrepancy(
 | |
|         sample: "npt.ArrayLike",
 | |
|         *,
 | |
|         iterative: bool = False,
 | |
|         method: Literal["CD", "WD", "MD", "L2-star"] = "CD",
 | |
|         workers: IntNumber = 1) -> float:
 | |
|     """Discrepancy of a given sample.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         The sample to compute the discrepancy from.
 | |
|     iterative : bool, optional
 | |
|         Must be False if not using it for updating the discrepancy.
 | |
|         Default is False. Refer to the notes for more details.
 | |
|     method : str, optional
 | |
|         Type of discrepancy, can be ``CD``, ``WD``, ``MD`` or ``L2-star``.
 | |
|         Refer to the notes for more details. Default is ``CD``.
 | |
|     workers : int, optional
 | |
|         Number of workers to use for parallel processing. If -1 is given all
 | |
|         CPU threads are used. Default is 1.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     discrepancy : float
 | |
|         Discrepancy.
 | |
| 
 | |
|     See Also
 | |
|     --------
 | |
|     geometric_discrepancy
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     The discrepancy is a uniformity criterion used to assess the space filling
 | |
|     of a number of samples in a hypercube. A discrepancy quantifies the
 | |
|     distance between the continuous uniform distribution on a hypercube and the
 | |
|     discrete uniform distribution on :math:`n` distinct sample points.
 | |
| 
 | |
|     The lower the value is, the better the coverage of the parameter space is.
 | |
| 
 | |
|     For a collection of subsets of the hypercube, the discrepancy is the
 | |
|     difference between the fraction of sample points in one of those
 | |
|     subsets and the volume of that subset. There are different definitions of
 | |
|     discrepancy corresponding to different collections of subsets. Some
 | |
|     versions take a root mean square difference over subsets instead of
 | |
|     a maximum.
 | |
| 
 | |
|     A measure of uniformity is reasonable if it satisfies the following
 | |
|     criteria [1]_:
 | |
| 
 | |
|     1. It is invariant under permuting factors and/or runs.
 | |
|     2. It is invariant under rotation of the coordinates.
 | |
|     3. It can measure not only uniformity of the sample over the hypercube,
 | |
|        but also the projection uniformity of the sample over non-empty
 | |
|        subset of lower dimension hypercubes.
 | |
|     4. There is some reasonable geometric meaning.
 | |
|     5. It is easy to compute.
 | |
|     6. It satisfies the Koksma-Hlawka-like inequality.
 | |
|     7. It is consistent with other criteria in experimental design.
 | |
| 
 | |
|     Four methods are available:
 | |
| 
 | |
|     * ``CD``: Centered Discrepancy - subspace involves a corner of the
 | |
|       hypercube
 | |
|     * ``WD``: Wrap-around Discrepancy - subspace can wrap around bounds
 | |
|     * ``MD``: Mixture Discrepancy - mix between CD/WD covering more criteria
 | |
|     * ``L2-star``: L2-star discrepancy - like CD BUT variant to rotation
 | |
| 
 | |
|     Methods ``CD``, ``WD``, and ``MD`` implement the right hand side of equations
 | |
|     9, 10, and 18 of [2]_, respectively; the square root is not taken. On the
 | |
|     other hand, ``L2-star`` computes the quantity given by equation 10 of
 | |
|     [3]_ as implemented by subsequent equations; the square root is taken.
 | |
| 
 | |
|     Lastly, using ``iterative=True``, it is possible to compute the
 | |
|     discrepancy as if we had :math:`n+1` samples. This is useful if we want
 | |
|     to add a point to a sampling and check the candidate which would give the
 | |
|     lowest discrepancy. Then you could just update the discrepancy with
 | |
|     each candidate using `update_discrepancy`. This method is faster than
 | |
|     computing the discrepancy for a large number of candidates.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Fang et al. "Design and modeling for computer experiments".
 | |
|        Computer Science and Data Analysis Series, 2006.
 | |
|     .. [2] Zhou Y.-D. et al. "Mixture discrepancy for quasi-random point sets."
 | |
|        Journal of Complexity, 29 (3-4) , pp. 283-301, 2013.
 | |
|     .. [3] T. T. Warnock. "Computational investigations of low discrepancy
 | |
|        point sets." Applications of Number Theory to Numerical
 | |
|        Analysis, Academic Press, pp. 319-343, 1972.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Calculate the quality of the sample using the discrepancy:
 | |
| 
 | |
|     >>> import numpy as np
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> space = np.array([[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]])
 | |
|     >>> l_bounds = [0.5, 0.5]
 | |
|     >>> u_bounds = [6.5, 6.5]
 | |
|     >>> space = qmc.scale(space, l_bounds, u_bounds, reverse=True)
 | |
|     >>> space
 | |
|     array([[0.08333333, 0.41666667],
 | |
|            [0.25      , 0.91666667],
 | |
|            [0.41666667, 0.25      ],
 | |
|            [0.58333333, 0.75      ],
 | |
|            [0.75      , 0.08333333],
 | |
|            [0.91666667, 0.58333333]])
 | |
|     >>> qmc.discrepancy(space)
 | |
|     0.008142039609053464
 | |
| 
 | |
|     We can also compute iteratively the ``CD`` discrepancy by using
 | |
|     ``iterative=True``.
 | |
| 
 | |
|     >>> disc_init = qmc.discrepancy(space[:-1], iterative=True)
 | |
|     >>> disc_init
 | |
|     0.04769081147119336
 | |
|     >>> qmc.update_discrepancy(space[-1], space[:-1], disc_init)
 | |
|     0.008142039609053513
 | |
| 
 | |
|     """
 | |
|     sample = _ensure_in_unit_hypercube(sample)
 | |
| 
 | |
|     workers = _validate_workers(workers)
 | |
| 
 | |
|     methods = {
 | |
|         "CD": _cy_wrapper_centered_discrepancy,
 | |
|         "WD": _cy_wrapper_wrap_around_discrepancy,
 | |
|         "MD": _cy_wrapper_mixture_discrepancy,
 | |
|         "L2-star": _cy_wrapper_l2_star_discrepancy,
 | |
|     }
 | |
| 
 | |
|     if method in methods:
 | |
|         return methods[method](sample, iterative, workers=workers)
 | |
|     else:
 | |
|         raise ValueError(f"{method!r} is not a valid method. It must be one of"
 | |
|                          f" {set(methods)!r}")
 | |
| 
 | |
| 
 | |
| def geometric_discrepancy(
 | |
|         sample: "npt.ArrayLike",
 | |
|         method: Literal["mindist", "mst"] = "mindist",
 | |
|         metric: str = "euclidean") -> float:
 | |
|     """Discrepancy of a given sample based on its geometric properties.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         The sample to compute the discrepancy from.
 | |
|     method : {"mindist", "mst"}, optional
 | |
|         The method to use. One of ``mindist`` for minimum distance (default)
 | |
|         or ``mst`` for minimum spanning tree.
 | |
|     metric : str or callable, optional
 | |
|         The distance metric to use. See the documentation
 | |
|         for `scipy.spatial.distance.pdist` for the available metrics and
 | |
|         the default.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     discrepancy : float
 | |
|         Discrepancy (higher values correspond to greater sample uniformity).
 | |
| 
 | |
|     See Also
 | |
|     --------
 | |
|     discrepancy
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     The discrepancy can serve as a simple measure of quality of a random sample.
 | |
|     This measure is based on the geometric properties of the distribution of points
 | |
|     in the sample, such as the minimum distance between any pair of points, or
 | |
|     the mean edge length in a minimum spanning tree.
 | |
| 
 | |
|     The higher the value is, the better the coverage of the parameter space is.
 | |
|     Note that this is different from `scipy.stats.qmc.discrepancy`, where lower
 | |
|     values correspond to higher quality of the sample.
 | |
| 
 | |
|     Also note that when comparing different sampling strategies using this function,
 | |
|     the sample size must be kept constant.
 | |
| 
 | |
|     It is possible to calculate two metrics from the minimum spanning tree:
 | |
|     the mean edge length and the standard deviation of edges lengths. Using
 | |
|     both metrics offers a better picture of uniformity than either metric alone,
 | |
|     with higher mean and lower standard deviation being preferable (see [1]_
 | |
|     for a brief discussion). This function currently only calculates the mean
 | |
|     edge length.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Franco J. et al. "Minimum Spanning Tree: A new approach to assess the quality
 | |
|        of the design of computer experiments." Chemometrics and Intelligent Laboratory
 | |
|        Systems, 97 (2), pp. 164-169, 2009.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Calculate the quality of the sample using the minimum euclidean distance
 | |
|     (the defaults):
 | |
| 
 | |
|     >>> import numpy as np
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> rng = np.random.default_rng(191468432622931918890291693003068437394)
 | |
|     >>> sample = qmc.LatinHypercube(d=2, rng=rng).random(50)
 | |
|     >>> qmc.geometric_discrepancy(sample)
 | |
|     0.03708161435687876
 | |
| 
 | |
|     Calculate the quality using the mean edge length in the minimum
 | |
|     spanning tree:
 | |
| 
 | |
|     >>> qmc.geometric_discrepancy(sample, method='mst')
 | |
|     0.1105149978798376
 | |
| 
 | |
|     Display the minimum spanning tree and the points with
 | |
|     the smallest distance:
 | |
| 
 | |
|     >>> import matplotlib.pyplot as plt
 | |
|     >>> from matplotlib.lines import Line2D
 | |
|     >>> from scipy.sparse.csgraph import minimum_spanning_tree
 | |
|     >>> from scipy.spatial.distance import pdist, squareform
 | |
|     >>> dist = pdist(sample)
 | |
|     >>> mst = minimum_spanning_tree(squareform(dist))
 | |
|     >>> edges = np.where(mst.toarray() > 0)
 | |
|     >>> edges = np.asarray(edges).T
 | |
|     >>> min_dist = np.min(dist)
 | |
|     >>> min_idx = np.argwhere(squareform(dist) == min_dist)[0]
 | |
|     >>> fig, ax = plt.subplots(figsize=(10, 5))
 | |
|     >>> _ = ax.set(aspect='equal', xlabel=r'$x_1$', ylabel=r'$x_2$',
 | |
|     ...            xlim=[0, 1], ylim=[0, 1])
 | |
|     >>> for edge in edges:
 | |
|     ...     ax.plot(sample[edge, 0], sample[edge, 1], c='k')
 | |
|     >>> ax.scatter(sample[:, 0], sample[:, 1])
 | |
|     >>> ax.add_patch(plt.Circle(sample[min_idx[0]], min_dist, color='red', fill=False))
 | |
|     >>> markers = [
 | |
|     ...     Line2D([0], [0], marker='o', lw=0, label='Sample points'),
 | |
|     ...     Line2D([0], [0], color='k', label='Minimum spanning tree'),
 | |
|     ...     Line2D([0], [0], marker='o', lw=0, markerfacecolor='w', markeredgecolor='r',
 | |
|     ...            label='Minimum point-to-point distance'),
 | |
|     ... ]
 | |
|     >>> ax.legend(handles=markers, loc='center left', bbox_to_anchor=(1, 0.5));
 | |
|     >>> plt.show()
 | |
| 
 | |
|     """
 | |
|     sample = _ensure_in_unit_hypercube(sample)
 | |
|     if sample.shape[0] < 2:
 | |
|         raise ValueError("Sample must contain at least two points")
 | |
| 
 | |
|     distances = distance.pdist(sample, metric=metric)  # type: ignore[call-overload]
 | |
| 
 | |
|     if np.any(distances == 0.0):
 | |
|         warnings.warn("Sample contains duplicate points.", stacklevel=2)
 | |
| 
 | |
|     if method == "mindist":
 | |
|         return np.min(distances[distances.nonzero()])
 | |
|     elif method == "mst":
 | |
|         fully_connected_graph = distance.squareform(distances)
 | |
|         mst = minimum_spanning_tree(fully_connected_graph)
 | |
|         distances = mst[mst.nonzero()]
 | |
|         # TODO consider returning both the mean and the standard deviation
 | |
|         # see [1] for a discussion
 | |
|         return np.mean(distances)
 | |
|     else:
 | |
|         raise ValueError(f"{method!r} is not a valid method. "
 | |
|                          f"It must be one of {{'mindist', 'mst'}}")
 | |
| 
 | |
| 
 | |
| def update_discrepancy(
 | |
|         x_new: "npt.ArrayLike",
 | |
|         sample: "npt.ArrayLike",
 | |
|         initial_disc: DecimalNumber) -> float:
 | |
|     """Update the centered discrepancy with a new sample.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     x_new : array_like (1, d)
 | |
|         The new sample to add in `sample`.
 | |
|     sample : array_like (n, d)
 | |
|         The initial sample.
 | |
|     initial_disc : float
 | |
|         Centered discrepancy of the `sample`.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     discrepancy : float
 | |
|         Centered discrepancy of the sample composed of `x_new` and `sample`.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     We can also compute iteratively the discrepancy by using
 | |
|     ``iterative=True``.
 | |
| 
 | |
|     >>> import numpy as np
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> space = np.array([[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]])
 | |
|     >>> l_bounds = [0.5, 0.5]
 | |
|     >>> u_bounds = [6.5, 6.5]
 | |
|     >>> space = qmc.scale(space, l_bounds, u_bounds, reverse=True)
 | |
|     >>> disc_init = qmc.discrepancy(space[:-1], iterative=True)
 | |
|     >>> disc_init
 | |
|     0.04769081147119336
 | |
|     >>> qmc.update_discrepancy(space[-1], space[:-1], disc_init)
 | |
|     0.008142039609053513
 | |
| 
 | |
|     """
 | |
|     sample = np.asarray(sample, dtype=np.float64, order="C")
 | |
|     x_new = np.asarray(x_new, dtype=np.float64, order="C")
 | |
| 
 | |
|     # Checking that sample is within the hypercube and 2D
 | |
|     if not sample.ndim == 2:
 | |
|         raise ValueError('Sample is not a 2D array')
 | |
| 
 | |
|     if (sample.max() > 1.) or (sample.min() < 0.):
 | |
|         raise ValueError('Sample is not in unit hypercube')
 | |
| 
 | |
|     # Checking that x_new is within the hypercube and 1D
 | |
|     if not x_new.ndim == 1:
 | |
|         raise ValueError('x_new is not a 1D array')
 | |
| 
 | |
|     if not (np.all(x_new >= 0) and np.all(x_new <= 1)):
 | |
|         raise ValueError('x_new is not in unit hypercube')
 | |
| 
 | |
|     if x_new.shape[0] != sample.shape[1]:
 | |
|         raise ValueError("x_new and sample must be broadcastable")
 | |
| 
 | |
|     return _cy_wrapper_update_discrepancy(x_new, sample, initial_disc)
 | |
| 
 | |
| 
 | |
| def _perturb_discrepancy(sample: np.ndarray, i1: int, i2: int, k: int,
 | |
|                          disc: float):
 | |
|     """Centered discrepancy after an elementary perturbation of a LHS.
 | |
| 
 | |
|     An elementary perturbation consists of an exchange of coordinates between
 | |
|     two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
 | |
|     this operation conserves the LHS properties.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         The sample (before permutation) to compute the discrepancy from.
 | |
|     i1 : int
 | |
|         The first line of the elementary permutation.
 | |
|     i2 : int
 | |
|         The second line of the elementary permutation.
 | |
|     k : int
 | |
|         The column of the elementary permutation.
 | |
|     disc : float
 | |
|         Centered discrepancy of the design before permutation.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     discrepancy : float
 | |
|         Centered discrepancy of the design after permutation.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Jin et al. "An efficient algorithm for constructing optimal design
 | |
|        of computer experiments", Journal of Statistical Planning and
 | |
|        Inference, 2005.
 | |
| 
 | |
|     """
 | |
|     n = sample.shape[0]
 | |
| 
 | |
|     z_ij = sample - 0.5
 | |
| 
 | |
|     # Eq (19)
 | |
|     c_i1j = (1. / n ** 2.
 | |
|              * np.prod(0.5 * (2. + abs(z_ij[i1, :])
 | |
|                               + abs(z_ij) - abs(z_ij[i1, :] - z_ij)), axis=1))
 | |
|     c_i2j = (1. / n ** 2.
 | |
|              * np.prod(0.5 * (2. + abs(z_ij[i2, :])
 | |
|                               + abs(z_ij) - abs(z_ij[i2, :] - z_ij)), axis=1))
 | |
| 
 | |
|     # Eq (20)
 | |
|     c_i1i1 = (1. / n ** 2 * np.prod(1 + abs(z_ij[i1, :]))
 | |
|               - 2. / n * np.prod(1. + 0.5 * abs(z_ij[i1, :])
 | |
|                                  - 0.5 * z_ij[i1, :] ** 2))
 | |
|     c_i2i2 = (1. / n ** 2 * np.prod(1 + abs(z_ij[i2, :]))
 | |
|               - 2. / n * np.prod(1. + 0.5 * abs(z_ij[i2, :])
 | |
|                                  - 0.5 * z_ij[i2, :] ** 2))
 | |
| 
 | |
|     # Eq (22), typo in the article in the denominator i2 -> i1
 | |
|     num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k])
 | |
|            - abs(z_ij[i2, k] - z_ij[:, k]))
 | |
|     denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k])
 | |
|              - abs(z_ij[i1, k] - z_ij[:, k]))
 | |
|     gamma = num / denum
 | |
| 
 | |
|     # Eq (23)
 | |
|     c_p_i1j = gamma * c_i1j
 | |
|     # Eq (24)
 | |
|     c_p_i2j = c_i2j / gamma
 | |
| 
 | |
|     alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
 | |
|     beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))
 | |
| 
 | |
|     g_i1 = np.prod(1. + abs(z_ij[i1, :]))
 | |
|     g_i2 = np.prod(1. + abs(z_ij[i2, :]))
 | |
|     h_i1 = np.prod(1. + 0.5 * abs(z_ij[i1, :]) - 0.5 * (z_ij[i1, :] ** 2))
 | |
|     h_i2 = np.prod(1. + 0.5 * abs(z_ij[i2, :]) - 0.5 * (z_ij[i2, :] ** 2))
 | |
| 
 | |
|     # Eq (25), typo in the article g is missing
 | |
|     c_p_i1i1 = ((g_i1 * alpha) / (n ** 2) - 2. * alpha * beta * h_i1 / n)
 | |
|     # Eq (26), typo in the article n ** 2
 | |
|     c_p_i2i2 = ((g_i2 / ((n ** 2) * alpha)) - (2. * h_i2 / (n * alpha * beta)))
 | |
| 
 | |
|     # Eq (26)
 | |
|     sum_ = c_p_i1j - c_i1j + c_p_i2j - c_i2j
 | |
| 
 | |
|     mask = np.ones(n, dtype=bool)
 | |
|     mask[[i1, i2]] = False
 | |
|     sum_ = sum(sum_[mask])
 | |
| 
 | |
|     disc_ep = (disc + c_p_i1i1 - c_i1i1 + c_p_i2i2 - c_i2i2 + 2 * sum_)
 | |
| 
 | |
|     return disc_ep
 | |
| 
 | |
| 
 | |
| def primes_from_2_to(n: int) -> np.ndarray:
 | |
|     """Prime numbers from 2 to *n*.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     n : int
 | |
|         Sup bound with ``n >= 6``.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     primes : list(int)
 | |
|         Primes in ``2 <= p < n``.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     Taken from [1]_ by P.T. Roy, written consent given on 23.04.2021
 | |
|     by the original author, Bruno Astrolino, for free use in SciPy under
 | |
|     the 3-clause BSD.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] `StackOverflow <https://stackoverflow.com/questions/2068372>`_.
 | |
| 
 | |
|     """
 | |
|     sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool)
 | |
|     for i in range(1, int(n ** 0.5) // 3 + 1):
 | |
|         k = 3 * i + 1 | 1
 | |
|         sieve[k * k // 3::2 * k] = False
 | |
|         sieve[k * (k - 2 * (i & 1) + 4) // 3::2 * k] = False
 | |
|     return np.r_[2, 3, ((3 * np.nonzero(sieve)[0][1:] + 1) | 1)]
 | |
| 
 | |
| 
 | |
| def n_primes(n: IntNumber) -> list[int]:
 | |
|     """List of the n-first prime numbers.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     n : int
 | |
|         Number of prime numbers wanted.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     primes : list(int)
 | |
|         List of primes.
 | |
| 
 | |
|     """
 | |
|     primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
 | |
|               61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
 | |
|               131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
 | |
|               197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
 | |
|               271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
 | |
|               353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431,
 | |
|               433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
 | |
|               509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
 | |
|               601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673,
 | |
|               677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761,
 | |
|               769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
 | |
|               859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947,
 | |
|               953, 967, 971, 977, 983, 991, 997][:n]
 | |
| 
 | |
|     if len(primes) < n:
 | |
|         big_number = 2000
 | |
|         while 'Not enough primes':
 | |
|             primes = primes_from_2_to(big_number)[:n]  # type: ignore
 | |
|             if len(primes) == n:
 | |
|                 break
 | |
|             big_number += 1000
 | |
| 
 | |
|     return primes
 | |
| 
 | |
| 
 | |
| def _van_der_corput_permutations(
 | |
|     base: IntNumber, *, rng: SeedType = None
 | |
| ) -> np.ndarray:
 | |
|     """Permutations for scrambling a Van der Corput sequence.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     base : int
 | |
|         Base of the sequence.
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. During the transition, the behavior documented above is not
 | |
|             accurate; see `check_random_state` for actual behavior. After the
 | |
|             transition, this admonition can be removed.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     permutations : array_like
 | |
|         Permutation indices.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     In Algorithm 1 of Owen 2017, a permutation of `np.arange(base)` is
 | |
|     created for each positive integer `k` such that ``1 - base**-k < 1``
 | |
|     using floating-point arithmetic. For double precision floats, the
 | |
|     condition ``1 - base**-k < 1`` can also be written as ``base**-k >
 | |
|     2**-54``, which makes it more apparent how many permutations we need
 | |
|     to create.
 | |
|     """
 | |
|     rng = check_random_state(rng)
 | |
|     count = math.ceil(54 / math.log2(base)) - 1
 | |
|     permutations = np.repeat(np.arange(base)[None], count, axis=0)
 | |
|     for perm in permutations:
 | |
|         rng.shuffle(perm)
 | |
| 
 | |
|     return permutations
 | |
| 
 | |
| 
 | |
| def van_der_corput(
 | |
|         n: IntNumber,
 | |
|         base: IntNumber = 2,
 | |
|         *,
 | |
|         start_index: IntNumber = 0,
 | |
|         scramble: bool = False,
 | |
|         permutations: "npt.ArrayLike | None" = None,
 | |
|         rng: SeedType = None,
 | |
|         workers: IntNumber = 1) -> np.ndarray:
 | |
|     """Van der Corput sequence.
 | |
| 
 | |
|     Pseudo-random number generator based on a b-adic expansion.
 | |
| 
 | |
|     Scrambling uses permutations of the remainders (see [1]_). Multiple
 | |
|     permutations are applied to construct a point. The sequence of
 | |
|     permutations has to be the same for all points of the sequence.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     n : int
 | |
|         Number of element of the sequence.
 | |
|     base : int, optional
 | |
|         Base of the sequence. Default is 2.
 | |
|     start_index : int, optional
 | |
|         Index to start the sequence from. Default is 0.
 | |
|     scramble : bool, optional
 | |
|         If True, use Owen scrambling. Otherwise no scrambling is done.
 | |
|         Default is True.
 | |
|     permutations : array_like, optional
 | |
|         Permutations used for scrambling.
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
|     workers : int, optional
 | |
|         Number of workers to use for parallel processing. If -1 is
 | |
|         given all CPU threads are used. Default is 1.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     sequence : list (n,)
 | |
|         Sequence of Van der Corput.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] A. B. Owen. "A randomized Halton algorithm in R",
 | |
|        :arxiv:`1706.02808`, 2017.
 | |
| 
 | |
|     """
 | |
|     if base < 2:
 | |
|         raise ValueError("'base' must be at least 2")
 | |
| 
 | |
|     if scramble:
 | |
|         if permutations is None:
 | |
|             permutations = _van_der_corput_permutations(
 | |
|                 base=base, rng=rng
 | |
|             )
 | |
|         else:
 | |
|             permutations = np.asarray(permutations)
 | |
| 
 | |
|         permutations = permutations.astype(np.int64)
 | |
|         return _cy_van_der_corput_scrambled(n, base, start_index,
 | |
|                                             permutations, workers)
 | |
| 
 | |
|     else:
 | |
|         return _cy_van_der_corput(n, base, start_index, workers)
 | |
| 
 | |
| 
 | |
| class QMCEngine(ABC):
 | |
|     """A generic Quasi-Monte Carlo sampler class meant for subclassing.
 | |
| 
 | |
|     QMCEngine is a base class to construct a specific Quasi-Monte Carlo
 | |
|     sampler. It cannot be used directly as a sampler.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     d : int
 | |
|         Dimension of the parameter space.
 | |
|     optimization : {None, "random-cd", "lloyd"}, optional
 | |
|         Whether to use an optimization scheme to improve the quality after
 | |
|         sampling. Note that this is a post-processing step that does not
 | |
|         guarantee that all properties of the sample will be conserved.
 | |
|         Default is None.
 | |
| 
 | |
|         * ``random-cd``: random permutations of coordinates to lower the
 | |
|           centered discrepancy. The best sample based on the centered
 | |
|           discrepancy is constantly updated. Centered discrepancy-based
 | |
|           sampling shows better space-filling robustness toward 2D and 3D
 | |
|           subprojections compared to using other discrepancy measures.
 | |
|         * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
 | |
|           The process converges to equally spaced samples.
 | |
| 
 | |
|         .. versionadded:: 1.10.0
 | |
| 
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     By convention samples are distributed over the half-open interval
 | |
|     ``[0, 1)``. Instances of the class can access the attributes: ``d`` for
 | |
|     the dimension; and ``rng`` for the random number generator.
 | |
| 
 | |
|     **Subclassing**
 | |
| 
 | |
|     When subclassing `QMCEngine` to create a new sampler,  ``__init__`` and
 | |
|     ``random`` must be redefined.
 | |
| 
 | |
|     * ``__init__(d, rng=None)``: at least fix the dimension. If the sampler
 | |
|       does not take advantage of a ``rng`` (deterministic methods like
 | |
|       Halton), this parameter can be omitted.
 | |
|     * ``_random(n, *, workers=1)``: draw ``n`` from the engine. ``workers``
 | |
|       is used for parallelism. See `Halton` for example.
 | |
| 
 | |
|     Optionally, two other methods can be overwritten by subclasses:
 | |
| 
 | |
|     * ``reset``: Reset the engine to its original state.
 | |
|     * ``fast_forward``: If the sequence is deterministic (like Halton
 | |
|       sequence), then ``fast_forward(n)`` is skipping the ``n`` first draw.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     To create a random sampler based on ``np.random.random``, we would do the
 | |
|     following:
 | |
| 
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> class RandomEngine(qmc.QMCEngine):
 | |
|     ...     def __init__(self, d, rng=None):
 | |
|     ...         super().__init__(d=d, rng=rng)
 | |
|     ...
 | |
|     ...
 | |
|     ...     def _random(self, n=1, *, workers=1):
 | |
|     ...         return self.rng.random((n, self.d))
 | |
|     ...
 | |
|     ...
 | |
|     ...     def reset(self):
 | |
|     ...         super().__init__(d=self.d, rng=self.rng_seed)
 | |
|     ...         return self
 | |
|     ...
 | |
|     ...
 | |
|     ...     def fast_forward(self, n):
 | |
|     ...         self.random(n)
 | |
|     ...         return self
 | |
| 
 | |
|     After subclassing `QMCEngine` to define the sampling strategy we want to
 | |
|     use, we can create an instance to sample from.
 | |
| 
 | |
|     >>> engine = RandomEngine(2)
 | |
|     >>> engine.random(5)
 | |
|     array([[0.22733602, 0.31675834],  # random
 | |
|            [0.79736546, 0.67625467],
 | |
|            [0.39110955, 0.33281393],
 | |
|            [0.59830875, 0.18673419],
 | |
|            [0.67275604, 0.94180287]])
 | |
| 
 | |
|     We can also reset the state of the generator and resample again.
 | |
| 
 | |
|     >>> _ = engine.reset()
 | |
|     >>> engine.random(5)
 | |
|     array([[0.22733602, 0.31675834],  # random
 | |
|            [0.79736546, 0.67625467],
 | |
|            [0.39110955, 0.33281393],
 | |
|            [0.59830875, 0.18673419],
 | |
|            [0.67275604, 0.94180287]])
 | |
| 
 | |
|     """
 | |
| 
 | |
|     @abstractmethod
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|         self,
 | |
|         d: IntNumber,
 | |
|         *,
 | |
|         optimization: Literal["random-cd", "lloyd"] | None = None,
 | |
|         rng: SeedType = None
 | |
|     ) -> None:
 | |
|         self._initialize(d, optimization=optimization, rng=rng)
 | |
| 
 | |
|     # During SPEC 7 transition:
 | |
|     # `__init__` has to be wrapped with @_transition_to_rng decorator
 | |
|     # because it is public. Subclasses previously called `__init__`
 | |
|     # directly, but this was problematic because arguments passed to
 | |
|     # subclass `__init__` as `seed` would get passed to superclass
 | |
|     # `__init__` as `rng`, rejecting `RandomState` arguments.
 | |
|     def _initialize(
 | |
|         self,
 | |
|         d: IntNumber,
 | |
|         *,
 | |
|         optimization: Literal["random-cd", "lloyd"] | None = None,
 | |
|         rng: SeedType = None
 | |
|     ) -> None:
 | |
|         if not np.issubdtype(type(d), np.integer) or d < 0:
 | |
|             raise ValueError('d must be a non-negative integer value')
 | |
| 
 | |
|         self.d = d
 | |
| 
 | |
|         if isinstance(rng, np.random.Generator):
 | |
|             # Spawn a Generator that we can own and reset.
 | |
|             self.rng = _rng_spawn(rng, 1)[0]
 | |
|         else:
 | |
|             # Create our instance of Generator, does not need spawning
 | |
|             # Also catch RandomState which cannot be spawned
 | |
|             self.rng = check_random_state(rng)
 | |
|         self.rng_seed = copy.deepcopy(self.rng)
 | |
| 
 | |
|         self.num_generated = 0
 | |
| 
 | |
|         config = {
 | |
|             # random-cd
 | |
|             "n_nochange": 100,
 | |
|             "n_iters": 10_000,
 | |
|             "rng": self.rng,
 | |
| 
 | |
|             # lloyd
 | |
|             "tol": 1e-5,
 | |
|             "maxiter": 10,
 | |
|             "qhull_options": None,
 | |
|         }
 | |
|         self._optimization = optimization
 | |
|         self.optimization_method = _select_optimizer(optimization, config)
 | |
| 
 | |
|     @abstractmethod
 | |
|     def _random(
 | |
|         self, n: IntNumber = 1, *, workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         ...
 | |
| 
 | |
|     def random(
 | |
|         self, n: IntNumber = 1, *, workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         """Draw `n` in the half-open interval ``[0, 1)``.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space.
 | |
|             Default is 1.
 | |
|         workers : int, optional
 | |
|             Only supported with `Halton`.
 | |
|             Number of workers to use for parallel processing. If -1 is
 | |
|             given all CPU threads are used. Default is 1. It becomes faster
 | |
|             than one worker for `n` greater than :math:`10^3`.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             QMC sample.
 | |
| 
 | |
|         """
 | |
|         sample = self._random(n, workers=workers)
 | |
|         if self.optimization_method is not None:
 | |
|             sample = self.optimization_method(sample)
 | |
| 
 | |
|         self.num_generated += n
 | |
|         return sample
 | |
| 
 | |
|     def integers(
 | |
|         self,
 | |
|         l_bounds: "npt.ArrayLike",
 | |
|         *,
 | |
|         u_bounds: "npt.ArrayLike | None" = None,
 | |
|         n: IntNumber = 1,
 | |
|         endpoint: bool = False,
 | |
|         workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         r"""
 | |
|         Draw `n` integers from `l_bounds` (inclusive) to `u_bounds`
 | |
|         (exclusive), or if endpoint=True, `l_bounds` (inclusive) to
 | |
|         `u_bounds` (inclusive).
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         l_bounds : int or array-like of ints
 | |
|             Lowest (signed) integers to be drawn (unless ``u_bounds=None``,
 | |
|             in which case this parameter is 0 and this value is used for
 | |
|             `u_bounds`).
 | |
|         u_bounds : int or array-like of ints, optional
 | |
|             If provided, one above the largest (signed) integer to be drawn
 | |
|             (see above for behavior if ``u_bounds=None``).
 | |
|             If array-like, must contain integer values.
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space.
 | |
|             Default is 1.
 | |
|         endpoint : bool, optional
 | |
|             If true, sample from the interval ``[l_bounds, u_bounds]`` instead
 | |
|             of the default ``[l_bounds, u_bounds)``. Defaults is False.
 | |
|         workers : int, optional
 | |
|             Number of workers to use for parallel processing. If -1 is
 | |
|             given all CPU threads are used. Only supported when using `Halton`
 | |
|             Default is 1.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             QMC sample.
 | |
| 
 | |
|         Notes
 | |
|         -----
 | |
|         It is safe to just use the same ``[0, 1)`` to integer mapping
 | |
|         with QMC that you would use with MC. You still get unbiasedness,
 | |
|         a strong law of large numbers, an asymptotically infinite variance
 | |
|         reduction and a finite sample variance bound.
 | |
| 
 | |
|         To convert a sample from :math:`[0, 1)` to :math:`[a, b), b>a`,
 | |
|         with :math:`a` the lower bounds and :math:`b` the upper bounds,
 | |
|         the following transformation is used:
 | |
| 
 | |
|         .. math::
 | |
| 
 | |
|             \text{floor}((b - a) \cdot \text{sample} + a)
 | |
| 
 | |
|         """
 | |
|         if u_bounds is None:
 | |
|             u_bounds = l_bounds
 | |
|             l_bounds = 0
 | |
| 
 | |
|         u_bounds = np.atleast_1d(u_bounds)
 | |
|         l_bounds = np.atleast_1d(l_bounds)
 | |
| 
 | |
|         if endpoint:
 | |
|             u_bounds = u_bounds + 1
 | |
| 
 | |
|         if (not np.issubdtype(l_bounds.dtype, np.integer) or
 | |
|                 not np.issubdtype(u_bounds.dtype, np.integer)):
 | |
|             message = ("'u_bounds' and 'l_bounds' must be integers or"
 | |
|                        " array-like of integers")
 | |
|             raise ValueError(message)
 | |
| 
 | |
|         if isinstance(self, Halton):
 | |
|             sample = self.random(n=n, workers=workers)
 | |
|         else:
 | |
|             sample = self.random(n=n)
 | |
| 
 | |
|         sample = scale(sample, l_bounds=l_bounds, u_bounds=u_bounds)
 | |
|         sample = np.floor(sample).astype(np.int64)
 | |
| 
 | |
|         return sample
 | |
| 
 | |
|     def reset(self) -> "QMCEngine":
 | |
|         """Reset the engine to base state.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         engine : QMCEngine
 | |
|             Engine reset to its base state.
 | |
| 
 | |
|         """
 | |
|         rng = copy.deepcopy(self.rng_seed)
 | |
|         self.rng = check_random_state(rng)
 | |
|         self.num_generated = 0
 | |
|         return self
 | |
| 
 | |
|     def fast_forward(self, n: IntNumber) -> "QMCEngine":
 | |
|         """Fast-forward the sequence by `n` positions.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int
 | |
|             Number of points to skip in the sequence.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         engine : QMCEngine
 | |
|             Engine reset to its base state.
 | |
| 
 | |
|         """
 | |
|         self.random(n=n)
 | |
|         return self
 | |
| 
 | |
| 
 | |
| class Halton(QMCEngine):
 | |
|     """Halton sequence.
 | |
| 
 | |
|     Pseudo-random number generator that generalize the Van der Corput sequence
 | |
|     for multiple dimensions. The Halton sequence uses the base-two Van der
 | |
|     Corput sequence for the first dimension, base-three for its second and
 | |
|     base-:math:`p` for its :math:`n`-dimension, with :math:`p` the
 | |
|     :math:`n`'th prime.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     d : int
 | |
|         Dimension of the parameter space.
 | |
|     scramble : bool, optional
 | |
|         If True, use random scrambling from [2]_. Otherwise no scrambling
 | |
|         is done.
 | |
|         Default is True.
 | |
|     optimization : {None, "random-cd", "lloyd"}, optional
 | |
|         Whether to use an optimization scheme to improve the quality after
 | |
|         sampling. Note that this is a post-processing step that does not
 | |
|         guarantee that all properties of the sample will be conserved.
 | |
|         Default is None.
 | |
| 
 | |
|         * ``random-cd``: random permutations of coordinates to lower the
 | |
|           centered discrepancy. The best sample based on the centered
 | |
|           discrepancy is constantly updated. Centered discrepancy-based
 | |
|           sampling shows better space-filling robustness toward 2D and 3D
 | |
|           subprojections compared to using other discrepancy measures.
 | |
|         * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
 | |
|           The process converges to equally spaced samples.
 | |
| 
 | |
|         .. versionadded:: 1.10.0
 | |
| 
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     The Halton sequence has severe striping artifacts for even modestly
 | |
|     large dimensions. These can be ameliorated by scrambling. Scrambling
 | |
|     also supports replication-based error estimates and extends
 | |
|     applicability to unbounded integrands.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Halton, "On the efficiency of certain quasi-random sequences of
 | |
|        points in evaluating multi-dimensional integrals", Numerische
 | |
|        Mathematik, 1960.
 | |
|     .. [2] A. B. Owen. "A randomized Halton algorithm in R",
 | |
|        :arxiv:`1706.02808`, 2017.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Generate samples from a low discrepancy sequence of Halton.
 | |
| 
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> sampler = qmc.Halton(d=2, scramble=False)
 | |
|     >>> sample = sampler.random(n=5)
 | |
|     >>> sample
 | |
|     array([[0.        , 0.        ],
 | |
|            [0.5       , 0.33333333],
 | |
|            [0.25      , 0.66666667],
 | |
|            [0.75      , 0.11111111],
 | |
|            [0.125     , 0.44444444]])
 | |
| 
 | |
|     Compute the quality of the sample using the discrepancy criterion.
 | |
| 
 | |
|     >>> qmc.discrepancy(sample)
 | |
|     0.088893711419753
 | |
| 
 | |
|     If some wants to continue an existing design, extra points can be obtained
 | |
|     by calling again `random`. Alternatively, you can skip some points like:
 | |
| 
 | |
|     >>> _ = sampler.fast_forward(5)
 | |
|     >>> sample_continued = sampler.random(n=5)
 | |
|     >>> sample_continued
 | |
|     array([[0.3125    , 0.37037037],
 | |
|            [0.8125    , 0.7037037 ],
 | |
|            [0.1875    , 0.14814815],
 | |
|            [0.6875    , 0.48148148],
 | |
|            [0.4375    , 0.81481481]])
 | |
| 
 | |
|     Finally, samples can be scaled to bounds.
 | |
| 
 | |
|     >>> l_bounds = [0, 2]
 | |
|     >>> u_bounds = [10, 5]
 | |
|     >>> qmc.scale(sample_continued, l_bounds, u_bounds)
 | |
|     array([[3.125     , 3.11111111],
 | |
|            [8.125     , 4.11111111],
 | |
|            [1.875     , 2.44444444],
 | |
|            [6.875     , 3.44444444],
 | |
|            [4.375     , 4.44444444]])
 | |
| 
 | |
|     """
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|         self, d: IntNumber, *, scramble: bool = True,
 | |
|         optimization: Literal["random-cd", "lloyd"] | None = None,
 | |
|         rng: SeedType = None
 | |
|     ) -> None:
 | |
|         # Used in `scipy.integrate.qmc_quad`
 | |
|         self._init_quad = {'d': d, 'scramble': True,
 | |
|                            'optimization': optimization}
 | |
|         super()._initialize(d=d, optimization=optimization, rng=rng)
 | |
| 
 | |
|         # important to have ``type(bdim) == int`` for performance reason
 | |
|         self.base = [int(bdim) for bdim in n_primes(d)]
 | |
|         self.scramble = scramble
 | |
| 
 | |
|         self._initialize_permutations()
 | |
| 
 | |
|     def _initialize_permutations(self) -> None:
 | |
|         """Initialize permutations for all Van der Corput sequences.
 | |
| 
 | |
|         Permutations are only needed for scrambling.
 | |
|         """
 | |
|         self._permutations: list = [None] * len(self.base)
 | |
|         if self.scramble:
 | |
|             for i, bdim in enumerate(self.base):
 | |
|                 permutations = _van_der_corput_permutations(
 | |
|                     base=bdim, rng=self.rng
 | |
|                 )
 | |
| 
 | |
|                 self._permutations[i] = permutations
 | |
| 
 | |
|     def _random(
 | |
|         self, n: IntNumber = 1, *, workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         """Draw `n` in the half-open interval ``[0, 1)``.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space. Default is 1.
 | |
|         workers : int, optional
 | |
|             Number of workers to use for parallel processing. If -1 is
 | |
|             given all CPU threads are used. Default is 1. It becomes faster
 | |
|             than one worker for `n` greater than :math:`10^3`.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             QMC sample.
 | |
| 
 | |
|         """
 | |
|         workers = _validate_workers(workers)
 | |
|         # Generate a sample using a Van der Corput sequence per dimension.
 | |
|         sample = [van_der_corput(n, bdim, start_index=self.num_generated,
 | |
|                                  scramble=self.scramble,
 | |
|                                  permutations=self._permutations[i],
 | |
|                                  workers=workers)
 | |
|                   for i, bdim in enumerate(self.base)]
 | |
| 
 | |
|         return np.array(sample).T.reshape(n, self.d)
 | |
| 
 | |
| 
 | |
| class LatinHypercube(QMCEngine):
 | |
|     r"""Latin hypercube sampling (LHS).
 | |
| 
 | |
|     A Latin hypercube sample [1]_ generates :math:`n` points in
 | |
|     :math:`[0,1)^{d}`. Each univariate marginal distribution is stratified,
 | |
|     placing exactly one point in :math:`[j/n, (j+1)/n)` for
 | |
|     :math:`j=0,1,...,n-1`. They are still applicable when :math:`n << d`.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     d : int
 | |
|         Dimension of the parameter space.
 | |
|     scramble : bool, optional
 | |
|         When False, center samples within cells of a multi-dimensional grid.
 | |
|         Otherwise, samples are randomly placed within cells of the grid.
 | |
| 
 | |
|         .. note::
 | |
|             Setting ``scramble=False`` does not ensure deterministic output.
 | |
|             For that, use the `rng` parameter.
 | |
| 
 | |
|         Default is True.
 | |
| 
 | |
|         .. versionadded:: 1.10.0
 | |
| 
 | |
|     optimization : {None, "random-cd", "lloyd"}, optional
 | |
|         Whether to use an optimization scheme to improve the quality after
 | |
|         sampling. Note that this is a post-processing step that does not
 | |
|         guarantee that all properties of the sample will be conserved.
 | |
|         Default is None.
 | |
| 
 | |
|         * ``random-cd``: random permutations of coordinates to lower the
 | |
|           centered discrepancy. The best sample based on the centered
 | |
|           discrepancy is constantly updated. Centered discrepancy-based
 | |
|           sampling shows better space-filling robustness toward 2D and 3D
 | |
|           subprojections compared to using other discrepancy measures.
 | |
|         * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
 | |
|           The process converges to equally spaced samples.
 | |
| 
 | |
|         .. versionadded:: 1.8.0
 | |
|         .. versionchanged:: 1.10.0
 | |
|             Add ``lloyd``.
 | |
| 
 | |
|     strength : {1, 2}, optional
 | |
|         Strength of the LHS. ``strength=1`` produces a plain LHS while
 | |
|         ``strength=2`` produces an orthogonal array based LHS of strength 2
 | |
|         [7]_, [8]_. In that case, only ``n=p**2`` points can be sampled,
 | |
|         with ``p`` a prime number. It also constrains ``d <= p + 1``.
 | |
|         Default is 1.
 | |
| 
 | |
|         .. versionadded:: 1.8.0
 | |
| 
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     See Also
 | |
|     --------
 | |
|     :ref:`quasi-monte-carlo`
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
| 
 | |
|     When LHS is used for integrating a function :math:`f` over :math:`n`,
 | |
|     LHS is extremely effective on integrands that are nearly additive [2]_.
 | |
|     With a LHS of :math:`n` points, the variance of the integral is always
 | |
|     lower than plain MC on :math:`n-1` points [3]_. There is a central limit
 | |
|     theorem for LHS on the mean and variance of the integral [4]_, but not
 | |
|     necessarily for optimized LHS due to the randomization.
 | |
| 
 | |
|     :math:`A` is called an orthogonal array of strength :math:`t` if in each
 | |
|     n-row-by-t-column submatrix of :math:`A`: all :math:`p^t` possible
 | |
|     distinct rows occur the same number of times. The elements of :math:`A`
 | |
|     are in the set :math:`\{0, 1, ..., p-1\}`, also called symbols.
 | |
|     The constraint that :math:`p` must be a prime number is to allow modular
 | |
|     arithmetic. Increasing strength adds some symmetry to the sub-projections
 | |
|     of a sample. With strength 2, samples are symmetric along the diagonals of
 | |
|     2D sub-projections. This may be undesirable, but on the other hand, the
 | |
|     sample dispersion is improved.
 | |
| 
 | |
|     Strength 1 (plain LHS) brings an advantage over strength 0 (MC) and
 | |
|     strength 2 is a useful increment over strength 1. Going to strength 3 is
 | |
|     a smaller increment and scrambled QMC like Sobol', Halton are more
 | |
|     performant [7]_.
 | |
| 
 | |
|     To create a LHS of strength 2, the orthogonal array :math:`A` is
 | |
|     randomized by applying a random, bijective map of the set of symbols onto
 | |
|     itself. For example, in column 0, all 0s might become 2; in column 1,
 | |
|     all 0s might become 1, etc.
 | |
|     Then, for each column :math:`i` and symbol :math:`j`, we add a plain,
 | |
|     one-dimensional LHS of size :math:`p` to the subarray where
 | |
|     :math:`A^i = j`. The resulting matrix is finally divided by :math:`p`.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Mckay et al., "A Comparison of Three Methods for Selecting Values
 | |
|        of Input Variables in the Analysis of Output from a Computer Code."
 | |
|        Technometrics, 1979.
 | |
|     .. [2] M. Stein, "Large sample properties of simulations using Latin
 | |
|        hypercube sampling." Technometrics 29, no. 2: 143-151, 1987.
 | |
|     .. [3] A. B. Owen, "Monte Carlo variance of scrambled net quadrature."
 | |
|        SIAM Journal on Numerical Analysis 34, no. 5: 1884-1910, 1997
 | |
|     .. [4]  Loh, W.-L. "On Latin hypercube sampling." The annals of statistics
 | |
|        24, no. 5: 2058-2080, 1996.
 | |
|     .. [5] Fang et al. "Design and modeling for computer experiments".
 | |
|        Computer Science and Data Analysis Series, 2006.
 | |
|     .. [6] Damblin et al., "Numerical studies of space filling designs:
 | |
|        optimization of Latin Hypercube Samples and subprojection properties."
 | |
|        Journal of Simulation, 2013.
 | |
|     .. [7] A. B. Owen , "Orthogonal arrays for computer experiments,
 | |
|        integration and visualization." Statistica Sinica, 1992.
 | |
|     .. [8] B. Tang, "Orthogonal Array-Based Latin Hypercubes."
 | |
|        Journal of the American Statistical Association, 1993.
 | |
|     .. [9] Seaholm, Susan K. et al. (1988). Latin hypercube sampling and the
 | |
|        sensitivity analysis of a Monte Carlo epidemic model. Int J Biomed
 | |
|        Comput, 23(1-2), 97-112. :doi:`10.1016/0020-7101(88)90067-0`
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Generate samples from a Latin hypercube generator.
 | |
| 
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> sampler = qmc.LatinHypercube(d=2)
 | |
|     >>> sample = sampler.random(n=5)
 | |
|     >>> sample
 | |
|     array([[0.1545328 , 0.53664833], # random
 | |
|             [0.84052691, 0.06474907],
 | |
|             [0.52177809, 0.93343721],
 | |
|             [0.68033825, 0.36265316],
 | |
|             [0.26544879, 0.61163943]])
 | |
| 
 | |
|     Compute the quality of the sample using the discrepancy criterion.
 | |
| 
 | |
|     >>> qmc.discrepancy(sample)
 | |
|     0.0196... # random
 | |
| 
 | |
|     Samples can be scaled to bounds.
 | |
| 
 | |
|     >>> l_bounds = [0, 2]
 | |
|     >>> u_bounds = [10, 5]
 | |
|     >>> qmc.scale(sample, l_bounds, u_bounds)
 | |
|     array([[1.54532796, 3.609945 ], # random
 | |
|             [8.40526909, 2.1942472 ],
 | |
|             [5.2177809 , 4.80031164],
 | |
|             [6.80338249, 3.08795949],
 | |
|             [2.65448791, 3.83491828]])
 | |
| 
 | |
|     Below are other examples showing alternative ways to construct LHS with
 | |
|     even better coverage of the space.
 | |
| 
 | |
|     Using a base LHS as a baseline.
 | |
| 
 | |
|     >>> sampler = qmc.LatinHypercube(d=2)
 | |
|     >>> sample = sampler.random(n=5)
 | |
|     >>> qmc.discrepancy(sample)
 | |
|     0.0196...  # random
 | |
| 
 | |
|     Use the `optimization` keyword argument to produce a LHS with
 | |
|     lower discrepancy at higher computational cost.
 | |
| 
 | |
|     >>> sampler = qmc.LatinHypercube(d=2, optimization="random-cd")
 | |
|     >>> sample = sampler.random(n=5)
 | |
|     >>> qmc.discrepancy(sample)
 | |
|     0.0176...  # random
 | |
| 
 | |
|     Use the `strength` keyword argument to produce an orthogonal array based
 | |
|     LHS of strength 2. In this case, the number of sample points must be the
 | |
|     square of a prime number.
 | |
| 
 | |
|     >>> sampler = qmc.LatinHypercube(d=2, strength=2)
 | |
|     >>> sample = sampler.random(n=9)
 | |
|     >>> qmc.discrepancy(sample)
 | |
|     0.00526...  # random
 | |
| 
 | |
|     Options could be combined to produce an optimized centered
 | |
|     orthogonal array based LHS. After optimization, the result would not
 | |
|     be guaranteed to be of strength 2.
 | |
| 
 | |
|     **Real-world example**
 | |
| 
 | |
|     In [9]_, a Latin Hypercube sampling (LHS) strategy was used to sample a
 | |
|     parameter space to study the importance of each parameter of an epidemic
 | |
|     model. Such analysis is also called a sensitivity analysis.
 | |
| 
 | |
|     Since the dimensionality of the problem is high (6), it is computationally
 | |
|     expensive to cover the space. When numerical experiments are costly, QMC
 | |
|     enables analysis that may not be possible if using a grid.
 | |
| 
 | |
|     The six parameters of the model represented the probability of illness,
 | |
|     the probability of withdrawal, and four contact probabilities. The
 | |
|     authors assumed uniform distributions for all parameters and generated
 | |
|     50 samples.
 | |
| 
 | |
|     Using `scipy.stats.qmc.LatinHypercube` to replicate the protocol,
 | |
|     the first step is to create a sample in the unit hypercube:
 | |
| 
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> sampler = qmc.LatinHypercube(d=6)
 | |
|     >>> sample = sampler.random(n=50)
 | |
| 
 | |
|     Then the sample can be scaled to the appropriate bounds:
 | |
| 
 | |
|     >>> l_bounds = [0.000125, 0.01, 0.0025, 0.05, 0.47, 0.7]
 | |
|     >>> u_bounds = [0.000375, 0.03, 0.0075, 0.15, 0.87, 0.9]
 | |
|     >>> sample_scaled = qmc.scale(sample, l_bounds, u_bounds)
 | |
| 
 | |
|     Such a sample was used to run the model 50 times, and a polynomial
 | |
|     response surface was constructed. This allowed the authors to study the
 | |
|     relative importance of each parameter across the range of possibilities
 | |
|     of every other parameter.
 | |
| 
 | |
|     In this computer experiment, they showed a 14-fold reduction in the
 | |
|     number of samples required to maintain an error below 2% on their
 | |
|     response surface when compared to a grid sampling.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|         self, d: IntNumber, *,
 | |
|         scramble: bool = True,
 | |
|         strength: int = 1,
 | |
|         optimization: Literal["random-cd", "lloyd"] | None = None,
 | |
|         rng: SeedType = None
 | |
|     ) -> None:
 | |
|         # Used in `scipy.integrate.qmc_quad`
 | |
|         self._init_quad = {'d': d, 'scramble': True, 'strength': strength,
 | |
|                            'optimization': optimization}
 | |
|         super()._initialize(d=d, rng=rng, optimization=optimization)
 | |
|         self.scramble = scramble
 | |
| 
 | |
|         lhs_method_strength = {
 | |
|             1: self._random_lhs,
 | |
|             2: self._random_oa_lhs
 | |
|         }
 | |
| 
 | |
|         try:
 | |
|             self.lhs_method: Callable = lhs_method_strength[strength]
 | |
|         except KeyError as exc:
 | |
|             message = (f"{strength!r} is not a valid strength. It must be one"
 | |
|                        f" of {set(lhs_method_strength)!r}")
 | |
|             raise ValueError(message) from exc
 | |
| 
 | |
|     def _random(
 | |
|         self, n: IntNumber = 1, *, workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         lhs = self.lhs_method(n)
 | |
|         return lhs
 | |
| 
 | |
|     def _random_lhs(self, n: IntNumber = 1) -> np.ndarray:
 | |
|         """Base LHS algorithm."""
 | |
|         if not self.scramble:
 | |
|             samples: np.ndarray | float = 0.5
 | |
|         else:
 | |
|             samples = self.rng.uniform(size=(n, self.d))
 | |
| 
 | |
|         perms = np.tile(np.arange(1, n + 1),
 | |
|                         (self.d, 1))  # type: ignore[arg-type]
 | |
|         for i in range(self.d):
 | |
|             self.rng.shuffle(perms[i, :])
 | |
|         perms = perms.T
 | |
| 
 | |
|         samples = (perms - samples) / n
 | |
|         return samples
 | |
| 
 | |
|     def _random_oa_lhs(self, n: IntNumber = 4) -> np.ndarray:
 | |
|         """Orthogonal array based LHS of strength 2."""
 | |
|         p = np.sqrt(n).astype(int)
 | |
|         n_row = p**2
 | |
|         n_col = p + 1
 | |
| 
 | |
|         primes = primes_from_2_to(p + 1)
 | |
|         if p not in primes or n != n_row:
 | |
|             raise ValueError(
 | |
|                 "n is not the square of a prime number. Close"
 | |
|                 f" values are {primes[-2:]**2}"
 | |
|             )
 | |
|         if self.d > p + 1:
 | |
|             raise ValueError("n is too small for d. Must be n > (d-1)**2")
 | |
| 
 | |
|         oa_sample = np.zeros(shape=(n_row, n_col), dtype=int)
 | |
| 
 | |
|         # OA of strength 2
 | |
|         arrays = np.tile(np.arange(p), (2, 1))
 | |
|         oa_sample[:, :2] = np.stack(np.meshgrid(*arrays),
 | |
|                                     axis=-1).reshape(-1, 2)
 | |
|         for p_ in range(1, p):
 | |
|             oa_sample[:, 2+p_-1] = np.mod(oa_sample[:, 0]
 | |
|                                           + p_*oa_sample[:, 1], p)
 | |
| 
 | |
|         # scramble the OA
 | |
|         oa_sample_ = np.empty(shape=(n_row, n_col), dtype=int)
 | |
|         for j in range(n_col):
 | |
|             perms = self.rng.permutation(p)
 | |
|             oa_sample_[:, j] = perms[oa_sample[:, j]]
 | |
| 
 | |
|         oa_sample = oa_sample_
 | |
|         # following is making a scrambled OA into an OA-LHS
 | |
|         oa_lhs_sample = np.zeros(shape=(n_row, n_col))
 | |
|         lhs_engine = LatinHypercube(d=1, scramble=self.scramble, strength=1,
 | |
|                                     rng=self.rng)  # type: QMCEngine
 | |
|         for j in range(n_col):
 | |
|             for k in range(p):
 | |
|                 idx = oa_sample[:, j] == k
 | |
|                 lhs = lhs_engine.random(p).flatten()
 | |
|                 oa_lhs_sample[:, j][idx] = lhs + oa_sample[:, j][idx]
 | |
| 
 | |
|         oa_lhs_sample /= p
 | |
| 
 | |
|         return oa_lhs_sample[:, :self.d]
 | |
| 
 | |
| 
 | |
| class Sobol(QMCEngine):
 | |
|     """Engine for generating (scrambled) Sobol' sequences.
 | |
| 
 | |
|     Sobol' sequences are low-discrepancy, quasi-random numbers. Points
 | |
|     can be drawn using two methods:
 | |
| 
 | |
|     * `random_base2`: safely draw :math:`n=2^m` points. This method
 | |
|       guarantees the balance properties of the sequence.
 | |
|     * `random`: draw an arbitrary number of points from the
 | |
|       sequence. See warning below.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     d : int
 | |
|         Dimensionality of the sequence. Max dimensionality is 21201.
 | |
|     scramble : bool, optional
 | |
|         If True, use LMS+shift scrambling. Otherwise, no scrambling is done.
 | |
|         Default is True.
 | |
|     bits : int, optional
 | |
|         Number of bits of the generator. Control the maximum number of points
 | |
|         that can be generated, which is ``2**bits``. Maximal value is 64.
 | |
|         It does not correspond to the return type, which is always
 | |
|         ``np.float64`` to prevent points from repeating themselves.
 | |
|         Default is None, which for backward compatibility, corresponds to 30.
 | |
| 
 | |
|         .. versionadded:: 1.9.0
 | |
|     optimization : {None, "random-cd", "lloyd"}, optional
 | |
|         Whether to use an optimization scheme to improve the quality after
 | |
|         sampling. Note that this is a post-processing step that does not
 | |
|         guarantee that all properties of the sample will be conserved.
 | |
|         Default is None.
 | |
| 
 | |
|         * ``random-cd``: random permutations of coordinates to lower the
 | |
|           centered discrepancy. The best sample based on the centered
 | |
|           discrepancy is constantly updated. Centered discrepancy-based
 | |
|           sampling shows better space-filling robustness toward 2D and 3D
 | |
|           subprojections compared to using other discrepancy measures.
 | |
|         * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
 | |
|           The process converges to equally spaced samples.
 | |
| 
 | |
|         .. versionadded:: 1.10.0
 | |
| 
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     Sobol' sequences [1]_ provide :math:`n=2^m` low discrepancy points in
 | |
|     :math:`[0,1)^{d}`. Scrambling them [3]_ makes them suitable for singular
 | |
|     integrands, provides a means of error estimation, and can improve their
 | |
|     rate of convergence. The scrambling strategy which is implemented is a
 | |
|     (left) linear matrix scramble (LMS) followed by a digital random shift
 | |
|     (LMS+shift) [2]_.
 | |
| 
 | |
|     There are many versions of Sobol' sequences depending on their
 | |
|     'direction numbers'. This code uses direction numbers from [4]_. Hence,
 | |
|     the maximum number of dimension is 21201. The direction numbers have been
 | |
|     precomputed with search criterion 6 and can be retrieved at
 | |
|     https://web.maths.unsw.edu.au/~fkuo/sobol/.
 | |
| 
 | |
|     .. warning::
 | |
| 
 | |
|        Sobol' sequences are a quadrature rule and they lose their balance
 | |
|        properties if one uses a sample size that is not a power of 2, or skips
 | |
|        the first point, or thins the sequence [5]_.
 | |
| 
 | |
|        If :math:`n=2^m` points are not enough then one should take :math:`2^M`
 | |
|        points for :math:`M>m`. When scrambling, the number R of independent
 | |
|        replicates does not have to be a power of 2.
 | |
| 
 | |
|        Sobol' sequences are generated to some number :math:`B` of bits.
 | |
|        After :math:`2^B` points have been generated, the sequence would
 | |
|        repeat. Hence, an error is raised.
 | |
|        The number of bits can be controlled with the parameter `bits`.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] I. M. Sobol', "The distribution of points in a cube and the accurate
 | |
|        evaluation of integrals." Zh. Vychisl. Mat. i Mat. Phys., 7:784-802,
 | |
|        1967.
 | |
|     .. [2] J. Matousek, "On the L2-discrepancy for anchored boxes."
 | |
|        J. of Complexity 14, 527-556, 1998.
 | |
|     .. [3] Art B. Owen, "Scrambling Sobol and Niederreiter-Xing points."
 | |
|        Journal of Complexity, 14(4):466-489, December 1998.
 | |
|     .. [4] S. Joe and F. Y. Kuo, "Constructing sobol sequences with better
 | |
|        two-dimensional projections." SIAM Journal on Scientific Computing,
 | |
|        30(5):2635-2654, 2008.
 | |
|     .. [5] Art B. Owen, "On dropping the first Sobol' point."
 | |
|        :arxiv:`2008.08051`, 2020.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Generate samples from a low discrepancy sequence of Sobol'.
 | |
| 
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> sampler = qmc.Sobol(d=2, scramble=False)
 | |
|     >>> sample = sampler.random_base2(m=3)
 | |
|     >>> sample
 | |
|     array([[0.   , 0.   ],
 | |
|            [0.5  , 0.5  ],
 | |
|            [0.75 , 0.25 ],
 | |
|            [0.25 , 0.75 ],
 | |
|            [0.375, 0.375],
 | |
|            [0.875, 0.875],
 | |
|            [0.625, 0.125],
 | |
|            [0.125, 0.625]])
 | |
| 
 | |
|     Compute the quality of the sample using the discrepancy criterion.
 | |
| 
 | |
|     >>> qmc.discrepancy(sample)
 | |
|     0.013882107204860938
 | |
| 
 | |
|     To continue an existing design, extra points can be obtained
 | |
|     by calling again `random_base2`. Alternatively, you can skip some
 | |
|     points like:
 | |
| 
 | |
|     >>> _ = sampler.reset()
 | |
|     >>> _ = sampler.fast_forward(4)
 | |
|     >>> sample_continued = sampler.random_base2(m=2)
 | |
|     >>> sample_continued
 | |
|     array([[0.375, 0.375],
 | |
|            [0.875, 0.875],
 | |
|            [0.625, 0.125],
 | |
|            [0.125, 0.625]])
 | |
| 
 | |
|     Finally, samples can be scaled to bounds.
 | |
| 
 | |
|     >>> l_bounds = [0, 2]
 | |
|     >>> u_bounds = [10, 5]
 | |
|     >>> qmc.scale(sample_continued, l_bounds, u_bounds)
 | |
|     array([[3.75 , 3.125],
 | |
|            [8.75 , 4.625],
 | |
|            [6.25 , 2.375],
 | |
|            [1.25 , 3.875]])
 | |
| 
 | |
|     """
 | |
| 
 | |
|     MAXDIM: ClassVar[int] = _MAXDIM
 | |
| 
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|         self, d: IntNumber, *, scramble: bool = True,
 | |
|         bits: IntNumber | None = None, rng: SeedType = None,
 | |
|         optimization: Literal["random-cd", "lloyd"] | None = None
 | |
|     ) -> None:
 | |
|         # Used in `scipy.integrate.qmc_quad`
 | |
|         self._init_quad = {'d': d, 'scramble': True, 'bits': bits,
 | |
|                            'optimization': optimization}
 | |
| 
 | |
|         super()._initialize(d=d, optimization=optimization, rng=rng)
 | |
|         if d > self.MAXDIM:
 | |
|             raise ValueError(
 | |
|                 f"Maximum supported dimensionality is {self.MAXDIM}."
 | |
|             )
 | |
| 
 | |
|         self.bits = bits
 | |
|         self.dtype_i: type
 | |
|         self.scramble = scramble
 | |
| 
 | |
|         if self.bits is None:
 | |
|             self.bits = 30
 | |
| 
 | |
|         if self.bits <= 32:
 | |
|             self.dtype_i = np.uint32
 | |
|         elif 32 < self.bits <= 64:
 | |
|             self.dtype_i = np.uint64
 | |
|         else:
 | |
|             raise ValueError("Maximum supported 'bits' is 64")
 | |
| 
 | |
|         self.maxn = 2**self.bits
 | |
| 
 | |
|         # v is d x maxbit matrix
 | |
|         self._sv: np.ndarray = np.zeros((d, self.bits), dtype=self.dtype_i)
 | |
|         _initialize_v(self._sv, dim=d, bits=self.bits)
 | |
| 
 | |
|         if not scramble:
 | |
|             self._shift: np.ndarray = np.zeros(d, dtype=self.dtype_i)
 | |
|         else:
 | |
|             # scramble self._shift and self._sv
 | |
|             self._scramble()
 | |
| 
 | |
|         self._quasi = self._shift.copy()
 | |
| 
 | |
|         # normalization constant with the largest possible number
 | |
|         # calculate in Python to not overflow int with 2**64
 | |
|         self._scale = 1.0 / 2 ** self.bits
 | |
| 
 | |
|         self._first_point = (self._quasi * self._scale).reshape(1, -1)
 | |
|         # explicit casting to float64
 | |
|         self._first_point = self._first_point.astype(np.float64)
 | |
| 
 | |
|     def _scramble(self) -> None:
 | |
|         """Scramble the sequence using LMS+shift."""
 | |
|         # Generate shift vector
 | |
|         self._shift = np.dot(
 | |
|             rng_integers(self.rng, 2, size=(self.d, self.bits),
 | |
|                          dtype=self.dtype_i),
 | |
|             2 ** np.arange(self.bits, dtype=self.dtype_i),
 | |
|         )
 | |
|         # Generate lower triangular matrices (stacked across dimensions)
 | |
|         ltm = np.tril(rng_integers(self.rng, 2,
 | |
|                                    size=(self.d, self.bits, self.bits),
 | |
|                                    dtype=self.dtype_i))
 | |
|         _cscramble(
 | |
|             dim=self.d, bits=self.bits,  # type: ignore[arg-type]
 | |
|             ltm=ltm, sv=self._sv
 | |
|         )
 | |
| 
 | |
|     def _random(
 | |
|         self, n: IntNumber = 1, *, workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         """Draw next point(s) in the Sobol' sequence.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space. Default is 1.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             Sobol' sample.
 | |
| 
 | |
|         """
 | |
|         sample: np.ndarray = np.empty((n, self.d), dtype=np.float64)
 | |
| 
 | |
|         if n == 0:
 | |
|             return sample
 | |
| 
 | |
|         total_n = self.num_generated + n
 | |
|         if total_n > self.maxn:
 | |
|             msg = (
 | |
|                 f"At most 2**{self.bits}={self.maxn} distinct points can be "
 | |
|                 f"generated. {self.num_generated} points have been previously "
 | |
|                 f"generated, then: n={self.num_generated}+{n}={total_n}. "
 | |
|             )
 | |
|             if self.bits != 64:
 | |
|                 msg += "Consider increasing `bits`."
 | |
|             raise ValueError(msg)
 | |
| 
 | |
|         if self.num_generated == 0:
 | |
|             # verify n is 2**n
 | |
|             if not (n & (n - 1) == 0):
 | |
|                 warnings.warn("The balance properties of Sobol' points require"
 | |
|                               " n to be a power of 2.", stacklevel=3)
 | |
| 
 | |
|             if n == 1:
 | |
|                 sample = self._first_point
 | |
|             else:
 | |
|                 _draw(
 | |
|                     n=n - 1, num_gen=self.num_generated, dim=self.d,
 | |
|                     scale=self._scale, sv=self._sv, quasi=self._quasi,
 | |
|                     sample=sample
 | |
|                 )
 | |
|                 sample = np.concatenate(
 | |
|                     [self._first_point, sample]
 | |
|                 )[:n]
 | |
|         else:
 | |
|             _draw(
 | |
|                 n=n, num_gen=self.num_generated - 1, dim=self.d,
 | |
|                 scale=self._scale, sv=self._sv, quasi=self._quasi,
 | |
|                 sample=sample
 | |
|             )
 | |
| 
 | |
|         return sample
 | |
| 
 | |
|     def random_base2(self, m: IntNumber) -> np.ndarray:
 | |
|         """Draw point(s) from the Sobol' sequence.
 | |
| 
 | |
|         This function draws :math:`n=2^m` points in the parameter space
 | |
|         ensuring the balance properties of the sequence.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         m : int
 | |
|             Logarithm in base 2 of the number of samples; i.e., n = 2^m.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             Sobol' sample.
 | |
| 
 | |
|         """
 | |
|         n = 2 ** m
 | |
| 
 | |
|         total_n = self.num_generated + n
 | |
|         if not (total_n & (total_n - 1) == 0):
 | |
|             raise ValueError('The balance properties of Sobol\' points require '
 | |
|                              f'n to be a power of 2. {self.num_generated} points '
 | |
|                              'have been previously generated, then: '
 | |
|                              f'n={self.num_generated}+2**{m}={total_n}. '
 | |
|                              'If you still want to do this, the function '
 | |
|                              '\'Sobol.random()\' can be used.'
 | |
|                              )
 | |
| 
 | |
|         return self.random(n)
 | |
| 
 | |
|     def reset(self) -> "Sobol":
 | |
|         """Reset the engine to base state.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         engine : Sobol
 | |
|             Engine reset to its base state.
 | |
| 
 | |
|         """
 | |
|         super().reset()
 | |
|         self._quasi = self._shift.copy()
 | |
|         return self
 | |
| 
 | |
|     def fast_forward(self, n: IntNumber) -> "Sobol":
 | |
|         """Fast-forward the sequence by `n` positions.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int
 | |
|             Number of points to skip in the sequence.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         engine : Sobol
 | |
|             The fast-forwarded engine.
 | |
| 
 | |
|         """
 | |
|         if self.num_generated == 0:
 | |
|             _fast_forward(
 | |
|                 n=n - 1, num_gen=self.num_generated, dim=self.d,
 | |
|                 sv=self._sv, quasi=self._quasi
 | |
|             )
 | |
|         else:
 | |
|             _fast_forward(
 | |
|                 n=n, num_gen=self.num_generated - 1, dim=self.d,
 | |
|                 sv=self._sv, quasi=self._quasi
 | |
|             )
 | |
|         self.num_generated += n
 | |
|         return self
 | |
| 
 | |
| 
 | |
| class PoissonDisk(QMCEngine):
 | |
|     """Poisson disk sampling.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     d : int
 | |
|         Dimension of the parameter space.
 | |
|     radius : float
 | |
|         Minimal distance to keep between points when sampling new candidates.
 | |
|     hypersphere : {"volume", "surface"}, optional
 | |
|         Sampling strategy to generate potential candidates to be added in the
 | |
|         final sample. Default is "volume".
 | |
| 
 | |
|         * ``volume``: original Bridson algorithm as described in [1]_.
 | |
|           New candidates are sampled *within* the hypersphere.
 | |
|         * ``surface``: only sample the surface of the hypersphere.
 | |
|     ncandidates : int
 | |
|         Number of candidates to sample per iteration. More candidates result
 | |
|         in a denser sampling as more candidates can be accepted per iteration.
 | |
|     optimization : {None, "random-cd", "lloyd"}, optional
 | |
|         Whether to use an optimization scheme to improve the quality after
 | |
|         sampling. Note that this is a post-processing step that does not
 | |
|         guarantee that all properties of the sample will be conserved.
 | |
|         Default is None.
 | |
| 
 | |
|         * ``random-cd``: random permutations of coordinates to lower the
 | |
|           centered discrepancy. The best sample based on the centered
 | |
|           discrepancy is constantly updated. Centered discrepancy-based
 | |
|           sampling shows better space-filling robustness toward 2D and 3D
 | |
|           subprojections compared to using other discrepancy measures.
 | |
|         * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
 | |
|           The process converges to equally spaced samples.
 | |
| 
 | |
|         .. versionadded:: 1.10.0
 | |
| 
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     l_bounds, u_bounds : array_like (d,)
 | |
|         Lower and upper bounds of target sample data.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     Poisson disk sampling is an iterative sampling strategy. Starting from
 | |
|     a seed sample, `ncandidates` are sampled in the hypersphere
 | |
|     surrounding the seed. Candidates below a certain `radius` or outside the
 | |
|     domain are rejected. New samples are added in a pool of sample seed. The
 | |
|     process stops when the pool is empty or when the number of required
 | |
|     samples is reached.
 | |
| 
 | |
|     The maximum number of point that a sample can contain is directly linked
 | |
|     to the `radius`. As the dimension of the space increases, a higher radius
 | |
|     spreads the points further and help overcome the curse of dimensionality.
 | |
|     See the :ref:`quasi monte carlo tutorial <quasi-monte-carlo>` for more
 | |
|     details.
 | |
| 
 | |
|     .. warning::
 | |
| 
 | |
|        The algorithm is more suitable for low dimensions and sampling size
 | |
|        due to its iterative nature and memory requirements.
 | |
|        Selecting a small radius with a high dimension would
 | |
|        mean that the space could contain more samples than using lower
 | |
|        dimension or a bigger radius.
 | |
| 
 | |
|     Some code taken from [2]_, written consent given on 31.03.2021
 | |
|     by the original author, Shamis, for free use in SciPy under
 | |
|     the 3-clause BSD.
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Robert Bridson, "Fast Poisson Disk Sampling in Arbitrary
 | |
|        Dimensions." SIGGRAPH, 2007.
 | |
|     .. [2] `StackOverflow <https://stackoverflow.com/questions/66047540>`__.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Generate a 2D sample using a `radius` of 0.2.
 | |
| 
 | |
|     >>> import numpy as np
 | |
|     >>> import matplotlib.pyplot as plt
 | |
|     >>> from matplotlib.collections import PatchCollection
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>>
 | |
|     >>> rng = np.random.default_rng()
 | |
|     >>> radius = 0.2
 | |
|     >>> engine = qmc.PoissonDisk(d=2, radius=radius, rng=rng)
 | |
|     >>> sample = engine.random(20)
 | |
| 
 | |
|     Visualizing the 2D sample and showing that no points are closer than
 | |
|     `radius`. ``radius/2`` is used to visualize non-intersecting circles.
 | |
|     If two samples are exactly at `radius` from each other, then their circle
 | |
|     of radius ``radius/2`` will touch.
 | |
| 
 | |
|     >>> fig, ax = plt.subplots()
 | |
|     >>> _ = ax.scatter(sample[:, 0], sample[:, 1])
 | |
|     >>> circles = [plt.Circle((xi, yi), radius=radius/2, fill=False)
 | |
|     ...            for xi, yi in sample]
 | |
|     >>> collection = PatchCollection(circles, match_original=True)
 | |
|     >>> ax.add_collection(collection)
 | |
|     >>> _ = ax.set(aspect='equal', xlabel=r'$x_1$', ylabel=r'$x_2$',
 | |
|     ...            xlim=[0, 1], ylim=[0, 1])
 | |
|     >>> plt.show()
 | |
| 
 | |
|     Such visualization can be seen as circle packing: how many circle can
 | |
|     we put in the space. It is a np-hard problem. The method `fill_space`
 | |
|     can be used to add samples until no more samples can be added. This is
 | |
|     a hard problem and parameters may need to be adjusted manually. Beware of
 | |
|     the dimension: as the dimensionality increases, the number of samples
 | |
|     required to fill the space increases exponentially
 | |
|     (curse-of-dimensionality).
 | |
| 
 | |
|     """
 | |
| 
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|         self,
 | |
|         d: IntNumber,
 | |
|         *,
 | |
|         radius: DecimalNumber = 0.05,
 | |
|         hypersphere: Literal["volume", "surface"] = "volume",
 | |
|         ncandidates: IntNumber = 30,
 | |
|         optimization: Literal["random-cd", "lloyd"] | None = None,
 | |
|         rng: SeedType = None,
 | |
|         l_bounds: "npt.ArrayLike | None" = None,
 | |
|         u_bounds: "npt.ArrayLike | None" = None,
 | |
|     ) -> None:
 | |
|         # Used in `scipy.integrate.qmc_quad`
 | |
|         self._init_quad = {'d': d, 'radius': radius,
 | |
|                            'hypersphere': hypersphere,
 | |
|                            'ncandidates': ncandidates,
 | |
|                            'optimization': optimization}
 | |
|         super()._initialize(d=d, optimization=optimization, rng=rng)
 | |
| 
 | |
|         hypersphere_sample = {
 | |
|             "volume": self._hypersphere_volume_sample,
 | |
|             "surface": self._hypersphere_surface_sample
 | |
|         }
 | |
| 
 | |
|         try:
 | |
|             self.hypersphere_method = hypersphere_sample[hypersphere]
 | |
|         except KeyError as exc:
 | |
|             message = (
 | |
|                 f"{hypersphere!r} is not a valid hypersphere sampling"
 | |
|                 f" method. It must be one of {set(hypersphere_sample)!r}")
 | |
|             raise ValueError(message) from exc
 | |
| 
 | |
|         # size of the sphere from which the samples are drawn relative to the
 | |
|         # size of a disk (radius)
 | |
|         # for the surface sampler, all new points are almost exactly 1 radius
 | |
|         # away from at least one existing sample +eps to avoid rejection
 | |
|         self.radius_factor = 2 if hypersphere == "volume" else 1.001
 | |
|         self.radius = radius
 | |
|         self.radius_squared = self.radius**2
 | |
| 
 | |
|         # sample to generate per iteration in the hypersphere around center
 | |
|         self.ncandidates = ncandidates
 | |
| 
 | |
|         if u_bounds is None:
 | |
|             u_bounds = np.ones(d)
 | |
|         if l_bounds is None:
 | |
|             l_bounds = np.zeros(d)
 | |
|         self.l_bounds, self.u_bounds = _validate_bounds(
 | |
|             l_bounds=l_bounds, u_bounds=u_bounds, d=int(d)
 | |
|         )
 | |
| 
 | |
|         with np.errstate(divide='ignore'):
 | |
|             self.cell_size = self.radius / np.sqrt(self.d)
 | |
|             self.grid_size = (
 | |
|                 np.ceil((self.u_bounds - self.l_bounds) / self.cell_size)
 | |
|             ).astype(int)
 | |
| 
 | |
|         self._initialize_grid_pool()
 | |
| 
 | |
|     def _initialize_grid_pool(self):
 | |
|         """Sampling pool and sample grid."""
 | |
|         self.sample_pool = []
 | |
|         # Positions of cells
 | |
|         # n-dim value for each grid cell
 | |
|         self.sample_grid = np.empty(
 | |
|             np.append(self.grid_size, self.d),
 | |
|             dtype=np.float32
 | |
|         )
 | |
|         # Initialise empty cells with NaNs
 | |
|         self.sample_grid.fill(np.nan)
 | |
| 
 | |
|     def _random(
 | |
|         self, n: IntNumber = 1, *, workers: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         """Draw `n` in the interval ``[l_bounds, u_bounds]``.
 | |
| 
 | |
|         Note that it can return fewer samples if the space is full.
 | |
|         See the note section of the class.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space. Default is 1.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             QMC sample.
 | |
| 
 | |
|         """
 | |
|         if n == 0 or self.d == 0:
 | |
|             return np.empty((n, self.d))
 | |
| 
 | |
|         def in_limits(sample: np.ndarray) -> bool:
 | |
|             for i in range(self.d):
 | |
|                 if (sample[i] > self.u_bounds[i] or sample[i] < self.l_bounds[i]):
 | |
|                     return False
 | |
|             return True
 | |
| 
 | |
|         def in_neighborhood(candidate: np.ndarray, n: int = 2) -> bool:
 | |
|             """
 | |
|             Check if there are samples closer than ``radius_squared`` to the
 | |
|             `candidate` sample.
 | |
|             """
 | |
|             indices = ((candidate - self.l_bounds) / self.cell_size).astype(int)
 | |
|             ind_min = np.maximum(indices - n, self.l_bounds.astype(int))
 | |
|             ind_max = np.minimum(indices + n + 1, self.grid_size)
 | |
| 
 | |
|             # Check if the center cell is empty
 | |
|             if not np.isnan(self.sample_grid[tuple(indices)][0]):
 | |
|                 return True
 | |
| 
 | |
|             a = [slice(ind_min[i], ind_max[i]) for i in range(self.d)]
 | |
| 
 | |
|             # guards against: invalid value encountered in less as we are
 | |
|             # comparing with nan and returns False. Which is wanted.
 | |
|             with np.errstate(invalid='ignore'):
 | |
|                 if np.any(
 | |
|                     np.sum(
 | |
|                         np.square(candidate - self.sample_grid[tuple(a)]),
 | |
|                         axis=self.d
 | |
|                     ) < self.radius_squared
 | |
|                 ):
 | |
|                     return True
 | |
| 
 | |
|             return False
 | |
| 
 | |
|         def add_sample(candidate: np.ndarray) -> None:
 | |
|             self.sample_pool.append(candidate)
 | |
|             indices = ((candidate - self.l_bounds) / self.cell_size).astype(int)
 | |
|             self.sample_grid[tuple(indices)] = candidate
 | |
|             curr_sample.append(candidate)
 | |
| 
 | |
|         curr_sample: list[np.ndarray] = []
 | |
| 
 | |
|         if len(self.sample_pool) == 0:
 | |
|             # the pool is being initialized with a single random sample
 | |
|             add_sample(self.rng.uniform(self.l_bounds, self.u_bounds))
 | |
|             num_drawn = 1
 | |
|         else:
 | |
|             num_drawn = 0
 | |
| 
 | |
|         # exhaust sample pool to have up to n sample
 | |
|         while len(self.sample_pool) and num_drawn < n:
 | |
|             # select a sample from the available pool
 | |
|             idx_center = rng_integers(self.rng, len(self.sample_pool))
 | |
|             center = self.sample_pool[idx_center]
 | |
|             del self.sample_pool[idx_center]
 | |
| 
 | |
|             # generate candidates around the center sample
 | |
|             candidates = self.hypersphere_method(
 | |
|                 center, self.radius * self.radius_factor, self.ncandidates
 | |
|             )
 | |
| 
 | |
|             # keep candidates that satisfy some conditions
 | |
|             for candidate in candidates:
 | |
|                 if in_limits(candidate) and not in_neighborhood(candidate):
 | |
|                     add_sample(candidate)
 | |
| 
 | |
|                     num_drawn += 1
 | |
|                     if num_drawn >= n:
 | |
|                         break
 | |
| 
 | |
|         self.num_generated += num_drawn
 | |
|         return np.array(curr_sample)
 | |
| 
 | |
|     def fill_space(self) -> np.ndarray:
 | |
|         """Draw ``n`` samples in the interval ``[l_bounds, u_bounds]``.
 | |
| 
 | |
|         Unlike `random`, this method will try to add points until
 | |
|         the space is full. Depending on ``candidates`` (and to a lesser extent
 | |
|         other parameters), some empty areas can still be present in the sample.
 | |
| 
 | |
|         .. warning::
 | |
| 
 | |
|            This can be extremely slow in high dimensions or if the
 | |
|            ``radius`` is very small-with respect to the dimensionality.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             QMC sample.
 | |
| 
 | |
|         """
 | |
|         return self.random(np.inf)  # type: ignore[arg-type]
 | |
| 
 | |
|     def reset(self) -> "PoissonDisk":
 | |
|         """Reset the engine to base state.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         engine : PoissonDisk
 | |
|             Engine reset to its base state.
 | |
| 
 | |
|         """
 | |
|         super().reset()
 | |
|         self._initialize_grid_pool()
 | |
|         return self
 | |
| 
 | |
|     def _hypersphere_volume_sample(
 | |
|         self, center: np.ndarray, radius: DecimalNumber,
 | |
|         candidates: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         """Uniform sampling within hypersphere."""
 | |
|         # should remove samples within r/2
 | |
|         x = self.rng.standard_normal(size=(candidates, self.d))
 | |
|         ssq = np.sum(x**2, axis=1)
 | |
|         fr = radius * gammainc(self.d/2, ssq/2)**(1/self.d) / np.sqrt(ssq)
 | |
|         fr_tiled = np.tile(
 | |
|             fr.reshape(-1, 1), (1, self.d)  # type: ignore[arg-type]
 | |
|         )
 | |
|         p = center + np.multiply(x, fr_tiled)
 | |
|         return p
 | |
| 
 | |
|     def _hypersphere_surface_sample(
 | |
|         self, center: np.ndarray, radius: DecimalNumber,
 | |
|         candidates: IntNumber = 1
 | |
|     ) -> np.ndarray:
 | |
|         """Uniform sampling on the hypersphere's surface."""
 | |
|         vec = self.rng.standard_normal(size=(candidates, self.d))
 | |
|         vec /= np.linalg.norm(vec, axis=1)[:, None]
 | |
|         p = center + np.multiply(vec, radius)
 | |
|         return p
 | |
| 
 | |
| 
 | |
| class MultivariateNormalQMC:
 | |
|     r"""QMC sampling from a multivariate Normal :math:`N(\mu, \Sigma)`.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     mean : array_like (d,)
 | |
|         The mean vector. Where ``d`` is the dimension.
 | |
|     cov : array_like (d, d), optional
 | |
|         The covariance matrix. If omitted, use `cov_root` instead.
 | |
|         If both `cov` and `cov_root` are omitted, use the identity matrix.
 | |
|     cov_root : array_like (d, d'), optional
 | |
|         A root decomposition of the covariance matrix, where ``d'`` may be less
 | |
|         than ``d`` if the covariance is not full rank. If omitted, use `cov`.
 | |
|     inv_transform : bool, optional
 | |
|         If True, use inverse transform instead of Box-Muller. Default is True.
 | |
|     engine : QMCEngine, optional
 | |
|         Quasi-Monte Carlo engine sampler. If None, `Sobol` is used.
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     >>> import matplotlib.pyplot as plt
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> dist = qmc.MultivariateNormalQMC(mean=[0, 5], cov=[[1, 0], [0, 1]])
 | |
|     >>> sample = dist.random(512)
 | |
|     >>> _ = plt.scatter(sample[:, 0], sample[:, 1])
 | |
|     >>> plt.show()
 | |
| 
 | |
|     """
 | |
| 
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|             self,
 | |
|             mean: "npt.ArrayLike",
 | |
|             cov: "npt.ArrayLike | None" = None,
 | |
|             *,
 | |
|             cov_root: "npt.ArrayLike | None" = None,
 | |
|             inv_transform: bool = True,
 | |
|             engine: QMCEngine | None = None,
 | |
|             rng: SeedType = None,
 | |
|     ) -> None:
 | |
|         mean = np.asarray(np.atleast_1d(mean))
 | |
|         d = mean.shape[0]
 | |
|         if cov is not None:
 | |
|             # covariance matrix provided
 | |
|             cov = np.asarray(np.atleast_2d(cov))
 | |
|             # check for square/symmetric cov matrix and mean vector has the
 | |
|             # same d
 | |
|             if not mean.shape[0] == cov.shape[0]:
 | |
|                 raise ValueError("Dimension mismatch between mean and "
 | |
|                                  "covariance.")
 | |
|             if not np.allclose(cov, cov.transpose()):
 | |
|                 raise ValueError("Covariance matrix is not symmetric.")
 | |
|             # compute Cholesky decomp; if it fails, do the eigen decomposition
 | |
|             try:
 | |
|                 cov_root = np.linalg.cholesky(cov).transpose()
 | |
|             except np.linalg.LinAlgError:
 | |
|                 eigval, eigvec = np.linalg.eigh(cov)
 | |
|                 if not np.all(eigval >= -1.0e-8):
 | |
|                     raise ValueError("Covariance matrix not PSD.")
 | |
|                 eigval = np.clip(eigval, 0.0, None)
 | |
|                 cov_root = (eigvec * np.sqrt(eigval)).transpose()
 | |
|         elif cov_root is not None:
 | |
|             # root decomposition provided
 | |
|             cov_root = np.atleast_2d(cov_root)
 | |
|             if not mean.shape[0] == cov_root.shape[0]:
 | |
|                 raise ValueError("Dimension mismatch between mean and "
 | |
|                                  "covariance.")
 | |
|         else:
 | |
|             # corresponds to identity covariance matrix
 | |
|             cov_root = None
 | |
| 
 | |
|         self._inv_transform = inv_transform
 | |
| 
 | |
|         if not inv_transform:
 | |
|             # to apply Box-Muller, we need an even number of dimensions
 | |
|             engine_dim = 2 * math.ceil(d / 2)
 | |
|         else:
 | |
|             engine_dim = d
 | |
|         if engine is None:
 | |
|             # Need this during SPEC 7 transition to prevent `RandomState`
 | |
|             # from being passed via `rng`.
 | |
|             kwarg = "seed" if isinstance(rng, np.random.RandomState) else "rng"
 | |
|             kwargs = {kwarg: rng}
 | |
|             self.engine = Sobol(
 | |
|                 d=engine_dim, scramble=True, bits=30, **kwargs
 | |
|             )  # type: QMCEngine
 | |
|         elif isinstance(engine, QMCEngine):
 | |
|             if engine.d != engine_dim:
 | |
|                 raise ValueError("Dimension of `engine` must be consistent"
 | |
|                                  " with dimensions of mean and covariance."
 | |
|                                  " If `inv_transform` is False, it must be"
 | |
|                                  " an even number.")
 | |
|             self.engine = engine
 | |
|         else:
 | |
|             raise ValueError("`engine` must be an instance of "
 | |
|                              "`scipy.stats.qmc.QMCEngine` or `None`.")
 | |
| 
 | |
|         self._mean = mean
 | |
|         self._corr_matrix = cov_root
 | |
| 
 | |
|         self._d = d
 | |
| 
 | |
|     def random(self, n: IntNumber = 1) -> np.ndarray:
 | |
|         """Draw `n` QMC samples from the multivariate Normal.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space. Default is 1.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             Sample.
 | |
| 
 | |
|         """
 | |
|         base_samples = self._standard_normal_samples(n)
 | |
|         return self._correlate(base_samples)
 | |
| 
 | |
|     def _correlate(self, base_samples: np.ndarray) -> np.ndarray:
 | |
|         if self._corr_matrix is not None:
 | |
|             return base_samples @ self._corr_matrix + self._mean
 | |
|         else:
 | |
|             # avoid multiplying with identity here
 | |
|             return base_samples + self._mean
 | |
| 
 | |
|     def _standard_normal_samples(self, n: IntNumber = 1) -> np.ndarray:
 | |
|         """Draw `n` QMC samples from the standard Normal :math:`N(0, I_d)`.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space. Default is 1.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         sample : array_like (n, d)
 | |
|             Sample.
 | |
| 
 | |
|         """
 | |
|         # get base samples
 | |
|         samples = self.engine.random(n)
 | |
|         if self._inv_transform:
 | |
|             # apply inverse transform
 | |
|             # (values to close to 0/1 result in inf values)
 | |
|             return stats.norm.ppf(0.5 + (1 - 1e-10) * (samples - 0.5))  # type: ignore[attr-defined]  # noqa: E501
 | |
|         else:
 | |
|             # apply Box-Muller transform (note: indexes starting from 1)
 | |
|             even = np.arange(0, samples.shape[-1], 2)
 | |
|             Rs = np.sqrt(-2 * np.log(samples[:, even]))
 | |
|             thetas = 2 * math.pi * samples[:, 1 + even]
 | |
|             cos = np.cos(thetas)
 | |
|             sin = np.sin(thetas)
 | |
|             transf_samples = np.stack([Rs * cos, Rs * sin],
 | |
|                                       -1).reshape(n, -1)
 | |
|             # make sure we only return the number of dimension requested
 | |
|             return transf_samples[:, : self._d]
 | |
| 
 | |
| 
 | |
| class MultinomialQMC:
 | |
|     r"""QMC sampling from a multinomial distribution.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     pvals : array_like (k,)
 | |
|         Vector of probabilities of size ``k``, where ``k`` is the number
 | |
|         of categories. Elements must be non-negative and sum to 1.
 | |
|     n_trials : int
 | |
|         Number of trials.
 | |
|     engine : QMCEngine, optional
 | |
|         Quasi-Monte Carlo engine sampler. If None, `Sobol` is used.
 | |
|     rng : `numpy.random.Generator`, optional
 | |
|         Pseudorandom number generator state. When `rng` is None, a new
 | |
|         `numpy.random.Generator` is created using entropy from the
 | |
|         operating system. Types other than `numpy.random.Generator` are
 | |
|         passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 | |
| 
 | |
|         .. versionchanged:: 1.15.0
 | |
| 
 | |
|             As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
 | |
|             transition from use of `numpy.random.RandomState` to
 | |
|             `numpy.random.Generator`, this keyword was changed from `seed` to
 | |
|             `rng`. For an interim period, both keywords will continue to work, although
 | |
|             only one may be specified at a time. After the interim period, function
 | |
|             calls using the `seed` keyword will emit warnings. Following a
 | |
|             deprecation period, the `seed` keyword will be removed.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     Let's define 3 categories and for a given sample, the sum of the trials
 | |
|     of each category is 8. The number of trials per category is determined
 | |
|     by the `pvals` associated to each category.
 | |
|     Then, we sample this distribution 64 times.
 | |
| 
 | |
|     >>> import matplotlib.pyplot as plt
 | |
|     >>> from scipy.stats import qmc
 | |
|     >>> dist = qmc.MultinomialQMC(
 | |
|     ...     pvals=[0.2, 0.4, 0.4], n_trials=10, engine=qmc.Halton(d=1)
 | |
|     ... )
 | |
|     >>> sample = dist.random(64)
 | |
| 
 | |
|     We can plot the sample and verify that the median of number of trials
 | |
|     for each category is following the `pvals`. That would be
 | |
|     ``pvals * n_trials = [2, 4, 4]``.
 | |
| 
 | |
|     >>> fig, ax = plt.subplots()
 | |
|     >>> ax.yaxis.get_major_locator().set_params(integer=True)
 | |
|     >>> _ = ax.boxplot(sample)
 | |
|     >>> ax.set(xlabel="Categories", ylabel="Trials")
 | |
|     >>> plt.show()
 | |
| 
 | |
|     """
 | |
| 
 | |
|     @_transition_to_rng('seed', replace_doc=False)
 | |
|     def __init__(
 | |
|         self,
 | |
|         pvals: "npt.ArrayLike",
 | |
|         n_trials: IntNumber,
 | |
|         *,
 | |
|         engine: QMCEngine | None = None,
 | |
|         rng: SeedType = None,
 | |
|     ) -> None:
 | |
|         self.pvals = np.atleast_1d(np.asarray(pvals))
 | |
|         if np.min(pvals) < 0:
 | |
|             raise ValueError('Elements of pvals must be non-negative.')
 | |
|         if not np.isclose(np.sum(pvals), 1):
 | |
|             raise ValueError('Elements of pvals must sum to 1.')
 | |
|         self.n_trials = n_trials
 | |
|         if engine is None:
 | |
|             # Need this during SPEC 7 transition to prevent `RandomState`
 | |
|             # from being passed via `rng`.
 | |
|             kwarg = "seed" if isinstance(rng, np.random.RandomState) else "rng"
 | |
|             kwargs = {kwarg: rng}
 | |
|             self.engine = Sobol(
 | |
|                 d=1, scramble=True, bits=30, **kwargs
 | |
|             )  # type: QMCEngine
 | |
|         elif isinstance(engine, QMCEngine):
 | |
|             if engine.d != 1:
 | |
|                 raise ValueError("Dimension of `engine` must be 1.")
 | |
|             self.engine = engine
 | |
|         else:
 | |
|             raise ValueError("`engine` must be an instance of "
 | |
|                              "`scipy.stats.qmc.QMCEngine` or `None`.")
 | |
| 
 | |
|     def random(self, n: IntNumber = 1) -> np.ndarray:
 | |
|         """Draw `n` QMC samples from the multinomial distribution.
 | |
| 
 | |
|         Parameters
 | |
|         ----------
 | |
|         n : int, optional
 | |
|             Number of samples to generate in the parameter space. Default is 1.
 | |
| 
 | |
|         Returns
 | |
|         -------
 | |
|         samples : array_like (n, pvals)
 | |
|             Sample.
 | |
| 
 | |
|         """
 | |
|         sample = np.empty((n, len(self.pvals)))
 | |
|         for i in range(n):
 | |
|             base_draws = self.engine.random(self.n_trials).ravel()
 | |
|             p_cumulative = np.empty_like(self.pvals, dtype=float)
 | |
|             _fill_p_cumulative(np.array(self.pvals, dtype=float), p_cumulative)
 | |
|             sample_ = np.zeros_like(self.pvals, dtype=np.intp)
 | |
|             _categorize(base_draws, p_cumulative, sample_)
 | |
|             sample[i] = sample_
 | |
|         return sample
 | |
| 
 | |
| 
 | |
| def _select_optimizer(
 | |
|     optimization: Literal["random-cd", "lloyd"] | None, config: dict
 | |
| ) -> Callable | None:
 | |
|     """A factory for optimization methods."""
 | |
|     optimization_method: dict[str, Callable] = {
 | |
|         "random-cd": _random_cd,
 | |
|         "lloyd": _lloyd_centroidal_voronoi_tessellation
 | |
|     }
 | |
| 
 | |
|     optimizer: partial | None
 | |
|     if optimization is not None:
 | |
|         try:
 | |
|             optimization = optimization.lower()  # type: ignore[assignment]
 | |
|             optimizer_ = optimization_method[optimization]
 | |
|         except KeyError as exc:
 | |
|             message = (f"{optimization!r} is not a valid optimization"
 | |
|                        f" method. It must be one of"
 | |
|                        f" {set(optimization_method)!r}")
 | |
|             raise ValueError(message) from exc
 | |
| 
 | |
|         # config
 | |
|         optimizer = partial(optimizer_, **config)
 | |
|     else:
 | |
|         optimizer = None
 | |
| 
 | |
|     return optimizer
 | |
| 
 | |
| 
 | |
| def _random_cd(
 | |
|     best_sample: np.ndarray, n_iters: int, n_nochange: int, rng: GeneratorType,
 | |
|     **kwargs: dict
 | |
| ) -> np.ndarray:
 | |
|     """Optimal LHS on CD.
 | |
| 
 | |
|     Create a base LHS and do random permutations of coordinates to
 | |
|     lower the centered discrepancy.
 | |
|     Because it starts with a normal LHS, it also works with the
 | |
|     `scramble` keyword argument.
 | |
| 
 | |
|     Two stopping criterion are used to stop the algorithm: at most,
 | |
|     `n_iters` iterations are performed; or if there is no improvement
 | |
|     for `n_nochange` consecutive iterations.
 | |
|     """
 | |
|     del kwargs  # only use keywords which are defined, needed by factory
 | |
| 
 | |
|     n, d = best_sample.shape
 | |
| 
 | |
|     if d == 0 or n == 0:
 | |
|         return np.empty((n, d))
 | |
| 
 | |
|     if d == 1 or n == 1:
 | |
|         # discrepancy measures are invariant under permuting factors and runs
 | |
|         return best_sample
 | |
| 
 | |
|     best_disc = discrepancy(best_sample)
 | |
| 
 | |
|     bounds = ([0, d - 1],
 | |
|               [0, n - 1],
 | |
|               [0, n - 1])
 | |
| 
 | |
|     n_nochange_ = 0
 | |
|     n_iters_ = 0
 | |
|     while n_nochange_ < n_nochange and n_iters_ < n_iters:
 | |
|         n_iters_ += 1
 | |
| 
 | |
|         col = rng_integers(rng, *bounds[0], endpoint=True)  # type: ignore[misc]
 | |
|         row_1 = rng_integers(rng, *bounds[1], endpoint=True)  # type: ignore[misc]
 | |
|         row_2 = rng_integers(rng, *bounds[2], endpoint=True)  # type: ignore[misc]
 | |
|         disc = _perturb_discrepancy(best_sample,
 | |
|                                     row_1, row_2, col,
 | |
|                                     best_disc)
 | |
|         if disc < best_disc:
 | |
|             best_sample[row_1, col], best_sample[row_2, col] = (
 | |
|                 best_sample[row_2, col], best_sample[row_1, col])
 | |
| 
 | |
|             best_disc = disc
 | |
|             n_nochange_ = 0
 | |
|         else:
 | |
|             n_nochange_ += 1
 | |
| 
 | |
|     return best_sample
 | |
| 
 | |
| 
 | |
| def _l1_norm(sample: np.ndarray) -> float:
 | |
|     return distance.pdist(sample, 'cityblock').min()
 | |
| 
 | |
| 
 | |
| def _lloyd_iteration(
 | |
|     sample: np.ndarray,
 | |
|     decay: float,
 | |
|     qhull_options: str
 | |
| ) -> np.ndarray:
 | |
|     """Lloyd-Max algorithm iteration.
 | |
| 
 | |
|     Based on the implementation of Stéfan van der Walt:
 | |
| 
 | |
|     https://github.com/stefanv/lloyd
 | |
| 
 | |
|     which is:
 | |
| 
 | |
|         Copyright (c) 2021-04-21 Stéfan van der Walt
 | |
|         https://github.com/stefanv/lloyd
 | |
|         MIT License
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         The sample to iterate on.
 | |
|     decay : float
 | |
|         Relaxation decay. A positive value would move the samples toward
 | |
|         their centroid, and negative value would move them away.
 | |
|         1 would move the samples to their centroid.
 | |
|     qhull_options : str
 | |
|         Additional options to pass to Qhull. See Qhull manual
 | |
|         for details. (Default: "Qbb Qc Qz Qj Qx" for ndim > 4 and
 | |
|         "Qbb Qc Qz Qj" otherwise.)
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     sample : array_like (n, d)
 | |
|         The sample after an iteration of Lloyd's algorithm.
 | |
| 
 | |
|     """
 | |
|     new_sample = np.empty_like(sample)
 | |
| 
 | |
|     voronoi = Voronoi(sample, qhull_options=qhull_options)
 | |
| 
 | |
|     for ii, idx in enumerate(voronoi.point_region):
 | |
|         # the region is a series of indices into self.voronoi.vertices
 | |
|         # remove samples at infinity, designated by index -1
 | |
|         region = [i for i in voronoi.regions[idx] if i != -1]
 | |
| 
 | |
|         # get the vertices for this region
 | |
|         verts = voronoi.vertices[region]
 | |
| 
 | |
|         # clipping would be wrong, we need to intersect
 | |
|         # verts = np.clip(verts, 0, 1)
 | |
| 
 | |
|         # move samples towards centroids:
 | |
|         # Centroid in n-D is the mean for uniformly distributed nodes
 | |
|         # of a geometry.
 | |
|         centroid = np.mean(verts, axis=0)
 | |
|         new_sample[ii] = sample[ii] + (centroid - sample[ii]) * decay
 | |
| 
 | |
|     # only update sample to centroid within the region
 | |
|     is_valid = np.all(np.logical_and(new_sample >= 0, new_sample <= 1), axis=1)
 | |
|     sample[is_valid] = new_sample[is_valid]
 | |
| 
 | |
|     return sample
 | |
| 
 | |
| 
 | |
| def _lloyd_centroidal_voronoi_tessellation(
 | |
|     sample: "npt.ArrayLike",
 | |
|     *,
 | |
|     tol: DecimalNumber = 1e-5,
 | |
|     maxiter: IntNumber = 10,
 | |
|     qhull_options: str | None = None,
 | |
|     **kwargs: dict
 | |
| ) -> np.ndarray:
 | |
|     """Approximate Centroidal Voronoi Tessellation.
 | |
| 
 | |
|     Perturb samples in N-dimensions using Lloyd-Max algorithm.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     sample : array_like (n, d)
 | |
|         The sample to iterate on. With ``n`` the number of samples and ``d``
 | |
|         the dimension. Samples must be in :math:`[0, 1]^d`, with ``d>=2``.
 | |
|     tol : float, optional
 | |
|         Tolerance for termination. If the min of the L1-norm over the samples
 | |
|         changes less than `tol`, it stops the algorithm. Default is 1e-5.
 | |
|     maxiter : int, optional
 | |
|         Maximum number of iterations. It will stop the algorithm even if
 | |
|         `tol` is above the threshold.
 | |
|         Too many iterations tend to cluster the samples as a hypersphere.
 | |
|         Default is 10.
 | |
|     qhull_options : str, optional
 | |
|         Additional options to pass to Qhull. See Qhull manual
 | |
|         for details. (Default: "Qbb Qc Qz Qj Qx" for ndim > 4 and
 | |
|         "Qbb Qc Qz Qj" otherwise.)
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     sample : array_like (n, d)
 | |
|         The sample after being processed by Lloyd-Max algorithm.
 | |
| 
 | |
|     Notes
 | |
|     -----
 | |
|     Lloyd-Max algorithm is an iterative process with the purpose of improving
 | |
|     the dispersion of samples. For given sample: (i) compute a Voronoi
 | |
|     Tessellation; (ii) find the centroid of each Voronoi cell; (iii) move the
 | |
|     samples toward the centroid of their respective cell. See [1]_, [2]_.
 | |
| 
 | |
|     A relaxation factor is used to control how fast samples can move at each
 | |
|     iteration. This factor is starting at 2 and ending at 1 after `maxiter`
 | |
|     following an exponential decay.
 | |
| 
 | |
|     The process converges to equally spaced samples. It implies that measures
 | |
|     like the discrepancy could suffer from too many iterations. On the other
 | |
|     hand, L1 and L2 distances should improve. This is especially true with
 | |
|     QMC methods which tend to favor the discrepancy over other criteria.
 | |
| 
 | |
|     .. note::
 | |
| 
 | |
|         The current implementation does not intersect the Voronoi Tessellation
 | |
|         with the boundaries. This implies that for a low number of samples,
 | |
|         empirically below 20, no Voronoi cell is touching the boundaries.
 | |
|         Hence, samples cannot be moved close to the boundaries.
 | |
| 
 | |
|         Further improvements could consider the samples at infinity so that
 | |
|         all boundaries are segments of some Voronoi cells. This would fix
 | |
|         the computation of the centroid position.
 | |
| 
 | |
|     .. warning::
 | |
| 
 | |
|        The Voronoi Tessellation step is expensive and quickly becomes
 | |
|        intractable with dimensions as low as 10 even for a sample
 | |
|        of size as low as 1000.
 | |
| 
 | |
|     .. versionadded:: 1.9.0
 | |
| 
 | |
|     References
 | |
|     ----------
 | |
|     .. [1] Lloyd. "Least Squares Quantization in PCM".
 | |
|        IEEE Transactions on Information Theory, 1982.
 | |
|     .. [2] Max J. "Quantizing for minimum distortion".
 | |
|        IEEE Transactions on Information Theory, 1960.
 | |
| 
 | |
|     Examples
 | |
|     --------
 | |
|     >>> import numpy as np
 | |
|     >>> from scipy.spatial import distance
 | |
|     >>> from scipy.stats._qmc import _lloyd_centroidal_voronoi_tessellation
 | |
|     >>> rng = np.random.default_rng()
 | |
|     >>> sample = rng.random((128, 2))
 | |
| 
 | |
|     .. note::
 | |
| 
 | |
|         The samples need to be in :math:`[0, 1]^d`. `scipy.stats.qmc.scale`
 | |
|         can be used to scale the samples from their
 | |
|         original bounds to :math:`[0, 1]^d`. And back to their original bounds.
 | |
| 
 | |
|     Compute the quality of the sample using the L1 criterion.
 | |
| 
 | |
|     >>> def l1_norm(sample):
 | |
|     ...    return distance.pdist(sample, 'cityblock').min()
 | |
| 
 | |
|     >>> l1_norm(sample)
 | |
|     0.00161...  # random
 | |
| 
 | |
|     Now process the sample using Lloyd's algorithm and check the improvement
 | |
|     on the L1. The value should increase.
 | |
| 
 | |
|     >>> sample = _lloyd_centroidal_voronoi_tessellation(sample)
 | |
|     >>> l1_norm(sample)
 | |
|     0.0278...  # random
 | |
| 
 | |
|     """
 | |
|     del kwargs  # only use keywords which are defined, needed by factory
 | |
| 
 | |
|     sample = np.asarray(sample).copy()
 | |
| 
 | |
|     if not sample.ndim == 2:
 | |
|         raise ValueError('`sample` is not a 2D array')
 | |
| 
 | |
|     if not sample.shape[1] >= 2:
 | |
|         raise ValueError('`sample` dimension is not >= 2')
 | |
| 
 | |
|     # Checking that sample is within the hypercube
 | |
|     if (sample.max() > 1.) or (sample.min() < 0.):
 | |
|         raise ValueError('`sample` is not in unit hypercube')
 | |
| 
 | |
|     if qhull_options is None:
 | |
|         qhull_options = 'Qbb Qc Qz QJ'
 | |
| 
 | |
|         if sample.shape[1] >= 5:
 | |
|             qhull_options += ' Qx'
 | |
| 
 | |
|     # Fit an exponential to be 2 at 0 and 1 at `maxiter`.
 | |
|     # The decay is used for relaxation.
 | |
|     # analytical solution for y=exp(-maxiter/x) - 0.1
 | |
|     root = -maxiter / np.log(0.1)
 | |
|     decay = [np.exp(-x / root)+0.9 for x in range(maxiter)]
 | |
| 
 | |
|     l1_old = _l1_norm(sample=sample)
 | |
|     for i in range(maxiter):
 | |
|         sample = _lloyd_iteration(
 | |
|                 sample=sample, decay=decay[i],
 | |
|                 qhull_options=qhull_options,
 | |
|         )
 | |
| 
 | |
|         l1_new = _l1_norm(sample=sample)
 | |
| 
 | |
|         if abs(l1_new - l1_old) < tol:
 | |
|             break
 | |
|         else:
 | |
|             l1_old = l1_new
 | |
| 
 | |
|     return sample
 | |
| 
 | |
| 
 | |
| def _validate_workers(workers: IntNumber = 1) -> IntNumber:
 | |
|     """Validate `workers` based on platform and value.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     workers : int, optional
 | |
|         Number of workers to use for parallel processing. If -1 is
 | |
|         given all CPU threads are used. Default is 1.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     Workers : int
 | |
|         Number of CPU used by the algorithm
 | |
| 
 | |
|     """
 | |
|     workers = int(workers)
 | |
|     if workers == -1:
 | |
|         workers = os.cpu_count()  # type: ignore[assignment]
 | |
|         if workers is None:
 | |
|             raise NotImplementedError(
 | |
|                 "Cannot determine the number of cpus using os.cpu_count(), "
 | |
|                 "cannot use -1 for the number of workers"
 | |
|             )
 | |
|     elif workers <= 0:
 | |
|         raise ValueError(f"Invalid number of workers: {workers}, must be -1 "
 | |
|                          "or > 0")
 | |
| 
 | |
|     return workers
 | |
| 
 | |
| 
 | |
| def _validate_bounds(
 | |
|     l_bounds: "npt.ArrayLike", u_bounds: "npt.ArrayLike", d: int
 | |
| ) -> "tuple[npt.NDArray[np.generic], npt.NDArray[np.generic]]":
 | |
|     """Bounds input validation.
 | |
| 
 | |
|     Parameters
 | |
|     ----------
 | |
|     l_bounds, u_bounds : array_like (d,)
 | |
|         Lower and upper bounds.
 | |
|     d : int
 | |
|         Dimension to use for broadcasting.
 | |
| 
 | |
|     Returns
 | |
|     -------
 | |
|     l_bounds, u_bounds : array_like (d,)
 | |
|         Lower and upper bounds.
 | |
| 
 | |
|     """
 | |
|     try:
 | |
|         lower = np.broadcast_to(l_bounds, d)
 | |
|         upper = np.broadcast_to(u_bounds, d)
 | |
|     except ValueError as exc:
 | |
|         msg = ("'l_bounds' and 'u_bounds' must be broadcastable and respect"
 | |
|                " the sample dimension")
 | |
|         raise ValueError(msg) from exc
 | |
| 
 | |
|     if not np.all(lower < upper):
 | |
|         raise ValueError("Bounds are not consistent 'l_bounds' < 'u_bounds'")
 | |
| 
 | |
|     return lower, upper
 |