123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- """Algorithms to support fitting routines in seaborn plotting functions."""
- import numbers
- import numpy as np
- import warnings
- def bootstrap(*args, **kwargs):
- """Resample one or more arrays with replacement and store aggregate values.
- Positional arguments are a sequence of arrays to bootstrap along the first
- axis and pass to a summary function.
- Keyword arguments:
- n_boot : int, default 10000
- Number of iterations
- axis : int, default None
- Will pass axis to ``func`` as a keyword argument.
- units : array, default None
- Array of sampling unit IDs. When used the bootstrap resamples units
- and then observations within units instead of individual
- datapoints.
- func : string or callable, default np.mean
- Function to call on the args that are passed in. If string, tries
- to use as named method on numpy array.
- seed : Generator | SeedSequence | RandomState | int | None
- Seed for the random number generator; useful if you want
- reproducible resamples.
- Returns
- -------
- boot_dist: array
- array of bootstrapped statistic values
- """
- # Ensure list of arrays are same length
- if len(np.unique(list(map(len, args)))) > 1:
- raise ValueError("All input arrays must have the same length")
- n = len(args[0])
- # Default keyword arguments
- n_boot = kwargs.get("n_boot", 10000)
- func = kwargs.get("func", np.mean)
- axis = kwargs.get("axis", None)
- units = kwargs.get("units", None)
- random_seed = kwargs.get("random_seed", None)
- if random_seed is not None:
- msg = "`random_seed` has been renamed to `seed` and will be removed"
- warnings.warn(msg)
- seed = kwargs.get("seed", random_seed)
- if axis is None:
- func_kwargs = dict()
- else:
- func_kwargs = dict(axis=axis)
- # Initialize the resampler
- rng = _handle_random_seed(seed)
- # Coerce to arrays
- args = list(map(np.asarray, args))
- if units is not None:
- units = np.asarray(units)
- # Allow for a function that is the name of a method on an array
- if isinstance(func, str):
- def f(x):
- return getattr(x, func)()
- else:
- f = func
- # Handle numpy changes
- try:
- integers = rng.integers
- except AttributeError:
- integers = rng.randint
- # Do the bootstrap
- if units is not None:
- return _structured_bootstrap(args, n_boot, units, f,
- func_kwargs, integers)
- boot_dist = []
- for i in range(int(n_boot)):
- resampler = integers(0, n, n, dtype=np.intp) # intp is indexing dtype
- sample = [a.take(resampler, axis=0) for a in args]
- boot_dist.append(f(*sample, **func_kwargs))
- return np.array(boot_dist)
- def _structured_bootstrap(args, n_boot, units, func, func_kwargs, integers):
- """Resample units instead of datapoints."""
- unique_units = np.unique(units)
- n_units = len(unique_units)
- args = [[a[units == unit] for unit in unique_units] for a in args]
- boot_dist = []
- for i in range(int(n_boot)):
- resampler = integers(0, n_units, n_units, dtype=np.intp)
- sample = [np.take(a, resampler, axis=0) for a in args]
- lengths = map(len, sample[0])
- resampler = [integers(0, n, n, dtype=np.intp) for n in lengths]
- sample = [[c.take(r, axis=0) for c, r in zip(a, resampler)]
- for a in sample]
- sample = list(map(np.concatenate, sample))
- boot_dist.append(func(*sample, **func_kwargs))
- return np.array(boot_dist)
- def _handle_random_seed(seed=None):
- """Given a seed in one of many formats, return a random number generator.
- Generalizes across the numpy 1.17 changes, preferring newer functionality.
- """
- if isinstance(seed, np.random.RandomState):
- rng = seed
- else:
- try:
- # General interface for seeding on numpy >= 1.17
- rng = np.random.default_rng(seed)
- except AttributeError:
- # We are on numpy < 1.17, handle options ourselves
- if isinstance(seed, (numbers.Integral, np.integer)):
- rng = np.random.RandomState(seed)
- elif seed is None:
- rng = np.random.RandomState()
- else:
- err = "{} cannot be used to seed the randomn number generator"
- raise ValueError(err.format(seed))
- return rng
|