123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 |
- import numpy as np
- import numpy.random as npr
- import pytest
- from numpy.testing import assert_array_equal
- from distutils.version import LooseVersion
- from .. import algorithms as algo
- @pytest.fixture
- def random():
- np.random.seed(sum(map(ord, "test_algorithms")))
- def test_bootstrap(random):
- """Test that bootstrapping gives the right answer in dumb cases."""
- a_ones = np.ones(10)
- n_boot = 5
- out1 = algo.bootstrap(a_ones, n_boot=n_boot)
- assert_array_equal(out1, np.ones(n_boot))
- out2 = algo.bootstrap(a_ones, n_boot=n_boot, func=np.median)
- assert_array_equal(out2, np.ones(n_boot))
- def test_bootstrap_length(random):
- """Test that we get a bootstrap array of the right shape."""
- a_norm = np.random.randn(1000)
- out = algo.bootstrap(a_norm)
- assert len(out) == 10000
- n_boot = 100
- out = algo.bootstrap(a_norm, n_boot=n_boot)
- assert len(out) == n_boot
- def test_bootstrap_range(random):
- """Test that boostrapping a random array stays within the right range."""
- a_norm = np.random.randn(1000)
- amin, amax = a_norm.min(), a_norm.max()
- out = algo.bootstrap(a_norm)
- assert amin <= out.min()
- assert amax >= out.max()
- def test_bootstrap_multiarg(random):
- """Test that bootstrap works with multiple input arrays."""
- x = np.vstack([[1, 10] for i in range(10)])
- y = np.vstack([[5, 5] for i in range(10)])
- def f(x, y):
- return np.vstack((x, y)).max(axis=0)
- out_actual = algo.bootstrap(x, y, n_boot=2, func=f)
- out_wanted = np.array([[5, 10], [5, 10]])
- assert_array_equal(out_actual, out_wanted)
- def test_bootstrap_axis(random):
- """Test axis kwarg to bootstrap function."""
- x = np.random.randn(10, 20)
- n_boot = 100
- out_default = algo.bootstrap(x, n_boot=n_boot)
- assert out_default.shape == (n_boot,)
- out_axis = algo.bootstrap(x, n_boot=n_boot, axis=0)
- assert out_axis.shape, (n_boot, x.shape[1])
- def test_bootstrap_seed(random):
- """Test that we can get reproducible resamples by seeding the RNG."""
- data = np.random.randn(50)
- seed = 42
- boots1 = algo.bootstrap(data, seed=seed)
- boots2 = algo.bootstrap(data, seed=seed)
- assert_array_equal(boots1, boots2)
- def test_bootstrap_ols(random):
- """Test bootstrap of OLS model fit."""
- def ols_fit(X, y):
- XtXinv = np.linalg.inv(np.dot(X.T, X))
- return XtXinv.dot(X.T).dot(y)
- X = np.column_stack((np.random.randn(50, 4), np.ones(50)))
- w = [2, 4, 0, 3, 5]
- y_noisy = np.dot(X, w) + np.random.randn(50) * 20
- y_lownoise = np.dot(X, w) + np.random.randn(50)
- n_boot = 500
- w_boot_noisy = algo.bootstrap(X, y_noisy,
- n_boot=n_boot,
- func=ols_fit)
- w_boot_lownoise = algo.bootstrap(X, y_lownoise,
- n_boot=n_boot,
- func=ols_fit)
- assert w_boot_noisy.shape == (n_boot, 5)
- assert w_boot_lownoise.shape == (n_boot, 5)
- assert w_boot_noisy.std() > w_boot_lownoise.std()
- def test_bootstrap_units(random):
- """Test that results make sense when passing unit IDs to bootstrap."""
- data = np.random.randn(50)
- ids = np.repeat(range(10), 5)
- bwerr = np.random.normal(0, 2, 10)
- bwerr = bwerr[ids]
- data_rm = data + bwerr
- seed = 77
- boots_orig = algo.bootstrap(data_rm, seed=seed)
- boots_rm = algo.bootstrap(data_rm, units=ids, seed=seed)
- assert boots_rm.std() > boots_orig.std()
- def test_bootstrap_arglength():
- """Test that different length args raise ValueError."""
- with pytest.raises(ValueError):
- algo.bootstrap(np.arange(5), np.arange(10))
- def test_bootstrap_string_func():
- """Test that named numpy methods are the same as the numpy function."""
- x = np.random.randn(100)
- res_a = algo.bootstrap(x, func="mean", seed=0)
- res_b = algo.bootstrap(x, func=np.mean, seed=0)
- assert np.array_equal(res_a, res_b)
- res_a = algo.bootstrap(x, func="std", seed=0)
- res_b = algo.bootstrap(x, func=np.std, seed=0)
- assert np.array_equal(res_a, res_b)
- with pytest.raises(AttributeError):
- algo.bootstrap(x, func="not_a_method_name")
- def test_bootstrap_reproducibility(random):
- """Test that bootstrapping uses the internal random state."""
- data = np.random.randn(50)
- boots1 = algo.bootstrap(data, seed=100)
- boots2 = algo.bootstrap(data, seed=100)
- assert_array_equal(boots1, boots2)
- with pytest.warns(UserWarning):
- # Deprecatd, remove when removing random_seed
- boots1 = algo.bootstrap(data, random_seed=100)
- boots2 = algo.bootstrap(data, random_seed=100)
- assert_array_equal(boots1, boots2)
- @pytest.mark.skipif(LooseVersion(np.__version__) < "1.17",
- reason="Tests new numpy random functionality")
- def test_seed_new():
- # Can't use pytest parametrize because tests will fail where the new
- # Generator object and related function are not defined
- test_bank = [
- (None, None, npr.Generator, False),
- (npr.RandomState(0), npr.RandomState(0), npr.RandomState, True),
- (npr.RandomState(0), npr.RandomState(1), npr.RandomState, False),
- (npr.default_rng(1), npr.default_rng(1), npr.Generator, True),
- (npr.default_rng(1), npr.default_rng(2), npr.Generator, False),
- (npr.SeedSequence(10), npr.SeedSequence(10), npr.Generator, True),
- (npr.SeedSequence(10), npr.SeedSequence(20), npr.Generator, False),
- (100, 100, npr.Generator, True),
- (100, 200, npr.Generator, False),
- ]
- for seed1, seed2, rng_class, match in test_bank:
- rng1 = algo._handle_random_seed(seed1)
- rng2 = algo._handle_random_seed(seed2)
- assert isinstance(rng1, rng_class)
- assert isinstance(rng2, rng_class)
- assert (rng1.uniform() == rng2.uniform()) == match
- @pytest.mark.skipif(LooseVersion(np.__version__) >= "1.17",
- reason="Tests old numpy random functionality")
- @pytest.mark.parametrize("seed1, seed2, match", [
- (None, None, False),
- (npr.RandomState(0), npr.RandomState(0), True),
- (npr.RandomState(0), npr.RandomState(1), False),
- (100, 100, True),
- (100, 200, False),
- ])
- def test_seed_old(seed1, seed2, match):
- rng1 = algo._handle_random_seed(seed1)
- rng2 = algo._handle_random_seed(seed2)
- assert isinstance(rng1, np.random.RandomState)
- assert isinstance(rng2, np.random.RandomState)
- assert (rng1.uniform() == rng2.uniform()) == match
- @pytest.mark.skipif(LooseVersion(np.__version__) >= "1.17",
- reason="Tests old numpy random functionality")
- def test_bad_seed_old():
- with pytest.raises(ValueError):
- algo._handle_random_seed("not_a_random_seed")
|