123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- from datetime import timedelta
- import numpy as np
- import pytest
- from pandas.core.dtypes.generic import ABCDateOffset
- import pandas as pd
- from pandas import Series, TimedeltaIndex, timedelta_range
- import pandas._testing as tm
- from pandas.tests.base.test_ops import Ops
- from pandas.tseries.offsets import Day, Hour
- class TestTimedeltaIndexOps(Ops):
- def setup_method(self, method):
- super().setup_method(method)
- mask = lambda x: isinstance(x, TimedeltaIndex)
- self.is_valid_objs = [o for o in self.objs if mask(o)]
- self.not_valid_objs = []
- def test_ops_properties(self):
- f = lambda x: isinstance(x, TimedeltaIndex)
- self.check_ops_properties(TimedeltaIndex._field_ops, f)
- self.check_ops_properties(TimedeltaIndex._object_ops, f)
- def test_value_counts_unique(self):
- # GH 7735
- idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
- # create repeated values, 'n'th element is repeated by n+1 times
- idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
- exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
- expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(), expected)
- expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
- tm.assert_index_equal(idx.unique(), expected)
- idx = TimedeltaIndex(
- [
- "1 days 09:00:00",
- "1 days 09:00:00",
- "1 days 09:00:00",
- "1 days 08:00:00",
- "1 days 08:00:00",
- pd.NaT,
- ]
- )
- exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
- expected = Series([3, 2], index=exp_idx)
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(), expected)
- exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
- expected = Series([3, 2, 1], index=exp_idx)
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(dropna=False), expected)
- tm.assert_index_equal(idx.unique(), exp_idx)
- def test_nonunique_contains(self):
- # GH 9512
- for idx in map(
- TimedeltaIndex,
- (
- [0, 1, 0],
- [0, 0, -1],
- [0, -1, -1],
- ["00:01:00", "00:01:00", "00:02:00"],
- ["00:01:00", "00:01:00", "00:00:01"],
- ),
- ):
- assert idx[0] in idx
- def test_unknown_attribute(self):
- # see gh-9680
- tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
- ts = pd.Series(np.random.normal(size=10), index=tdi)
- assert "foo" not in ts.__dict__.keys()
- msg = "'Series' object has no attribute 'foo'"
- with pytest.raises(AttributeError, match=msg):
- ts.foo
- def test_order(self):
- # GH 10295
- idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx")
- idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx")
- for idx in [idx1, idx2]:
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, idx)
- assert ordered.freq == idx.freq
- ordered = idx.sort_values(ascending=False)
- expected = idx[::-1]
- tm.assert_index_equal(ordered, expected)
- assert ordered.freq == expected.freq
- assert ordered.freq.n == -1
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, idx)
- tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
- assert ordered.freq == idx.freq
- ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
- tm.assert_index_equal(ordered, idx[::-1])
- assert ordered.freq == expected.freq
- assert ordered.freq.n == -1
- idx1 = TimedeltaIndex(
- ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
- )
- exp1 = TimedeltaIndex(
- ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
- )
- idx2 = TimedeltaIndex(
- ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2"
- )
- # TODO(wesm): unused?
- # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
- # '3 day', '5 day'], name='idx2')
- # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
- # '2 minute', pd.NaT], name='idx3')
- # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
- # '5 minute'], name='idx3')
- for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, expected)
- assert ordered.freq is None
- ordered = idx.sort_values(ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- assert ordered.freq is None
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, expected)
- exp = np.array([0, 4, 3, 1, 2])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- assert ordered.freq is None
- ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- exp = np.array([2, 1, 3, 4, 0])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- assert ordered.freq is None
- def test_drop_duplicates_metadata(self):
- # GH 10115
- idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
- result = idx.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert idx.freq == result.freq
- idx_dup = idx.append(idx)
- assert idx_dup.freq is None # freq is reset
- result = idx_dup.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert result.freq is None
- def test_drop_duplicates(self):
- # to check Index/Series compat
- base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
- idx = base.append(base[:5])
- res = idx.drop_duplicates()
- tm.assert_index_equal(res, base)
- res = Series(idx).drop_duplicates()
- tm.assert_series_equal(res, Series(base))
- res = idx.drop_duplicates(keep="last")
- exp = base[5:].append(base[:5])
- tm.assert_index_equal(res, exp)
- res = Series(idx).drop_duplicates(keep="last")
- tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
- res = idx.drop_duplicates(keep=False)
- tm.assert_index_equal(res, base[5:])
- res = Series(idx).drop_duplicates(keep=False)
- tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
- @pytest.mark.parametrize(
- "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
- )
- def test_infer_freq(self, freq):
- # GH#11018
- idx = pd.timedelta_range("1", freq=freq, periods=10)
- result = pd.TimedeltaIndex(idx.asi8, freq="infer")
- tm.assert_index_equal(idx, result)
- assert result.freq == freq
- def test_shift(self):
- pass # handled in test_arithmetic.py
- def test_repeat(self):
- index = pd.timedelta_range("1 days", periods=2, freq="D")
- exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
- for res in [index.repeat(2), np.repeat(index, 2)]:
- tm.assert_index_equal(res, exp)
- assert res.freq is None
- index = TimedeltaIndex(["1 days", "NaT", "3 days"])
- exp = TimedeltaIndex(
- [
- "1 days",
- "1 days",
- "1 days",
- "NaT",
- "NaT",
- "NaT",
- "3 days",
- "3 days",
- "3 days",
- ]
- )
- for res in [index.repeat(3), np.repeat(index, 3)]:
- tm.assert_index_equal(res, exp)
- assert res.freq is None
- def test_nat(self):
- assert pd.TimedeltaIndex._na_value is pd.NaT
- assert pd.TimedeltaIndex([])._na_value is pd.NaT
- idx = pd.TimedeltaIndex(["1 days", "2 days"])
- assert idx._can_hold_na
- tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
- assert idx.hasnans is False
- tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
- idx = pd.TimedeltaIndex(["1 days", "NaT"])
- assert idx._can_hold_na
- tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
- assert idx.hasnans is True
- tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
- def test_equals(self):
- # GH 13107
- idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"])
- assert idx.equals(idx)
- assert idx.equals(idx.copy())
- assert idx.equals(idx.astype(object))
- assert idx.astype(object).equals(idx)
- assert idx.astype(object).equals(idx.astype(object))
- assert not idx.equals(list(idx))
- assert not idx.equals(pd.Series(idx))
- idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"])
- assert not idx.equals(idx2)
- assert not idx.equals(idx2.copy())
- assert not idx.equals(idx2.astype(object))
- assert not idx.astype(object).equals(idx2)
- assert not idx.astype(object).equals(idx2.astype(object))
- assert not idx.equals(list(idx2))
- assert not idx.equals(pd.Series(idx2))
- # Check that we dont raise OverflowError on comparisons outside the
- # implementation range
- oob = pd.Index([timedelta(days=10 ** 6)] * 3, dtype=object)
- assert not idx.equals(oob)
- assert not idx2.equals(oob)
- # FIXME: oob.apply(np.timedelta64) incorrectly overflows
- oob2 = pd.Index([np.timedelta64(x) for x in oob], dtype=object)
- assert not idx.equals(oob2)
- assert not idx2.equals(oob2)
- @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
- @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
- def test_freq_setter(self, values, freq):
- # GH 20678
- idx = TimedeltaIndex(values)
- # can set to an offset, converting from string if necessary
- idx._data.freq = freq
- assert idx.freq == freq
- assert isinstance(idx.freq, ABCDateOffset)
- # can reset to None
- idx._data.freq = None
- assert idx.freq is None
- def test_freq_setter_errors(self):
- # GH 20678
- idx = TimedeltaIndex(["0 days", "2 days", "4 days"])
- # setting with an incompatible freq
- msg = (
- "Inferred frequency 2D from passed values does not conform to "
- "passed frequency 5D"
- )
- with pytest.raises(ValueError, match=msg):
- idx._data.freq = "5D"
- # setting with a non-fixed frequency
- msg = r"<2 \* BusinessDays> is a non-fixed frequency"
- with pytest.raises(ValueError, match=msg):
- idx._data.freq = "2B"
- # setting with non-freq string
- with pytest.raises(ValueError, match="Invalid frequency"):
- idx._data.freq = "foo"
|