test_ops.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. from datetime import timedelta
  2. import numpy as np
  3. import pytest
  4. from pandas.core.dtypes.generic import ABCDateOffset
  5. import pandas as pd
  6. from pandas import Series, TimedeltaIndex, timedelta_range
  7. import pandas._testing as tm
  8. from pandas.tests.base.test_ops import Ops
  9. from pandas.tseries.offsets import Day, Hour
  10. class TestTimedeltaIndexOps(Ops):
  11. def setup_method(self, method):
  12. super().setup_method(method)
  13. mask = lambda x: isinstance(x, TimedeltaIndex)
  14. self.is_valid_objs = [o for o in self.objs if mask(o)]
  15. self.not_valid_objs = []
  16. def test_ops_properties(self):
  17. f = lambda x: isinstance(x, TimedeltaIndex)
  18. self.check_ops_properties(TimedeltaIndex._field_ops, f)
  19. self.check_ops_properties(TimedeltaIndex._object_ops, f)
  20. def test_value_counts_unique(self):
  21. # GH 7735
  22. idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
  23. # create repeated values, 'n'th element is repeated by n+1 times
  24. idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
  25. exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
  26. expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
  27. for obj in [idx, Series(idx)]:
  28. tm.assert_series_equal(obj.value_counts(), expected)
  29. expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
  30. tm.assert_index_equal(idx.unique(), expected)
  31. idx = TimedeltaIndex(
  32. [
  33. "1 days 09:00:00",
  34. "1 days 09:00:00",
  35. "1 days 09:00:00",
  36. "1 days 08:00:00",
  37. "1 days 08:00:00",
  38. pd.NaT,
  39. ]
  40. )
  41. exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
  42. expected = Series([3, 2], index=exp_idx)
  43. for obj in [idx, Series(idx)]:
  44. tm.assert_series_equal(obj.value_counts(), expected)
  45. exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
  46. expected = Series([3, 2, 1], index=exp_idx)
  47. for obj in [idx, Series(idx)]:
  48. tm.assert_series_equal(obj.value_counts(dropna=False), expected)
  49. tm.assert_index_equal(idx.unique(), exp_idx)
  50. def test_nonunique_contains(self):
  51. # GH 9512
  52. for idx in map(
  53. TimedeltaIndex,
  54. (
  55. [0, 1, 0],
  56. [0, 0, -1],
  57. [0, -1, -1],
  58. ["00:01:00", "00:01:00", "00:02:00"],
  59. ["00:01:00", "00:01:00", "00:00:01"],
  60. ),
  61. ):
  62. assert idx[0] in idx
  63. def test_unknown_attribute(self):
  64. # see gh-9680
  65. tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
  66. ts = pd.Series(np.random.normal(size=10), index=tdi)
  67. assert "foo" not in ts.__dict__.keys()
  68. msg = "'Series' object has no attribute 'foo'"
  69. with pytest.raises(AttributeError, match=msg):
  70. ts.foo
  71. def test_order(self):
  72. # GH 10295
  73. idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx")
  74. idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx")
  75. for idx in [idx1, idx2]:
  76. ordered = idx.sort_values()
  77. tm.assert_index_equal(ordered, idx)
  78. assert ordered.freq == idx.freq
  79. ordered = idx.sort_values(ascending=False)
  80. expected = idx[::-1]
  81. tm.assert_index_equal(ordered, expected)
  82. assert ordered.freq == expected.freq
  83. assert ordered.freq.n == -1
  84. ordered, indexer = idx.sort_values(return_indexer=True)
  85. tm.assert_index_equal(ordered, idx)
  86. tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
  87. assert ordered.freq == idx.freq
  88. ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
  89. tm.assert_index_equal(ordered, idx[::-1])
  90. assert ordered.freq == expected.freq
  91. assert ordered.freq.n == -1
  92. idx1 = TimedeltaIndex(
  93. ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
  94. )
  95. exp1 = TimedeltaIndex(
  96. ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
  97. )
  98. idx2 = TimedeltaIndex(
  99. ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2"
  100. )
  101. # TODO(wesm): unused?
  102. # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
  103. # '3 day', '5 day'], name='idx2')
  104. # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
  105. # '2 minute', pd.NaT], name='idx3')
  106. # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
  107. # '5 minute'], name='idx3')
  108. for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
  109. ordered = idx.sort_values()
  110. tm.assert_index_equal(ordered, expected)
  111. assert ordered.freq is None
  112. ordered = idx.sort_values(ascending=False)
  113. tm.assert_index_equal(ordered, expected[::-1])
  114. assert ordered.freq is None
  115. ordered, indexer = idx.sort_values(return_indexer=True)
  116. tm.assert_index_equal(ordered, expected)
  117. exp = np.array([0, 4, 3, 1, 2])
  118. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  119. assert ordered.freq is None
  120. ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
  121. tm.assert_index_equal(ordered, expected[::-1])
  122. exp = np.array([2, 1, 3, 4, 0])
  123. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  124. assert ordered.freq is None
  125. def test_drop_duplicates_metadata(self):
  126. # GH 10115
  127. idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
  128. result = idx.drop_duplicates()
  129. tm.assert_index_equal(idx, result)
  130. assert idx.freq == result.freq
  131. idx_dup = idx.append(idx)
  132. assert idx_dup.freq is None # freq is reset
  133. result = idx_dup.drop_duplicates()
  134. tm.assert_index_equal(idx, result)
  135. assert result.freq is None
  136. def test_drop_duplicates(self):
  137. # to check Index/Series compat
  138. base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
  139. idx = base.append(base[:5])
  140. res = idx.drop_duplicates()
  141. tm.assert_index_equal(res, base)
  142. res = Series(idx).drop_duplicates()
  143. tm.assert_series_equal(res, Series(base))
  144. res = idx.drop_duplicates(keep="last")
  145. exp = base[5:].append(base[:5])
  146. tm.assert_index_equal(res, exp)
  147. res = Series(idx).drop_duplicates(keep="last")
  148. tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
  149. res = idx.drop_duplicates(keep=False)
  150. tm.assert_index_equal(res, base[5:])
  151. res = Series(idx).drop_duplicates(keep=False)
  152. tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
  153. @pytest.mark.parametrize(
  154. "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
  155. )
  156. def test_infer_freq(self, freq):
  157. # GH#11018
  158. idx = pd.timedelta_range("1", freq=freq, periods=10)
  159. result = pd.TimedeltaIndex(idx.asi8, freq="infer")
  160. tm.assert_index_equal(idx, result)
  161. assert result.freq == freq
  162. def test_shift(self):
  163. pass # handled in test_arithmetic.py
  164. def test_repeat(self):
  165. index = pd.timedelta_range("1 days", periods=2, freq="D")
  166. exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
  167. for res in [index.repeat(2), np.repeat(index, 2)]:
  168. tm.assert_index_equal(res, exp)
  169. assert res.freq is None
  170. index = TimedeltaIndex(["1 days", "NaT", "3 days"])
  171. exp = TimedeltaIndex(
  172. [
  173. "1 days",
  174. "1 days",
  175. "1 days",
  176. "NaT",
  177. "NaT",
  178. "NaT",
  179. "3 days",
  180. "3 days",
  181. "3 days",
  182. ]
  183. )
  184. for res in [index.repeat(3), np.repeat(index, 3)]:
  185. tm.assert_index_equal(res, exp)
  186. assert res.freq is None
  187. def test_nat(self):
  188. assert pd.TimedeltaIndex._na_value is pd.NaT
  189. assert pd.TimedeltaIndex([])._na_value is pd.NaT
  190. idx = pd.TimedeltaIndex(["1 days", "2 days"])
  191. assert idx._can_hold_na
  192. tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
  193. assert idx.hasnans is False
  194. tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
  195. idx = pd.TimedeltaIndex(["1 days", "NaT"])
  196. assert idx._can_hold_na
  197. tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
  198. assert idx.hasnans is True
  199. tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
  200. def test_equals(self):
  201. # GH 13107
  202. idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"])
  203. assert idx.equals(idx)
  204. assert idx.equals(idx.copy())
  205. assert idx.equals(idx.astype(object))
  206. assert idx.astype(object).equals(idx)
  207. assert idx.astype(object).equals(idx.astype(object))
  208. assert not idx.equals(list(idx))
  209. assert not idx.equals(pd.Series(idx))
  210. idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"])
  211. assert not idx.equals(idx2)
  212. assert not idx.equals(idx2.copy())
  213. assert not idx.equals(idx2.astype(object))
  214. assert not idx.astype(object).equals(idx2)
  215. assert not idx.astype(object).equals(idx2.astype(object))
  216. assert not idx.equals(list(idx2))
  217. assert not idx.equals(pd.Series(idx2))
  218. # Check that we dont raise OverflowError on comparisons outside the
  219. # implementation range
  220. oob = pd.Index([timedelta(days=10 ** 6)] * 3, dtype=object)
  221. assert not idx.equals(oob)
  222. assert not idx2.equals(oob)
  223. # FIXME: oob.apply(np.timedelta64) incorrectly overflows
  224. oob2 = pd.Index([np.timedelta64(x) for x in oob], dtype=object)
  225. assert not idx.equals(oob2)
  226. assert not idx2.equals(oob2)
  227. @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
  228. @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
  229. def test_freq_setter(self, values, freq):
  230. # GH 20678
  231. idx = TimedeltaIndex(values)
  232. # can set to an offset, converting from string if necessary
  233. idx._data.freq = freq
  234. assert idx.freq == freq
  235. assert isinstance(idx.freq, ABCDateOffset)
  236. # can reset to None
  237. idx._data.freq = None
  238. assert idx.freq is None
  239. def test_freq_setter_errors(self):
  240. # GH 20678
  241. idx = TimedeltaIndex(["0 days", "2 days", "4 days"])
  242. # setting with an incompatible freq
  243. msg = (
  244. "Inferred frequency 2D from passed values does not conform to "
  245. "passed frequency 5D"
  246. )
  247. with pytest.raises(ValueError, match=msg):
  248. idx._data.freq = "5D"
  249. # setting with a non-fixed frequency
  250. msg = r"<2 \* BusinessDays> is a non-fixed frequency"
  251. with pytest.raises(ValueError, match=msg):
  252. idx._data.freq = "2B"
  253. # setting with non-freq string
  254. with pytest.raises(ValueError, match="Invalid frequency"):
  255. idx._data.freq = "foo"