test_indexing.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. from datetime import datetime, timedelta
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import Index, Timedelta, TimedeltaIndex, notna, timedelta_range
  6. import pandas._testing as tm
  7. class TestGetItem:
  8. def test_ellipsis(self):
  9. # GH#21282
  10. idx = timedelta_range("1 day", "31 day", freq="D", name="idx")
  11. result = idx[...]
  12. assert result.equals(idx)
  13. assert result is not idx
  14. def test_getitem(self):
  15. idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx")
  16. for idx in [idx1]:
  17. result = idx[0]
  18. assert result == Timedelta("1 day")
  19. result = idx[0:5]
  20. expected = timedelta_range("1 day", "5 day", freq="D", name="idx")
  21. tm.assert_index_equal(result, expected)
  22. assert result.freq == expected.freq
  23. result = idx[0:10:2]
  24. expected = timedelta_range("1 day", "9 day", freq="2D", name="idx")
  25. tm.assert_index_equal(result, expected)
  26. assert result.freq == expected.freq
  27. result = idx[-20:-5:3]
  28. expected = timedelta_range("12 day", "24 day", freq="3D", name="idx")
  29. tm.assert_index_equal(result, expected)
  30. assert result.freq == expected.freq
  31. result = idx[4::-1]
  32. expected = TimedeltaIndex(
  33. ["5 day", "4 day", "3 day", "2 day", "1 day"], freq="-1D", name="idx"
  34. )
  35. tm.assert_index_equal(result, expected)
  36. assert result.freq == expected.freq
  37. @pytest.mark.parametrize(
  38. "key",
  39. [pd.Timestamp("1970-01-01"), pd.Timestamp("1970-01-02"), datetime(1970, 1, 1)],
  40. )
  41. def test_timestamp_invalid_key(self, key):
  42. # GH#20464
  43. tdi = pd.timedelta_range(0, periods=10)
  44. with pytest.raises(TypeError):
  45. tdi.get_loc(key)
  46. class TestWhere:
  47. def test_where_invalid_dtypes(self):
  48. tdi = timedelta_range("1 day", periods=3, freq="D", name="idx")
  49. i2 = tdi.copy()
  50. i2 = Index([pd.NaT, pd.NaT] + tdi[2:].tolist())
  51. with pytest.raises(TypeError, match="Where requires matching dtype"):
  52. tdi.where(notna(i2), i2.asi8)
  53. with pytest.raises(TypeError, match="Where requires matching dtype"):
  54. tdi.where(notna(i2), i2 + pd.Timestamp.now())
  55. with pytest.raises(TypeError, match="Where requires matching dtype"):
  56. tdi.where(notna(i2), (i2 + pd.Timestamp.now()).to_period("D"))
  57. class TestTake:
  58. def test_take(self):
  59. # GH 10295
  60. idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx")
  61. for idx in [idx1]:
  62. result = idx.take([0])
  63. assert result == Timedelta("1 day")
  64. result = idx.take([-1])
  65. assert result == Timedelta("31 day")
  66. result = idx.take([0, 1, 2])
  67. expected = timedelta_range("1 day", "3 day", freq="D", name="idx")
  68. tm.assert_index_equal(result, expected)
  69. assert result.freq == expected.freq
  70. result = idx.take([0, 2, 4])
  71. expected = timedelta_range("1 day", "5 day", freq="2D", name="idx")
  72. tm.assert_index_equal(result, expected)
  73. assert result.freq == expected.freq
  74. result = idx.take([7, 4, 1])
  75. expected = timedelta_range("8 day", "2 day", freq="-3D", name="idx")
  76. tm.assert_index_equal(result, expected)
  77. assert result.freq == expected.freq
  78. result = idx.take([3, 2, 5])
  79. expected = TimedeltaIndex(["4 day", "3 day", "6 day"], name="idx")
  80. tm.assert_index_equal(result, expected)
  81. assert result.freq is None
  82. result = idx.take([-3, 2, 5])
  83. expected = TimedeltaIndex(["29 day", "3 day", "6 day"], name="idx")
  84. tm.assert_index_equal(result, expected)
  85. assert result.freq is None
  86. def test_take_invalid_kwargs(self):
  87. idx = timedelta_range("1 day", "31 day", freq="D", name="idx")
  88. indices = [1, 6, 5, 9, 10, 13, 15, 3]
  89. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  90. with pytest.raises(TypeError, match=msg):
  91. idx.take(indices, foo=2)
  92. msg = "the 'out' parameter is not supported"
  93. with pytest.raises(ValueError, match=msg):
  94. idx.take(indices, out=indices)
  95. msg = "the 'mode' parameter is not supported"
  96. with pytest.raises(ValueError, match=msg):
  97. idx.take(indices, mode="clip")
  98. # TODO: This method came from test_timedelta; de-dup with version above
  99. def test_take2(self):
  100. tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"]
  101. idx = timedelta_range(start="1d", end="2d", freq="H", name="idx")
  102. expected = TimedeltaIndex(tds, freq=None, name="idx")
  103. taken1 = idx.take([2, 4, 10])
  104. taken2 = idx[[2, 4, 10]]
  105. for taken in [taken1, taken2]:
  106. tm.assert_index_equal(taken, expected)
  107. assert isinstance(taken, TimedeltaIndex)
  108. assert taken.freq is None
  109. assert taken.name == expected.name
  110. def test_take_fill_value(self):
  111. # GH 12631
  112. idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx")
  113. result = idx.take(np.array([1, 0, -1]))
  114. expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx")
  115. tm.assert_index_equal(result, expected)
  116. # fill_value
  117. result = idx.take(np.array([1, 0, -1]), fill_value=True)
  118. expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx")
  119. tm.assert_index_equal(result, expected)
  120. # allow_fill=False
  121. result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  122. expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx")
  123. tm.assert_index_equal(result, expected)
  124. msg = (
  125. "When allow_fill=True and fill_value is not None, "
  126. "all indices must be >= -1"
  127. )
  128. with pytest.raises(ValueError, match=msg):
  129. idx.take(np.array([1, 0, -2]), fill_value=True)
  130. with pytest.raises(ValueError, match=msg):
  131. idx.take(np.array([1, 0, -5]), fill_value=True)
  132. with pytest.raises(IndexError):
  133. idx.take(np.array([1, -5]))
  134. class TestTimedeltaIndex:
  135. def test_insert_empty(self):
  136. # Corner case inserting with length zero doesnt raise IndexError
  137. idx = timedelta_range("1 Day", periods=3)
  138. td = idx[0]
  139. idx[:0].insert(0, td)
  140. idx[:0].insert(1, td)
  141. idx[:0].insert(-1, td)
  142. def test_insert(self):
  143. idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
  144. result = idx.insert(2, timedelta(days=5))
  145. exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx")
  146. tm.assert_index_equal(result, exp)
  147. # insertion of non-datetime should coerce to object index
  148. result = idx.insert(1, "inserted")
  149. expected = Index(
  150. [Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")],
  151. name="idx",
  152. )
  153. assert not isinstance(result, TimedeltaIndex)
  154. tm.assert_index_equal(result, expected)
  155. assert result.name == expected.name
  156. idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx")
  157. # preserve freq
  158. expected_0 = TimedeltaIndex(
  159. ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
  160. name="idx",
  161. freq="s",
  162. )
  163. expected_3 = TimedeltaIndex(
  164. ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"],
  165. name="idx",
  166. freq="s",
  167. )
  168. # reset freq to None
  169. expected_1_nofreq = TimedeltaIndex(
  170. ["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
  171. name="idx",
  172. freq=None,
  173. )
  174. expected_3_nofreq = TimedeltaIndex(
  175. ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"],
  176. name="idx",
  177. freq=None,
  178. )
  179. cases = [
  180. (0, Timedelta("1day"), expected_0),
  181. (-3, Timedelta("1day"), expected_0),
  182. (3, Timedelta("1day 00:00:04"), expected_3),
  183. (1, Timedelta("1day 00:00:01"), expected_1_nofreq),
  184. (3, Timedelta("1day 00:00:05"), expected_3_nofreq),
  185. ]
  186. for n, d, expected in cases:
  187. result = idx.insert(n, d)
  188. tm.assert_index_equal(result, expected)
  189. assert result.name == expected.name
  190. assert result.freq == expected.freq
  191. @pytest.mark.parametrize(
  192. "null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA]
  193. )
  194. def test_insert_nat(self, null):
  195. # GH 18295 (test missing)
  196. idx = timedelta_range("1day", "3day")
  197. result = idx.insert(1, null)
  198. expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"])
  199. tm.assert_index_equal(result, expected)
  200. def test_insert_invalid_na(self):
  201. idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
  202. with pytest.raises(TypeError, match="incompatible label"):
  203. idx.insert(0, np.datetime64("NaT"))
  204. def test_insert_dont_cast_strings(self):
  205. # To match DatetimeIndex and PeriodIndex behavior, dont try to
  206. # parse strings to Timedelta
  207. idx = timedelta_range("1day", "3day")
  208. result = idx.insert(0, "1 Day")
  209. assert result.dtype == object
  210. assert result[0] == "1 Day"
  211. def test_delete(self):
  212. idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx")
  213. # preserve freq
  214. expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx")
  215. expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx")
  216. # reset freq to None
  217. expected_1 = TimedeltaIndex(
  218. ["1 day", "3 day", "4 day", "5 day"], freq=None, name="idx"
  219. )
  220. cases = {
  221. 0: expected_0,
  222. -5: expected_0,
  223. -1: expected_4,
  224. 4: expected_4,
  225. 1: expected_1,
  226. }
  227. for n, expected in cases.items():
  228. result = idx.delete(n)
  229. tm.assert_index_equal(result, expected)
  230. assert result.name == expected.name
  231. assert result.freq == expected.freq
  232. with pytest.raises((IndexError, ValueError)):
  233. # either depending on numpy version
  234. idx.delete(5)
  235. def test_delete_slice(self):
  236. idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx")
  237. # preserve freq
  238. expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx")
  239. expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx")
  240. # reset freq to None
  241. expected_3_5 = TimedeltaIndex(
  242. ["1 d", "2 d", "3 d", "7 d", "8 d", "9 d", "10d"], freq=None, name="idx"
  243. )
  244. cases = {
  245. (0, 1, 2): expected_0_2,
  246. (7, 8, 9): expected_7_9,
  247. (3, 4, 5): expected_3_5,
  248. }
  249. for n, expected in cases.items():
  250. result = idx.delete(n)
  251. tm.assert_index_equal(result, expected)
  252. assert result.name == expected.name
  253. assert result.freq == expected.freq
  254. result = idx.delete(slice(n[0], n[-1] + 1))
  255. tm.assert_index_equal(result, expected)
  256. assert result.name == expected.name
  257. assert result.freq == expected.freq
  258. def test_get_loc(self):
  259. idx = pd.to_timedelta(["0 days", "1 days", "2 days"])
  260. for method in [None, "pad", "backfill", "nearest"]:
  261. assert idx.get_loc(idx[1], method) == 1
  262. assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1
  263. assert idx.get_loc(str(idx[1]), method) == 1
  264. assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1
  265. assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1
  266. assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1
  267. with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
  268. idx.get_loc(idx[1], method="nearest", tolerance="foo")
  269. with pytest.raises(ValueError, match="tolerance size must match"):
  270. idx.get_loc(
  271. idx[1],
  272. method="nearest",
  273. tolerance=[
  274. Timedelta(0).to_timedelta64(),
  275. Timedelta(0).to_timedelta64(),
  276. ],
  277. )
  278. for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]:
  279. assert idx.get_loc("1 day 1 hour", method) == loc
  280. # GH 16909
  281. assert idx.get_loc(idx[1].to_timedelta64()) == 1
  282. # GH 16896
  283. assert idx.get_loc("0 days") == 0
  284. def test_get_loc_nat(self):
  285. tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"])
  286. assert tidx.get_loc(pd.NaT) == 1
  287. assert tidx.get_loc(None) == 1
  288. assert tidx.get_loc(float("nan")) == 1
  289. assert tidx.get_loc(np.nan) == 1
  290. def test_get_indexer(self):
  291. idx = pd.to_timedelta(["0 days", "1 days", "2 days"])
  292. tm.assert_numpy_array_equal(
  293. idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
  294. )
  295. target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
  296. tm.assert_numpy_array_equal(
  297. idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
  298. )
  299. tm.assert_numpy_array_equal(
  300. idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
  301. )
  302. tm.assert_numpy_array_equal(
  303. idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
  304. )
  305. res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour"))
  306. tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))