test_timeseries.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. from datetime import datetime, time
  2. from itertools import product
  3. import numpy as np
  4. import pytest
  5. import pytz
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. DatetimeIndex,
  10. Index,
  11. MultiIndex,
  12. Series,
  13. date_range,
  14. period_range,
  15. to_datetime,
  16. )
  17. import pandas._testing as tm
  18. import pandas.tseries.offsets as offsets
  19. @pytest.fixture(params=product([True, False], [True, False]))
  20. def close_open_fixture(request):
  21. return request.param
  22. class TestDataFrameTimeSeriesMethods:
  23. def test_frame_ctor_datetime64_column(self):
  24. rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
  25. dates = np.asarray(rng)
  26. df = DataFrame({"A": np.random.randn(len(rng)), "B": dates})
  27. assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]"))
  28. def test_frame_append_datetime64_column(self):
  29. rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
  30. df = DataFrame(index=np.arange(len(rng)))
  31. df["A"] = rng
  32. assert np.issubdtype(df["A"].dtype, np.dtype("M8[ns]"))
  33. def test_frame_datetime64_pre1900_repr(self):
  34. df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")})
  35. # it works!
  36. repr(df)
  37. def test_frame_append_datetime64_col_other_units(self):
  38. n = 100
  39. units = ["h", "m", "s", "ms", "D", "M", "Y"]
  40. ns_dtype = np.dtype("M8[ns]")
  41. for unit in units:
  42. dtype = np.dtype("M8[{unit}]".format(unit=unit))
  43. vals = np.arange(n, dtype=np.int64).view(dtype)
  44. df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
  45. df[unit] = vals
  46. ex_vals = to_datetime(vals.astype("O")).values
  47. assert df[unit].dtype == ns_dtype
  48. assert (df[unit].values == ex_vals).all()
  49. # Test insertion into existing datetime64 column
  50. df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
  51. df["dates"] = np.arange(n, dtype=np.int64).view(ns_dtype)
  52. for unit in units:
  53. dtype = np.dtype("M8[{unit}]".format(unit=unit))
  54. vals = np.arange(n, dtype=np.int64).view(dtype)
  55. tmp = df.copy()
  56. tmp["dates"] = vals
  57. ex_vals = to_datetime(vals.astype("O")).values
  58. assert (tmp["dates"].values == ex_vals).all()
  59. def test_asfreq(self, datetime_frame):
  60. offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd())
  61. rule_monthly = datetime_frame.asfreq("BM")
  62. tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"])
  63. filled = rule_monthly.asfreq("B", method="pad") # noqa
  64. # TODO: actually check that this worked.
  65. # don't forget!
  66. filled_dep = rule_monthly.asfreq("B", method="pad") # noqa
  67. # test does not blow up on length-0 DataFrame
  68. zero_length = datetime_frame.reindex([])
  69. result = zero_length.asfreq("BM")
  70. assert result is not zero_length
  71. def test_asfreq_datetimeindex(self):
  72. df = DataFrame(
  73. {"A": [1, 2, 3]},
  74. index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)],
  75. )
  76. df = df.asfreq("B")
  77. assert isinstance(df.index, DatetimeIndex)
  78. ts = df["A"].asfreq("B")
  79. assert isinstance(ts.index, DatetimeIndex)
  80. def test_asfreq_fillvalue(self):
  81. # test for fill value during upsampling, related to issue 3715
  82. # setup
  83. rng = pd.date_range("1/1/2016", periods=10, freq="2S")
  84. ts = pd.Series(np.arange(len(rng)), index=rng)
  85. df = pd.DataFrame({"one": ts})
  86. # insert pre-existing missing value
  87. df.loc["2016-01-01 00:00:08", "one"] = None
  88. actual_df = df.asfreq(freq="1S", fill_value=9.0)
  89. expected_df = df.asfreq(freq="1S").fillna(9.0)
  90. expected_df.loc["2016-01-01 00:00:08", "one"] = None
  91. tm.assert_frame_equal(expected_df, actual_df)
  92. expected_series = ts.asfreq(freq="1S").fillna(9.0)
  93. actual_series = ts.asfreq(freq="1S", fill_value=9.0)
  94. tm.assert_series_equal(expected_series, actual_series)
  95. @pytest.mark.parametrize(
  96. "data,idx,expected_first,expected_last",
  97. [
  98. ({"A": [1, 2, 3]}, [1, 1, 2], 1, 2),
  99. ({"A": [1, 2, 3]}, [1, 2, 2], 1, 2),
  100. ({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"),
  101. ({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2),
  102. ({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
  103. ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2),
  104. ],
  105. )
  106. def test_first_last_valid(
  107. self, float_frame, data, idx, expected_first, expected_last
  108. ):
  109. N = len(float_frame.index)
  110. mat = np.random.randn(N)
  111. mat[:5] = np.nan
  112. mat[-5:] = np.nan
  113. frame = DataFrame({"foo": mat}, index=float_frame.index)
  114. index = frame.first_valid_index()
  115. assert index == frame.index[5]
  116. index = frame.last_valid_index()
  117. assert index == frame.index[-6]
  118. # GH12800
  119. empty = DataFrame()
  120. assert empty.last_valid_index() is None
  121. assert empty.first_valid_index() is None
  122. # GH17400: no valid entries
  123. frame[:] = np.nan
  124. assert frame.last_valid_index() is None
  125. assert frame.first_valid_index() is None
  126. # GH20499: its preserves freq with holes
  127. frame.index = date_range("20110101", periods=N, freq="B")
  128. frame.iloc[1] = 1
  129. frame.iloc[-2] = 1
  130. assert frame.first_valid_index() == frame.index[1]
  131. assert frame.last_valid_index() == frame.index[-2]
  132. assert frame.first_valid_index().freq == frame.index.freq
  133. assert frame.last_valid_index().freq == frame.index.freq
  134. # GH 21441
  135. df = DataFrame(data, index=idx)
  136. assert expected_first == df.first_valid_index()
  137. assert expected_last == df.last_valid_index()
  138. @pytest.mark.parametrize("klass", [Series, DataFrame])
  139. def test_first_valid_index_all_nan(self, klass):
  140. # GH#9752 Series/DataFrame should both return None, not raise
  141. obj = klass([np.nan])
  142. assert obj.first_valid_index() is None
  143. assert obj.iloc[:0].first_valid_index() is None
  144. def test_first_subset(self):
  145. ts = tm.makeTimeDataFrame(freq="12h")
  146. result = ts.first("10d")
  147. assert len(result) == 20
  148. ts = tm.makeTimeDataFrame(freq="D")
  149. result = ts.first("10d")
  150. assert len(result) == 10
  151. result = ts.first("3M")
  152. expected = ts[:"3/31/2000"]
  153. tm.assert_frame_equal(result, expected)
  154. result = ts.first("21D")
  155. expected = ts[:21]
  156. tm.assert_frame_equal(result, expected)
  157. result = ts[:0].first("3M")
  158. tm.assert_frame_equal(result, ts[:0])
  159. def test_first_raises(self):
  160. # GH20725
  161. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  162. with pytest.raises(TypeError): # index is not a DatetimeIndex
  163. df.first("1D")
  164. def test_last_subset(self):
  165. ts = tm.makeTimeDataFrame(freq="12h")
  166. result = ts.last("10d")
  167. assert len(result) == 20
  168. ts = tm.makeTimeDataFrame(nper=30, freq="D")
  169. result = ts.last("10d")
  170. assert len(result) == 10
  171. result = ts.last("21D")
  172. expected = ts["2000-01-10":]
  173. tm.assert_frame_equal(result, expected)
  174. result = ts.last("21D")
  175. expected = ts[-21:]
  176. tm.assert_frame_equal(result, expected)
  177. result = ts[:0].last("3M")
  178. tm.assert_frame_equal(result, ts[:0])
  179. def test_last_raises(self):
  180. # GH20725
  181. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  182. with pytest.raises(TypeError): # index is not a DatetimeIndex
  183. df.last("1D")
  184. def test_at_time(self):
  185. rng = date_range("1/1/2000", "1/5/2000", freq="5min")
  186. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  187. rs = ts.at_time(rng[1])
  188. assert (rs.index.hour == rng[1].hour).all()
  189. assert (rs.index.minute == rng[1].minute).all()
  190. assert (rs.index.second == rng[1].second).all()
  191. result = ts.at_time("9:30")
  192. expected = ts.at_time(time(9, 30))
  193. tm.assert_frame_equal(result, expected)
  194. result = ts.loc[time(9, 30)]
  195. expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]
  196. tm.assert_frame_equal(result, expected)
  197. # midnight, everything
  198. rng = date_range("1/1/2000", "1/31/2000")
  199. ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
  200. result = ts.at_time(time(0, 0))
  201. tm.assert_frame_equal(result, ts)
  202. # time doesn't exist
  203. rng = date_range("1/1/2012", freq="23Min", periods=384)
  204. ts = DataFrame(np.random.randn(len(rng), 2), rng)
  205. rs = ts.at_time("16:00")
  206. assert len(rs) == 0
  207. @pytest.mark.parametrize(
  208. "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)]
  209. )
  210. def test_at_time_errors(self, hour):
  211. # GH 24043
  212. dti = pd.date_range("2018", periods=3, freq="H")
  213. df = pd.DataFrame(list(range(len(dti))), index=dti)
  214. if getattr(hour, "tzinfo", None) is None:
  215. result = df.at_time(hour)
  216. expected = df.iloc[1:2]
  217. tm.assert_frame_equal(result, expected)
  218. else:
  219. with pytest.raises(ValueError, match="Index must be timezone"):
  220. df.at_time(hour)
  221. def test_at_time_tz(self):
  222. # GH 24043
  223. dti = pd.date_range("2018", periods=3, freq="H", tz="US/Pacific")
  224. df = pd.DataFrame(list(range(len(dti))), index=dti)
  225. result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern")))
  226. expected = df.iloc[1:2]
  227. tm.assert_frame_equal(result, expected)
  228. def test_at_time_raises(self):
  229. # GH20725
  230. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  231. with pytest.raises(TypeError): # index is not a DatetimeIndex
  232. df.at_time("00:00")
  233. @pytest.mark.parametrize("axis", ["index", "columns", 0, 1])
  234. def test_at_time_axis(self, axis):
  235. # issue 8839
  236. rng = date_range("1/1/2000", "1/5/2000", freq="5min")
  237. ts = DataFrame(np.random.randn(len(rng), len(rng)))
  238. ts.index, ts.columns = rng, rng
  239. indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
  240. if axis in ["index", 0]:
  241. expected = ts.loc[indices, :]
  242. elif axis in ["columns", 1]:
  243. expected = ts.loc[:, indices]
  244. result = ts.at_time("9:30", axis=axis)
  245. tm.assert_frame_equal(result, expected)
  246. def test_between_time(self, close_open_fixture):
  247. rng = date_range("1/1/2000", "1/5/2000", freq="5min")
  248. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  249. stime = time(0, 0)
  250. etime = time(1, 0)
  251. inc_start, inc_end = close_open_fixture
  252. filtered = ts.between_time(stime, etime, inc_start, inc_end)
  253. exp_len = 13 * 4 + 1
  254. if not inc_start:
  255. exp_len -= 5
  256. if not inc_end:
  257. exp_len -= 4
  258. assert len(filtered) == exp_len
  259. for rs in filtered.index:
  260. t = rs.time()
  261. if inc_start:
  262. assert t >= stime
  263. else:
  264. assert t > stime
  265. if inc_end:
  266. assert t <= etime
  267. else:
  268. assert t < etime
  269. result = ts.between_time("00:00", "01:00")
  270. expected = ts.between_time(stime, etime)
  271. tm.assert_frame_equal(result, expected)
  272. # across midnight
  273. rng = date_range("1/1/2000", "1/5/2000", freq="5min")
  274. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  275. stime = time(22, 0)
  276. etime = time(9, 0)
  277. filtered = ts.between_time(stime, etime, inc_start, inc_end)
  278. exp_len = (12 * 11 + 1) * 4 + 1
  279. if not inc_start:
  280. exp_len -= 4
  281. if not inc_end:
  282. exp_len -= 4
  283. assert len(filtered) == exp_len
  284. for rs in filtered.index:
  285. t = rs.time()
  286. if inc_start:
  287. assert (t >= stime) or (t <= etime)
  288. else:
  289. assert (t > stime) or (t <= etime)
  290. if inc_end:
  291. assert (t <= etime) or (t >= stime)
  292. else:
  293. assert (t < etime) or (t >= stime)
  294. def test_between_time_raises(self):
  295. # GH20725
  296. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  297. with pytest.raises(TypeError): # index is not a DatetimeIndex
  298. df.between_time(start_time="00:00", end_time="12:00")
  299. def test_between_time_axis(self, axis):
  300. # issue 8839
  301. rng = date_range("1/1/2000", periods=100, freq="10min")
  302. ts = DataFrame(np.random.randn(len(rng), len(rng)))
  303. stime, etime = ("08:00:00", "09:00:00")
  304. exp_len = 7
  305. if axis in ["index", 0]:
  306. ts.index = rng
  307. assert len(ts.between_time(stime, etime)) == exp_len
  308. assert len(ts.between_time(stime, etime, axis=0)) == exp_len
  309. if axis in ["columns", 1]:
  310. ts.columns = rng
  311. selected = ts.between_time(stime, etime, axis=1).columns
  312. assert len(selected) == exp_len
  313. def test_between_time_axis_raises(self, axis):
  314. # issue 8839
  315. rng = date_range("1/1/2000", periods=100, freq="10min")
  316. mask = np.arange(0, len(rng))
  317. rand_data = np.random.randn(len(rng), len(rng))
  318. ts = DataFrame(rand_data, index=rng, columns=rng)
  319. stime, etime = ("08:00:00", "09:00:00")
  320. msg = "Index must be DatetimeIndex"
  321. if axis in ["columns", 1]:
  322. ts.index = mask
  323. with pytest.raises(TypeError, match=msg):
  324. ts.between_time(stime, etime)
  325. with pytest.raises(TypeError, match=msg):
  326. ts.between_time(stime, etime, axis=0)
  327. if axis in ["index", 0]:
  328. ts.columns = mask
  329. with pytest.raises(TypeError, match=msg):
  330. ts.between_time(stime, etime, axis=1)
  331. def test_operation_on_NaT(self):
  332. # Both NaT and Timestamp are in DataFrame.
  333. df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]})
  334. res = df.min()
  335. exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
  336. tm.assert_series_equal(res, exp)
  337. res = df.max()
  338. exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
  339. tm.assert_series_equal(res, exp)
  340. # GH12941, only NaTs are in DataFrame.
  341. df = pd.DataFrame({"foo": [pd.NaT, pd.NaT]})
  342. res = df.min()
  343. exp = pd.Series([pd.NaT], index=["foo"])
  344. tm.assert_series_equal(res, exp)
  345. res = df.max()
  346. exp = pd.Series([pd.NaT], index=["foo"])
  347. tm.assert_series_equal(res, exp)
  348. def test_datetime_assignment_with_NaT_and_diff_time_units(self):
  349. # GH 7492
  350. data_ns = np.array([1, "nat"], dtype="datetime64[ns]")
  351. result = pd.Series(data_ns).to_frame()
  352. result["new"] = data_ns
  353. expected = pd.DataFrame(
  354. {0: [1, None], "new": [1, None]}, dtype="datetime64[ns]"
  355. )
  356. tm.assert_frame_equal(result, expected)
  357. # OutOfBoundsDatetime error shouldn't occur
  358. data_s = np.array([1, "nat"], dtype="datetime64[s]")
  359. result["new"] = data_s
  360. expected = pd.DataFrame(
  361. {0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]"
  362. )
  363. tm.assert_frame_equal(result, expected)
  364. def test_frame_to_period(self):
  365. K = 5
  366. dr = date_range("1/1/2000", "1/1/2001")
  367. pr = period_range("1/1/2000", "1/1/2001")
  368. df = DataFrame(np.random.randn(len(dr), K), index=dr)
  369. df["mix"] = "a"
  370. pts = df.to_period()
  371. exp = df.copy()
  372. exp.index = pr
  373. tm.assert_frame_equal(pts, exp)
  374. pts = df.to_period("M")
  375. tm.assert_index_equal(pts.index, exp.index.asfreq("M"))
  376. df = df.T
  377. pts = df.to_period(axis=1)
  378. exp = df.copy()
  379. exp.columns = pr
  380. tm.assert_frame_equal(pts, exp)
  381. pts = df.to_period("M", axis=1)
  382. tm.assert_index_equal(pts.columns, exp.columns.asfreq("M"))
  383. msg = "No axis named 2 for object type <class 'pandas.core.frame.DataFrame'>"
  384. with pytest.raises(ValueError, match=msg):
  385. df.to_period(axis=2)
  386. @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"])
  387. def test_tz_convert_and_localize(self, fn):
  388. l0 = date_range("20140701", periods=5, freq="D")
  389. l1 = date_range("20140701", periods=5, freq="D")
  390. int_idx = Index(range(5))
  391. if fn == "tz_convert":
  392. l0 = l0.tz_localize("UTC")
  393. l1 = l1.tz_localize("UTC")
  394. for idx in [l0, l1]:
  395. l0_expected = getattr(idx, fn)("US/Pacific")
  396. l1_expected = getattr(idx, fn)("US/Pacific")
  397. df1 = DataFrame(np.ones(5), index=l0)
  398. df1 = getattr(df1, fn)("US/Pacific")
  399. tm.assert_index_equal(df1.index, l0_expected)
  400. # MultiIndex
  401. # GH7846
  402. df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))
  403. df3 = getattr(df2, fn)("US/Pacific", level=0)
  404. assert not df3.index.levels[0].equals(l0)
  405. tm.assert_index_equal(df3.index.levels[0], l0_expected)
  406. tm.assert_index_equal(df3.index.levels[1], l1)
  407. assert not df3.index.levels[1].equals(l1_expected)
  408. df3 = getattr(df2, fn)("US/Pacific", level=1)
  409. tm.assert_index_equal(df3.index.levels[0], l0)
  410. assert not df3.index.levels[0].equals(l0_expected)
  411. tm.assert_index_equal(df3.index.levels[1], l1_expected)
  412. assert not df3.index.levels[1].equals(l1)
  413. df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
  414. # TODO: untested
  415. df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa
  416. tm.assert_index_equal(df3.index.levels[0], l0)
  417. assert not df3.index.levels[0].equals(l0_expected)
  418. tm.assert_index_equal(df3.index.levels[1], l1_expected)
  419. assert not df3.index.levels[1].equals(l1)
  420. # Bad Inputs
  421. # Not DatetimeIndex / PeriodIndex
  422. with pytest.raises(TypeError, match="DatetimeIndex"):
  423. df = DataFrame(index=int_idx)
  424. df = getattr(df, fn)("US/Pacific")
  425. # Not DatetimeIndex / PeriodIndex
  426. with pytest.raises(TypeError, match="DatetimeIndex"):
  427. df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
  428. df = getattr(df, fn)("US/Pacific", level=0)
  429. # Invalid level
  430. with pytest.raises(ValueError, match="not valid"):
  431. df = DataFrame(index=l0)
  432. df = getattr(df, fn)("US/Pacific", level=1)