test_datetimes.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. """
  2. Tests for DatetimeArray
  3. """
  4. import operator
  5. import numpy as np
  6. import pytest
  7. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  8. import pandas as pd
  9. import pandas._testing as tm
  10. from pandas.core.arrays import DatetimeArray
  11. from pandas.core.arrays.datetimes import sequence_to_dt64ns
  12. class TestDatetimeArrayConstructor:
  13. def test_from_sequence_invalid_type(self):
  14. mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
  15. with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
  16. DatetimeArray._from_sequence(mi)
  17. def test_only_1dim_accepted(self):
  18. arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]")
  19. with pytest.raises(ValueError, match="Only 1-dimensional"):
  20. # 3-dim, we allow 2D to sneak in for ops purposes GH#29853
  21. DatetimeArray(arr.reshape(2, 2, 1))
  22. with pytest.raises(ValueError, match="Only 1-dimensional"):
  23. # 0-dim
  24. DatetimeArray(arr[[0]].squeeze())
  25. def test_freq_validation(self):
  26. # GH#24623 check that invalid instances cannot be created with the
  27. # public constructor
  28. arr = np.arange(5, dtype=np.int64) * 3600 * 10 ** 9
  29. msg = (
  30. "Inferred frequency H from passed values does not "
  31. "conform to passed frequency W-SUN"
  32. )
  33. with pytest.raises(ValueError, match=msg):
  34. DatetimeArray(arr, freq="W")
  35. @pytest.mark.parametrize(
  36. "meth",
  37. [
  38. DatetimeArray._from_sequence,
  39. sequence_to_dt64ns,
  40. pd.to_datetime,
  41. pd.DatetimeIndex,
  42. ],
  43. )
  44. def test_mixing_naive_tzaware_raises(self, meth):
  45. # GH#24569
  46. arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
  47. msg = (
  48. "Cannot mix tz-aware with tz-naive values|"
  49. "Tz-aware datetime.datetime cannot be converted "
  50. "to datetime64 unless utc=True"
  51. )
  52. for obj in [arr, arr[::-1]]:
  53. # check that we raise regardless of whether naive is found
  54. # before aware or vice-versa
  55. with pytest.raises(ValueError, match=msg):
  56. meth(obj)
  57. def test_from_pandas_array(self):
  58. arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9
  59. result = DatetimeArray._from_sequence(arr, freq="infer")
  60. expected = pd.date_range("1970-01-01", periods=5, freq="H")._data
  61. tm.assert_datetime_array_equal(result, expected)
  62. def test_mismatched_timezone_raises(self):
  63. arr = DatetimeArray(
  64. np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"),
  65. dtype=DatetimeTZDtype(tz="US/Central"),
  66. )
  67. dtype = DatetimeTZDtype(tz="US/Eastern")
  68. with pytest.raises(TypeError, match="Timezone of the array"):
  69. DatetimeArray(arr, dtype=dtype)
  70. def test_non_array_raises(self):
  71. with pytest.raises(ValueError, match="list"):
  72. DatetimeArray([1, 2, 3])
  73. def test_other_type_raises(self):
  74. with pytest.raises(
  75. ValueError, match="The dtype of 'values' is incorrect.*bool"
  76. ):
  77. DatetimeArray(np.array([1, 2, 3], dtype="bool"))
  78. def test_incorrect_dtype_raises(self):
  79. with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
  80. DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
  81. def test_freq_infer_raises(self):
  82. with pytest.raises(ValueError, match="Frequency inference"):
  83. DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer")
  84. def test_copy(self):
  85. data = np.array([1, 2, 3], dtype="M8[ns]")
  86. arr = DatetimeArray(data, copy=False)
  87. assert arr._data is data
  88. arr = DatetimeArray(data, copy=True)
  89. assert arr._data is not data
  90. class TestDatetimeArrayComparisons:
  91. # TODO: merge this into tests/arithmetic/test_datetime64 once it is
  92. # sufficiently robust
  93. def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators):
  94. # arbitrary tz-naive DatetimeIndex
  95. opname = all_compare_operators.strip("_")
  96. op = getattr(operator, opname)
  97. dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
  98. arr = DatetimeArray(dti)
  99. assert arr.freq == dti.freq
  100. assert arr.tz == dti.tz
  101. right = dti
  102. expected = np.ones(len(arr), dtype=bool)
  103. if opname in ["ne", "gt", "lt"]:
  104. # for these the comparisons should be all-False
  105. expected = ~expected
  106. result = op(arr, arr)
  107. tm.assert_numpy_array_equal(result, expected)
  108. for other in [right, np.array(right)]:
  109. # TODO: add list and tuple, and object-dtype once those
  110. # are fixed in the constructor
  111. result = op(arr, other)
  112. tm.assert_numpy_array_equal(result, expected)
  113. result = op(other, arr)
  114. tm.assert_numpy_array_equal(result, expected)
  115. class TestDatetimeArray:
  116. def test_astype_to_same(self):
  117. arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
  118. result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
  119. assert result is arr
  120. @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
  121. @pytest.mark.parametrize(
  122. "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
  123. )
  124. def test_astype_copies(self, dtype, other):
  125. # https://github.com/pandas-dev/pandas/pull/32490
  126. s = pd.Series([1, 2], dtype=dtype)
  127. orig = s.copy()
  128. t = s.astype(other)
  129. t[:] = pd.NaT
  130. tm.assert_series_equal(s, orig)
  131. @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
  132. def test_astype_int(self, dtype):
  133. arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])
  134. result = arr.astype(dtype)
  135. if np.dtype(dtype).kind == "u":
  136. expected_dtype = np.dtype("uint64")
  137. else:
  138. expected_dtype = np.dtype("int64")
  139. expected = arr.astype(expected_dtype)
  140. assert result.dtype == expected_dtype
  141. tm.assert_numpy_array_equal(result, expected)
  142. def test_tz_setter_raises(self):
  143. arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
  144. with pytest.raises(AttributeError, match="tz_localize"):
  145. arr.tz = "UTC"
  146. def test_setitem_different_tz_raises(self):
  147. data = np.array([1, 2, 3], dtype="M8[ns]")
  148. arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central"))
  149. with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
  150. arr[0] = pd.Timestamp("2000")
  151. with pytest.raises(ValueError, match="US/Central"):
  152. arr[0] = pd.Timestamp("2000", tz="US/Eastern")
  153. def test_setitem_clears_freq(self):
  154. a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
  155. a[0] = pd.Timestamp("2000", tz="US/Central")
  156. assert a.freq is None
  157. @pytest.mark.parametrize(
  158. "obj",
  159. [
  160. pd.Timestamp.now(),
  161. pd.Timestamp.now().to_datetime64(),
  162. pd.Timestamp.now().to_pydatetime(),
  163. ],
  164. )
  165. def test_setitem_objects(self, obj):
  166. # make sure we accept datetime64 and datetime in addition to Timestamp
  167. dti = pd.date_range("2000", periods=2, freq="D")
  168. arr = dti._data
  169. arr[0] = obj
  170. assert arr[0] == obj
  171. def test_repeat_preserves_tz(self):
  172. dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
  173. arr = DatetimeArray(dti)
  174. repeated = arr.repeat([1, 1])
  175. # preserves tz and values, but not freq
  176. expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype)
  177. tm.assert_equal(repeated, expected)
  178. def test_value_counts_preserves_tz(self):
  179. dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
  180. arr = DatetimeArray(dti).repeat([4, 3])
  181. result = arr.value_counts()
  182. # Note: not tm.assert_index_equal, since `freq`s do not match
  183. assert result.index.equals(dti)
  184. arr[-2] = pd.NaT
  185. result = arr.value_counts()
  186. expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
  187. tm.assert_series_equal(result, expected)
  188. @pytest.mark.parametrize("method", ["pad", "backfill"])
  189. def test_fillna_preserves_tz(self, method):
  190. dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
  191. arr = DatetimeArray(dti, copy=True)
  192. arr[2] = pd.NaT
  193. fill_val = dti[1] if method == "pad" else dti[3]
  194. expected = DatetimeArray._from_sequence(
  195. [dti[0], dti[1], fill_val, dti[3], dti[4]], freq=None, tz="US/Central"
  196. )
  197. result = arr.fillna(method=method)
  198. tm.assert_extension_array_equal(result, expected)
  199. # assert that arr and dti were not modified in-place
  200. assert arr[2] is pd.NaT
  201. assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central")
  202. def test_array_interface_tz(self):
  203. tz = "US/Central"
  204. data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz))
  205. result = np.asarray(data)
  206. expected = np.array(
  207. [
  208. pd.Timestamp("2017-01-01T00:00:00", tz=tz),
  209. pd.Timestamp("2017-01-02T00:00:00", tz=tz),
  210. ],
  211. dtype=object,
  212. )
  213. tm.assert_numpy_array_equal(result, expected)
  214. result = np.asarray(data, dtype=object)
  215. tm.assert_numpy_array_equal(result, expected)
  216. result = np.asarray(data, dtype="M8[ns]")
  217. expected = np.array(
  218. ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]"
  219. )
  220. tm.assert_numpy_array_equal(result, expected)
  221. def test_array_interface(self):
  222. data = DatetimeArray(pd.date_range("2017", periods=2))
  223. expected = np.array(
  224. ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]"
  225. )
  226. result = np.asarray(data)
  227. tm.assert_numpy_array_equal(result, expected)
  228. result = np.asarray(data, dtype=object)
  229. expected = np.array(
  230. [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")],
  231. dtype=object,
  232. )
  233. tm.assert_numpy_array_equal(result, expected)
  234. @pytest.mark.parametrize("index", [True, False])
  235. def test_searchsorted_different_tz(self, index):
  236. data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
  237. arr = DatetimeArray(data, freq="D").tz_localize("Asia/Tokyo")
  238. if index:
  239. arr = pd.Index(arr)
  240. expected = arr.searchsorted(arr[2])
  241. result = arr.searchsorted(arr[2].tz_convert("UTC"))
  242. assert result == expected
  243. expected = arr.searchsorted(arr[2:6])
  244. result = arr.searchsorted(arr[2:6].tz_convert("UTC"))
  245. tm.assert_equal(result, expected)
  246. @pytest.mark.parametrize("index", [True, False])
  247. def test_searchsorted_tzawareness_compat(self, index):
  248. data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
  249. arr = DatetimeArray(data, freq="D")
  250. if index:
  251. arr = pd.Index(arr)
  252. mismatch = arr.tz_localize("Asia/Tokyo")
  253. msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
  254. with pytest.raises(TypeError, match=msg):
  255. arr.searchsorted(mismatch[0])
  256. with pytest.raises(TypeError, match=msg):
  257. arr.searchsorted(mismatch)
  258. with pytest.raises(TypeError, match=msg):
  259. mismatch.searchsorted(arr[0])
  260. with pytest.raises(TypeError, match=msg):
  261. mismatch.searchsorted(arr)
  262. @pytest.mark.parametrize(
  263. "other",
  264. [
  265. 1,
  266. np.int64(1),
  267. 1.0,
  268. np.timedelta64("NaT"),
  269. pd.Timedelta(days=2),
  270. "invalid",
  271. np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9,
  272. np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10 ** 9,
  273. pd.Timestamp.now().to_period("D"),
  274. ],
  275. )
  276. @pytest.mark.parametrize(
  277. "index",
  278. [
  279. True,
  280. pytest.param(
  281. False,
  282. marks=pytest.mark.xfail(
  283. reason="Raises ValueError instead of TypeError", raises=ValueError
  284. ),
  285. ),
  286. ],
  287. )
  288. def test_searchsorted_invalid_types(self, other, index):
  289. data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
  290. arr = DatetimeArray(data, freq="D")
  291. if index:
  292. arr = pd.Index(arr)
  293. msg = "searchsorted requires compatible dtype or scalar"
  294. with pytest.raises(TypeError, match=msg):
  295. arr.searchsorted(other)
  296. class TestSequenceToDT64NS:
  297. def test_tz_dtype_mismatch_raises(self):
  298. arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
  299. with pytest.raises(TypeError, match="data is already tz-aware"):
  300. sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC"))
  301. def test_tz_dtype_matches(self):
  302. arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
  303. result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central"))
  304. tm.assert_numpy_array_equal(arr._data, result)
  305. class TestReductions:
  306. @pytest.mark.parametrize("tz", [None, "US/Central"])
  307. def test_min_max(self, tz):
  308. arr = DatetimeArray._from_sequence(
  309. [
  310. "2000-01-03",
  311. "2000-01-03",
  312. "NaT",
  313. "2000-01-02",
  314. "2000-01-05",
  315. "2000-01-04",
  316. ],
  317. tz=tz,
  318. )
  319. result = arr.min()
  320. expected = pd.Timestamp("2000-01-02", tz=tz)
  321. assert result == expected
  322. result = arr.max()
  323. expected = pd.Timestamp("2000-01-05", tz=tz)
  324. assert result == expected
  325. result = arr.min(skipna=False)
  326. assert result is pd.NaT
  327. result = arr.max(skipna=False)
  328. assert result is pd.NaT
  329. @pytest.mark.parametrize("tz", [None, "US/Central"])
  330. @pytest.mark.parametrize("skipna", [True, False])
  331. def test_min_max_empty(self, skipna, tz):
  332. arr = DatetimeArray._from_sequence([], tz=tz)
  333. result = arr.min(skipna=skipna)
  334. assert result is pd.NaT
  335. result = arr.max(skipna=skipna)
  336. assert result is pd.NaT