test_period.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs import iNaT
  4. from pandas._libs.tslibs.period import IncompatibleFrequency
  5. import pandas.util._test_decorators as td
  6. from pandas.core.dtypes.dtypes import PeriodDtype, registry
  7. import pandas as pd
  8. import pandas._testing as tm
  9. from pandas.core.arrays import PeriodArray, period_array
  10. # ----------------------------------------------------------------------------
  11. # Dtype
  12. def test_registered():
  13. assert PeriodDtype in registry.dtypes
  14. result = registry.find("Period[D]")
  15. expected = PeriodDtype("D")
  16. assert result == expected
  17. # ----------------------------------------------------------------------------
  18. # period_array
  19. @pytest.mark.parametrize(
  20. "data, freq, expected",
  21. [
  22. ([pd.Period("2017", "D")], None, [17167]),
  23. ([pd.Period("2017", "D")], "D", [17167]),
  24. ([2017], "D", [17167]),
  25. (["2017"], "D", [17167]),
  26. ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
  27. ([pd.Period("2017", "D"), None], None, [17167, iNaT]),
  28. (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]),
  29. (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
  30. ],
  31. )
  32. def test_period_array_ok(data, freq, expected):
  33. result = period_array(data, freq=freq).asi8
  34. expected = np.asarray(expected, dtype=np.int64)
  35. tm.assert_numpy_array_equal(result, expected)
  36. def test_period_array_readonly_object():
  37. # https://github.com/pandas-dev/pandas/issues/25403
  38. pa = period_array([pd.Period("2019-01-01")])
  39. arr = np.asarray(pa, dtype="object")
  40. arr.setflags(write=False)
  41. result = period_array(arr)
  42. tm.assert_period_array_equal(result, pa)
  43. result = pd.Series(arr)
  44. tm.assert_series_equal(result, pd.Series(pa))
  45. result = pd.DataFrame({"A": arr})
  46. tm.assert_frame_equal(result, pd.DataFrame({"A": pa}))
  47. def test_from_datetime64_freq_changes():
  48. # https://github.com/pandas-dev/pandas/issues/23438
  49. arr = pd.date_range("2017", periods=3, freq="D")
  50. result = PeriodArray._from_datetime64(arr, freq="M")
  51. expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M")
  52. tm.assert_period_array_equal(result, expected)
  53. @pytest.mark.parametrize(
  54. "data, freq, msg",
  55. [
  56. (
  57. [pd.Period("2017", "D"), pd.Period("2017", "A")],
  58. None,
  59. "Input has different freq",
  60. ),
  61. ([pd.Period("2017", "D")], "A", "Input has different freq"),
  62. ],
  63. )
  64. def test_period_array_raises(data, freq, msg):
  65. with pytest.raises(IncompatibleFrequency, match=msg):
  66. period_array(data, freq)
  67. def test_period_array_non_period_series_raies():
  68. ser = pd.Series([1, 2, 3])
  69. with pytest.raises(TypeError, match="dtype"):
  70. PeriodArray(ser, freq="D")
  71. def test_period_array_freq_mismatch():
  72. arr = period_array(["2000", "2001"], freq="D")
  73. with pytest.raises(IncompatibleFrequency, match="freq"):
  74. PeriodArray(arr, freq="M")
  75. with pytest.raises(IncompatibleFrequency, match="freq"):
  76. PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
  77. def test_asi8():
  78. result = period_array(["2000", "2001", None], freq="D").asi8
  79. expected = np.array([10957, 11323, iNaT])
  80. tm.assert_numpy_array_equal(result, expected)
  81. def test_take_raises():
  82. arr = period_array(["2000", "2001"], freq="D")
  83. with pytest.raises(IncompatibleFrequency, match="freq"):
  84. arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
  85. with pytest.raises(ValueError, match="foo"):
  86. arr.take([0, -1], allow_fill=True, fill_value="foo")
  87. @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
  88. def test_astype(dtype):
  89. # We choose to ignore the sign and size of integers for
  90. # Period/Datetime/Timedelta astype
  91. arr = period_array(["2000", "2001", None], freq="D")
  92. result = arr.astype(dtype)
  93. if np.dtype(dtype).kind == "u":
  94. expected_dtype = np.dtype("uint64")
  95. else:
  96. expected_dtype = np.dtype("int64")
  97. expected = arr.astype(expected_dtype)
  98. assert result.dtype == expected_dtype
  99. tm.assert_numpy_array_equal(result, expected)
  100. def test_astype_copies():
  101. arr = period_array(["2000", "2001", None], freq="D")
  102. result = arr.astype(np.int64, copy=False)
  103. # Add the `.base`, since we now use `.asi8` which returns a view.
  104. # We could maybe override it in PeriodArray to return ._data directly.
  105. assert result.base is arr._data
  106. result = arr.astype(np.int64, copy=True)
  107. assert result is not arr._data
  108. tm.assert_numpy_array_equal(result, arr._data.view("i8"))
  109. def test_astype_categorical():
  110. arr = period_array(["2000", "2001", "2001", None], freq="D")
  111. result = arr.astype("category")
  112. categories = pd.PeriodIndex(["2000", "2001"], freq="D")
  113. expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
  114. tm.assert_categorical_equal(result, expected)
  115. def test_astype_period():
  116. arr = period_array(["2000", "2001", None], freq="D")
  117. result = arr.astype(PeriodDtype("M"))
  118. expected = period_array(["2000", "2001", None], freq="M")
  119. tm.assert_period_array_equal(result, expected)
  120. @pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"])
  121. def test_astype_datetime(other):
  122. arr = period_array(["2000", "2001", None], freq="D")
  123. # slice off the [ns] so that the regex matches.
  124. with pytest.raises(TypeError, match=other[:-4]):
  125. arr.astype(other)
  126. def test_fillna_raises():
  127. arr = period_array(["2000", "2001", "2002"], freq="D")
  128. with pytest.raises(ValueError, match="Length"):
  129. arr.fillna(arr[:2])
  130. def test_fillna_copies():
  131. arr = period_array(["2000", "2001", "2002"], freq="D")
  132. result = arr.fillna(pd.Period("2000", "D"))
  133. assert result is not arr
  134. # ----------------------------------------------------------------------------
  135. # setitem
  136. @pytest.mark.parametrize(
  137. "key, value, expected",
  138. [
  139. ([0], pd.Period("2000", "D"), [10957, 1, 2]),
  140. ([0], None, [iNaT, 1, 2]),
  141. ([0], np.nan, [iNaT, 1, 2]),
  142. ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
  143. (
  144. [0, 1, 2],
  145. [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")],
  146. [10957, 11323, 11688],
  147. ),
  148. ],
  149. )
  150. def test_setitem(key, value, expected):
  151. arr = PeriodArray(np.arange(3), freq="D")
  152. expected = PeriodArray(expected, freq="D")
  153. arr[key] = value
  154. tm.assert_period_array_equal(arr, expected)
  155. def test_setitem_raises_incompatible_freq():
  156. arr = PeriodArray(np.arange(3), freq="D")
  157. with pytest.raises(IncompatibleFrequency, match="freq"):
  158. arr[0] = pd.Period("2000", freq="A")
  159. other = period_array(["2000", "2001"], freq="A")
  160. with pytest.raises(IncompatibleFrequency, match="freq"):
  161. arr[[0, 1]] = other
  162. def test_setitem_raises_length():
  163. arr = PeriodArray(np.arange(3), freq="D")
  164. with pytest.raises(ValueError, match="length"):
  165. arr[[0, 1]] = [pd.Period("2000", freq="D")]
  166. def test_setitem_raises_type():
  167. arr = PeriodArray(np.arange(3), freq="D")
  168. with pytest.raises(TypeError, match="int"):
  169. arr[0] = 1
  170. # ----------------------------------------------------------------------------
  171. # Ops
  172. def test_sub_period():
  173. arr = period_array(["2000", "2001"], freq="D")
  174. other = pd.Period("2000", freq="M")
  175. with pytest.raises(IncompatibleFrequency, match="freq"):
  176. arr - other
  177. # ----------------------------------------------------------------------------
  178. # Methods
  179. @pytest.mark.parametrize(
  180. "other",
  181. [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")],
  182. )
  183. def test_where_different_freq_raises(other):
  184. ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D"))
  185. cond = np.array([True, False, True])
  186. with pytest.raises(IncompatibleFrequency, match="freq"):
  187. ser.where(cond, other)
  188. # ----------------------------------------------------------------------------
  189. # Printing
  190. def test_repr_small():
  191. arr = period_array(["2000", "2001"], freq="D")
  192. result = str(arr)
  193. expected = (
  194. "<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]"
  195. )
  196. assert result == expected
  197. def test_repr_large():
  198. arr = period_array(["2000", "2001"] * 500, freq="D")
  199. result = str(arr)
  200. expected = (
  201. "<PeriodArray>\n"
  202. "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
  203. "'2000-01-01',\n"
  204. " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
  205. "'2001-01-01',\n"
  206. " ...\n"
  207. " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
  208. "'2000-01-01',\n"
  209. " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
  210. "'2001-01-01']\n"
  211. "Length: 1000, dtype: period[D]"
  212. )
  213. assert result == expected
  214. # ----------------------------------------------------------------------------
  215. # Reductions
  216. class TestReductions:
  217. def test_min_max(self):
  218. arr = period_array(
  219. [
  220. "2000-01-03",
  221. "2000-01-03",
  222. "NaT",
  223. "2000-01-02",
  224. "2000-01-05",
  225. "2000-01-04",
  226. ],
  227. freq="D",
  228. )
  229. result = arr.min()
  230. expected = pd.Period("2000-01-02", freq="D")
  231. assert result == expected
  232. result = arr.max()
  233. expected = pd.Period("2000-01-05", freq="D")
  234. assert result == expected
  235. result = arr.min(skipna=False)
  236. assert result is pd.NaT
  237. result = arr.max(skipna=False)
  238. assert result is pd.NaT
  239. @pytest.mark.parametrize("skipna", [True, False])
  240. def test_min_max_empty(self, skipna):
  241. arr = period_array([], freq="D")
  242. result = arr.min(skipna=skipna)
  243. assert result is pd.NaT
  244. result = arr.max(skipna=skipna)
  245. assert result is pd.NaT
  246. # ----------------------------------------------------------------------------
  247. # Arrow interaction
  248. pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev")
  249. @pyarrow_skip
  250. def test_arrow_extension_type():
  251. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  252. p1 = ArrowPeriodType("D")
  253. p2 = ArrowPeriodType("D")
  254. p3 = ArrowPeriodType("M")
  255. assert p1.freq == "D"
  256. assert p1 == p2
  257. assert not p1 == p3
  258. assert hash(p1) == hash(p2)
  259. assert not hash(p1) == hash(p3)
  260. @pyarrow_skip
  261. @pytest.mark.parametrize(
  262. "data, freq",
  263. [
  264. (pd.date_range("2017", periods=3), "D"),
  265. (pd.date_range("2017", periods=3, freq="A"), "A-DEC"),
  266. ],
  267. )
  268. def test_arrow_array(data, freq):
  269. import pyarrow as pa
  270. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  271. periods = period_array(data, freq=freq)
  272. result = pa.array(periods)
  273. assert isinstance(result.type, ArrowPeriodType)
  274. assert result.type.freq == freq
  275. expected = pa.array(periods.asi8, type="int64")
  276. assert result.storage.equals(expected)
  277. # convert to its storage type
  278. result = pa.array(periods, type=pa.int64())
  279. assert result.equals(expected)
  280. # unsupported conversions
  281. with pytest.raises(TypeError):
  282. pa.array(periods, type="float64")
  283. with pytest.raises(TypeError, match="different 'freq'"):
  284. pa.array(periods, type=ArrowPeriodType("T"))
  285. @pyarrow_skip
  286. def test_arrow_array_missing():
  287. import pyarrow as pa
  288. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  289. arr = PeriodArray([1, 2, 3], freq="D")
  290. arr[1] = pd.NaT
  291. result = pa.array(arr)
  292. assert isinstance(result.type, ArrowPeriodType)
  293. assert result.type.freq == "D"
  294. expected = pa.array([1, None, 3], type="int64")
  295. assert result.storage.equals(expected)
  296. @pyarrow_skip
  297. def test_arrow_table_roundtrip():
  298. import pyarrow as pa
  299. from pandas.core.arrays._arrow_utils import ArrowPeriodType
  300. arr = PeriodArray([1, 2, 3], freq="D")
  301. arr[1] = pd.NaT
  302. df = pd.DataFrame({"a": arr})
  303. table = pa.table(df)
  304. assert isinstance(table.field("a").type, ArrowPeriodType)
  305. result = table.to_pandas()
  306. assert isinstance(result["a"].dtype, PeriodDtype)
  307. tm.assert_frame_equal(result, df)
  308. table2 = pa.concat_tables([table, table])
  309. result = table2.to_pandas()
  310. expected = pd.concat([df, df], ignore_index=True)
  311. tm.assert_frame_equal(result, expected)