test_timedeltas.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.core.arrays import TimedeltaArray
  6. class TestTimedeltaArrayConstructor:
  7. def test_only_1dim_accepted(self):
  8. # GH#25282
  9. arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]")
  10. with pytest.raises(ValueError, match="Only 1-dimensional"):
  11. # 3-dim, we allow 2D to sneak in for ops purposes GH#29853
  12. TimedeltaArray(arr.reshape(2, 2, 1))
  13. with pytest.raises(ValueError, match="Only 1-dimensional"):
  14. # 0-dim
  15. TimedeltaArray(arr[[0]].squeeze())
  16. def test_freq_validation(self):
  17. # ensure that the public constructor cannot create an invalid instance
  18. arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10 ** 9
  19. msg = (
  20. "Inferred frequency None from passed values does not "
  21. "conform to passed frequency D"
  22. )
  23. with pytest.raises(ValueError, match=msg):
  24. TimedeltaArray(arr.view("timedelta64[ns]"), freq="D")
  25. def test_non_array_raises(self):
  26. with pytest.raises(ValueError, match="list"):
  27. TimedeltaArray([1, 2, 3])
  28. def test_other_type_raises(self):
  29. with pytest.raises(ValueError, match="dtype bool cannot be converted"):
  30. TimedeltaArray(np.array([1, 2, 3], dtype="bool"))
  31. def test_incorrect_dtype_raises(self):
  32. # TODO: why TypeError for 'category' but ValueError for i8?
  33. with pytest.raises(
  34. ValueError, match=r"category cannot be converted to timedelta64\[ns\]"
  35. ):
  36. TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
  37. with pytest.raises(
  38. ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]",
  39. ):
  40. TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64"))
  41. def test_copy(self):
  42. data = np.array([1, 2, 3], dtype="m8[ns]")
  43. arr = TimedeltaArray(data, copy=False)
  44. assert arr._data is data
  45. arr = TimedeltaArray(data, copy=True)
  46. assert arr._data is not data
  47. assert arr._data.base is not data
  48. class TestTimedeltaArray:
  49. def test_np_sum(self):
  50. # GH#25282
  51. vals = np.arange(5, dtype=np.int64).view("m8[h]").astype("m8[ns]")
  52. arr = TimedeltaArray(vals)
  53. result = np.sum(arr)
  54. assert result == vals.sum()
  55. result = np.sum(pd.TimedeltaIndex(arr))
  56. assert result == vals.sum()
  57. def test_from_sequence_dtype(self):
  58. msg = "dtype .*object.* cannot be converted to timedelta64"
  59. with pytest.raises(ValueError, match=msg):
  60. TimedeltaArray._from_sequence([], dtype=object)
  61. def test_abs(self):
  62. vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
  63. arr = TimedeltaArray(vals)
  64. evals = np.array([3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
  65. expected = TimedeltaArray(evals)
  66. result = abs(arr)
  67. tm.assert_timedelta_array_equal(result, expected)
  68. def test_neg(self):
  69. vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
  70. arr = TimedeltaArray(vals)
  71. evals = np.array([3600 * 10 ** 9, "NaT", -7200 * 10 ** 9], dtype="m8[ns]")
  72. expected = TimedeltaArray(evals)
  73. result = -arr
  74. tm.assert_timedelta_array_equal(result, expected)
  75. def test_neg_freq(self):
  76. tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
  77. arr = TimedeltaArray(tdi, freq=tdi.freq)
  78. expected = TimedeltaArray(-tdi._data, freq=-tdi.freq)
  79. result = -arr
  80. tm.assert_timedelta_array_equal(result, expected)
  81. @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
  82. def test_astype_int(self, dtype):
  83. arr = TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")])
  84. result = arr.astype(dtype)
  85. if np.dtype(dtype).kind == "u":
  86. expected_dtype = np.dtype("uint64")
  87. else:
  88. expected_dtype = np.dtype("int64")
  89. expected = arr.astype(expected_dtype)
  90. assert result.dtype == expected_dtype
  91. tm.assert_numpy_array_equal(result, expected)
  92. def test_setitem_clears_freq(self):
  93. a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H"))
  94. a[0] = pd.Timedelta("1H")
  95. assert a.freq is None
  96. @pytest.mark.parametrize(
  97. "obj",
  98. [
  99. pd.Timedelta(seconds=1),
  100. pd.Timedelta(seconds=1).to_timedelta64(),
  101. pd.Timedelta(seconds=1).to_pytimedelta(),
  102. ],
  103. )
  104. def test_setitem_objects(self, obj):
  105. # make sure we accept timedelta64 and timedelta in addition to Timedelta
  106. tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
  107. arr = TimedeltaArray(tdi, freq=tdi.freq)
  108. arr[0] = obj
  109. assert arr[0] == pd.Timedelta(seconds=1)
  110. @pytest.mark.parametrize(
  111. "other",
  112. [
  113. 1,
  114. np.int64(1),
  115. 1.0,
  116. np.datetime64("NaT"),
  117. pd.Timestamp.now(),
  118. "invalid",
  119. np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9,
  120. (np.arange(10) * 24 * 3600 * 10 ** 9).view("datetime64[ns]"),
  121. pd.Timestamp.now().to_period("D"),
  122. ],
  123. )
  124. @pytest.mark.parametrize(
  125. "index",
  126. [
  127. True,
  128. pytest.param(
  129. False,
  130. marks=pytest.mark.xfail(
  131. reason="Raises ValueError instead of TypeError", raises=ValueError
  132. ),
  133. ),
  134. ],
  135. )
  136. def test_searchsorted_invalid_types(self, other, index):
  137. data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
  138. arr = TimedeltaArray(data, freq="D")
  139. if index:
  140. arr = pd.Index(arr)
  141. msg = "searchsorted requires compatible dtype or scalar"
  142. with pytest.raises(TypeError, match=msg):
  143. arr.searchsorted(other)
  144. class TestReductions:
  145. @pytest.mark.parametrize("name", ["sum", "std", "min", "max", "median"])
  146. @pytest.mark.parametrize("skipna", [True, False])
  147. def test_reductions_empty(self, name, skipna):
  148. tdi = pd.TimedeltaIndex([])
  149. arr = tdi.array
  150. result = getattr(tdi, name)(skipna=skipna)
  151. assert result is pd.NaT
  152. result = getattr(arr, name)(skipna=skipna)
  153. assert result is pd.NaT
  154. def test_min_max(self):
  155. arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"])
  156. result = arr.min()
  157. expected = pd.Timedelta("2H")
  158. assert result == expected
  159. result = arr.max()
  160. expected = pd.Timedelta("5H")
  161. assert result == expected
  162. result = arr.min(skipna=False)
  163. assert result is pd.NaT
  164. result = arr.max(skipna=False)
  165. assert result is pd.NaT
  166. def test_sum(self):
  167. tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"])
  168. arr = tdi.array
  169. result = arr.sum(skipna=True)
  170. expected = pd.Timedelta(hours=17)
  171. assert isinstance(result, pd.Timedelta)
  172. assert result == expected
  173. result = tdi.sum(skipna=True)
  174. assert isinstance(result, pd.Timedelta)
  175. assert result == expected
  176. result = arr.sum(skipna=False)
  177. assert result is pd.NaT
  178. result = tdi.sum(skipna=False)
  179. assert result is pd.NaT
  180. result = arr.sum(min_count=9)
  181. assert result is pd.NaT
  182. result = tdi.sum(min_count=9)
  183. assert result is pd.NaT
  184. result = arr.sum(min_count=1)
  185. assert isinstance(result, pd.Timedelta)
  186. assert result == expected
  187. result = tdi.sum(min_count=1)
  188. assert isinstance(result, pd.Timedelta)
  189. assert result == expected
  190. def test_npsum(self):
  191. # GH#25335 np.sum should return a Timedelta, not timedelta64
  192. tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"])
  193. arr = tdi.array
  194. result = np.sum(tdi)
  195. expected = pd.Timedelta(hours=17)
  196. assert isinstance(result, pd.Timedelta)
  197. assert result == expected
  198. result = np.sum(arr)
  199. assert isinstance(result, pd.Timedelta)
  200. assert result == expected
  201. def test_std(self):
  202. tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"])
  203. arr = tdi.array
  204. result = arr.std(skipna=True)
  205. expected = pd.Timedelta(hours=2)
  206. assert isinstance(result, pd.Timedelta)
  207. assert result == expected
  208. result = tdi.std(skipna=True)
  209. assert isinstance(result, pd.Timedelta)
  210. assert result == expected
  211. result = arr.std(skipna=False)
  212. assert result is pd.NaT
  213. result = tdi.std(skipna=False)
  214. assert result is pd.NaT
  215. def test_median(self):
  216. tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
  217. arr = tdi.array
  218. result = arr.median(skipna=True)
  219. expected = pd.Timedelta(hours=2)
  220. assert isinstance(result, pd.Timedelta)
  221. assert result == expected
  222. result = tdi.median(skipna=True)
  223. assert isinstance(result, pd.Timedelta)
  224. assert result == expected
  225. result = arr.std(skipna=False)
  226. assert result is pd.NaT
  227. result = tdi.std(skipna=False)
  228. assert result is pd.NaT