test_analytics.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. import numpy as np
  2. import pytest
  3. from pandas.compat.numpy import _np_version_under1p17
  4. import pandas as pd
  5. from pandas import Index, MultiIndex, date_range, period_range
  6. import pandas._testing as tm
  7. def test_shift(idx):
  8. # GH8083 test the base class for shift
  9. msg = "Not supported for type MultiIndex"
  10. with pytest.raises(NotImplementedError, match=msg):
  11. idx.shift(1)
  12. with pytest.raises(NotImplementedError, match=msg):
  13. idx.shift(1, 2)
  14. def test_groupby(idx):
  15. groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
  16. labels = idx.tolist()
  17. exp = {1: labels[:3], 2: labels[3:]}
  18. tm.assert_dict_equal(groups, exp)
  19. # GH5620
  20. groups = idx.groupby(idx)
  21. exp = {key: [key] for key in idx}
  22. tm.assert_dict_equal(groups, exp)
  23. def test_truncate():
  24. major_axis = Index(list(range(4)))
  25. minor_axis = Index(list(range(2)))
  26. major_codes = np.array([0, 0, 1, 2, 3, 3])
  27. minor_codes = np.array([0, 1, 0, 1, 0, 1])
  28. index = MultiIndex(
  29. levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
  30. )
  31. result = index.truncate(before=1)
  32. assert "foo" not in result.levels[0]
  33. assert 1 in result.levels[0]
  34. result = index.truncate(after=1)
  35. assert 2 not in result.levels[0]
  36. assert 1 in result.levels[0]
  37. result = index.truncate(before=1, after=2)
  38. assert len(result.levels[0]) == 2
  39. msg = "after < before"
  40. with pytest.raises(ValueError, match=msg):
  41. index.truncate(3, 1)
  42. def test_where():
  43. i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
  44. msg = r"\.where is not supported for MultiIndex operations"
  45. with pytest.raises(NotImplementedError, match=msg):
  46. i.where(True)
  47. @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
  48. def test_where_array_like(klass):
  49. i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
  50. cond = [False, True]
  51. msg = r"\.where is not supported for MultiIndex operations"
  52. with pytest.raises(NotImplementedError, match=msg):
  53. i.where(klass(cond))
  54. # TODO: reshape
  55. def test_reorder_levels(idx):
  56. # this blows up
  57. with pytest.raises(IndexError, match="^Too many levels"):
  58. idx.reorder_levels([2, 1, 0])
  59. def test_numpy_repeat():
  60. reps = 2
  61. numbers = [1, 2, 3]
  62. names = np.array(["foo", "bar"])
  63. m = MultiIndex.from_product([numbers, names], names=names)
  64. expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
  65. tm.assert_index_equal(np.repeat(m, reps), expected)
  66. msg = "the 'axis' parameter is not supported"
  67. with pytest.raises(ValueError, match=msg):
  68. np.repeat(m, reps, axis=1)
  69. def test_append_mixed_dtypes():
  70. # GH 13660
  71. dti = date_range("2011-01-01", freq="M", periods=3)
  72. dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern")
  73. pi = period_range("2011-01", freq="M", periods=3)
  74. mi = MultiIndex.from_arrays(
  75. [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
  76. )
  77. assert mi.nlevels == 6
  78. res = mi.append(mi)
  79. exp = MultiIndex.from_arrays(
  80. [
  81. [1, 2, 3, 1, 2, 3],
  82. [1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
  83. ["a", "b", "c", "a", "b", "c"],
  84. dti.append(dti),
  85. dti_tz.append(dti_tz),
  86. pi.append(pi),
  87. ]
  88. )
  89. tm.assert_index_equal(res, exp)
  90. other = MultiIndex.from_arrays(
  91. [
  92. ["x", "y", "z"],
  93. ["x", "y", "z"],
  94. ["x", "y", "z"],
  95. ["x", "y", "z"],
  96. ["x", "y", "z"],
  97. ["x", "y", "z"],
  98. ]
  99. )
  100. res = mi.append(other)
  101. exp = MultiIndex.from_arrays(
  102. [
  103. [1, 2, 3, "x", "y", "z"],
  104. [1.1, np.nan, 3.3, "x", "y", "z"],
  105. ["a", "b", "c", "x", "y", "z"],
  106. dti.append(pd.Index(["x", "y", "z"])),
  107. dti_tz.append(pd.Index(["x", "y", "z"])),
  108. pi.append(pd.Index(["x", "y", "z"])),
  109. ]
  110. )
  111. tm.assert_index_equal(res, exp)
  112. def test_take(idx):
  113. indexer = [4, 3, 0, 2]
  114. result = idx.take(indexer)
  115. expected = idx[indexer]
  116. assert result.equals(expected)
  117. # TODO: Remove Commented Code
  118. # if not isinstance(idx,
  119. # (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  120. # GH 10791
  121. msg = "'MultiIndex' object has no attribute 'freq'"
  122. with pytest.raises(AttributeError, match=msg):
  123. idx.freq
  124. def test_take_invalid_kwargs(idx):
  125. idx = idx
  126. indices = [1, 2]
  127. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  128. with pytest.raises(TypeError, match=msg):
  129. idx.take(indices, foo=2)
  130. msg = "the 'out' parameter is not supported"
  131. with pytest.raises(ValueError, match=msg):
  132. idx.take(indices, out=indices)
  133. msg = "the 'mode' parameter is not supported"
  134. with pytest.raises(ValueError, match=msg):
  135. idx.take(indices, mode="clip")
  136. def test_take_fill_value():
  137. # GH 12631
  138. vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
  139. idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
  140. result = idx.take(np.array([1, 0, -1]))
  141. exp_vals = [
  142. ("A", pd.Timestamp("2011-01-02")),
  143. ("A", pd.Timestamp("2011-01-01")),
  144. ("B", pd.Timestamp("2011-01-02")),
  145. ]
  146. expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
  147. tm.assert_index_equal(result, expected)
  148. # fill_value
  149. result = idx.take(np.array([1, 0, -1]), fill_value=True)
  150. exp_vals = [
  151. ("A", pd.Timestamp("2011-01-02")),
  152. ("A", pd.Timestamp("2011-01-01")),
  153. (np.nan, pd.NaT),
  154. ]
  155. expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
  156. tm.assert_index_equal(result, expected)
  157. # allow_fill=False
  158. result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  159. exp_vals = [
  160. ("A", pd.Timestamp("2011-01-02")),
  161. ("A", pd.Timestamp("2011-01-01")),
  162. ("B", pd.Timestamp("2011-01-02")),
  163. ]
  164. expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
  165. tm.assert_index_equal(result, expected)
  166. msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
  167. with pytest.raises(ValueError, match=msg):
  168. idx.take(np.array([1, 0, -2]), fill_value=True)
  169. with pytest.raises(ValueError, match=msg):
  170. idx.take(np.array([1, 0, -5]), fill_value=True)
  171. msg = "index -5 is out of bounds for( axis 0 with)? size 4"
  172. with pytest.raises(IndexError, match=msg):
  173. idx.take(np.array([1, -5]))
  174. def test_iter(idx):
  175. result = list(idx)
  176. expected = [
  177. ("foo", "one"),
  178. ("foo", "two"),
  179. ("bar", "one"),
  180. ("baz", "two"),
  181. ("qux", "one"),
  182. ("qux", "two"),
  183. ]
  184. assert result == expected
  185. def test_sub(idx):
  186. first = idx
  187. # - now raises (previously was set op difference)
  188. msg = "cannot perform __sub__ with this index type: MultiIndex"
  189. with pytest.raises(TypeError, match=msg):
  190. first - idx[-3:]
  191. with pytest.raises(TypeError, match=msg):
  192. idx[-3:] - first
  193. with pytest.raises(TypeError, match=msg):
  194. idx[-3:] - first.tolist()
  195. msg = "cannot perform __rsub__ with this index type: MultiIndex"
  196. with pytest.raises(TypeError, match=msg):
  197. first.tolist() - idx[-3:]
  198. def test_map(idx):
  199. # callable
  200. index = idx
  201. # we don't infer UInt64
  202. if isinstance(index, pd.UInt64Index):
  203. expected = index.astype("int64")
  204. else:
  205. expected = index
  206. result = index.map(lambda x: x)
  207. tm.assert_index_equal(result, expected)
  208. @pytest.mark.parametrize(
  209. "mapper",
  210. [
  211. lambda values, idx: {i: e for e, i in zip(values, idx)},
  212. lambda values, idx: pd.Series(values, idx),
  213. ],
  214. )
  215. def test_map_dictlike(idx, mapper):
  216. if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
  217. pytest.skip(f"skipping tests for {type(idx)}")
  218. identity = mapper(idx.values, idx)
  219. # we don't infer to UInt64 for a dict
  220. if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
  221. expected = idx.astype("int64")
  222. else:
  223. expected = idx
  224. result = idx.map(identity)
  225. tm.assert_index_equal(result, expected)
  226. # empty mappable
  227. expected = pd.Index([np.nan] * len(idx))
  228. result = idx.map(mapper(expected, idx))
  229. tm.assert_index_equal(result, expected)
  230. @pytest.mark.parametrize(
  231. "func",
  232. [
  233. np.exp,
  234. np.exp2,
  235. np.expm1,
  236. np.log,
  237. np.log2,
  238. np.log10,
  239. np.log1p,
  240. np.sqrt,
  241. np.sin,
  242. np.cos,
  243. np.tan,
  244. np.arcsin,
  245. np.arccos,
  246. np.arctan,
  247. np.sinh,
  248. np.cosh,
  249. np.tanh,
  250. np.arcsinh,
  251. np.arccosh,
  252. np.arctanh,
  253. np.deg2rad,
  254. np.rad2deg,
  255. ],
  256. ids=lambda func: func.__name__,
  257. )
  258. def test_numpy_ufuncs(idx, func):
  259. # test ufuncs of numpy. see:
  260. # http://docs.scipy.org/doc/numpy/reference/ufuncs.html
  261. if _np_version_under1p17:
  262. expected_exception = AttributeError
  263. msg = f"'tuple' object has no attribute '{func.__name__}'"
  264. else:
  265. expected_exception = TypeError
  266. msg = (
  267. "loop of ufunc does not support argument 0 of type tuple which"
  268. f" has no callable {func.__name__} method"
  269. )
  270. with pytest.raises(expected_exception, match=msg):
  271. func(idx)
  272. @pytest.mark.parametrize(
  273. "func",
  274. [np.isfinite, np.isinf, np.isnan, np.signbit],
  275. ids=lambda func: func.__name__,
  276. )
  277. def test_numpy_type_funcs(idx, func):
  278. msg = (
  279. f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
  280. "could not be safely coerced to any supported types according to "
  281. "the casting rule ''safe''"
  282. )
  283. with pytest.raises(TypeError, match=msg):
  284. func(idx)