test_drop.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import numpy as np
  2. import pytest
  3. from pandas.errors import PerformanceWarning
  4. import pandas as pd
  5. from pandas import Index, MultiIndex
  6. import pandas._testing as tm
  7. def test_drop(idx):
  8. dropped = idx.drop([("foo", "two"), ("qux", "one")])
  9. index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
  10. dropped2 = idx.drop(index)
  11. expected = idx[[0, 2, 3, 5]]
  12. tm.assert_index_equal(dropped, expected)
  13. tm.assert_index_equal(dropped2, expected)
  14. dropped = idx.drop(["bar"])
  15. expected = idx[[0, 1, 3, 4, 5]]
  16. tm.assert_index_equal(dropped, expected)
  17. dropped = idx.drop("foo")
  18. expected = idx[[2, 3, 4, 5]]
  19. tm.assert_index_equal(dropped, expected)
  20. index = MultiIndex.from_tuples([("bar", "two")])
  21. with pytest.raises(KeyError, match=r"^10$"):
  22. idx.drop([("bar", "two")])
  23. with pytest.raises(KeyError, match=r"^10$"):
  24. idx.drop(index)
  25. with pytest.raises(KeyError, match=r"^'two'$"):
  26. idx.drop(["foo", "two"])
  27. # partially correct argument
  28. mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
  29. with pytest.raises(KeyError, match=r"^10$"):
  30. idx.drop(mixed_index)
  31. # error='ignore'
  32. dropped = idx.drop(index, errors="ignore")
  33. expected = idx[[0, 1, 2, 3, 4, 5]]
  34. tm.assert_index_equal(dropped, expected)
  35. dropped = idx.drop(mixed_index, errors="ignore")
  36. expected = idx[[0, 1, 2, 3, 5]]
  37. tm.assert_index_equal(dropped, expected)
  38. dropped = idx.drop(["foo", "two"], errors="ignore")
  39. expected = idx[[2, 3, 4, 5]]
  40. tm.assert_index_equal(dropped, expected)
  41. # mixed partial / full drop
  42. dropped = idx.drop(["foo", ("qux", "one")])
  43. expected = idx[[2, 3, 5]]
  44. tm.assert_index_equal(dropped, expected)
  45. # mixed partial / full drop / error='ignore'
  46. mixed_index = ["foo", ("qux", "one"), "two"]
  47. with pytest.raises(KeyError, match=r"^'two'$"):
  48. idx.drop(mixed_index)
  49. dropped = idx.drop(mixed_index, errors="ignore")
  50. expected = idx[[2, 3, 5]]
  51. tm.assert_index_equal(dropped, expected)
  52. def test_droplevel_with_names(idx):
  53. index = idx[idx.get_loc("foo")]
  54. dropped = index.droplevel(0)
  55. assert dropped.name == "second"
  56. index = MultiIndex(
  57. levels=[Index(range(4)), Index(range(4)), Index(range(4))],
  58. codes=[
  59. np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  60. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  61. np.array([1, 0, 1, 1, 0, 0, 1, 0]),
  62. ],
  63. names=["one", "two", "three"],
  64. )
  65. dropped = index.droplevel(0)
  66. assert dropped.names == ("two", "three")
  67. dropped = index.droplevel("two")
  68. expected = index.droplevel(1)
  69. assert dropped.equals(expected)
  70. def test_droplevel_list():
  71. index = MultiIndex(
  72. levels=[Index(range(4)), Index(range(4)), Index(range(4))],
  73. codes=[
  74. np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  75. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  76. np.array([1, 0, 1, 1, 0, 0, 1, 0]),
  77. ],
  78. names=["one", "two", "three"],
  79. )
  80. dropped = index[:2].droplevel(["three", "one"])
  81. expected = index[:2].droplevel(2).droplevel(0)
  82. assert dropped.equals(expected)
  83. dropped = index[:2].droplevel([])
  84. expected = index[:2]
  85. assert dropped.equals(expected)
  86. msg = (
  87. "Cannot remove 3 levels from an index with 3 levels: "
  88. "at least one level must be left"
  89. )
  90. with pytest.raises(ValueError, match=msg):
  91. index[:2].droplevel(["one", "two", "three"])
  92. with pytest.raises(KeyError, match="'Level four not found'"):
  93. index[:2].droplevel(["one", "four"])
  94. def test_drop_not_lexsorted():
  95. # GH 12078
  96. # define the lexsorted version of the multi-index
  97. tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
  98. lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
  99. assert lexsorted_mi.is_lexsorted()
  100. # and the not-lexsorted version
  101. df = pd.DataFrame(
  102. columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
  103. )
  104. df = df.pivot_table(index="a", columns=["b", "c"], values="d")
  105. df = df.reset_index()
  106. not_lexsorted_mi = df.columns
  107. assert not not_lexsorted_mi.is_lexsorted()
  108. # compare the results
  109. tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
  110. with tm.assert_produces_warning(PerformanceWarning):
  111. tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
  112. @pytest.mark.parametrize(
  113. "msg,labels,level",
  114. [
  115. (r"labels \[4\] not found in level", 4, "a"),
  116. (r"labels \[7\] not found in level", 7, "b"),
  117. ],
  118. )
  119. def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level):
  120. # GH 8594
  121. mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
  122. s = pd.Series([10, 20, 30], index=mi)
  123. df = pd.DataFrame([10, 20, 30], index=mi)
  124. with pytest.raises(KeyError, match=msg):
  125. s.drop(labels, level=level)
  126. with pytest.raises(KeyError, match=msg):
  127. df.drop(labels, level=level)
  128. @pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")])
  129. def test_drop_errors_ignore(labels, level):
  130. # GH 8594
  131. mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
  132. s = pd.Series([10, 20, 30], index=mi)
  133. df = pd.DataFrame([10, 20, 30], index=mi)
  134. expected_s = s.drop(labels, level=level, errors="ignore")
  135. tm.assert_series_equal(s, expected_s)
  136. expected_df = df.drop(labels, level=level, errors="ignore")
  137. tm.assert_frame_equal(df, expected_df)
  138. def test_drop_with_non_unique_datetime_index_and_invalid_keys():
  139. # GH 30399
  140. # define dataframe with unique datetime index
  141. df = pd.DataFrame(
  142. np.random.randn(5, 3),
  143. columns=["a", "b", "c"],
  144. index=pd.date_range("2012", freq="H", periods=5),
  145. )
  146. # create dataframe with non-unique datetime index
  147. df = df.iloc[[0, 2, 2, 3]].copy()
  148. with pytest.raises(KeyError, match="not found in axis"):
  149. df.drop(["a", "b"]) # Dropping with labels not exist in the index