test_missing.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslib import iNaT
  4. import pandas as pd
  5. from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
  6. import pandas._testing as tm
  7. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  8. def test_fillna(idx):
  9. # GH 11343
  10. # TODO: Remove or Refactor. Not Implemented for MultiIndex
  11. for name, index in [("idx", idx)]:
  12. if len(index) == 0:
  13. pass
  14. elif isinstance(index, MultiIndex):
  15. idx = index.copy()
  16. msg = "isna is not defined for MultiIndex"
  17. with pytest.raises(NotImplementedError, match=msg):
  18. idx.fillna(idx[0])
  19. else:
  20. idx = index.copy()
  21. result = idx.fillna(idx[0])
  22. tm.assert_index_equal(result, idx)
  23. assert result is not idx
  24. msg = "'value' must be a scalar, passed: "
  25. with pytest.raises(TypeError, match=msg):
  26. idx.fillna([idx[0]])
  27. idx = index.copy()
  28. values = idx.values
  29. if isinstance(index, DatetimeIndexOpsMixin):
  30. values[1] = iNaT
  31. elif isinstance(index, (Int64Index, UInt64Index)):
  32. continue
  33. else:
  34. values[1] = np.nan
  35. if isinstance(index, PeriodIndex):
  36. idx = type(index)(values, freq=index.freq)
  37. else:
  38. idx = type(index)(values)
  39. expected = np.array([False] * len(idx), dtype=bool)
  40. expected[1] = True
  41. tm.assert_numpy_array_equal(idx._isnan, expected)
  42. assert idx.hasnans is True
  43. def test_dropna():
  44. # GH 6194
  45. idx = pd.MultiIndex.from_arrays(
  46. [
  47. [1, np.nan, 3, np.nan, 5],
  48. [1, 2, np.nan, np.nan, 5],
  49. ["a", "b", "c", np.nan, "e"],
  50. ]
  51. )
  52. exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
  53. tm.assert_index_equal(idx.dropna(), exp)
  54. tm.assert_index_equal(idx.dropna(how="any"), exp)
  55. exp = pd.MultiIndex.from_arrays(
  56. [[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
  57. )
  58. tm.assert_index_equal(idx.dropna(how="all"), exp)
  59. msg = "invalid how option: xxx"
  60. with pytest.raises(ValueError, match=msg):
  61. idx.dropna(how="xxx")
  62. # GH26408
  63. # test if missing values are dropped for multiindex constructed
  64. # from codes and values
  65. idx = MultiIndex(
  66. levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
  67. codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
  68. )
  69. expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
  70. tm.assert_index_equal(idx.dropna(), expected)
  71. tm.assert_index_equal(idx.dropna(how="any"), expected)
  72. expected = MultiIndex.from_arrays(
  73. [[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
  74. )
  75. tm.assert_index_equal(idx.dropna(how="all"), expected)
  76. def test_nulls(idx):
  77. # this is really a smoke test for the methods
  78. # as these are adequately tested for function elsewhere
  79. msg = "isna is not defined for MultiIndex"
  80. with pytest.raises(NotImplementedError, match=msg):
  81. idx.isna()
  82. @pytest.mark.xfail(reason="isna is not defined for MultiIndex")
  83. def test_hasnans_isnans(idx):
  84. # GH 11343, added tests for hasnans / isnans
  85. index = idx.copy()
  86. # cases in indices doesn't include NaN
  87. expected = np.array([False] * len(index), dtype=bool)
  88. tm.assert_numpy_array_equal(index._isnan, expected)
  89. assert index.hasnans is False
  90. index = idx.copy()
  91. values = index.values
  92. values[1] = np.nan
  93. index = type(idx)(values)
  94. expected = np.array([False] * len(index), dtype=bool)
  95. expected[1] = True
  96. tm.assert_numpy_array_equal(index._isnan, expected)
  97. assert index.hasnans is True
  98. def test_nan_stays_float():
  99. # GH 7031
  100. idx0 = pd.MultiIndex(
  101. levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]
  102. )
  103. idx1 = pd.MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
  104. idxm = idx0.join(idx1, how="outer")
  105. assert pd.isna(idx0.get_level_values(1)).all()
  106. # the following failed in 0.14.1
  107. assert pd.isna(idxm.get_level_values(1)[:-1]).all()
  108. df0 = pd.DataFrame([[1, 2]], index=idx0)
  109. df1 = pd.DataFrame([[3, 4]], index=idx1)
  110. dfm = df0 - df1
  111. assert pd.isna(df0.index.get_level_values(1)).all()
  112. # the following failed in 0.14.1
  113. assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()