test_setops.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """
  2. The tests in this package are to ensure the proper resultant dtypes of
  3. set operations.
  4. """
  5. import itertools as it
  6. import numpy as np
  7. import pytest
  8. from pandas.core.dtypes.common import is_dtype_equal
  9. import pandas as pd
  10. from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index
  11. import pandas._testing as tm
  12. from pandas.api.types import pandas_dtype
  13. from pandas.tests.indexes.conftest import indices_dict
  14. COMPATIBLE_INCONSISTENT_PAIRS = {
  15. (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex),
  16. (Float64Index, Int64Index): (tm.makeFloatIndex, tm.makeIntIndex),
  17. (Float64Index, RangeIndex): (tm.makeFloatIndex, tm.makeIntIndex),
  18. (Float64Index, UInt64Index): (tm.makeFloatIndex, tm.makeUIntIndex),
  19. }
  20. @pytest.fixture(params=it.combinations(indices_dict, 2), ids="-".join)
  21. def index_pair(request):
  22. """
  23. Create all combinations of 2 index types.
  24. """
  25. return indices_dict[request.param[0]], indices_dict[request.param[1]]
  26. def test_union_same_types(indices):
  27. # Union with a non-unique, non-monotonic index raises error
  28. # Only needed for bool index factory
  29. idx1 = indices.sort_values()
  30. idx2 = indices.sort_values()
  31. assert idx1.union(idx2).dtype == idx1.dtype
  32. def test_union_different_types(index_pair):
  33. # GH 23525
  34. idx1, idx2 = index_pair
  35. type_pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x)))
  36. if type_pair in COMPATIBLE_INCONSISTENT_PAIRS:
  37. pytest.xfail("This test only considers non compatible indexes.")
  38. if any(isinstance(idx, pd.MultiIndex) for idx in index_pair):
  39. pytest.xfail("This test doesn't consider multiindixes.")
  40. if is_dtype_equal(idx1.dtype, idx2.dtype):
  41. pytest.xfail("This test only considers non matching dtypes.")
  42. # A union with a CategoricalIndex (even as dtype('O')) and a
  43. # non-CategoricalIndex can only be made if both indices are monotonic.
  44. # This is true before this PR as well.
  45. # Union with a non-unique, non-monotonic index raises error
  46. # This applies to the boolean index
  47. idx1 = idx1.sort_values()
  48. idx2 = idx2.sort_values()
  49. assert idx1.union(idx2).dtype == np.dtype("O")
  50. assert idx2.union(idx1).dtype == np.dtype("O")
  51. @pytest.mark.parametrize("idx_fact1,idx_fact2", COMPATIBLE_INCONSISTENT_PAIRS.values())
  52. def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
  53. # GH 23525
  54. idx1 = idx_fact1(10)
  55. idx2 = idx_fact2(20)
  56. res1 = idx1.union(idx2)
  57. res2 = idx2.union(idx1)
  58. assert res1.dtype in (idx1.dtype, idx2.dtype)
  59. assert res2.dtype in (idx1.dtype, idx2.dtype)
  60. @pytest.mark.parametrize(
  61. "left, right, expected",
  62. [
  63. ("int64", "int64", "int64"),
  64. ("int64", "uint64", "object"),
  65. ("int64", "float64", "float64"),
  66. ("uint64", "float64", "float64"),
  67. ("uint64", "uint64", "uint64"),
  68. ("float64", "float64", "float64"),
  69. ("datetime64[ns]", "int64", "object"),
  70. ("datetime64[ns]", "uint64", "object"),
  71. ("datetime64[ns]", "float64", "object"),
  72. ("datetime64[ns, CET]", "int64", "object"),
  73. ("datetime64[ns, CET]", "uint64", "object"),
  74. ("datetime64[ns, CET]", "float64", "object"),
  75. ("Period[D]", "int64", "object"),
  76. ("Period[D]", "uint64", "object"),
  77. ("Period[D]", "float64", "object"),
  78. ],
  79. )
  80. def test_union_dtypes(left, right, expected):
  81. left = pandas_dtype(left)
  82. right = pandas_dtype(right)
  83. a = pd.Index([], dtype=left)
  84. b = pd.Index([], dtype=right)
  85. result = (a | b).dtype
  86. assert result == expected