test_conversion.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import DataFrame, MultiIndex, date_range
  5. import pandas._testing as tm
  6. def test_tolist(idx):
  7. result = idx.tolist()
  8. exp = list(idx.values)
  9. assert result == exp
  10. def test_to_numpy(idx):
  11. result = idx.to_numpy()
  12. exp = idx.values
  13. tm.assert_numpy_array_equal(result, exp)
  14. def test_to_frame():
  15. tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
  16. index = MultiIndex.from_tuples(tuples)
  17. result = index.to_frame(index=False)
  18. expected = DataFrame(tuples)
  19. tm.assert_frame_equal(result, expected)
  20. result = index.to_frame()
  21. expected.index = index
  22. tm.assert_frame_equal(result, expected)
  23. tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
  24. index = MultiIndex.from_tuples(tuples, names=["first", "second"])
  25. result = index.to_frame(index=False)
  26. expected = DataFrame(tuples)
  27. expected.columns = ["first", "second"]
  28. tm.assert_frame_equal(result, expected)
  29. result = index.to_frame()
  30. expected.index = index
  31. tm.assert_frame_equal(result, expected)
  32. # See GH-22580
  33. index = MultiIndex.from_tuples(tuples)
  34. result = index.to_frame(index=False, name=["first", "second"])
  35. expected = DataFrame(tuples)
  36. expected.columns = ["first", "second"]
  37. tm.assert_frame_equal(result, expected)
  38. result = index.to_frame(name=["first", "second"])
  39. expected.index = index
  40. expected.columns = ["first", "second"]
  41. tm.assert_frame_equal(result, expected)
  42. msg = "'name' must be a list / sequence of column names."
  43. with pytest.raises(TypeError, match=msg):
  44. index.to_frame(name="first")
  45. msg = "'name' should have same length as number of levels on index."
  46. with pytest.raises(ValueError, match=msg):
  47. index.to_frame(name=["first"])
  48. # Tests for datetime index
  49. index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
  50. result = index.to_frame(index=False)
  51. expected = DataFrame(
  52. {
  53. 0: np.repeat(np.arange(5, dtype="int64"), 3),
  54. 1: np.tile(pd.date_range("20130101", periods=3), 5),
  55. }
  56. )
  57. tm.assert_frame_equal(result, expected)
  58. result = index.to_frame()
  59. expected.index = index
  60. tm.assert_frame_equal(result, expected)
  61. # See GH-22580
  62. result = index.to_frame(index=False, name=["first", "second"])
  63. expected = DataFrame(
  64. {
  65. "first": np.repeat(np.arange(5, dtype="int64"), 3),
  66. "second": np.tile(pd.date_range("20130101", periods=3), 5),
  67. }
  68. )
  69. tm.assert_frame_equal(result, expected)
  70. result = index.to_frame(name=["first", "second"])
  71. expected.index = index
  72. tm.assert_frame_equal(result, expected)
  73. def test_to_frame_dtype_fidelity():
  74. # GH 22420
  75. mi = pd.MultiIndex.from_arrays(
  76. [
  77. pd.date_range("19910905", periods=6, tz="US/Eastern"),
  78. [1, 1, 1, 2, 2, 2],
  79. pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
  80. ["x", "x", "y", "z", "x", "y"],
  81. ],
  82. names=["dates", "a", "b", "c"],
  83. )
  84. original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
  85. expected_df = pd.DataFrame(
  86. {
  87. "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
  88. "a": [1, 1, 1, 2, 2, 2],
  89. "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
  90. "c": ["x", "x", "y", "z", "x", "y"],
  91. }
  92. )
  93. df = mi.to_frame(index=False)
  94. df_dtypes = df.dtypes.to_dict()
  95. tm.assert_frame_equal(df, expected_df)
  96. assert original_dtypes == df_dtypes
  97. def test_to_frame_resulting_column_order():
  98. # GH 22420
  99. expected = ["z", 0, "a"]
  100. mi = pd.MultiIndex.from_arrays(
  101. [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
  102. )
  103. result = mi.to_frame().columns.tolist()
  104. assert result == expected
  105. def test_roundtrip_pickle_with_tz():
  106. return # FIXME: this can't be right?
  107. # GH 8367
  108. # round-trip of timezone
  109. index = MultiIndex.from_product(
  110. [[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")],
  111. names=["one", "two", "three"],
  112. )
  113. unpickled = tm.round_trip_pickle(index)
  114. assert index.equal_levels(unpickled)
  115. def test_pickle(indices):
  116. return # FIXME: this can't be right?
  117. unpickled = tm.round_trip_pickle(indices)
  118. assert indices.equals(unpickled)
  119. original_name, indices.name = indices.name, "foo"
  120. unpickled = tm.round_trip_pickle(indices)
  121. assert indices.equals(unpickled)
  122. indices.name = original_name
  123. def test_to_series(idx):
  124. # assert that we are creating a copy of the index
  125. s = idx.to_series()
  126. assert s.values is not idx.values
  127. assert s.index is not idx
  128. assert s.name == idx.name
  129. def test_to_series_with_arguments(idx):
  130. # GH18699
  131. # index kwarg
  132. s = idx.to_series(index=idx)
  133. assert s.values is not idx.values
  134. assert s.index is idx
  135. assert s.name == idx.name
  136. # name kwarg
  137. idx = idx
  138. s = idx.to_series(name="__test")
  139. assert s.values is not idx.values
  140. assert s.index is not idx
  141. assert s.name != idx.name
  142. def test_to_flat_index(idx):
  143. expected = pd.Index(
  144. (
  145. ("foo", "one"),
  146. ("foo", "two"),
  147. ("bar", "one"),
  148. ("baz", "two"),
  149. ("qux", "one"),
  150. ("qux", "two"),
  151. ),
  152. tupleize_cols=False,
  153. )
  154. result = idx.to_flat_index()
  155. tm.assert_index_equal(result, expected)