test_subclass.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import DataFrame, Index, MultiIndex, Series
  5. import pandas._testing as tm
  6. class TestDataFrameSubclassing:
  7. def test_frame_subclassing_and_slicing(self):
  8. # Subclass frame and ensure it returns the right class on slicing it
  9. # In reference to PR 9632
  10. class CustomSeries(Series):
  11. @property
  12. def _constructor(self):
  13. return CustomSeries
  14. def custom_series_function(self):
  15. return "OK"
  16. class CustomDataFrame(DataFrame):
  17. """
  18. Subclasses pandas DF, fills DF with simulation results, adds some
  19. custom plotting functions.
  20. """
  21. def __init__(self, *args, **kw):
  22. super().__init__(*args, **kw)
  23. @property
  24. def _constructor(self):
  25. return CustomDataFrame
  26. _constructor_sliced = CustomSeries
  27. def custom_frame_function(self):
  28. return "OK"
  29. data = {"col1": range(10), "col2": range(10)}
  30. cdf = CustomDataFrame(data)
  31. # Did we get back our own DF class?
  32. assert isinstance(cdf, CustomDataFrame)
  33. # Do we get back our own Series class after selecting a column?
  34. cdf_series = cdf.col1
  35. assert isinstance(cdf_series, CustomSeries)
  36. assert cdf_series.custom_series_function() == "OK"
  37. # Do we get back our own DF class after slicing row-wise?
  38. cdf_rows = cdf[1:5]
  39. assert isinstance(cdf_rows, CustomDataFrame)
  40. assert cdf_rows.custom_frame_function() == "OK"
  41. # Make sure sliced part of multi-index frame is custom class
  42. mcol = pd.MultiIndex.from_tuples([("A", "A"), ("A", "B")])
  43. cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol)
  44. assert isinstance(cdf_multi["A"], CustomDataFrame)
  45. mcol = pd.MultiIndex.from_tuples([("A", ""), ("B", "")])
  46. cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol)
  47. assert isinstance(cdf_multi2["A"], CustomSeries)
  48. def test_dataframe_metadata(self):
  49. df = tm.SubclassedDataFrame(
  50. {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"]
  51. )
  52. df.testattr = "XXX"
  53. assert df.testattr == "XXX"
  54. assert df[["X"]].testattr == "XXX"
  55. assert df.loc[["a", "b"], :].testattr == "XXX"
  56. assert df.iloc[[0, 1], :].testattr == "XXX"
  57. # see gh-9776
  58. assert df.iloc[0:1, :].testattr == "XXX"
  59. # see gh-10553
  60. unpickled = tm.round_trip_pickle(df)
  61. tm.assert_frame_equal(df, unpickled)
  62. assert df._metadata == unpickled._metadata
  63. assert df.testattr == unpickled.testattr
  64. def test_indexing_sliced(self):
  65. # GH 11559
  66. df = tm.SubclassedDataFrame(
  67. {"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["a", "b", "c"]
  68. )
  69. res = df.loc[:, "X"]
  70. exp = tm.SubclassedSeries([1, 2, 3], index=list("abc"), name="X")
  71. tm.assert_series_equal(res, exp)
  72. assert isinstance(res, tm.SubclassedSeries)
  73. res = df.iloc[:, 1]
  74. exp = tm.SubclassedSeries([4, 5, 6], index=list("abc"), name="Y")
  75. tm.assert_series_equal(res, exp)
  76. assert isinstance(res, tm.SubclassedSeries)
  77. res = df.loc[:, "Z"]
  78. exp = tm.SubclassedSeries([7, 8, 9], index=list("abc"), name="Z")
  79. tm.assert_series_equal(res, exp)
  80. assert isinstance(res, tm.SubclassedSeries)
  81. res = df.loc["a", :]
  82. exp = tm.SubclassedSeries([1, 4, 7], index=list("XYZ"), name="a")
  83. tm.assert_series_equal(res, exp)
  84. assert isinstance(res, tm.SubclassedSeries)
  85. res = df.iloc[1, :]
  86. exp = tm.SubclassedSeries([2, 5, 8], index=list("XYZ"), name="b")
  87. tm.assert_series_equal(res, exp)
  88. assert isinstance(res, tm.SubclassedSeries)
  89. res = df.loc["c", :]
  90. exp = tm.SubclassedSeries([3, 6, 9], index=list("XYZ"), name="c")
  91. tm.assert_series_equal(res, exp)
  92. assert isinstance(res, tm.SubclassedSeries)
  93. def test_subclass_attr_err_propagation(self):
  94. # GH 11808
  95. class A(DataFrame):
  96. @property
  97. def bar(self):
  98. return self.i_dont_exist
  99. with pytest.raises(AttributeError, match=".*i_dont_exist.*"):
  100. A().bar
  101. def test_subclass_align(self):
  102. # GH 12983
  103. df1 = tm.SubclassedDataFrame(
  104. {"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")
  105. )
  106. df2 = tm.SubclassedDataFrame(
  107. {"c": [1, 2, 4], "d": [1, 2, 4]}, index=list("ABD")
  108. )
  109. res1, res2 = df1.align(df2, axis=0)
  110. exp1 = tm.SubclassedDataFrame(
  111. {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
  112. index=list("ABCDE"),
  113. )
  114. exp2 = tm.SubclassedDataFrame(
  115. {"c": [1, 2, np.nan, 4, np.nan], "d": [1, 2, np.nan, 4, np.nan]},
  116. index=list("ABCDE"),
  117. )
  118. assert isinstance(res1, tm.SubclassedDataFrame)
  119. tm.assert_frame_equal(res1, exp1)
  120. assert isinstance(res2, tm.SubclassedDataFrame)
  121. tm.assert_frame_equal(res2, exp2)
  122. res1, res2 = df1.a.align(df2.c)
  123. assert isinstance(res1, tm.SubclassedSeries)
  124. tm.assert_series_equal(res1, exp1.a)
  125. assert isinstance(res2, tm.SubclassedSeries)
  126. tm.assert_series_equal(res2, exp2.c)
  127. def test_subclass_align_combinations(self):
  128. # GH 12983
  129. df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE"))
  130. s = tm.SubclassedSeries([1, 2, 4], index=list("ABD"), name="x")
  131. # frame + series
  132. res1, res2 = df.align(s, axis=0)
  133. exp1 = pd.DataFrame(
  134. {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
  135. index=list("ABCDE"),
  136. )
  137. # name is lost when
  138. exp2 = pd.Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x")
  139. assert isinstance(res1, tm.SubclassedDataFrame)
  140. tm.assert_frame_equal(res1, exp1)
  141. assert isinstance(res2, tm.SubclassedSeries)
  142. tm.assert_series_equal(res2, exp2)
  143. # series + frame
  144. res1, res2 = s.align(df)
  145. assert isinstance(res1, tm.SubclassedSeries)
  146. tm.assert_series_equal(res1, exp2)
  147. assert isinstance(res2, tm.SubclassedDataFrame)
  148. tm.assert_frame_equal(res2, exp1)
  149. def test_subclass_iterrows(self):
  150. # GH 13977
  151. df = tm.SubclassedDataFrame({"a": [1]})
  152. for i, row in df.iterrows():
  153. assert isinstance(row, tm.SubclassedSeries)
  154. tm.assert_series_equal(row, df.loc[i])
  155. def test_subclass_stack(self):
  156. # GH 15564
  157. df = tm.SubclassedDataFrame(
  158. [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  159. index=["a", "b", "c"],
  160. columns=["X", "Y", "Z"],
  161. )
  162. res = df.stack()
  163. exp = tm.SubclassedSeries(
  164. [1, 2, 3, 4, 5, 6, 7, 8, 9], index=[list("aaabbbccc"), list("XYZXYZXYZ")]
  165. )
  166. tm.assert_series_equal(res, exp)
  167. def test_subclass_stack_multi(self):
  168. # GH 15564
  169. df = tm.SubclassedDataFrame(
  170. [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
  171. index=MultiIndex.from_tuples(
  172. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  173. ),
  174. columns=MultiIndex.from_tuples(
  175. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  176. ),
  177. )
  178. exp = tm.SubclassedDataFrame(
  179. [
  180. [10, 12],
  181. [11, 13],
  182. [20, 22],
  183. [21, 23],
  184. [30, 32],
  185. [31, 33],
  186. [40, 42],
  187. [41, 43],
  188. ],
  189. index=MultiIndex.from_tuples(
  190. list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))),
  191. names=["aaa", "ccc", "yyy"],
  192. ),
  193. columns=Index(["W", "X"], name="www"),
  194. )
  195. res = df.stack()
  196. tm.assert_frame_equal(res, exp)
  197. res = df.stack("yyy")
  198. tm.assert_frame_equal(res, exp)
  199. exp = tm.SubclassedDataFrame(
  200. [
  201. [10, 11],
  202. [12, 13],
  203. [20, 21],
  204. [22, 23],
  205. [30, 31],
  206. [32, 33],
  207. [40, 41],
  208. [42, 43],
  209. ],
  210. index=MultiIndex.from_tuples(
  211. list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))),
  212. names=["aaa", "ccc", "www"],
  213. ),
  214. columns=Index(["y", "z"], name="yyy"),
  215. )
  216. res = df.stack("www")
  217. tm.assert_frame_equal(res, exp)
  218. def test_subclass_stack_multi_mixed(self):
  219. # GH 15564
  220. df = tm.SubclassedDataFrame(
  221. [
  222. [10, 11, 12.0, 13.0],
  223. [20, 21, 22.0, 23.0],
  224. [30, 31, 32.0, 33.0],
  225. [40, 41, 42.0, 43.0],
  226. ],
  227. index=MultiIndex.from_tuples(
  228. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  229. ),
  230. columns=MultiIndex.from_tuples(
  231. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  232. ),
  233. )
  234. exp = tm.SubclassedDataFrame(
  235. [
  236. [10, 12.0],
  237. [11, 13.0],
  238. [20, 22.0],
  239. [21, 23.0],
  240. [30, 32.0],
  241. [31, 33.0],
  242. [40, 42.0],
  243. [41, 43.0],
  244. ],
  245. index=MultiIndex.from_tuples(
  246. list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))),
  247. names=["aaa", "ccc", "yyy"],
  248. ),
  249. columns=Index(["W", "X"], name="www"),
  250. )
  251. res = df.stack()
  252. tm.assert_frame_equal(res, exp)
  253. res = df.stack("yyy")
  254. tm.assert_frame_equal(res, exp)
  255. exp = tm.SubclassedDataFrame(
  256. [
  257. [10.0, 11.0],
  258. [12.0, 13.0],
  259. [20.0, 21.0],
  260. [22.0, 23.0],
  261. [30.0, 31.0],
  262. [32.0, 33.0],
  263. [40.0, 41.0],
  264. [42.0, 43.0],
  265. ],
  266. index=MultiIndex.from_tuples(
  267. list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))),
  268. names=["aaa", "ccc", "www"],
  269. ),
  270. columns=Index(["y", "z"], name="yyy"),
  271. )
  272. res = df.stack("www")
  273. tm.assert_frame_equal(res, exp)
  274. def test_subclass_unstack(self):
  275. # GH 15564
  276. df = tm.SubclassedDataFrame(
  277. [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  278. index=["a", "b", "c"],
  279. columns=["X", "Y", "Z"],
  280. )
  281. res = df.unstack()
  282. exp = tm.SubclassedSeries(
  283. [1, 4, 7, 2, 5, 8, 3, 6, 9], index=[list("XXXYYYZZZ"), list("abcabcabc")]
  284. )
  285. tm.assert_series_equal(res, exp)
  286. def test_subclass_unstack_multi(self):
  287. # GH 15564
  288. df = tm.SubclassedDataFrame(
  289. [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
  290. index=MultiIndex.from_tuples(
  291. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  292. ),
  293. columns=MultiIndex.from_tuples(
  294. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  295. ),
  296. )
  297. exp = tm.SubclassedDataFrame(
  298. [[10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]],
  299. index=Index(["A", "B"], name="aaa"),
  300. columns=MultiIndex.from_tuples(
  301. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))),
  302. names=["www", "yyy", "ccc"],
  303. ),
  304. )
  305. res = df.unstack()
  306. tm.assert_frame_equal(res, exp)
  307. res = df.unstack("ccc")
  308. tm.assert_frame_equal(res, exp)
  309. exp = tm.SubclassedDataFrame(
  310. [[10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]],
  311. index=Index(["c", "d"], name="ccc"),
  312. columns=MultiIndex.from_tuples(
  313. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))),
  314. names=["www", "yyy", "aaa"],
  315. ),
  316. )
  317. res = df.unstack("aaa")
  318. tm.assert_frame_equal(res, exp)
  319. def test_subclass_unstack_multi_mixed(self):
  320. # GH 15564
  321. df = tm.SubclassedDataFrame(
  322. [
  323. [10, 11, 12.0, 13.0],
  324. [20, 21, 22.0, 23.0],
  325. [30, 31, 32.0, 33.0],
  326. [40, 41, 42.0, 43.0],
  327. ],
  328. index=MultiIndex.from_tuples(
  329. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  330. ),
  331. columns=MultiIndex.from_tuples(
  332. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  333. ),
  334. )
  335. exp = tm.SubclassedDataFrame(
  336. [
  337. [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0],
  338. [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0],
  339. ],
  340. index=Index(["A", "B"], name="aaa"),
  341. columns=MultiIndex.from_tuples(
  342. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))),
  343. names=["www", "yyy", "ccc"],
  344. ),
  345. )
  346. res = df.unstack()
  347. tm.assert_frame_equal(res, exp)
  348. res = df.unstack("ccc")
  349. tm.assert_frame_equal(res, exp)
  350. exp = tm.SubclassedDataFrame(
  351. [
  352. [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0],
  353. [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0],
  354. ],
  355. index=Index(["c", "d"], name="ccc"),
  356. columns=MultiIndex.from_tuples(
  357. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))),
  358. names=["www", "yyy", "aaa"],
  359. ),
  360. )
  361. res = df.unstack("aaa")
  362. tm.assert_frame_equal(res, exp)
  363. def test_subclass_pivot(self):
  364. # GH 15564
  365. df = tm.SubclassedDataFrame(
  366. {
  367. "index": ["A", "B", "C", "C", "B", "A"],
  368. "columns": ["One", "One", "One", "Two", "Two", "Two"],
  369. "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
  370. }
  371. )
  372. pivoted = df.pivot(index="index", columns="columns", values="values")
  373. expected = tm.SubclassedDataFrame(
  374. {
  375. "One": {"A": 1.0, "B": 2.0, "C": 3.0},
  376. "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
  377. }
  378. )
  379. expected.index.name, expected.columns.name = "index", "columns"
  380. tm.assert_frame_equal(pivoted, expected)
  381. def test_subclassed_melt(self):
  382. # GH 15564
  383. cheese = tm.SubclassedDataFrame(
  384. {
  385. "first": ["John", "Mary"],
  386. "last": ["Doe", "Bo"],
  387. "height": [5.5, 6.0],
  388. "weight": [130, 150],
  389. }
  390. )
  391. melted = pd.melt(cheese, id_vars=["first", "last"])
  392. expected = tm.SubclassedDataFrame(
  393. [
  394. ["John", "Doe", "height", 5.5],
  395. ["Mary", "Bo", "height", 6.0],
  396. ["John", "Doe", "weight", 130],
  397. ["Mary", "Bo", "weight", 150],
  398. ],
  399. columns=["first", "last", "variable", "value"],
  400. )
  401. tm.assert_frame_equal(melted, expected)
  402. def test_subclassed_wide_to_long(self):
  403. # GH 9762
  404. np.random.seed(123)
  405. x = np.random.randn(3)
  406. df = tm.SubclassedDataFrame(
  407. {
  408. "A1970": {0: "a", 1: "b", 2: "c"},
  409. "A1980": {0: "d", 1: "e", 2: "f"},
  410. "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
  411. "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
  412. "X": dict(zip(range(3), x)),
  413. }
  414. )
  415. df["id"] = df.index
  416. exp_data = {
  417. "X": x.tolist() + x.tolist(),
  418. "A": ["a", "b", "c", "d", "e", "f"],
  419. "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
  420. "year": [1970, 1970, 1970, 1980, 1980, 1980],
  421. "id": [0, 1, 2, 0, 1, 2],
  422. }
  423. expected = tm.SubclassedDataFrame(exp_data)
  424. expected = expected.set_index(["id", "year"])[["X", "A", "B"]]
  425. long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year")
  426. tm.assert_frame_equal(long_frame, expected)
  427. def test_subclassed_apply(self):
  428. # GH 19822
  429. def check_row_subclass(row):
  430. assert isinstance(row, tm.SubclassedSeries)
  431. def strech(row):
  432. if row["variable"] == "height":
  433. row["value"] += 0.5
  434. return row
  435. df = tm.SubclassedDataFrame(
  436. [
  437. ["John", "Doe", "height", 5.5],
  438. ["Mary", "Bo", "height", 6.0],
  439. ["John", "Doe", "weight", 130],
  440. ["Mary", "Bo", "weight", 150],
  441. ],
  442. columns=["first", "last", "variable", "value"],
  443. )
  444. df.apply(lambda x: check_row_subclass(x))
  445. df.apply(lambda x: check_row_subclass(x), axis=1)
  446. expected = tm.SubclassedDataFrame(
  447. [
  448. ["John", "Doe", "height", 6.0],
  449. ["Mary", "Bo", "height", 6.5],
  450. ["John", "Doe", "weight", 130],
  451. ["Mary", "Bo", "weight", 150],
  452. ],
  453. columns=["first", "last", "variable", "value"],
  454. )
  455. result = df.apply(lambda x: strech(x), axis=1)
  456. assert isinstance(result, tm.SubclassedDataFrame)
  457. tm.assert_frame_equal(result, expected)
  458. expected = tm.SubclassedDataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
  459. result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1)
  460. assert isinstance(result, tm.SubclassedDataFrame)
  461. tm.assert_frame_equal(result, expected)
  462. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
  463. assert isinstance(result, tm.SubclassedDataFrame)
  464. tm.assert_frame_equal(result, expected)
  465. expected = tm.SubclassedSeries([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
  466. result = df.apply(lambda x: [1, 2, 3], axis=1)
  467. assert not isinstance(result, tm.SubclassedDataFrame)
  468. tm.assert_series_equal(result, expected)