test_hist_method.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. # coding: utf-8
  2. """ Test cases for .hist method """
  3. import numpy as np
  4. from numpy.random import randn
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. from pandas import DataFrame, Series
  8. import pandas._testing as tm
  9. from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
  10. @td.skip_if_no_mpl
  11. class TestSeriesPlots(TestPlotBase):
  12. def setup_method(self, method):
  13. TestPlotBase.setup_method(self, method)
  14. import matplotlib as mpl
  15. mpl.rcdefaults()
  16. self.ts = tm.makeTimeSeries()
  17. self.ts.name = "ts"
  18. @pytest.mark.slow
  19. def test_hist_legacy(self):
  20. _check_plot_works(self.ts.hist)
  21. _check_plot_works(self.ts.hist, grid=False)
  22. _check_plot_works(self.ts.hist, figsize=(8, 10))
  23. # _check_plot_works adds an ax so catch warning. see GH #13188
  24. with tm.assert_produces_warning(UserWarning):
  25. _check_plot_works(self.ts.hist, by=self.ts.index.month)
  26. with tm.assert_produces_warning(UserWarning):
  27. _check_plot_works(self.ts.hist, by=self.ts.index.month, bins=5)
  28. fig, ax = self.plt.subplots(1, 1)
  29. _check_plot_works(self.ts.hist, ax=ax)
  30. _check_plot_works(self.ts.hist, ax=ax, figure=fig)
  31. _check_plot_works(self.ts.hist, figure=fig)
  32. tm.close()
  33. fig, (ax1, ax2) = self.plt.subplots(1, 2)
  34. _check_plot_works(self.ts.hist, figure=fig, ax=ax1)
  35. _check_plot_works(self.ts.hist, figure=fig, ax=ax2)
  36. with pytest.raises(ValueError):
  37. self.ts.hist(by=self.ts.index, figure=fig)
  38. @pytest.mark.slow
  39. def test_hist_bins_legacy(self):
  40. df = DataFrame(np.random.randn(10, 2))
  41. ax = df.hist(bins=2)[0][0]
  42. assert len(ax.patches) == 2
  43. @pytest.mark.slow
  44. def test_hist_layout(self):
  45. df = self.hist_df
  46. with pytest.raises(ValueError):
  47. df.height.hist(layout=(1, 1))
  48. with pytest.raises(ValueError):
  49. df.height.hist(layout=[1, 1])
  50. @pytest.mark.slow
  51. def test_hist_layout_with_by(self):
  52. df = self.hist_df
  53. # _check_plot_works adds an `ax` kwarg to the method call
  54. # so we get a warning about an axis being cleared, even
  55. # though we don't explicing pass one, see GH #13188
  56. with tm.assert_produces_warning(UserWarning):
  57. axes = _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1))
  58. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  59. with tm.assert_produces_warning(UserWarning):
  60. axes = _check_plot_works(df.height.hist, by=df.gender, layout=(3, -1))
  61. self._check_axes_shape(axes, axes_num=2, layout=(3, 1))
  62. with tm.assert_produces_warning(UserWarning):
  63. axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1))
  64. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  65. with tm.assert_produces_warning(UserWarning):
  66. axes = _check_plot_works(df.height.hist, by=df.category, layout=(2, -1))
  67. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  68. with tm.assert_produces_warning(UserWarning):
  69. axes = _check_plot_works(df.height.hist, by=df.category, layout=(3, -1))
  70. self._check_axes_shape(axes, axes_num=4, layout=(3, 2))
  71. with tm.assert_produces_warning(UserWarning):
  72. axes = _check_plot_works(df.height.hist, by=df.category, layout=(-1, 4))
  73. self._check_axes_shape(axes, axes_num=4, layout=(1, 4))
  74. with tm.assert_produces_warning(UserWarning):
  75. axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2))
  76. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  77. axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7))
  78. self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7))
  79. @pytest.mark.slow
  80. def test_hist_no_overlap(self):
  81. from matplotlib.pyplot import subplot, gcf
  82. x = Series(randn(2))
  83. y = Series(randn(2))
  84. subplot(121)
  85. x.hist()
  86. subplot(122)
  87. y.hist()
  88. fig = gcf()
  89. axes = fig.axes
  90. assert len(axes) == 2
  91. @pytest.mark.slow
  92. def test_hist_by_no_extra_plots(self):
  93. df = self.hist_df
  94. axes = df.height.hist(by=df.gender) # noqa
  95. assert len(self.plt.get_fignums()) == 1
  96. @pytest.mark.slow
  97. def test_plot_fails_when_ax_differs_from_figure(self):
  98. from pylab import figure
  99. fig1 = figure()
  100. fig2 = figure()
  101. ax1 = fig1.add_subplot(111)
  102. with pytest.raises(AssertionError):
  103. self.ts.hist(ax=ax1, figure=fig2)
  104. @td.skip_if_no_mpl
  105. class TestDataFramePlots(TestPlotBase):
  106. @pytest.mark.slow
  107. def test_hist_df_legacy(self):
  108. from matplotlib.patches import Rectangle
  109. with tm.assert_produces_warning(UserWarning):
  110. _check_plot_works(self.hist_df.hist)
  111. # make sure layout is handled
  112. df = DataFrame(randn(100, 3))
  113. with tm.assert_produces_warning(UserWarning):
  114. axes = _check_plot_works(df.hist, grid=False)
  115. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  116. assert not axes[1, 1].get_visible()
  117. df = DataFrame(randn(100, 1))
  118. _check_plot_works(df.hist)
  119. # make sure layout is handled
  120. df = DataFrame(randn(100, 6))
  121. with tm.assert_produces_warning(UserWarning):
  122. axes = _check_plot_works(df.hist, layout=(4, 2))
  123. self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
  124. # make sure sharex, sharey is handled
  125. with tm.assert_produces_warning(UserWarning):
  126. _check_plot_works(df.hist, sharex=True, sharey=True)
  127. # handle figsize arg
  128. with tm.assert_produces_warning(UserWarning):
  129. _check_plot_works(df.hist, figsize=(8, 10))
  130. # check bins argument
  131. with tm.assert_produces_warning(UserWarning):
  132. _check_plot_works(df.hist, bins=5)
  133. # make sure xlabelsize and xrot are handled
  134. ser = df[0]
  135. xf, yf = 20, 18
  136. xrot, yrot = 30, 40
  137. axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  138. self._check_ticks_props(
  139. axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
  140. )
  141. xf, yf = 20, 18
  142. xrot, yrot = 30, 40
  143. axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  144. self._check_ticks_props(
  145. axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
  146. )
  147. tm.close()
  148. ax = ser.hist(cumulative=True, bins=4, density=True)
  149. # height of last bin (index 5) must be 1.0
  150. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  151. tm.assert_almost_equal(rects[-1].get_height(), 1.0)
  152. tm.close()
  153. ax = ser.hist(log=True)
  154. # scale of y must be 'log'
  155. self._check_ax_scales(ax, yaxis="log")
  156. tm.close()
  157. # propagate attr exception from matplotlib.Axes.hist
  158. with pytest.raises(AttributeError):
  159. ser.hist(foo="bar")
  160. @pytest.mark.slow
  161. def test_hist_non_numerical_raises(self):
  162. # gh-10444
  163. df = DataFrame(np.random.rand(10, 2))
  164. df_o = df.astype(np.object)
  165. msg = "hist method requires numerical columns, nothing to plot."
  166. with pytest.raises(ValueError, match=msg):
  167. df_o.hist()
  168. @pytest.mark.slow
  169. def test_hist_layout(self):
  170. df = DataFrame(randn(100, 3))
  171. layout_to_expected_size = (
  172. {"layout": None, "expected_size": (2, 2)}, # default is 2x2
  173. {"layout": (2, 2), "expected_size": (2, 2)},
  174. {"layout": (4, 1), "expected_size": (4, 1)},
  175. {"layout": (1, 4), "expected_size": (1, 4)},
  176. {"layout": (3, 3), "expected_size": (3, 3)},
  177. {"layout": (-1, 4), "expected_size": (1, 4)},
  178. {"layout": (4, -1), "expected_size": (4, 1)},
  179. {"layout": (-1, 2), "expected_size": (2, 2)},
  180. {"layout": (2, -1), "expected_size": (2, 2)},
  181. )
  182. for layout_test in layout_to_expected_size:
  183. axes = df.hist(layout=layout_test["layout"])
  184. expected = layout_test["expected_size"]
  185. self._check_axes_shape(axes, axes_num=3, layout=expected)
  186. # layout too small for all 4 plots
  187. with pytest.raises(ValueError):
  188. df.hist(layout=(1, 1))
  189. # invalid format for layout
  190. with pytest.raises(ValueError):
  191. df.hist(layout=(1,))
  192. with pytest.raises(ValueError):
  193. df.hist(layout=(-1, -1))
  194. @pytest.mark.slow
  195. # GH 9351
  196. def test_tight_layout(self):
  197. df = DataFrame(randn(100, 3))
  198. _check_plot_works(df.hist)
  199. self.plt.tight_layout()
  200. tm.close()
  201. def test_hist_subplot_xrot(self):
  202. # GH 30288
  203. df = DataFrame(
  204. {
  205. "length": [1.5, 0.5, 1.2, 0.9, 3],
  206. "animal": ["pig", "rabbit", "pig", "pig", "rabbit"],
  207. }
  208. )
  209. axes = _check_plot_works(
  210. df.hist,
  211. filterwarnings="always",
  212. column="length",
  213. by="animal",
  214. bins=5,
  215. xrot=0,
  216. )
  217. self._check_ticks_props(axes, xrot=0)
  218. @td.skip_if_no_mpl
  219. class TestDataFrameGroupByPlots(TestPlotBase):
  220. @pytest.mark.slow
  221. def test_grouped_hist_legacy(self):
  222. from matplotlib.patches import Rectangle
  223. from pandas.plotting._matplotlib.hist import _grouped_hist
  224. df = DataFrame(randn(500, 2), columns=["A", "B"])
  225. df["C"] = np.random.randint(0, 4, 500)
  226. df["D"] = ["X"] * 500
  227. axes = _grouped_hist(df.A, by=df.C)
  228. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  229. tm.close()
  230. axes = df.hist(by=df.C)
  231. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  232. tm.close()
  233. # group by a key with single value
  234. axes = df.hist(by="D", rot=30)
  235. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  236. self._check_ticks_props(axes, xrot=30)
  237. tm.close()
  238. # make sure kwargs to hist are handled
  239. xf, yf = 20, 18
  240. xrot, yrot = 30, 40
  241. axes = _grouped_hist(
  242. df.A,
  243. by=df.C,
  244. cumulative=True,
  245. bins=4,
  246. xlabelsize=xf,
  247. xrot=xrot,
  248. ylabelsize=yf,
  249. yrot=yrot,
  250. density=True,
  251. )
  252. # height of last bin (index 5) must be 1.0
  253. for ax in axes.ravel():
  254. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  255. height = rects[-1].get_height()
  256. tm.assert_almost_equal(height, 1.0)
  257. self._check_ticks_props(
  258. axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
  259. )
  260. tm.close()
  261. axes = _grouped_hist(df.A, by=df.C, log=True)
  262. # scale of y must be 'log'
  263. self._check_ax_scales(axes, yaxis="log")
  264. tm.close()
  265. # propagate attr exception from matplotlib.Axes.hist
  266. with pytest.raises(AttributeError):
  267. _grouped_hist(df.A, by=df.C, foo="bar")
  268. msg = "Specify figure size by tuple instead"
  269. with pytest.raises(ValueError, match=msg):
  270. df.hist(by="C", figsize="default")
  271. @pytest.mark.slow
  272. def test_grouped_hist_legacy2(self):
  273. n = 10
  274. weight = Series(np.random.normal(166, 20, size=n))
  275. height = Series(np.random.normal(60, 10, size=n))
  276. with tm.RNGContext(42):
  277. gender_int = np.random.choice([0, 1], size=n)
  278. df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int})
  279. gb = df_int.groupby("gender")
  280. axes = gb.hist()
  281. assert len(axes) == 2
  282. assert len(self.plt.get_fignums()) == 2
  283. tm.close()
  284. @pytest.mark.slow
  285. def test_grouped_hist_layout(self):
  286. df = self.hist_df
  287. msg = "Layout of 1x1 must be larger than required size 2"
  288. with pytest.raises(ValueError, match=msg):
  289. df.hist(column="weight", by=df.gender, layout=(1, 1))
  290. msg = "Layout of 1x3 must be larger than required size 4"
  291. with pytest.raises(ValueError, match=msg):
  292. df.hist(column="height", by=df.category, layout=(1, 3))
  293. msg = "At least one dimension of layout must be positive"
  294. with pytest.raises(ValueError, match=msg):
  295. df.hist(column="height", by=df.category, layout=(-1, -1))
  296. with tm.assert_produces_warning(UserWarning):
  297. axes = _check_plot_works(
  298. df.hist, column="height", by=df.gender, layout=(2, 1)
  299. )
  300. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  301. with tm.assert_produces_warning(UserWarning):
  302. axes = _check_plot_works(
  303. df.hist, column="height", by=df.gender, layout=(2, -1)
  304. )
  305. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  306. axes = df.hist(column="height", by=df.category, layout=(4, 1))
  307. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  308. axes = df.hist(column="height", by=df.category, layout=(-1, 1))
  309. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  310. axes = df.hist(column="height", by=df.category, layout=(4, 2), figsize=(12, 8))
  311. self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8))
  312. tm.close()
  313. # GH 6769
  314. with tm.assert_produces_warning(UserWarning):
  315. axes = _check_plot_works(
  316. df.hist, column="height", by="classroom", layout=(2, 2)
  317. )
  318. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  319. # without column
  320. with tm.assert_produces_warning(UserWarning):
  321. axes = _check_plot_works(df.hist, by="classroom")
  322. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  323. axes = df.hist(by="gender", layout=(3, 5))
  324. self._check_axes_shape(axes, axes_num=2, layout=(3, 5))
  325. axes = df.hist(column=["height", "weight", "category"])
  326. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  327. @pytest.mark.slow
  328. def test_grouped_hist_multiple_axes(self):
  329. # GH 6970, GH 7069
  330. df = self.hist_df
  331. fig, axes = self.plt.subplots(2, 3)
  332. returned = df.hist(column=["height", "weight", "category"], ax=axes[0])
  333. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  334. tm.assert_numpy_array_equal(returned, axes[0])
  335. assert returned[0].figure is fig
  336. returned = df.hist(by="classroom", ax=axes[1])
  337. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  338. tm.assert_numpy_array_equal(returned, axes[1])
  339. assert returned[0].figure is fig
  340. with pytest.raises(ValueError):
  341. fig, axes = self.plt.subplots(2, 3)
  342. # pass different number of axes from required
  343. axes = df.hist(column="height", ax=axes)
  344. @pytest.mark.slow
  345. def test_axis_share_x(self):
  346. df = self.hist_df
  347. # GH4089
  348. ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True)
  349. # share x
  350. assert ax1._shared_x_axes.joined(ax1, ax2)
  351. assert ax2._shared_x_axes.joined(ax1, ax2)
  352. # don't share y
  353. assert not ax1._shared_y_axes.joined(ax1, ax2)
  354. assert not ax2._shared_y_axes.joined(ax1, ax2)
  355. @pytest.mark.slow
  356. def test_axis_share_y(self):
  357. df = self.hist_df
  358. ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True)
  359. # share y
  360. assert ax1._shared_y_axes.joined(ax1, ax2)
  361. assert ax2._shared_y_axes.joined(ax1, ax2)
  362. # don't share x
  363. assert not ax1._shared_x_axes.joined(ax1, ax2)
  364. assert not ax2._shared_x_axes.joined(ax1, ax2)
  365. @pytest.mark.slow
  366. def test_axis_share_xy(self):
  367. df = self.hist_df
  368. ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True)
  369. # share both x and y
  370. assert ax1._shared_x_axes.joined(ax1, ax2)
  371. assert ax2._shared_x_axes.joined(ax1, ax2)
  372. assert ax1._shared_y_axes.joined(ax1, ax2)
  373. assert ax2._shared_y_axes.joined(ax1, ax2)