test_boxplot_method.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. # coding: utf-8
  2. import itertools
  3. import string
  4. import numpy as np
  5. from numpy import random
  6. import pytest
  7. import pandas.util._test_decorators as td
  8. from pandas import DataFrame, MultiIndex, Series, date_range, timedelta_range
  9. import pandas._testing as tm
  10. from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
  11. import pandas.plotting as plotting
  12. """ Test cases for .boxplot method """
  13. @td.skip_if_no_mpl
  14. class TestDataFramePlots(TestPlotBase):
  15. @pytest.mark.slow
  16. def test_boxplot_legacy1(self):
  17. df = DataFrame(
  18. np.random.randn(6, 4),
  19. index=list(string.ascii_letters[:6]),
  20. columns=["one", "two", "three", "four"],
  21. )
  22. df["indic"] = ["foo", "bar"] * 3
  23. df["indic2"] = ["foo", "bar", "foo"] * 2
  24. _check_plot_works(df.boxplot, return_type="dict")
  25. _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict")
  26. # _check_plot_works adds an ax so catch warning. see GH #13188
  27. with tm.assert_produces_warning(UserWarning):
  28. _check_plot_works(df.boxplot, column=["one", "two"], by="indic")
  29. _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"])
  30. with tm.assert_produces_warning(UserWarning):
  31. _check_plot_works(df.boxplot, by="indic")
  32. with tm.assert_produces_warning(UserWarning):
  33. _check_plot_works(df.boxplot, by=["indic", "indic2"])
  34. _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict")
  35. _check_plot_works(df.boxplot, notch=1, return_type="dict")
  36. with tm.assert_produces_warning(UserWarning):
  37. _check_plot_works(df.boxplot, by="indic", notch=1)
  38. @pytest.mark.slow
  39. def test_boxplot_legacy2(self):
  40. df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"])
  41. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  42. df["Y"] = Series(["A"] * 10)
  43. with tm.assert_produces_warning(UserWarning):
  44. _check_plot_works(df.boxplot, by="X")
  45. # When ax is supplied and required number of axes is 1,
  46. # passed ax should be used:
  47. fig, ax = self.plt.subplots()
  48. axes = df.boxplot("Col1", by="X", ax=ax)
  49. ax_axes = ax.axes
  50. assert ax_axes is axes
  51. fig, ax = self.plt.subplots()
  52. axes = df.groupby("Y").boxplot(ax=ax, return_type="axes")
  53. ax_axes = ax.axes
  54. assert ax_axes is axes["A"]
  55. # Multiple columns with an ax argument should use same figure
  56. fig, ax = self.plt.subplots()
  57. with tm.assert_produces_warning(UserWarning):
  58. axes = df.boxplot(
  59. column=["Col1", "Col2"], by="X", ax=ax, return_type="axes"
  60. )
  61. assert axes["Col1"].get_figure() is fig
  62. # When by is None, check that all relevant lines are present in the
  63. # dict
  64. fig, ax = self.plt.subplots()
  65. d = df.boxplot(ax=ax, return_type="dict")
  66. lines = list(itertools.chain.from_iterable(d.values()))
  67. assert len(ax.get_lines()) == len(lines)
  68. @pytest.mark.slow
  69. def test_boxplot_return_type_none(self):
  70. # GH 12216; return_type=None & by=None -> axes
  71. result = self.hist_df.boxplot()
  72. assert isinstance(result, self.plt.Axes)
  73. @pytest.mark.slow
  74. def test_boxplot_return_type_legacy(self):
  75. # API change in https://github.com/pandas-dev/pandas/pull/7096
  76. import matplotlib as mpl # noqa
  77. df = DataFrame(
  78. np.random.randn(6, 4),
  79. index=list(string.ascii_letters[:6]),
  80. columns=["one", "two", "three", "four"],
  81. )
  82. with pytest.raises(ValueError):
  83. df.boxplot(return_type="NOTATYPE")
  84. result = df.boxplot()
  85. self._check_box_return_type(result, "axes")
  86. with tm.assert_produces_warning(False):
  87. result = df.boxplot(return_type="dict")
  88. self._check_box_return_type(result, "dict")
  89. with tm.assert_produces_warning(False):
  90. result = df.boxplot(return_type="axes")
  91. self._check_box_return_type(result, "axes")
  92. with tm.assert_produces_warning(False):
  93. result = df.boxplot(return_type="both")
  94. self._check_box_return_type(result, "both")
  95. @pytest.mark.slow
  96. def test_boxplot_axis_limits(self):
  97. def _check_ax_limits(col, ax):
  98. y_min, y_max = ax.get_ylim()
  99. assert y_min <= col.min()
  100. assert y_max >= col.max()
  101. df = self.hist_df.copy()
  102. df["age"] = np.random.randint(1, 20, df.shape[0])
  103. # One full row
  104. height_ax, weight_ax = df.boxplot(["height", "weight"], by="category")
  105. _check_ax_limits(df["height"], height_ax)
  106. _check_ax_limits(df["weight"], weight_ax)
  107. assert weight_ax._sharey == height_ax
  108. # Two rows, one partial
  109. p = df.boxplot(["height", "weight", "age"], by="category")
  110. height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0]
  111. dummy_ax = p[1, 1]
  112. _check_ax_limits(df["height"], height_ax)
  113. _check_ax_limits(df["weight"], weight_ax)
  114. _check_ax_limits(df["age"], age_ax)
  115. assert weight_ax._sharey == height_ax
  116. assert age_ax._sharey == height_ax
  117. assert dummy_ax._sharey is None
  118. @pytest.mark.slow
  119. def test_boxplot_empty_column(self):
  120. df = DataFrame(np.random.randn(20, 4))
  121. df.loc[:, 0] = np.nan
  122. _check_plot_works(df.boxplot, return_type="axes")
  123. @pytest.mark.slow
  124. def test_figsize(self):
  125. df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"])
  126. result = df.boxplot(return_type="axes", figsize=(12, 8))
  127. assert result.figure.bbox_inches.width == 12
  128. assert result.figure.bbox_inches.height == 8
  129. def test_fontsize(self):
  130. df = DataFrame({"a": [1, 2, 3, 4, 5, 6]})
  131. self._check_ticks_props(
  132. df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16
  133. )
  134. def test_boxplot_numeric_data(self):
  135. # GH 22799
  136. df = DataFrame(
  137. {
  138. "a": date_range("2012-01-01", periods=100),
  139. "b": np.random.randn(100),
  140. "c": np.random.randn(100) + 2,
  141. "d": date_range("2012-01-01", periods=100).astype(str),
  142. "e": date_range("2012-01-01", periods=100, tz="UTC"),
  143. "f": timedelta_range("1 days", periods=100),
  144. }
  145. )
  146. ax = df.plot(kind="box")
  147. assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"]
  148. @pytest.mark.parametrize(
  149. "colors_kwd, expected",
  150. [
  151. (
  152. dict(boxes="r", whiskers="b", medians="g", caps="c"),
  153. dict(boxes="r", whiskers="b", medians="g", caps="c"),
  154. ),
  155. (dict(boxes="r"), dict(boxes="r")),
  156. ("r", dict(boxes="r", whiskers="r", medians="r", caps="r")),
  157. ],
  158. )
  159. def test_color_kwd(self, colors_kwd, expected):
  160. # GH: 26214
  161. df = DataFrame(random.rand(10, 2))
  162. result = df.boxplot(color=colors_kwd, return_type="dict")
  163. for k, v in expected.items():
  164. assert result[k][0].get_color() == v
  165. @pytest.mark.parametrize(
  166. "dict_colors, msg",
  167. [(dict(boxes="r", invalid_key="r"), "invalid key 'invalid_key'")],
  168. )
  169. def test_color_kwd_errors(self, dict_colors, msg):
  170. # GH: 26214
  171. df = DataFrame(random.rand(10, 2))
  172. with pytest.raises(ValueError, match=msg):
  173. df.boxplot(color=dict_colors, return_type="dict")
  174. @td.skip_if_no_mpl
  175. class TestDataFrameGroupByPlots(TestPlotBase):
  176. @pytest.mark.slow
  177. def test_boxplot_legacy1(self):
  178. grouped = self.hist_df.groupby(by="gender")
  179. with tm.assert_produces_warning(UserWarning):
  180. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  181. self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2))
  182. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  183. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  184. @pytest.mark.slow
  185. def test_boxplot_legacy2(self):
  186. tuples = zip(string.ascii_letters[:10], range(10))
  187. df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
  188. grouped = df.groupby(level=1)
  189. with tm.assert_produces_warning(UserWarning):
  190. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  191. self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3))
  192. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  193. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  194. @pytest.mark.slow
  195. def test_boxplot_legacy3(self):
  196. tuples = zip(string.ascii_letters[:10], range(10))
  197. df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
  198. grouped = df.unstack(level=1).groupby(level=0, axis=1)
  199. with tm.assert_produces_warning(UserWarning):
  200. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  201. self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2))
  202. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  203. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  204. @pytest.mark.slow
  205. def test_grouped_plot_fignums(self):
  206. n = 10
  207. weight = Series(np.random.normal(166, 20, size=n))
  208. height = Series(np.random.normal(60, 10, size=n))
  209. with tm.RNGContext(42):
  210. gender = np.random.choice(["male", "female"], size=n)
  211. df = DataFrame({"height": height, "weight": weight, "gender": gender})
  212. gb = df.groupby("gender")
  213. res = gb.plot()
  214. assert len(self.plt.get_fignums()) == 2
  215. assert len(res) == 2
  216. tm.close()
  217. res = gb.boxplot(return_type="axes")
  218. assert len(self.plt.get_fignums()) == 1
  219. assert len(res) == 2
  220. tm.close()
  221. # now works with GH 5610 as gender is excluded
  222. res = df.groupby("gender").hist()
  223. tm.close()
  224. @pytest.mark.slow
  225. def test_grouped_box_return_type(self):
  226. df = self.hist_df
  227. # old style: return_type=None
  228. result = df.boxplot(by="gender")
  229. assert isinstance(result, np.ndarray)
  230. self._check_box_return_type(
  231. result, None, expected_keys=["height", "weight", "category"]
  232. )
  233. # now for groupby
  234. result = df.groupby("gender").boxplot(return_type="dict")
  235. self._check_box_return_type(result, "dict", expected_keys=["Male", "Female"])
  236. columns2 = "X B C D A G Y N Q O".split()
  237. df2 = DataFrame(random.randn(50, 10), columns=columns2)
  238. categories2 = "A B C D E F G H I J".split()
  239. df2["category"] = categories2 * 5
  240. for t in ["dict", "axes", "both"]:
  241. returned = df.groupby("classroom").boxplot(return_type=t)
  242. self._check_box_return_type(returned, t, expected_keys=["A", "B", "C"])
  243. returned = df.boxplot(by="classroom", return_type=t)
  244. self._check_box_return_type(
  245. returned, t, expected_keys=["height", "weight", "category"]
  246. )
  247. returned = df2.groupby("category").boxplot(return_type=t)
  248. self._check_box_return_type(returned, t, expected_keys=categories2)
  249. returned = df2.boxplot(by="category", return_type=t)
  250. self._check_box_return_type(returned, t, expected_keys=columns2)
  251. @pytest.mark.slow
  252. def test_grouped_box_layout(self):
  253. df = self.hist_df
  254. msg = "Layout of 1x1 must be larger than required size 2"
  255. with pytest.raises(ValueError, match=msg):
  256. df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1))
  257. msg = "The 'layout' keyword is not supported when 'by' is None"
  258. with pytest.raises(ValueError, match=msg):
  259. df.boxplot(
  260. column=["height", "weight", "category"],
  261. layout=(2, 1),
  262. return_type="dict",
  263. )
  264. msg = "At least one dimension of layout must be positive"
  265. with pytest.raises(ValueError, match=msg):
  266. df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1))
  267. # _check_plot_works adds an ax so catch warning. see GH #13188
  268. with tm.assert_produces_warning(UserWarning):
  269. box = _check_plot_works(
  270. df.groupby("gender").boxplot, column="height", return_type="dict"
  271. )
  272. self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2))
  273. with tm.assert_produces_warning(UserWarning):
  274. box = _check_plot_works(
  275. df.groupby("category").boxplot, column="height", return_type="dict"
  276. )
  277. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
  278. # GH 6769
  279. with tm.assert_produces_warning(UserWarning):
  280. box = _check_plot_works(
  281. df.groupby("classroom").boxplot, column="height", return_type="dict"
  282. )
  283. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  284. # GH 5897
  285. axes = df.boxplot(
  286. column=["height", "weight", "category"], by="gender", return_type="axes"
  287. )
  288. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  289. for ax in [axes["height"]]:
  290. self._check_visible(ax.get_xticklabels(), visible=False)
  291. self._check_visible([ax.xaxis.get_label()], visible=False)
  292. for ax in [axes["weight"], axes["category"]]:
  293. self._check_visible(ax.get_xticklabels())
  294. self._check_visible([ax.xaxis.get_label()])
  295. box = df.groupby("classroom").boxplot(
  296. column=["height", "weight", "category"], return_type="dict"
  297. )
  298. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  299. with tm.assert_produces_warning(UserWarning):
  300. box = _check_plot_works(
  301. df.groupby("category").boxplot,
  302. column="height",
  303. layout=(3, 2),
  304. return_type="dict",
  305. )
  306. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
  307. with tm.assert_produces_warning(UserWarning):
  308. box = _check_plot_works(
  309. df.groupby("category").boxplot,
  310. column="height",
  311. layout=(3, -1),
  312. return_type="dict",
  313. )
  314. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
  315. box = df.boxplot(
  316. column=["height", "weight", "category"], by="gender", layout=(4, 1)
  317. )
  318. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1))
  319. box = df.boxplot(
  320. column=["height", "weight", "category"], by="gender", layout=(-1, 1)
  321. )
  322. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1))
  323. box = df.groupby("classroom").boxplot(
  324. column=["height", "weight", "category"], layout=(1, 4), return_type="dict"
  325. )
  326. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4))
  327. box = df.groupby("classroom").boxplot( # noqa
  328. column=["height", "weight", "category"], layout=(1, -1), return_type="dict"
  329. )
  330. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3))
  331. @pytest.mark.slow
  332. def test_grouped_box_multiple_axes(self):
  333. # GH 6970, GH 7069
  334. df = self.hist_df
  335. # check warning to ignore sharex / sharey
  336. # this check should be done in the first function which
  337. # passes multiple axes to plot, hist or boxplot
  338. # location should be changed if other test is added
  339. # which has earlier alphabetical order
  340. with tm.assert_produces_warning(UserWarning):
  341. fig, axes = self.plt.subplots(2, 2)
  342. df.groupby("category").boxplot(column="height", return_type="axes", ax=axes)
  343. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
  344. fig, axes = self.plt.subplots(2, 3)
  345. with tm.assert_produces_warning(UserWarning):
  346. returned = df.boxplot(
  347. column=["height", "weight", "category"],
  348. by="gender",
  349. return_type="axes",
  350. ax=axes[0],
  351. )
  352. returned = np.array(list(returned.values))
  353. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  354. tm.assert_numpy_array_equal(returned, axes[0])
  355. assert returned[0].figure is fig
  356. # draw on second row
  357. with tm.assert_produces_warning(UserWarning):
  358. returned = df.groupby("classroom").boxplot(
  359. column=["height", "weight", "category"], return_type="axes", ax=axes[1]
  360. )
  361. returned = np.array(list(returned.values))
  362. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  363. tm.assert_numpy_array_equal(returned, axes[1])
  364. assert returned[0].figure is fig
  365. with pytest.raises(ValueError):
  366. fig, axes = self.plt.subplots(2, 3)
  367. # pass different number of axes from required
  368. with tm.assert_produces_warning(UserWarning):
  369. axes = df.groupby("classroom").boxplot(ax=axes)
  370. def test_fontsize(self):
  371. df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]})
  372. self._check_ticks_props(
  373. df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16
  374. )