_misc.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. from contextlib import contextmanager
  2. from pandas.plotting._core import _get_plot_backend
  3. def table(ax, data, rowLabels=None, colLabels=None, **kwargs):
  4. """
  5. Helper function to convert DataFrame and Series to matplotlib.table.
  6. Parameters
  7. ----------
  8. ax : Matplotlib axes object
  9. data : DataFrame or Series
  10. Data for table contents.
  11. **kwargs
  12. Keyword arguments to be passed to matplotlib.table.table.
  13. If `rowLabels` or `colLabels` is not specified, data index or column
  14. name will be used.
  15. Returns
  16. -------
  17. matplotlib table object
  18. """
  19. plot_backend = _get_plot_backend("matplotlib")
  20. return plot_backend.table(
  21. ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
  22. )
  23. def register():
  24. """
  25. Register Pandas Formatters and Converters with matplotlib.
  26. This function modifies the global ``matplotlib.units.registry``
  27. dictionary. Pandas adds custom converters for
  28. * pd.Timestamp
  29. * pd.Period
  30. * np.datetime64
  31. * datetime.datetime
  32. * datetime.date
  33. * datetime.time
  34. See Also
  35. --------
  36. deregister_matplotlib_converters
  37. """
  38. plot_backend = _get_plot_backend("matplotlib")
  39. plot_backend.register()
  40. def deregister():
  41. """
  42. Remove pandas' formatters and converters.
  43. Removes the custom converters added by :func:`register`. This
  44. attempts to set the state of the registry back to the state before
  45. pandas registered its own units. Converters for pandas' own types like
  46. Timestamp and Period are removed completely. Converters for types
  47. pandas overwrites, like ``datetime.datetime``, are restored to their
  48. original value.
  49. See Also
  50. --------
  51. register_matplotlib_converters
  52. """
  53. plot_backend = _get_plot_backend("matplotlib")
  54. plot_backend.deregister()
  55. def scatter_matrix(
  56. frame,
  57. alpha=0.5,
  58. figsize=None,
  59. ax=None,
  60. grid=False,
  61. diagonal="hist",
  62. marker=".",
  63. density_kwds=None,
  64. hist_kwds=None,
  65. range_padding=0.05,
  66. **kwargs,
  67. ):
  68. """
  69. Draw a matrix of scatter plots.
  70. Parameters
  71. ----------
  72. frame : DataFrame
  73. alpha : float, optional
  74. Amount of transparency applied.
  75. figsize : (float,float), optional
  76. A tuple (width, height) in inches.
  77. ax : Matplotlib axis object, optional
  78. grid : bool, optional
  79. Setting this to True will show the grid.
  80. diagonal : {'hist', 'kde'}
  81. Pick between 'kde' and 'hist' for either Kernel Density Estimation or
  82. Histogram plot in the diagonal.
  83. marker : str, optional
  84. Matplotlib marker type, default '.'.
  85. density_kwds : keywords
  86. Keyword arguments to be passed to kernel density estimate plot.
  87. hist_kwds : keywords
  88. Keyword arguments to be passed to hist function.
  89. range_padding : float, default 0.05
  90. Relative extension of axis range in x and y with respect to
  91. (x_max - x_min) or (y_max - y_min).
  92. **kwargs
  93. Keyword arguments to be passed to scatter function.
  94. Returns
  95. -------
  96. numpy.ndarray
  97. A matrix of scatter plots.
  98. Examples
  99. --------
  100. >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
  101. >>> scatter_matrix(df, alpha=0.2)
  102. """
  103. plot_backend = _get_plot_backend("matplotlib")
  104. return plot_backend.scatter_matrix(
  105. frame=frame,
  106. alpha=alpha,
  107. figsize=figsize,
  108. ax=ax,
  109. grid=grid,
  110. diagonal=diagonal,
  111. marker=marker,
  112. density_kwds=density_kwds,
  113. hist_kwds=hist_kwds,
  114. range_padding=range_padding,
  115. **kwargs,
  116. )
  117. def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
  118. """
  119. Plot a multidimensional dataset in 2D.
  120. Each Series in the DataFrame is represented as a evenly distributed
  121. slice on a circle. Each data point is rendered in the circle according to
  122. the value on each Series. Highly correlated `Series` in the `DataFrame`
  123. are placed closer on the unit circle.
  124. RadViz allow to project a N-dimensional data set into a 2D space where the
  125. influence of each dimension can be interpreted as a balance between the
  126. influence of all dimensions.
  127. More info available at the `original article
  128. <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_
  129. describing RadViz.
  130. Parameters
  131. ----------
  132. frame : `DataFrame`
  133. Pandas object holding the data.
  134. class_column : str
  135. Column name containing the name of the data point category.
  136. ax : :class:`matplotlib.axes.Axes`, optional
  137. A plot instance to which to add the information.
  138. color : list[str] or tuple[str], optional
  139. Assign a color to each category. Example: ['blue', 'green'].
  140. colormap : str or :class:`matplotlib.colors.Colormap`, default None
  141. Colormap to select colors from. If string, load colormap with that
  142. name from matplotlib.
  143. **kwds
  144. Options to pass to matplotlib scatter plotting method.
  145. Returns
  146. -------
  147. class:`matplotlib.axes.Axes`
  148. See Also
  149. --------
  150. plotting.andrews_curves : Plot clustering visualization.
  151. Examples
  152. --------
  153. .. plot::
  154. :context: close-figs
  155. >>> df = pd.DataFrame({
  156. ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6,
  157. ... 6.7, 4.6],
  158. ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2,
  159. ... 3.3, 3.6],
  160. ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4,
  161. ... 5.7, 1.0],
  162. ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2,
  163. ... 2.1, 0.2],
  164. ... 'Category': ['virginica', 'virginica', 'setosa',
  165. ... 'virginica', 'virginica', 'versicolor',
  166. ... 'versicolor', 'setosa', 'virginica',
  167. ... 'setosa']
  168. ... })
  169. >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP
  170. """
  171. plot_backend = _get_plot_backend("matplotlib")
  172. return plot_backend.radviz(
  173. frame=frame,
  174. class_column=class_column,
  175. ax=ax,
  176. color=color,
  177. colormap=colormap,
  178. **kwds,
  179. )
  180. def andrews_curves(
  181. frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs
  182. ):
  183. """
  184. Generate a matplotlib plot of Andrews curves, for visualising clusters of
  185. multivariate data.
  186. Andrews curves have the functional form:
  187. f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
  188. x_4 sin(2t) + x_5 cos(2t) + ...
  189. Where x coefficients correspond to the values of each dimension and t is
  190. linearly spaced between -pi and +pi. Each row of frame then corresponds to
  191. a single curve.
  192. Parameters
  193. ----------
  194. frame : DataFrame
  195. Data to be plotted, preferably normalized to (0.0, 1.0).
  196. class_column : Name of the column containing class names
  197. ax : matplotlib axes object, default None
  198. samples : Number of points to plot in each curve
  199. color : list or tuple, optional
  200. Colors to use for the different classes.
  201. colormap : str or matplotlib colormap object, default None
  202. Colormap to select colors from. If string, load colormap with that name
  203. from matplotlib.
  204. **kwargs
  205. Options to pass to matplotlib plotting method.
  206. Returns
  207. -------
  208. class:`matplotlip.axis.Axes`
  209. """
  210. plot_backend = _get_plot_backend("matplotlib")
  211. return plot_backend.andrews_curves(
  212. frame=frame,
  213. class_column=class_column,
  214. ax=ax,
  215. samples=samples,
  216. color=color,
  217. colormap=colormap,
  218. **kwargs,
  219. )
  220. def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
  221. """
  222. Bootstrap plot on mean, median and mid-range statistics.
  223. The bootstrap plot is used to estimate the uncertainty of a statistic
  224. by relaying on random sampling with replacement [1]_. This function will
  225. generate bootstrapping plots for mean, median and mid-range statistics
  226. for the given number of samples of the given size.
  227. .. [1] "Bootstrapping (statistics)" in \
  228. https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
  229. Parameters
  230. ----------
  231. series : pandas.Series
  232. Pandas Series from where to get the samplings for the bootstrapping.
  233. fig : matplotlib.figure.Figure, default None
  234. If given, it will use the `fig` reference for plotting instead of
  235. creating a new one with default parameters.
  236. size : int, default 50
  237. Number of data points to consider during each sampling. It must be
  238. greater or equal than the length of the `series`.
  239. samples : int, default 500
  240. Number of times the bootstrap procedure is performed.
  241. **kwds
  242. Options to pass to matplotlib plotting method.
  243. Returns
  244. -------
  245. matplotlib.figure.Figure
  246. Matplotlib figure.
  247. See Also
  248. --------
  249. DataFrame.plot : Basic plotting for DataFrame objects.
  250. Series.plot : Basic plotting for Series objects.
  251. Examples
  252. --------
  253. .. plot::
  254. :context: close-figs
  255. >>> s = pd.Series(np.random.uniform(size=100))
  256. >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP
  257. """
  258. plot_backend = _get_plot_backend("matplotlib")
  259. return plot_backend.bootstrap_plot(
  260. series=series, fig=fig, size=size, samples=samples, **kwds
  261. )
  262. def parallel_coordinates(
  263. frame,
  264. class_column,
  265. cols=None,
  266. ax=None,
  267. color=None,
  268. use_columns=False,
  269. xticks=None,
  270. colormap=None,
  271. axvlines=True,
  272. axvlines_kwds=None,
  273. sort_labels=False,
  274. **kwargs,
  275. ):
  276. """
  277. Parallel coordinates plotting.
  278. Parameters
  279. ----------
  280. frame : DataFrame
  281. class_column : str
  282. Column name containing class names.
  283. cols : list, optional
  284. A list of column names to use.
  285. ax : matplotlib.axis, optional
  286. Matplotlib axis object.
  287. color : list or tuple, optional
  288. Colors to use for the different classes.
  289. use_columns : bool, optional
  290. If true, columns will be used as xticks.
  291. xticks : list or tuple, optional
  292. A list of values to use for xticks.
  293. colormap : str or matplotlib colormap, default None
  294. Colormap to use for line colors.
  295. axvlines : bool, optional
  296. If true, vertical lines will be added at each xtick.
  297. axvlines_kwds : keywords, optional
  298. Options to be passed to axvline method for vertical lines.
  299. sort_labels : bool, default False
  300. Sort class_column labels, useful when assigning colors.
  301. **kwargs
  302. Options to pass to matplotlib plotting method.
  303. Returns
  304. -------
  305. class:`matplotlib.axis.Axes`
  306. Examples
  307. --------
  308. >>> from matplotlib import pyplot as plt
  309. >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
  310. '/pandas/tests/data/csv/iris.csv')
  311. >>> pd.plotting.parallel_coordinates(
  312. df, 'Name',
  313. color=('#556270', '#4ECDC4', '#C7F464'))
  314. >>> plt.show()
  315. """
  316. plot_backend = _get_plot_backend("matplotlib")
  317. return plot_backend.parallel_coordinates(
  318. frame=frame,
  319. class_column=class_column,
  320. cols=cols,
  321. ax=ax,
  322. color=color,
  323. use_columns=use_columns,
  324. xticks=xticks,
  325. colormap=colormap,
  326. axvlines=axvlines,
  327. axvlines_kwds=axvlines_kwds,
  328. sort_labels=sort_labels,
  329. **kwargs,
  330. )
  331. def lag_plot(series, lag=1, ax=None, **kwds):
  332. """
  333. Lag plot for time series.
  334. Parameters
  335. ----------
  336. series : Time series
  337. lag : lag of the scatter plot, default 1
  338. ax : Matplotlib axis object, optional
  339. **kwds
  340. Matplotlib scatter method keyword arguments.
  341. Returns
  342. -------
  343. class:`matplotlib.axis.Axes`
  344. """
  345. plot_backend = _get_plot_backend("matplotlib")
  346. return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
  347. def autocorrelation_plot(series, ax=None, **kwargs):
  348. """
  349. Autocorrelation plot for time series.
  350. Parameters
  351. ----------
  352. series : Time series
  353. ax : Matplotlib axis object, optional
  354. **kwargs
  355. Options to pass to matplotlib plotting method.
  356. Returns
  357. -------
  358. class:`matplotlib.axis.Axes`
  359. """
  360. plot_backend = _get_plot_backend("matplotlib")
  361. return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
  362. class _Options(dict):
  363. """
  364. Stores pandas plotting options.
  365. Allows for parameter aliasing so you can just use parameter names that are
  366. the same as the plot function parameters, but is stored in a canonical
  367. format that makes it easy to breakdown into groups later.
  368. """
  369. # alias so the names are same as plotting method parameter names
  370. _ALIASES = {"x_compat": "xaxis.compat"}
  371. _DEFAULT_KEYS = ["xaxis.compat"]
  372. def __init__(self, deprecated=False):
  373. self._deprecated = deprecated
  374. super().__setitem__("xaxis.compat", False)
  375. def __getitem__(self, key):
  376. key = self._get_canonical_key(key)
  377. if key not in self:
  378. raise ValueError(f"{key} is not a valid pandas plotting option")
  379. return super().__getitem__(key)
  380. def __setitem__(self, key, value):
  381. key = self._get_canonical_key(key)
  382. return super().__setitem__(key, value)
  383. def __delitem__(self, key):
  384. key = self._get_canonical_key(key)
  385. if key in self._DEFAULT_KEYS:
  386. raise ValueError(f"Cannot remove default parameter {key}")
  387. return super().__delitem__(key)
  388. def __contains__(self, key) -> bool:
  389. key = self._get_canonical_key(key)
  390. return super().__contains__(key)
  391. def reset(self):
  392. """
  393. Reset the option store to its initial state
  394. Returns
  395. -------
  396. None
  397. """
  398. self.__init__()
  399. def _get_canonical_key(self, key):
  400. return self._ALIASES.get(key, key)
  401. @contextmanager
  402. def use(self, key, value):
  403. """
  404. Temporarily set a parameter value using the with statement.
  405. Aliasing allowed.
  406. """
  407. old_value = self[key]
  408. try:
  409. self[key] = value
  410. yield self
  411. finally:
  412. self[key] = old_value
  413. plot_params = _Options()