1
0

format.py 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992
  1. """
  2. Internal module for formatting output data in csv, html,
  3. and latex files. This module also applies to display formatting.
  4. """
  5. from contextlib import contextmanager
  6. from datetime import tzinfo
  7. import decimal
  8. from functools import partial
  9. from io import StringIO
  10. import math
  11. import re
  12. from shutil import get_terminal_size
  13. from typing import (
  14. IO,
  15. TYPE_CHECKING,
  16. Any,
  17. Callable,
  18. Dict,
  19. Iterable,
  20. List,
  21. Mapping,
  22. Optional,
  23. Sequence,
  24. Tuple,
  25. Type,
  26. Union,
  27. cast,
  28. )
  29. from unicodedata import east_asian_width
  30. import numpy as np
  31. from pandas._config.config import get_option, set_option
  32. from pandas._libs import lib
  33. from pandas._libs.missing import NA
  34. from pandas._libs.tslib import format_array_from_datetime
  35. from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
  36. from pandas._libs.tslibs.nattype import NaTType
  37. from pandas._typing import FilePathOrBuffer
  38. from pandas.errors import AbstractMethodError
  39. from pandas.core.dtypes.common import (
  40. is_categorical_dtype,
  41. is_complex_dtype,
  42. is_datetime64_dtype,
  43. is_datetime64tz_dtype,
  44. is_extension_array_dtype,
  45. is_float,
  46. is_float_dtype,
  47. is_integer,
  48. is_integer_dtype,
  49. is_list_like,
  50. is_numeric_dtype,
  51. is_scalar,
  52. is_timedelta64_dtype,
  53. )
  54. from pandas.core.dtypes.generic import (
  55. ABCIndexClass,
  56. ABCMultiIndex,
  57. ABCSeries,
  58. ABCSparseArray,
  59. )
  60. from pandas.core.dtypes.missing import isna, notna
  61. from pandas.core.arrays.datetimes import DatetimeArray
  62. from pandas.core.arrays.timedeltas import TimedeltaArray
  63. from pandas.core.base import PandasObject
  64. import pandas.core.common as com
  65. from pandas.core.indexes.api import Index, ensure_index
  66. from pandas.core.indexes.datetimes import DatetimeIndex
  67. from pandas.core.indexes.timedeltas import TimedeltaIndex
  68. from pandas.io.common import stringify_path
  69. from pandas.io.formats.printing import adjoin, justify, pprint_thing
  70. if TYPE_CHECKING:
  71. from pandas import Series, DataFrame, Categorical
  72. formatters_type = Union[
  73. List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
  74. ]
  75. float_format_type = Union[str, Callable, "EngFormatter"]
  76. common_docstring = """
  77. Parameters
  78. ----------
  79. buf : str, Path or StringIO-like, optional, default None
  80. Buffer to write to. If None, the output is returned as a string.
  81. columns : sequence, optional, default None
  82. The subset of columns to write. Writes all columns by default.
  83. col_space : %(col_space_type)s, optional
  84. %(col_space)s.
  85. header : %(header_type)s, optional
  86. %(header)s.
  87. index : bool, optional, default True
  88. Whether to print index (row) labels.
  89. na_rep : str, optional, default 'NaN'
  90. String representation of NAN to use.
  91. formatters : list, tuple or dict of one-param. functions, optional
  92. Formatter functions to apply to columns' elements by position or
  93. name.
  94. The result of each function must be a unicode string.
  95. List/tuple must be of length equal to the number of columns.
  96. float_format : one-parameter function, optional, default None
  97. Formatter function to apply to columns' elements if they are
  98. floats. The result of this function must be a unicode string.
  99. sparsify : bool, optional, default True
  100. Set to False for a DataFrame with a hierarchical index to print
  101. every multiindex key at each row.
  102. index_names : bool, optional, default True
  103. Prints the names of the indexes.
  104. justify : str, default None
  105. How to justify the column labels. If None uses the option from
  106. the print configuration (controlled by set_option), 'right' out
  107. of the box. Valid values are
  108. * left
  109. * right
  110. * center
  111. * justify
  112. * justify-all
  113. * start
  114. * end
  115. * inherit
  116. * match-parent
  117. * initial
  118. * unset.
  119. max_rows : int, optional
  120. Maximum number of rows to display in the console.
  121. min_rows : int, optional
  122. The number of rows to display in the console in a truncated repr
  123. (when number of rows is above `max_rows`).
  124. max_cols : int, optional
  125. Maximum number of columns to display in the console.
  126. show_dimensions : bool, default False
  127. Display DataFrame dimensions (number of rows by number of columns).
  128. decimal : str, default '.'
  129. Character recognized as decimal separator, e.g. ',' in Europe.
  130. """
  131. _VALID_JUSTIFY_PARAMETERS = (
  132. "left",
  133. "right",
  134. "center",
  135. "justify",
  136. "justify-all",
  137. "start",
  138. "end",
  139. "inherit",
  140. "match-parent",
  141. "initial",
  142. "unset",
  143. )
  144. return_docstring = """
  145. Returns
  146. -------
  147. str or None
  148. If buf is None, returns the result as a string. Otherwise returns
  149. None.
  150. """
  151. class CategoricalFormatter:
  152. def __init__(
  153. self,
  154. categorical: "Categorical",
  155. buf: Optional[IO[str]] = None,
  156. length: bool = True,
  157. na_rep: str = "NaN",
  158. footer: bool = True,
  159. ):
  160. self.categorical = categorical
  161. self.buf = buf if buf is not None else StringIO("")
  162. self.na_rep = na_rep
  163. self.length = length
  164. self.footer = footer
  165. def _get_footer(self) -> str:
  166. footer = ""
  167. if self.length:
  168. if footer:
  169. footer += ", "
  170. footer += "Length: {length}".format(length=len(self.categorical))
  171. level_info = self.categorical._repr_categories_info()
  172. # Levels are added in a newline
  173. if footer:
  174. footer += "\n"
  175. footer += level_info
  176. return str(footer)
  177. def _get_formatted_values(self) -> List[str]:
  178. return format_array(
  179. self.categorical._internal_get_values(),
  180. None,
  181. float_format=None,
  182. na_rep=self.na_rep,
  183. )
  184. def to_string(self) -> str:
  185. categorical = self.categorical
  186. if len(categorical) == 0:
  187. if self.footer:
  188. return self._get_footer()
  189. else:
  190. return ""
  191. fmt_values = self._get_formatted_values()
  192. fmt_values = ["{i}".format(i=i) for i in fmt_values]
  193. fmt_values = [i.strip() for i in fmt_values]
  194. values = ", ".join(fmt_values)
  195. result = ["[" + values + "]"]
  196. if self.footer:
  197. footer = self._get_footer()
  198. if footer:
  199. result.append(footer)
  200. return str("\n".join(result))
  201. class SeriesFormatter:
  202. def __init__(
  203. self,
  204. series: "Series",
  205. buf: Optional[IO[str]] = None,
  206. length: Union[bool, str] = True,
  207. header: bool = True,
  208. index: bool = True,
  209. na_rep: str = "NaN",
  210. name: bool = False,
  211. float_format: Optional[str] = None,
  212. dtype: bool = True,
  213. max_rows: Optional[int] = None,
  214. min_rows: Optional[int] = None,
  215. ):
  216. self.series = series
  217. self.buf = buf if buf is not None else StringIO()
  218. self.name = name
  219. self.na_rep = na_rep
  220. self.header = header
  221. self.length = length
  222. self.index = index
  223. self.max_rows = max_rows
  224. self.min_rows = min_rows
  225. if float_format is None:
  226. float_format = get_option("display.float_format")
  227. self.float_format = float_format
  228. self.dtype = dtype
  229. self.adj = _get_adjustment()
  230. self._chk_truncate()
  231. def _chk_truncate(self) -> None:
  232. from pandas.core.reshape.concat import concat
  233. self.tr_row_num: Optional[int]
  234. min_rows = self.min_rows
  235. max_rows = self.max_rows
  236. # truncation determined by max_rows, actual truncated number of rows
  237. # used below by min_rows
  238. truncate_v = max_rows and (len(self.series) > max_rows)
  239. series = self.series
  240. if truncate_v:
  241. max_rows = cast(int, max_rows)
  242. if min_rows:
  243. # if min_rows is set (not None or 0), set max_rows to minimum
  244. # of both
  245. max_rows = min(min_rows, max_rows)
  246. if max_rows == 1:
  247. row_num = max_rows
  248. series = series.iloc[:max_rows]
  249. else:
  250. row_num = max_rows // 2
  251. series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
  252. self.tr_row_num = row_num
  253. else:
  254. self.tr_row_num = None
  255. self.tr_series = series
  256. self.truncate_v = truncate_v
  257. def _get_footer(self) -> str:
  258. name = self.series.name
  259. footer = ""
  260. if getattr(self.series.index, "freq", None) is not None:
  261. footer += "Freq: {freq}".format(freq=self.series.index.freqstr)
  262. if self.name is not False and name is not None:
  263. if footer:
  264. footer += ", "
  265. series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n"))
  266. footer += (
  267. ("Name: {sname}".format(sname=series_name)) if name is not None else ""
  268. )
  269. if self.length is True or (self.length == "truncate" and self.truncate_v):
  270. if footer:
  271. footer += ", "
  272. footer += "Length: {length}".format(length=len(self.series))
  273. if self.dtype is not False and self.dtype is not None:
  274. name = getattr(self.tr_series.dtype, "name", None)
  275. if name:
  276. if footer:
  277. footer += ", "
  278. footer += "dtype: {typ}".format(typ=pprint_thing(name))
  279. # level infos are added to the end and in a new line, like it is done
  280. # for Categoricals
  281. if is_categorical_dtype(self.tr_series.dtype):
  282. level_info = self.tr_series._values._repr_categories_info()
  283. if footer:
  284. footer += "\n"
  285. footer += level_info
  286. return str(footer)
  287. def _get_formatted_index(self) -> Tuple[List[str], bool]:
  288. index = self.tr_series.index
  289. is_multi = isinstance(index, ABCMultiIndex)
  290. if is_multi:
  291. have_header = any(name for name in index.names)
  292. fmt_index = index.format(names=True)
  293. else:
  294. have_header = index.name is not None
  295. fmt_index = index.format(name=True)
  296. return fmt_index, have_header
  297. def _get_formatted_values(self) -> List[str]:
  298. return format_array(
  299. self.tr_series._values,
  300. None,
  301. float_format=self.float_format,
  302. na_rep=self.na_rep,
  303. )
  304. def to_string(self) -> str:
  305. series = self.tr_series
  306. footer = self._get_footer()
  307. if len(series) == 0:
  308. return "{name}([], {footer})".format(
  309. name=type(self.series).__name__, footer=footer
  310. )
  311. fmt_index, have_header = self._get_formatted_index()
  312. fmt_values = self._get_formatted_values()
  313. if self.truncate_v:
  314. n_header_rows = 0
  315. row_num = self.tr_row_num
  316. row_num = cast(int, row_num)
  317. width = self.adj.len(fmt_values[row_num - 1])
  318. if width > 3:
  319. dot_str = "..."
  320. else:
  321. dot_str = ".."
  322. # Series uses mode=center because it has single value columns
  323. # DataFrame uses mode=left
  324. dot_str = self.adj.justify([dot_str], width, mode="center")[0]
  325. fmt_values.insert(row_num + n_header_rows, dot_str)
  326. fmt_index.insert(row_num + 1, "")
  327. if self.index:
  328. result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
  329. else:
  330. result = self.adj.adjoin(3, fmt_values)
  331. if self.header and have_header:
  332. result = fmt_index[0] + "\n" + result
  333. if footer:
  334. result += "\n" + footer
  335. return str("".join(result))
  336. class TextAdjustment:
  337. def __init__(self):
  338. self.encoding = get_option("display.encoding")
  339. def len(self, text: str) -> int:
  340. return len(text)
  341. def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]:
  342. return justify(texts, max_len, mode=mode)
  343. def adjoin(self, space: int, *lists, **kwargs) -> str:
  344. return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
  345. class EastAsianTextAdjustment(TextAdjustment):
  346. def __init__(self):
  347. super().__init__()
  348. if get_option("display.unicode.ambiguous_as_wide"):
  349. self.ambiguous_width = 2
  350. else:
  351. self.ambiguous_width = 1
  352. # Definition of East Asian Width
  353. # http://unicode.org/reports/tr11/
  354. # Ambiguous width can be changed by option
  355. self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
  356. def len(self, text: str) -> int:
  357. """
  358. Calculate display width considering unicode East Asian Width
  359. """
  360. if not isinstance(text, str):
  361. return len(text)
  362. return sum(
  363. self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
  364. )
  365. def justify(
  366. self, texts: Iterable[str], max_len: int, mode: str = "right"
  367. ) -> List[str]:
  368. # re-calculate padding space per str considering East Asian Width
  369. def _get_pad(t):
  370. return max_len - self.len(t) + len(t)
  371. if mode == "left":
  372. return [x.ljust(_get_pad(x)) for x in texts]
  373. elif mode == "center":
  374. return [x.center(_get_pad(x)) for x in texts]
  375. else:
  376. return [x.rjust(_get_pad(x)) for x in texts]
  377. def _get_adjustment() -> TextAdjustment:
  378. use_east_asian_width = get_option("display.unicode.east_asian_width")
  379. if use_east_asian_width:
  380. return EastAsianTextAdjustment()
  381. else:
  382. return TextAdjustment()
  383. class TableFormatter:
  384. show_dimensions: Union[bool, str]
  385. is_truncated: bool
  386. formatters: formatters_type
  387. columns: Index
  388. @property
  389. def should_show_dimensions(self) -> bool:
  390. return self.show_dimensions is True or (
  391. self.show_dimensions == "truncate" and self.is_truncated
  392. )
  393. def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]:
  394. if isinstance(self.formatters, (list, tuple)):
  395. if is_integer(i):
  396. i = cast(int, i)
  397. return self.formatters[i]
  398. else:
  399. return None
  400. else:
  401. if is_integer(i) and i not in self.columns:
  402. i = self.columns[i]
  403. return self.formatters.get(i, None)
  404. @contextmanager
  405. def get_buffer(
  406. self, buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None
  407. ):
  408. """
  409. Context manager to open, yield and close buffer for filenames or Path-like
  410. objects, otherwise yield buf unchanged.
  411. """
  412. if buf is not None:
  413. buf = stringify_path(buf)
  414. else:
  415. buf = StringIO()
  416. if encoding is None:
  417. encoding = "utf-8"
  418. elif not isinstance(buf, str):
  419. raise ValueError("buf is not a file name and encoding is specified.")
  420. if hasattr(buf, "write"):
  421. yield buf
  422. elif isinstance(buf, str):
  423. with open(buf, "w", encoding=encoding, newline="") as f:
  424. # GH#30034 open instead of codecs.open prevents a file leak
  425. # if we have an invalid encoding argument.
  426. # newline="" is needed to roundtrip correctly on
  427. # windows test_to_latex_filename
  428. yield f
  429. else:
  430. raise TypeError("buf is not a file name and it has no write method")
  431. def write_result(self, buf: IO[str]) -> None:
  432. """
  433. Write the result of serialization to buf.
  434. """
  435. raise AbstractMethodError(self)
  436. def get_result(
  437. self,
  438. buf: Optional[FilePathOrBuffer[str]] = None,
  439. encoding: Optional[str] = None,
  440. ) -> Optional[str]:
  441. """
  442. Perform serialization. Write to buf or return as string if buf is None.
  443. """
  444. with self.get_buffer(buf, encoding=encoding) as f:
  445. self.write_result(buf=f)
  446. if buf is None:
  447. return f.getvalue()
  448. return None
  449. class DataFrameFormatter(TableFormatter):
  450. """
  451. Render a DataFrame
  452. self.to_string() : console-friendly tabular output
  453. self.to_html() : html table
  454. self.to_latex() : LaTeX tabular environment table
  455. """
  456. __doc__ = __doc__ if __doc__ else ""
  457. __doc__ += common_docstring + return_docstring
  458. def __init__(
  459. self,
  460. frame: "DataFrame",
  461. columns: Optional[Sequence[str]] = None,
  462. col_space: Optional[Union[str, int]] = None,
  463. header: Union[bool, Sequence[str]] = True,
  464. index: bool = True,
  465. na_rep: str = "NaN",
  466. formatters: Optional[formatters_type] = None,
  467. justify: Optional[str] = None,
  468. float_format: Optional[float_format_type] = None,
  469. sparsify: Optional[bool] = None,
  470. index_names: bool = True,
  471. line_width: Optional[int] = None,
  472. max_rows: Optional[int] = None,
  473. min_rows: Optional[int] = None,
  474. max_cols: Optional[int] = None,
  475. show_dimensions: Union[bool, str] = False,
  476. decimal: str = ".",
  477. table_id: Optional[str] = None,
  478. render_links: bool = False,
  479. bold_rows: bool = False,
  480. escape: bool = True,
  481. ):
  482. self.frame = frame
  483. self.show_index_names = index_names
  484. if sparsify is None:
  485. sparsify = get_option("display.multi_sparse")
  486. self.sparsify = sparsify
  487. self.float_format = float_format
  488. if formatters is None:
  489. self.formatters = {}
  490. elif len(frame.columns) == len(formatters) or isinstance(formatters, dict):
  491. self.formatters = formatters
  492. else:
  493. raise ValueError(
  494. (
  495. "Formatters length({flen}) should match "
  496. "DataFrame number of columns({dlen})"
  497. ).format(flen=len(formatters), dlen=len(frame.columns))
  498. )
  499. self.na_rep = na_rep
  500. self.decimal = decimal
  501. self.col_space = col_space
  502. self.header = header
  503. self.index = index
  504. self.line_width = line_width
  505. self.max_rows = max_rows
  506. self.min_rows = min_rows
  507. self.max_cols = max_cols
  508. self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame))
  509. self.show_dimensions = show_dimensions
  510. self.table_id = table_id
  511. self.render_links = render_links
  512. if justify is None:
  513. self.justify = get_option("display.colheader_justify")
  514. else:
  515. self.justify = justify
  516. self.bold_rows = bold_rows
  517. self.escape = escape
  518. if columns is not None:
  519. self.columns = ensure_index(columns)
  520. self.frame = self.frame[self.columns]
  521. else:
  522. self.columns = frame.columns
  523. self._chk_truncate()
  524. self.adj = _get_adjustment()
  525. def _chk_truncate(self) -> None:
  526. """
  527. Checks whether the frame should be truncated. If so, slices
  528. the frame up.
  529. """
  530. from pandas.core.reshape.concat import concat
  531. # Cut the data to the information actually printed
  532. max_cols = self.max_cols
  533. max_rows = self.max_rows
  534. self.max_rows_adj: Optional[int]
  535. max_rows_adj: Optional[int]
  536. if max_cols == 0 or max_rows == 0: # assume we are in the terminal
  537. (w, h) = get_terminal_size()
  538. self.w = w
  539. self.h = h
  540. if self.max_rows == 0:
  541. dot_row = 1
  542. prompt_row = 1
  543. if self.show_dimensions:
  544. show_dimension_rows = 3
  545. # assume we only get here if self.header is boolean.
  546. # i.e. not to_latex() where self.header may be List[str]
  547. self.header = cast(bool, self.header)
  548. n_add_rows = self.header + dot_row + show_dimension_rows + prompt_row
  549. # rows available to fill with actual data
  550. max_rows_adj = self.h - n_add_rows
  551. self.max_rows_adj = max_rows_adj
  552. # Format only rows and columns that could potentially fit the
  553. # screen
  554. if max_cols == 0 and len(self.frame.columns) > w:
  555. max_cols = w
  556. if max_rows == 0 and len(self.frame) > h:
  557. max_rows = h
  558. if not hasattr(self, "max_rows_adj"):
  559. if max_rows:
  560. if (len(self.frame) > max_rows) and self.min_rows:
  561. # if truncated, set max_rows showed to min_rows
  562. max_rows = min(self.min_rows, max_rows)
  563. self.max_rows_adj = max_rows
  564. if not hasattr(self, "max_cols_adj"):
  565. self.max_cols_adj = max_cols
  566. max_cols_adj = self.max_cols_adj
  567. max_rows_adj = self.max_rows_adj
  568. truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj)
  569. truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj)
  570. frame = self.frame
  571. if truncate_h:
  572. # cast here since if truncate_h is True, max_cols_adj is not None
  573. max_cols_adj = cast(int, max_cols_adj)
  574. if max_cols_adj == 0:
  575. col_num = len(frame.columns)
  576. elif max_cols_adj == 1:
  577. max_cols = cast(int, max_cols)
  578. frame = frame.iloc[:, :max_cols]
  579. col_num = max_cols
  580. else:
  581. col_num = max_cols_adj // 2
  582. frame = concat(
  583. (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1
  584. )
  585. # truncate formatter
  586. if isinstance(self.formatters, (list, tuple)):
  587. truncate_fmt = self.formatters
  588. self.formatters = [
  589. *truncate_fmt[:col_num],
  590. *truncate_fmt[-col_num:],
  591. ]
  592. self.tr_col_num = col_num
  593. if truncate_v:
  594. # cast here since if truncate_v is True, max_rows_adj is not None
  595. max_rows_adj = cast(int, max_rows_adj)
  596. if max_rows_adj == 1:
  597. row_num = max_rows
  598. frame = frame.iloc[:max_rows, :]
  599. else:
  600. row_num = max_rows_adj // 2
  601. frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :]))
  602. self.tr_row_num = row_num
  603. else:
  604. self.tr_row_num = None
  605. self.tr_frame = frame
  606. self.truncate_h = truncate_h
  607. self.truncate_v = truncate_v
  608. self.is_truncated = bool(self.truncate_h or self.truncate_v)
  609. def _to_str_columns(self) -> List[List[str]]:
  610. """
  611. Render a DataFrame to a list of columns (as lists of strings).
  612. """
  613. # this method is not used by to_html where self.col_space
  614. # could be a string so safe to cast
  615. self.col_space = cast(int, self.col_space)
  616. frame = self.tr_frame
  617. # may include levels names also
  618. str_index = self._get_formatted_index(frame)
  619. if not is_list_like(self.header) and not self.header:
  620. stringified = []
  621. for i, c in enumerate(frame):
  622. fmt_values = self._format_col(i)
  623. fmt_values = _make_fixed_width(
  624. fmt_values,
  625. self.justify,
  626. minimum=(self.col_space or 0),
  627. adj=self.adj,
  628. )
  629. stringified.append(fmt_values)
  630. else:
  631. if is_list_like(self.header):
  632. # cast here since can't be bool if is_list_like
  633. self.header = cast(List[str], self.header)
  634. if len(self.header) != len(self.columns):
  635. raise ValueError(
  636. (
  637. "Writing {ncols} cols but got {nalias} "
  638. "aliases".format(
  639. ncols=len(self.columns), nalias=len(self.header)
  640. )
  641. )
  642. )
  643. str_columns = [[label] for label in self.header]
  644. else:
  645. str_columns = self._get_formatted_column_labels(frame)
  646. if self.show_row_idx_names:
  647. for x in str_columns:
  648. x.append("")
  649. stringified = []
  650. for i, c in enumerate(frame):
  651. cheader = str_columns[i]
  652. header_colwidth = max(
  653. self.col_space or 0, *(self.adj.len(x) for x in cheader)
  654. )
  655. fmt_values = self._format_col(i)
  656. fmt_values = _make_fixed_width(
  657. fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
  658. )
  659. max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
  660. cheader = self.adj.justify(cheader, max_len, mode=self.justify)
  661. stringified.append(cheader + fmt_values)
  662. strcols = stringified
  663. if self.index:
  664. strcols.insert(0, str_index)
  665. # Add ... to signal truncated
  666. truncate_h = self.truncate_h
  667. truncate_v = self.truncate_v
  668. if truncate_h:
  669. col_num = self.tr_col_num
  670. strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index)))
  671. if truncate_v:
  672. n_header_rows = len(str_index) - len(frame)
  673. row_num = self.tr_row_num
  674. # cast here since if truncate_v is True, self.tr_row_num is not None
  675. row_num = cast(int, row_num)
  676. for ix, col in enumerate(strcols):
  677. # infer from above row
  678. cwidth = self.adj.len(strcols[ix][row_num])
  679. is_dot_col = False
  680. if truncate_h:
  681. is_dot_col = ix == col_num + 1
  682. if cwidth > 3 or is_dot_col:
  683. my_str = "..."
  684. else:
  685. my_str = ".."
  686. if ix == 0:
  687. dot_mode = "left"
  688. elif is_dot_col:
  689. cwidth = 4
  690. dot_mode = "right"
  691. else:
  692. dot_mode = "right"
  693. dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0]
  694. strcols[ix].insert(row_num + n_header_rows, dot_str)
  695. return strcols
  696. def write_result(self, buf: IO[str]) -> None:
  697. """
  698. Render a DataFrame to a console-friendly tabular output.
  699. """
  700. from pandas import Series
  701. frame = self.frame
  702. if len(frame.columns) == 0 or len(frame.index) == 0:
  703. info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format(
  704. name=type(self.frame).__name__,
  705. col=pprint_thing(frame.columns),
  706. idx=pprint_thing(frame.index),
  707. )
  708. text = info_line
  709. else:
  710. strcols = self._to_str_columns()
  711. if self.line_width is None: # no need to wrap around just print
  712. # the whole frame
  713. text = self.adj.adjoin(1, *strcols)
  714. elif (
  715. not isinstance(self.max_cols, int) or self.max_cols > 0
  716. ): # need to wrap around
  717. text = self._join_multiline(*strcols)
  718. else: # max_cols == 0. Try to fit frame to terminal
  719. lines = self.adj.adjoin(1, *strcols).split("\n")
  720. max_len = Series(lines).str.len().max()
  721. # plus truncate dot col
  722. dif = max_len - self.w
  723. # '+ 1' to avoid too wide repr (GH PR #17023)
  724. adj_dif = dif + 1
  725. col_lens = Series([Series(ele).apply(len).max() for ele in strcols])
  726. n_cols = len(col_lens)
  727. counter = 0
  728. while adj_dif > 0 and n_cols > 1:
  729. counter += 1
  730. mid = int(round(n_cols / 2.0))
  731. mid_ix = col_lens.index[mid]
  732. col_len = col_lens[mid_ix]
  733. # adjoin adds one
  734. adj_dif -= col_len + 1
  735. col_lens = col_lens.drop(mid_ix)
  736. n_cols = len(col_lens)
  737. # subtract index column
  738. max_cols_adj = n_cols - self.index
  739. # GH-21180. Ensure that we print at least two.
  740. max_cols_adj = max(max_cols_adj, 2)
  741. self.max_cols_adj = max_cols_adj
  742. # Call again _chk_truncate to cut frame appropriately
  743. # and then generate string representation
  744. self._chk_truncate()
  745. strcols = self._to_str_columns()
  746. text = self.adj.adjoin(1, *strcols)
  747. buf.writelines(text)
  748. if self.should_show_dimensions:
  749. buf.write(
  750. "\n\n[{nrows} rows x {ncols} columns]".format(
  751. nrows=len(frame), ncols=len(frame.columns)
  752. )
  753. )
  754. def _join_multiline(self, *args) -> str:
  755. lwidth = self.line_width
  756. adjoin_width = 1
  757. strcols = list(args)
  758. if self.index:
  759. idx = strcols.pop(0)
  760. lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
  761. col_widths = [
  762. np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
  763. for col in strcols
  764. ]
  765. assert lwidth is not None
  766. col_bins = _binify(col_widths, lwidth)
  767. nbins = len(col_bins)
  768. if self.truncate_v:
  769. # cast here since if truncate_v is True, max_rows_adj is not None
  770. self.max_rows_adj = cast(int, self.max_rows_adj)
  771. nrows = self.max_rows_adj + 1
  772. else:
  773. nrows = len(self.frame)
  774. str_lst = []
  775. st = 0
  776. for i, ed in enumerate(col_bins):
  777. row = strcols[st:ed]
  778. if self.index:
  779. row.insert(0, idx)
  780. if nbins > 1:
  781. if ed <= len(strcols) and i < nbins - 1:
  782. row.append([" \\"] + [" "] * (nrows - 1))
  783. else:
  784. row.append([" "] * nrows)
  785. str_lst.append(self.adj.adjoin(adjoin_width, *row))
  786. st = ed
  787. return "\n\n".join(str_lst)
  788. def to_string(
  789. self,
  790. buf: Optional[FilePathOrBuffer[str]] = None,
  791. encoding: Optional[str] = None,
  792. ) -> Optional[str]:
  793. return self.get_result(buf=buf, encoding=encoding)
  794. def to_latex(
  795. self,
  796. buf: Optional[FilePathOrBuffer[str]] = None,
  797. column_format: Optional[str] = None,
  798. longtable: bool = False,
  799. encoding: Optional[str] = None,
  800. multicolumn: bool = False,
  801. multicolumn_format: Optional[str] = None,
  802. multirow: bool = False,
  803. caption: Optional[str] = None,
  804. label: Optional[str] = None,
  805. ) -> Optional[str]:
  806. """
  807. Render a DataFrame to a LaTeX tabular/longtable environment output.
  808. """
  809. from pandas.io.formats.latex import LatexFormatter
  810. return LatexFormatter(
  811. self,
  812. column_format=column_format,
  813. longtable=longtable,
  814. multicolumn=multicolumn,
  815. multicolumn_format=multicolumn_format,
  816. multirow=multirow,
  817. caption=caption,
  818. label=label,
  819. ).get_result(buf=buf, encoding=encoding)
  820. def _format_col(self, i: int) -> List[str]:
  821. frame = self.tr_frame
  822. formatter = self._get_formatter(i)
  823. return format_array(
  824. frame.iloc[:, i]._values,
  825. formatter,
  826. float_format=self.float_format,
  827. na_rep=self.na_rep,
  828. space=self.col_space,
  829. decimal=self.decimal,
  830. )
  831. def to_html(
  832. self,
  833. buf: Optional[FilePathOrBuffer[str]] = None,
  834. encoding: Optional[str] = None,
  835. classes: Optional[Union[str, List, Tuple]] = None,
  836. notebook: bool = False,
  837. border: Optional[int] = None,
  838. ) -> Optional[str]:
  839. """
  840. Render a DataFrame to a html table.
  841. Parameters
  842. ----------
  843. classes : str or list-like
  844. classes to include in the `class` attribute of the opening
  845. ``<table>`` tag, in addition to the default "dataframe".
  846. notebook : {True, False}, optional, default False
  847. Whether the generated HTML is for IPython Notebook.
  848. border : int
  849. A ``border=border`` attribute is included in the opening
  850. ``<table>`` tag. Default ``pd.options.display.html.border``.
  851. """
  852. from pandas.io.formats.html import HTMLFormatter, NotebookFormatter
  853. Klass = NotebookFormatter if notebook else HTMLFormatter
  854. return Klass(self, classes=classes, border=border).get_result(
  855. buf=buf, encoding=encoding
  856. )
  857. def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]:
  858. from pandas.core.indexes.multi import _sparsify
  859. columns = frame.columns
  860. if isinstance(columns, ABCMultiIndex):
  861. fmt_columns = columns.format(sparsify=False, adjoin=False)
  862. fmt_columns = list(zip(*fmt_columns))
  863. dtypes = self.frame.dtypes._values
  864. # if we have a Float level, they don't use leading space at all
  865. restrict_formatting = any(l.is_floating for l in columns.levels)
  866. need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
  867. def space_format(x, y):
  868. if (
  869. y not in self.formatters
  870. and need_leadsp[x]
  871. and not restrict_formatting
  872. ):
  873. return " " + y
  874. return y
  875. str_columns = list(
  876. zip(*[[space_format(x, y) for y in x] for x in fmt_columns])
  877. )
  878. if self.sparsify and len(str_columns):
  879. str_columns = _sparsify(str_columns)
  880. str_columns = [list(x) for x in zip(*str_columns)]
  881. else:
  882. fmt_columns = columns.format()
  883. dtypes = self.frame.dtypes
  884. need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
  885. str_columns = [
  886. [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
  887. for i, (col, x) in enumerate(zip(columns, fmt_columns))
  888. ]
  889. # self.str_columns = str_columns
  890. return str_columns
  891. @property
  892. def has_index_names(self) -> bool:
  893. return _has_names(self.frame.index)
  894. @property
  895. def has_column_names(self) -> bool:
  896. return _has_names(self.frame.columns)
  897. @property
  898. def show_row_idx_names(self) -> bool:
  899. return all((self.has_index_names, self.index, self.show_index_names))
  900. @property
  901. def show_col_idx_names(self) -> bool:
  902. return all((self.has_column_names, self.show_index_names, self.header))
  903. def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
  904. # Note: this is only used by to_string() and to_latex(), not by
  905. # to_html(). so safe to cast col_space here.
  906. self.col_space = cast(int, self.col_space)
  907. index = frame.index
  908. columns = frame.columns
  909. fmt = self._get_formatter("__index__")
  910. if isinstance(index, ABCMultiIndex):
  911. fmt_index = index.format(
  912. sparsify=self.sparsify,
  913. adjoin=False,
  914. names=self.show_row_idx_names,
  915. formatter=fmt,
  916. )
  917. else:
  918. fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
  919. fmt_index = [
  920. tuple(
  921. _make_fixed_width(
  922. list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj
  923. )
  924. )
  925. for x in fmt_index
  926. ]
  927. adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
  928. # empty space for columns
  929. if self.show_col_idx_names:
  930. col_header = ["{x}".format(x=x) for x in self._get_column_name_list()]
  931. else:
  932. col_header = [""] * columns.nlevels
  933. if self.header:
  934. return col_header + adjoined
  935. else:
  936. return adjoined
  937. def _get_column_name_list(self) -> List[str]:
  938. names: List[str] = []
  939. columns = self.frame.columns
  940. if isinstance(columns, ABCMultiIndex):
  941. names.extend("" if name is None else name for name in columns.names)
  942. else:
  943. names.append("" if columns.name is None else columns.name)
  944. return names
  945. # ----------------------------------------------------------------------
  946. # Array formatters
  947. def format_array(
  948. values: Any,
  949. formatter: Optional[Callable],
  950. float_format: Optional[float_format_type] = None,
  951. na_rep: str = "NaN",
  952. digits: Optional[int] = None,
  953. space: Optional[Union[str, int]] = None,
  954. justify: str = "right",
  955. decimal: str = ".",
  956. leading_space: Optional[bool] = None,
  957. ) -> List[str]:
  958. """
  959. Format an array for printing.
  960. Parameters
  961. ----------
  962. values
  963. formatter
  964. float_format
  965. na_rep
  966. digits
  967. space
  968. justify
  969. decimal
  970. leading_space : bool, optional
  971. Whether the array should be formatted with a leading space.
  972. When an array as a column of a Series or DataFrame, we do want
  973. the leading space to pad between columns.
  974. When formatting an Index subclass
  975. (e.g. IntervalIndex._format_native_types), we don't want the
  976. leading space since it should be left-aligned.
  977. Returns
  978. -------
  979. List[str]
  980. """
  981. fmt_klass: Type[GenericArrayFormatter]
  982. if is_datetime64_dtype(values.dtype):
  983. fmt_klass = Datetime64Formatter
  984. elif is_datetime64tz_dtype(values):
  985. fmt_klass = Datetime64TZFormatter
  986. elif is_timedelta64_dtype(values.dtype):
  987. fmt_klass = Timedelta64Formatter
  988. elif is_extension_array_dtype(values.dtype):
  989. fmt_klass = ExtensionArrayFormatter
  990. elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
  991. fmt_klass = FloatArrayFormatter
  992. elif is_integer_dtype(values.dtype):
  993. fmt_klass = IntArrayFormatter
  994. else:
  995. fmt_klass = GenericArrayFormatter
  996. if space is None:
  997. space = get_option("display.column_space")
  998. if float_format is None:
  999. float_format = get_option("display.float_format")
  1000. if digits is None:
  1001. digits = get_option("display.precision")
  1002. fmt_obj = fmt_klass(
  1003. values,
  1004. digits=digits,
  1005. na_rep=na_rep,
  1006. float_format=float_format,
  1007. formatter=formatter,
  1008. space=space,
  1009. justify=justify,
  1010. decimal=decimal,
  1011. leading_space=leading_space,
  1012. )
  1013. return fmt_obj.get_result()
  1014. class GenericArrayFormatter:
  1015. def __init__(
  1016. self,
  1017. values: Any,
  1018. digits: int = 7,
  1019. formatter: Optional[Callable] = None,
  1020. na_rep: str = "NaN",
  1021. space: Union[str, int] = 12,
  1022. float_format: Optional[float_format_type] = None,
  1023. justify: str = "right",
  1024. decimal: str = ".",
  1025. quoting: Optional[int] = None,
  1026. fixed_width: bool = True,
  1027. leading_space: Optional[bool] = None,
  1028. ):
  1029. self.values = values
  1030. self.digits = digits
  1031. self.na_rep = na_rep
  1032. self.space = space
  1033. self.formatter = formatter
  1034. self.float_format = float_format
  1035. self.justify = justify
  1036. self.decimal = decimal
  1037. self.quoting = quoting
  1038. self.fixed_width = fixed_width
  1039. self.leading_space = leading_space
  1040. def get_result(self) -> List[str]:
  1041. fmt_values = self._format_strings()
  1042. return _make_fixed_width(fmt_values, self.justify)
  1043. def _format_strings(self) -> List[str]:
  1044. if self.float_format is None:
  1045. float_format = get_option("display.float_format")
  1046. if float_format is None:
  1047. fmt_str = "{{x: .{prec:d}g}}".format(
  1048. prec=get_option("display.precision")
  1049. )
  1050. float_format = lambda x: fmt_str.format(x=x)
  1051. else:
  1052. float_format = self.float_format
  1053. formatter = (
  1054. self.formatter
  1055. if self.formatter is not None
  1056. else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n")))
  1057. )
  1058. def _format(x):
  1059. if self.na_rep is not None and is_scalar(x) and isna(x):
  1060. try:
  1061. # try block for np.isnat specifically
  1062. # determine na_rep if x is None or NaT-like
  1063. if x is None:
  1064. return "None"
  1065. elif x is NA:
  1066. return str(NA)
  1067. elif x is NaT or np.isnat(x):
  1068. return "NaT"
  1069. except (TypeError, ValueError):
  1070. # np.isnat only handles datetime or timedelta objects
  1071. pass
  1072. return self.na_rep
  1073. elif isinstance(x, PandasObject):
  1074. return "{x}".format(x=x)
  1075. else:
  1076. # object dtype
  1077. return "{x}".format(x=formatter(x))
  1078. vals = self.values
  1079. if isinstance(vals, Index):
  1080. vals = vals._values
  1081. elif isinstance(vals, ABCSparseArray):
  1082. vals = vals.values
  1083. is_float_type = lib.map_infer(vals, is_float) & notna(vals)
  1084. leading_space = self.leading_space
  1085. if leading_space is None:
  1086. leading_space = is_float_type.any()
  1087. fmt_values = []
  1088. for i, v in enumerate(vals):
  1089. if not is_float_type[i] and leading_space:
  1090. fmt_values.append(" {v}".format(v=_format(v)))
  1091. elif is_float_type[i]:
  1092. fmt_values.append(float_format(v))
  1093. else:
  1094. if leading_space is False:
  1095. # False specifically, so that the default is
  1096. # to include a space if we get here.
  1097. tpl = "{v}"
  1098. else:
  1099. tpl = " {v}"
  1100. fmt_values.append(tpl.format(v=_format(v)))
  1101. return fmt_values
  1102. class FloatArrayFormatter(GenericArrayFormatter):
  1103. """
  1104. """
  1105. def __init__(self, *args, **kwargs):
  1106. super().__init__(*args, **kwargs)
  1107. # float_format is expected to be a string
  1108. # formatter should be used to pass a function
  1109. if self.float_format is not None and self.formatter is None:
  1110. # GH21625, GH22270
  1111. self.fixed_width = False
  1112. if callable(self.float_format):
  1113. self.formatter = self.float_format
  1114. self.float_format = None
  1115. def _value_formatter(
  1116. self,
  1117. float_format: Optional[float_format_type] = None,
  1118. threshold: Optional[Union[float, int]] = None,
  1119. ) -> Callable:
  1120. """Returns a function to be applied on each value to format it
  1121. """
  1122. # the float_format parameter supersedes self.float_format
  1123. if float_format is None:
  1124. float_format = self.float_format
  1125. # we are going to compose different functions, to first convert to
  1126. # a string, then replace the decimal symbol, and finally chop according
  1127. # to the threshold
  1128. # when there is no float_format, we use str instead of '%g'
  1129. # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
  1130. if float_format:
  1131. def base_formatter(v):
  1132. return float_format(value=v) if notna(v) else self.na_rep
  1133. else:
  1134. def base_formatter(v):
  1135. return str(v) if notna(v) else self.na_rep
  1136. if self.decimal != ".":
  1137. def decimal_formatter(v):
  1138. return base_formatter(v).replace(".", self.decimal, 1)
  1139. else:
  1140. decimal_formatter = base_formatter
  1141. if threshold is None:
  1142. return decimal_formatter
  1143. def formatter(value):
  1144. if notna(value):
  1145. if abs(value) > threshold:
  1146. return decimal_formatter(value)
  1147. else:
  1148. return decimal_formatter(0.0)
  1149. else:
  1150. return self.na_rep
  1151. return formatter
  1152. def get_result_as_array(self) -> np.ndarray:
  1153. """
  1154. Returns the float values converted into strings using
  1155. the parameters given at initialisation, as a numpy array
  1156. """
  1157. if self.formatter is not None:
  1158. return np.array([self.formatter(x) for x in self.values])
  1159. if self.fixed_width:
  1160. threshold = get_option("display.chop_threshold")
  1161. else:
  1162. threshold = None
  1163. # if we have a fixed_width, we'll need to try different float_format
  1164. def format_values_with(float_format):
  1165. formatter = self._value_formatter(float_format, threshold)
  1166. # default formatter leaves a space to the left when formatting
  1167. # floats, must be consistent for left-justifying NaNs (GH #25061)
  1168. if self.justify == "left":
  1169. na_rep = " " + self.na_rep
  1170. else:
  1171. na_rep = self.na_rep
  1172. # separate the wheat from the chaff
  1173. values = self.values
  1174. is_complex = is_complex_dtype(values)
  1175. mask = isna(values)
  1176. if hasattr(values, "to_dense"): # sparse numpy ndarray
  1177. values = values.to_dense()
  1178. values = np.array(values, dtype="object")
  1179. values[mask] = na_rep
  1180. imask = (~mask).ravel()
  1181. values.flat[imask] = np.array(
  1182. [formatter(val) for val in values.ravel()[imask]]
  1183. )
  1184. if self.fixed_width:
  1185. if is_complex:
  1186. result = _trim_zeros_complex(values, na_rep)
  1187. else:
  1188. result = _trim_zeros_float(values, na_rep)
  1189. return np.asarray(result, dtype="object")
  1190. return values
  1191. # There is a special default string when we are fixed-width
  1192. # The default is otherwise to use str instead of a formatting string
  1193. float_format: Optional[float_format_type]
  1194. if self.float_format is None:
  1195. if self.fixed_width:
  1196. float_format = partial(
  1197. "{value: .{digits:d}f}".format, digits=self.digits
  1198. )
  1199. else:
  1200. float_format = self.float_format
  1201. else:
  1202. float_format = lambda value: self.float_format % value
  1203. formatted_values = format_values_with(float_format)
  1204. if not self.fixed_width:
  1205. return formatted_values
  1206. # we need do convert to engineering format if some values are too small
  1207. # and would appear as 0, or if some values are too big and take too
  1208. # much space
  1209. if len(formatted_values) > 0:
  1210. maxlen = max(len(x) for x in formatted_values)
  1211. too_long = maxlen > self.digits + 6
  1212. else:
  1213. too_long = False
  1214. with np.errstate(invalid="ignore"):
  1215. abs_vals = np.abs(self.values)
  1216. # this is pretty arbitrary for now
  1217. # large values: more that 8 characters including decimal symbol
  1218. # and first digit, hence > 1e6
  1219. has_large_values = (abs_vals > 1e6).any()
  1220. has_small_values = (
  1221. (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
  1222. ).any()
  1223. if has_small_values or (too_long and has_large_values):
  1224. float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
  1225. formatted_values = format_values_with(float_format)
  1226. return formatted_values
  1227. def _format_strings(self) -> List[str]:
  1228. # shortcut
  1229. if self.formatter is not None:
  1230. return [self.formatter(x) for x in self.values]
  1231. return list(self.get_result_as_array())
  1232. class IntArrayFormatter(GenericArrayFormatter):
  1233. def _format_strings(self) -> List[str]:
  1234. formatter = self.formatter or (lambda x: "{x: d}".format(x=x))
  1235. fmt_values = [formatter(x) for x in self.values]
  1236. return fmt_values
  1237. class Datetime64Formatter(GenericArrayFormatter):
  1238. def __init__(
  1239. self,
  1240. values: Union[np.ndarray, "Series", DatetimeIndex, DatetimeArray],
  1241. nat_rep: str = "NaT",
  1242. date_format: None = None,
  1243. **kwargs,
  1244. ):
  1245. super().__init__(values, **kwargs)
  1246. self.nat_rep = nat_rep
  1247. self.date_format = date_format
  1248. def _format_strings(self) -> List[str]:
  1249. """ we by definition have DO NOT have a TZ """
  1250. values = self.values
  1251. if not isinstance(values, DatetimeIndex):
  1252. values = DatetimeIndex(values)
  1253. if self.formatter is not None and callable(self.formatter):
  1254. return [self.formatter(x) for x in values]
  1255. fmt_values = format_array_from_datetime(
  1256. values.asi8.ravel(),
  1257. format=_get_format_datetime64_from_values(values, self.date_format),
  1258. na_rep=self.nat_rep,
  1259. ).reshape(values.shape)
  1260. return fmt_values.tolist()
  1261. class ExtensionArrayFormatter(GenericArrayFormatter):
  1262. def _format_strings(self) -> List[str]:
  1263. values = self.values
  1264. if isinstance(values, (ABCIndexClass, ABCSeries)):
  1265. values = values._values
  1266. formatter = values._formatter(boxed=True)
  1267. if is_categorical_dtype(values.dtype):
  1268. # Categorical is special for now, so that we can preserve tzinfo
  1269. array = values._internal_get_values()
  1270. else:
  1271. array = np.asarray(values)
  1272. fmt_values = format_array(
  1273. array,
  1274. formatter,
  1275. float_format=self.float_format,
  1276. na_rep=self.na_rep,
  1277. digits=self.digits,
  1278. space=self.space,
  1279. justify=self.justify,
  1280. leading_space=self.leading_space,
  1281. )
  1282. return fmt_values
  1283. def format_percentiles(
  1284. percentiles: Union[
  1285. np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]]
  1286. ]
  1287. ) -> List[str]:
  1288. """
  1289. Outputs rounded and formatted percentiles.
  1290. Parameters
  1291. ----------
  1292. percentiles : list-like, containing floats from interval [0,1]
  1293. Returns
  1294. -------
  1295. formatted : list of strings
  1296. Notes
  1297. -----
  1298. Rounding precision is chosen so that: (1) if any two elements of
  1299. ``percentiles`` differ, they remain different after rounding
  1300. (2) no entry is *rounded* to 0% or 100%.
  1301. Any non-integer is always rounded to at least 1 decimal place.
  1302. Examples
  1303. --------
  1304. Keeps all entries different after rounding:
  1305. >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
  1306. ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
  1307. No element is rounded to 0% or 100% (unless already equal to it).
  1308. Duplicates are allowed:
  1309. >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
  1310. ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
  1311. """
  1312. percentiles = np.asarray(percentiles)
  1313. # It checks for np.NaN as well
  1314. with np.errstate(invalid="ignore"):
  1315. if (
  1316. not is_numeric_dtype(percentiles)
  1317. or not np.all(percentiles >= 0)
  1318. or not np.all(percentiles <= 1)
  1319. ):
  1320. raise ValueError("percentiles should all be in the interval [0,1]")
  1321. percentiles = 100 * percentiles
  1322. int_idx = np.isclose(percentiles.astype(int), percentiles)
  1323. if np.all(int_idx):
  1324. out = percentiles.astype(int).astype(str)
  1325. return [i + "%" for i in out]
  1326. unique_pcts = np.unique(percentiles)
  1327. to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
  1328. to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
  1329. # Least precision that keeps percentiles unique after rounding
  1330. prec = -np.floor(
  1331. np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
  1332. ).astype(int)
  1333. prec = max(1, prec)
  1334. out = np.empty_like(percentiles, dtype=object)
  1335. out[int_idx] = percentiles[int_idx].astype(int).astype(str)
  1336. out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
  1337. return [i + "%" for i in out]
  1338. def _is_dates_only(
  1339. values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex]
  1340. ) -> bool:
  1341. # return a boolean if we are only dates (and don't have a timezone)
  1342. assert values.ndim == 1
  1343. values = DatetimeIndex(values)
  1344. if values.tz is not None:
  1345. return False
  1346. values_int = values.asi8
  1347. consider_values = values_int != iNaT
  1348. one_day_nanos = 86400 * 1e9
  1349. even_days = (
  1350. np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0
  1351. )
  1352. if even_days:
  1353. return True
  1354. return False
  1355. def _format_datetime64(
  1356. x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT"
  1357. ) -> str:
  1358. if x is None or (is_scalar(x) and isna(x)):
  1359. return nat_rep
  1360. if tz is not None or not isinstance(x, Timestamp):
  1361. if getattr(x, "tzinfo", None) is not None:
  1362. x = Timestamp(x).tz_convert(tz)
  1363. else:
  1364. x = Timestamp(x).tz_localize(tz)
  1365. return str(x)
  1366. def _format_datetime64_dateonly(
  1367. x: Union[NaTType, Timestamp], nat_rep: str = "NaT", date_format: None = None
  1368. ) -> str:
  1369. if x is None or (is_scalar(x) and isna(x)):
  1370. return nat_rep
  1371. if not isinstance(x, Timestamp):
  1372. x = Timestamp(x)
  1373. if date_format:
  1374. return x.strftime(date_format)
  1375. else:
  1376. return x._date_repr
  1377. def _get_format_datetime64(
  1378. is_dates_only: bool, nat_rep: str = "NaT", date_format: None = None
  1379. ) -> Callable:
  1380. if is_dates_only:
  1381. return lambda x, tz=None: _format_datetime64_dateonly(
  1382. x, nat_rep=nat_rep, date_format=date_format
  1383. )
  1384. else:
  1385. return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
  1386. def _get_format_datetime64_from_values(
  1387. values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str]
  1388. ) -> Optional[str]:
  1389. """ given values and a date_format, return a string format """
  1390. if isinstance(values, np.ndarray) and values.ndim > 1:
  1391. # We don't actually care about the order of values, and DatetimeIndex
  1392. # only accepts 1D values
  1393. values = values.ravel()
  1394. is_dates_only = _is_dates_only(values)
  1395. if is_dates_only:
  1396. return date_format or "%Y-%m-%d"
  1397. return date_format
  1398. class Datetime64TZFormatter(Datetime64Formatter):
  1399. def _format_strings(self) -> List[str]:
  1400. """ we by definition have a TZ """
  1401. values = self.values.astype(object)
  1402. is_dates_only = _is_dates_only(values)
  1403. formatter = self.formatter or _get_format_datetime64(
  1404. is_dates_only, date_format=self.date_format
  1405. )
  1406. fmt_values = [formatter(x) for x in values]
  1407. return fmt_values
  1408. class Timedelta64Formatter(GenericArrayFormatter):
  1409. def __init__(
  1410. self,
  1411. values: Union[np.ndarray, TimedeltaIndex],
  1412. nat_rep: str = "NaT",
  1413. box: bool = False,
  1414. **kwargs,
  1415. ):
  1416. super().__init__(values, **kwargs)
  1417. self.nat_rep = nat_rep
  1418. self.box = box
  1419. def _format_strings(self) -> List[str]:
  1420. formatter = self.formatter or _get_format_timedelta64(
  1421. self.values, nat_rep=self.nat_rep, box=self.box
  1422. )
  1423. return [formatter(x) for x in self.values]
  1424. def _get_format_timedelta64(
  1425. values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray],
  1426. nat_rep: str = "NaT",
  1427. box: bool = False,
  1428. ) -> Callable:
  1429. """
  1430. Return a formatter function for a range of timedeltas.
  1431. These will all have the same format argument
  1432. If box, then show the return in quotes
  1433. """
  1434. values_int = values.astype(np.int64)
  1435. consider_values = values_int != iNaT
  1436. one_day_nanos = 86400 * 1e9
  1437. even_days = (
  1438. np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
  1439. )
  1440. all_sub_day = (
  1441. np.logical_and(consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0
  1442. )
  1443. if even_days:
  1444. format = None
  1445. elif all_sub_day:
  1446. format = "sub_day"
  1447. else:
  1448. format = "long"
  1449. def _formatter(x):
  1450. if x is None or (is_scalar(x) and isna(x)):
  1451. return nat_rep
  1452. if not isinstance(x, Timedelta):
  1453. x = Timedelta(x)
  1454. result = x._repr_base(format=format)
  1455. if box:
  1456. result = "'{res}'".format(res=result)
  1457. return result
  1458. return _formatter
  1459. def _make_fixed_width(
  1460. strings: List[str],
  1461. justify: str = "right",
  1462. minimum: Optional[int] = None,
  1463. adj: Optional[TextAdjustment] = None,
  1464. ) -> List[str]:
  1465. if len(strings) == 0 or justify == "all":
  1466. return strings
  1467. if adj is None:
  1468. adj = _get_adjustment()
  1469. max_len = max(adj.len(x) for x in strings)
  1470. if minimum is not None:
  1471. max_len = max(minimum, max_len)
  1472. conf_max = get_option("display.max_colwidth")
  1473. if conf_max is not None and max_len > conf_max:
  1474. max_len = conf_max
  1475. def just(x):
  1476. if conf_max is not None:
  1477. if (conf_max > 3) & (adj.len(x) > max_len):
  1478. x = x[: max_len - 3] + "..."
  1479. return x
  1480. strings = [just(x) for x in strings]
  1481. result = adj.justify(strings, max_len, mode=justify)
  1482. return result
  1483. def _trim_zeros_complex(str_complexes: np.ndarray, na_rep: str = "NaN") -> List[str]:
  1484. """
  1485. Separates the real and imaginary parts from the complex number, and
  1486. executes the _trim_zeros_float method on each of those.
  1487. """
  1488. return [
  1489. "".join(_trim_zeros_float(re.split(r"([j+-])", x), na_rep))
  1490. for x in str_complexes
  1491. ]
  1492. def _trim_zeros_float(
  1493. str_floats: Union[np.ndarray, List[str]], na_rep: str = "NaN"
  1494. ) -> List[str]:
  1495. """
  1496. Trims zeros, leaving just one before the decimal points if need be.
  1497. """
  1498. trimmed = str_floats
  1499. def _is_number(x):
  1500. return x != na_rep and not x.endswith("inf")
  1501. def _cond(values):
  1502. finite = [x for x in values if _is_number(x)]
  1503. return (
  1504. len(finite) > 0
  1505. and all(x.endswith("0") for x in finite)
  1506. and not (any(("e" in x) or ("E" in x) for x in finite))
  1507. )
  1508. while _cond(trimmed):
  1509. trimmed = [x[:-1] if _is_number(x) else x for x in trimmed]
  1510. # leave one 0 after the decimal points if need be.
  1511. return [x + "0" if x.endswith(".") and _is_number(x) else x for x in trimmed]
  1512. def _has_names(index: Index) -> bool:
  1513. if isinstance(index, ABCMultiIndex):
  1514. return com.any_not_none(*index.names)
  1515. else:
  1516. return index.name is not None
  1517. class EngFormatter:
  1518. """
  1519. Formats float values according to engineering format.
  1520. Based on matplotlib.ticker.EngFormatter
  1521. """
  1522. # The SI engineering prefixes
  1523. ENG_PREFIXES = {
  1524. -24: "y",
  1525. -21: "z",
  1526. -18: "a",
  1527. -15: "f",
  1528. -12: "p",
  1529. -9: "n",
  1530. -6: "u",
  1531. -3: "m",
  1532. 0: "",
  1533. 3: "k",
  1534. 6: "M",
  1535. 9: "G",
  1536. 12: "T",
  1537. 15: "P",
  1538. 18: "E",
  1539. 21: "Z",
  1540. 24: "Y",
  1541. }
  1542. def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False):
  1543. self.accuracy = accuracy
  1544. self.use_eng_prefix = use_eng_prefix
  1545. def __call__(self, num: Union[int, float]) -> str:
  1546. """ Formats a number in engineering notation, appending a letter
  1547. representing the power of 1000 of the original number. Some examples:
  1548. >>> format_eng(0) # for self.accuracy = 0
  1549. ' 0'
  1550. >>> format_eng(1000000) # for self.accuracy = 1,
  1551. # self.use_eng_prefix = True
  1552. ' 1.0M'
  1553. >>> format_eng("-1e-6") # for self.accuracy = 2
  1554. # self.use_eng_prefix = False
  1555. '-1.00E-06'
  1556. @param num: the value to represent
  1557. @type num: either a numeric value or a string that can be converted to
  1558. a numeric value (as per decimal.Decimal constructor)
  1559. @return: engineering formatted string
  1560. """
  1561. dnum = decimal.Decimal(str(num))
  1562. if decimal.Decimal.is_nan(dnum):
  1563. return "NaN"
  1564. if decimal.Decimal.is_infinite(dnum):
  1565. return "inf"
  1566. sign = 1
  1567. if dnum < 0: # pragma: no cover
  1568. sign = -1
  1569. dnum = -dnum
  1570. if dnum != 0:
  1571. pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
  1572. else:
  1573. pow10 = decimal.Decimal(0)
  1574. pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
  1575. pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
  1576. int_pow10 = int(pow10)
  1577. if self.use_eng_prefix:
  1578. prefix = self.ENG_PREFIXES[int_pow10]
  1579. else:
  1580. if int_pow10 < 0:
  1581. prefix = "E-{pow10:02d}".format(pow10=-int_pow10)
  1582. else:
  1583. prefix = "E+{pow10:02d}".format(pow10=int_pow10)
  1584. mant = sign * dnum / (10 ** pow10)
  1585. if self.accuracy is None: # pragma: no cover
  1586. format_str = "{mant: g}{prefix}"
  1587. else:
  1588. format_str = "{{mant: .{acc:d}f}}{{prefix}}".format(acc=self.accuracy)
  1589. formatted = format_str.format(mant=mant, prefix=prefix)
  1590. return formatted
  1591. def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
  1592. """
  1593. Alter default behavior on how float is formatted in DataFrame.
  1594. Format float in engineering format. By accuracy, we mean the number of
  1595. decimal digits after the floating point.
  1596. See also EngFormatter.
  1597. """
  1598. set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
  1599. set_option("display.column_space", max(12, accuracy + 9))
  1600. def _binify(cols: List[int], line_width: int) -> List[int]:
  1601. adjoin_width = 1
  1602. bins = []
  1603. curr_width = 0
  1604. i_last_column = len(cols) - 1
  1605. for i, w in enumerate(cols):
  1606. w_adjoined = w + adjoin_width
  1607. curr_width += w_adjoined
  1608. if i_last_column == i:
  1609. wrap = curr_width + 1 > line_width and i > 0
  1610. else:
  1611. wrap = curr_width + 2 > line_width and i > 0
  1612. if wrap:
  1613. bins.append(i)
  1614. curr_width = w_adjoined
  1615. bins.append(len(cols))
  1616. return bins
  1617. def get_level_lengths(
  1618. levels: Any, sentinel: Union[bool, object, str] = ""
  1619. ) -> List[Dict[int, int]]:
  1620. """For each index in each level the function returns lengths of indexes.
  1621. Parameters
  1622. ----------
  1623. levels : list of lists
  1624. List of values on for level.
  1625. sentinel : string, optional
  1626. Value which states that no new index starts on there.
  1627. Returns
  1628. -------
  1629. Returns list of maps. For each level returns map of indexes (key is index
  1630. in row and value is length of index).
  1631. """
  1632. if len(levels) == 0:
  1633. return []
  1634. control = [True] * len(levels[0])
  1635. result = []
  1636. for level in levels:
  1637. last_index = 0
  1638. lengths = {}
  1639. for i, key in enumerate(level):
  1640. if control[i] and key == sentinel:
  1641. pass
  1642. else:
  1643. control[i] = False
  1644. lengths[last_index] = i - last_index
  1645. last_index = i
  1646. lengths[last_index] = len(level) - last_index
  1647. result.append(lengths)
  1648. return result
  1649. def buffer_put_lines(buf: IO[str], lines: List[str]) -> None:
  1650. """
  1651. Appends lines to a buffer.
  1652. Parameters
  1653. ----------
  1654. buf
  1655. The buffer to write to
  1656. lines
  1657. The lines to append.
  1658. """
  1659. if any(isinstance(x, str) for x in lines):
  1660. lines = [str(x) for x in lines]
  1661. buf.write("\n".join(lines))