printing.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. """
  2. Printing tools.
  3. """
  4. import sys
  5. from typing import (
  6. Any,
  7. Callable,
  8. Iterable,
  9. List,
  10. Mapping,
  11. Optional,
  12. Sequence,
  13. Tuple,
  14. Union,
  15. )
  16. from pandas._config import get_option
  17. from pandas.core.dtypes.inference import is_sequence
  18. EscapeChars = Union[Mapping[str, str], Iterable[str]]
  19. def adjoin(space: int, *lists: List[str], **kwargs) -> str:
  20. """
  21. Glues together two sets of strings using the amount of space requested.
  22. The idea is to prettify.
  23. ----------
  24. space : int
  25. number of spaces for padding
  26. lists : str
  27. list of str which being joined
  28. strlen : callable
  29. function used to calculate the length of each str. Needed for unicode
  30. handling.
  31. justfunc : callable
  32. function used to justify str. Needed for unicode handling.
  33. """
  34. strlen = kwargs.pop("strlen", len)
  35. justfunc = kwargs.pop("justfunc", justify)
  36. out_lines = []
  37. newLists = []
  38. lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
  39. # not the last one
  40. lengths.append(max(map(len, lists[-1])))
  41. maxLen = max(map(len, lists))
  42. for i, lst in enumerate(lists):
  43. nl = justfunc(lst, lengths[i], mode="left")
  44. nl.extend([" " * lengths[i]] * (maxLen - len(lst)))
  45. newLists.append(nl)
  46. toJoin = zip(*newLists)
  47. for lines in toJoin:
  48. out_lines.append("".join(lines))
  49. return "\n".join(out_lines)
  50. def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> List[str]:
  51. """
  52. Perform ljust, center, rjust against string or list-like
  53. """
  54. if mode == "left":
  55. return [x.ljust(max_len) for x in texts]
  56. elif mode == "center":
  57. return [x.center(max_len) for x in texts]
  58. else:
  59. return [x.rjust(max_len) for x in texts]
  60. # Unicode consolidation
  61. # ---------------------
  62. #
  63. # pprinting utility functions for generating Unicode text or
  64. # bytes(3.x)/str(2.x) representations of objects.
  65. # Try to use these as much as possible rather then rolling your own.
  66. #
  67. # When to use
  68. # -----------
  69. #
  70. # 1) If you're writing code internal to pandas (no I/O directly involved),
  71. # use pprint_thing().
  72. #
  73. # It will always return unicode text which can handled by other
  74. # parts of the package without breakage.
  75. #
  76. # 2) if you need to write something out to file, use
  77. # pprint_thing_encoded(encoding).
  78. #
  79. # If no encoding is specified, it defaults to utf-8. Since encoding pure
  80. # ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
  81. # working with straight ascii.
  82. def _pprint_seq(
  83. seq: Sequence, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds
  84. ) -> str:
  85. """
  86. internal. pprinter for iterables. you should probably use pprint_thing()
  87. rather then calling this directly.
  88. bounds length of printed sequence, depending on options
  89. """
  90. if isinstance(seq, set):
  91. fmt = "{{{body}}}"
  92. else:
  93. fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
  94. if max_seq_items is False:
  95. nitems = len(seq)
  96. else:
  97. nitems = max_seq_items or get_option("max_seq_items") or len(seq)
  98. s = iter(seq)
  99. # handle sets, no slicing
  100. r = [
  101. pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
  102. for i in range(min(nitems, len(seq)))
  103. ]
  104. body = ", ".join(r)
  105. if nitems < len(seq):
  106. body += ", ..."
  107. elif isinstance(seq, tuple) and len(seq) == 1:
  108. body += ","
  109. return fmt.format(body=body)
  110. def _pprint_dict(
  111. seq: Mapping, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds
  112. ) -> str:
  113. """
  114. internal. pprinter for iterables. you should probably use pprint_thing()
  115. rather then calling this directly.
  116. """
  117. fmt = "{{{things}}}"
  118. pairs = []
  119. pfmt = "{key}: {val}"
  120. if max_seq_items is False:
  121. nitems = len(seq)
  122. else:
  123. nitems = max_seq_items or get_option("max_seq_items") or len(seq)
  124. for k, v in list(seq.items())[:nitems]:
  125. pairs.append(
  126. pfmt.format(
  127. key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
  128. val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
  129. )
  130. )
  131. if nitems < len(seq):
  132. return fmt.format(things=", ".join(pairs) + ", ...")
  133. else:
  134. return fmt.format(things=", ".join(pairs))
  135. def pprint_thing(
  136. thing: Any,
  137. _nest_lvl: int = 0,
  138. escape_chars: Optional[EscapeChars] = None,
  139. default_escapes: bool = False,
  140. quote_strings: bool = False,
  141. max_seq_items: Optional[int] = None,
  142. ) -> str:
  143. """
  144. This function is the sanctioned way of converting objects
  145. to a string representation and properly handles nested sequences.
  146. Parameters
  147. ----------
  148. thing : anything to be formatted
  149. _nest_lvl : internal use only. pprint_thing() is mutually-recursive
  150. with pprint_sequence, this argument is used to keep track of the
  151. current nesting level, and limit it.
  152. escape_chars : list or dict, optional
  153. Characters to escape. If a dict is passed the values are the
  154. replacements
  155. default_escapes : bool, default False
  156. Whether the input escape characters replaces or adds to the defaults
  157. max_seq_items : int or None, default None
  158. Pass through to other pretty printers to limit sequence printing
  159. Returns
  160. -------
  161. str
  162. """
  163. def as_escaped_string(
  164. thing: Any, escape_chars: Optional[EscapeChars] = escape_chars
  165. ) -> str:
  166. translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
  167. if isinstance(escape_chars, dict):
  168. if default_escapes:
  169. translate.update(escape_chars)
  170. else:
  171. translate = escape_chars
  172. escape_chars = list(escape_chars.keys())
  173. else:
  174. escape_chars = escape_chars or tuple()
  175. result = str(thing)
  176. for c in escape_chars:
  177. result = result.replace(c, translate[c])
  178. return result
  179. if hasattr(thing, "__next__"):
  180. return str(thing)
  181. elif isinstance(thing, dict) and _nest_lvl < get_option(
  182. "display.pprint_nest_depth"
  183. ):
  184. result = _pprint_dict(
  185. thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
  186. )
  187. elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
  188. result = _pprint_seq(
  189. thing,
  190. _nest_lvl,
  191. escape_chars=escape_chars,
  192. quote_strings=quote_strings,
  193. max_seq_items=max_seq_items,
  194. )
  195. elif isinstance(thing, str) and quote_strings:
  196. result = "'{thing}'".format(thing=as_escaped_string(thing))
  197. else:
  198. result = as_escaped_string(thing)
  199. return result
  200. def pprint_thing_encoded(
  201. object, encoding: str = "utf-8", errors: str = "replace"
  202. ) -> bytes:
  203. value = pprint_thing(object) # get unicode representation of object
  204. return value.encode(encoding, errors)
  205. def _enable_data_resource_formatter(enable: bool) -> None:
  206. if "IPython" not in sys.modules:
  207. # definitely not in IPython
  208. return
  209. from IPython import get_ipython
  210. ip = get_ipython()
  211. if ip is None:
  212. # still not in IPython
  213. return
  214. formatters = ip.display_formatter.formatters
  215. mimetype = "application/vnd.dataresource+json"
  216. if enable:
  217. if mimetype not in formatters:
  218. # define tableschema formatter
  219. from IPython.core.formatters import BaseFormatter
  220. class TableSchemaFormatter(BaseFormatter):
  221. print_method = "_repr_data_resource_"
  222. _return_type = (dict,)
  223. # register it:
  224. formatters[mimetype] = TableSchemaFormatter()
  225. # enable it if it's been disabled:
  226. formatters[mimetype].enabled = True
  227. else:
  228. # unregister tableschema mime-type
  229. if mimetype in formatters:
  230. formatters[mimetype].enabled = False
  231. default_pprint = lambda x, max_seq_items=None: pprint_thing(
  232. x, escape_chars=("\t", "\r", "\n"), quote_strings=True, max_seq_items=max_seq_items
  233. )
  234. def format_object_summary(
  235. obj,
  236. formatter: Callable,
  237. is_justify: bool = True,
  238. name: Optional[str] = None,
  239. indent_for_name: bool = True,
  240. line_break_each_value: bool = False,
  241. ) -> str:
  242. """
  243. Return the formatted obj as a unicode string
  244. Parameters
  245. ----------
  246. obj : object
  247. must be iterable and support __getitem__
  248. formatter : callable
  249. string formatter for an element
  250. is_justify : boolean
  251. should justify the display
  252. name : name, optional
  253. defaults to the class name of the obj
  254. indent_for_name : bool, default True
  255. Whether subsequent lines should be be indented to
  256. align with the name.
  257. line_break_each_value : bool, default False
  258. If True, inserts a line break for each value of ``obj``.
  259. If False, only break lines when the a line of values gets wider
  260. than the display width.
  261. .. versionadded:: 0.25.0
  262. Returns
  263. -------
  264. summary string
  265. """
  266. from pandas.io.formats.console import get_console_size
  267. from pandas.io.formats.format import _get_adjustment
  268. display_width, _ = get_console_size()
  269. if display_width is None:
  270. display_width = get_option("display.width") or 80
  271. if name is None:
  272. name = type(obj).__name__
  273. if indent_for_name:
  274. name_len = len(name)
  275. space1 = f'\n{(" " * (name_len + 1))}'
  276. space2 = f'\n{(" " * (name_len + 2))}'
  277. else:
  278. space1 = "\n"
  279. space2 = "\n " # space for the opening '['
  280. n = len(obj)
  281. if line_break_each_value:
  282. # If we want to vertically align on each value of obj, we need to
  283. # separate values by a line break and indent the values
  284. sep = ",\n " + " " * len(name)
  285. else:
  286. sep = ","
  287. max_seq_items = get_option("display.max_seq_items") or n
  288. # are we a truncated display
  289. is_truncated = n > max_seq_items
  290. # adj can optionally handle unicode eastern asian width
  291. adj = _get_adjustment()
  292. def _extend_line(
  293. s: str, line: str, value: str, display_width: int, next_line_prefix: str
  294. ) -> Tuple[str, str]:
  295. if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
  296. s += line.rstrip()
  297. line = next_line_prefix
  298. line += value
  299. return s, line
  300. def best_len(values: List[str]) -> int:
  301. if values:
  302. return max(adj.len(x) for x in values)
  303. else:
  304. return 0
  305. close = ", "
  306. if n == 0:
  307. summary = f"[]{close}"
  308. elif n == 1 and not line_break_each_value:
  309. first = formatter(obj[0])
  310. summary = f"[{first}]{close}"
  311. elif n == 2 and not line_break_each_value:
  312. first = formatter(obj[0])
  313. last = formatter(obj[-1])
  314. summary = f"[{first}, {last}]{close}"
  315. else:
  316. if n > max_seq_items:
  317. n = min(max_seq_items // 2, 10)
  318. head = [formatter(x) for x in obj[:n]]
  319. tail = [formatter(x) for x in obj[-n:]]
  320. else:
  321. head = []
  322. tail = [formatter(x) for x in obj]
  323. # adjust all values to max length if needed
  324. if is_justify:
  325. if line_break_each_value:
  326. # Justify each string in the values of head and tail, so the
  327. # strings will right align when head and tail are stacked
  328. # vertically.
  329. head, tail = _justify(head, tail)
  330. elif is_truncated or not (
  331. len(", ".join(head)) < display_width
  332. and len(", ".join(tail)) < display_width
  333. ):
  334. # Each string in head and tail should align with each other
  335. max_length = max(best_len(head), best_len(tail))
  336. head = [x.rjust(max_length) for x in head]
  337. tail = [x.rjust(max_length) for x in tail]
  338. # If we are not truncated and we are only a single
  339. # line, then don't justify
  340. if line_break_each_value:
  341. # Now head and tail are of type List[Tuple[str]]. Below we
  342. # convert them into List[str], so there will be one string per
  343. # value. Also truncate items horizontally if wider than
  344. # max_space
  345. max_space = display_width - len(space2)
  346. value = tail[0]
  347. for max_items in reversed(range(1, len(value) + 1)):
  348. pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
  349. if len(pprinted_seq) < max_space:
  350. break
  351. head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
  352. tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
  353. summary = ""
  354. line = space2
  355. for max_items in range(len(head)):
  356. word = head[max_items] + sep + " "
  357. summary, line = _extend_line(summary, line, word, display_width, space2)
  358. if is_truncated:
  359. # remove trailing space of last line
  360. summary += line.rstrip() + space2 + "..."
  361. line = space2
  362. for max_items in range(len(tail) - 1):
  363. word = tail[max_items] + sep + " "
  364. summary, line = _extend_line(summary, line, word, display_width, space2)
  365. # last value: no sep added + 1 space of width used for trailing ','
  366. summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
  367. summary += line
  368. # right now close is either '' or ', '
  369. # Now we want to include the ']', but not the maybe space.
  370. close = "]" + close.rstrip(" ")
  371. summary += close
  372. if len(summary) > (display_width) or line_break_each_value:
  373. summary += space1
  374. else: # one row
  375. summary += " "
  376. # remove initial space
  377. summary = "[" + summary[len(space2) :]
  378. return summary
  379. def _justify(
  380. head: List[Sequence[str]], tail: List[Sequence[str]]
  381. ) -> Tuple[List[Tuple[str, ...]], List[Tuple[str, ...]]]:
  382. """
  383. Justify items in head and tail, so they are right-aligned when stacked.
  384. Parameters
  385. ----------
  386. head : list-like of list-likes of strings
  387. tail : list-like of list-likes of strings
  388. Returns
  389. -------
  390. tuple of list of tuples of strings
  391. Same as head and tail, but items are right aligned when stacked
  392. vertically.
  393. Examples
  394. --------
  395. >>> _justify([['a', 'b']], [['abc', 'abcd']])
  396. ([(' a', ' b')], [('abc', 'abcd')])
  397. """
  398. combined = head + tail
  399. # For each position for the sequences in ``combined``,
  400. # find the length of the largest string.
  401. max_length = [0] * len(combined[0])
  402. for inner_seq in combined:
  403. length = [len(item) for item in inner_seq]
  404. max_length = [max(x, y) for x, y in zip(max_length, length)]
  405. # justify each item in each list-like in head and tail using max_length
  406. head = [
  407. tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
  408. ]
  409. tail = [
  410. tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
  411. ]
  412. # https://github.com/python/mypy/issues/4975
  413. # error: Incompatible return value type (got "Tuple[List[Sequence[str]],
  414. # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]],
  415. # List[Tuple[str, ...]]]")
  416. return head, tail # type: ignore
  417. def format_object_attrs(
  418. obj: Sequence, include_dtype: bool = True
  419. ) -> List[Tuple[str, Union[str, int]]]:
  420. """
  421. Return a list of tuples of the (attr, formatted_value)
  422. for common attrs, including dtype, name, length
  423. Parameters
  424. ----------
  425. obj : object
  426. must be iterable
  427. include_dtype : bool
  428. If False, dtype won't be in the returned list
  429. Returns
  430. -------
  431. list of 2-tuple
  432. """
  433. attrs: List[Tuple[str, Union[str, int]]] = []
  434. if hasattr(obj, "dtype") and include_dtype:
  435. # error: "Sequence[Any]" has no attribute "dtype"
  436. attrs.append(("dtype", f"'{obj.dtype}'")) # type: ignore
  437. if getattr(obj, "name", None) is not None:
  438. # error: "Sequence[Any]" has no attribute "name"
  439. attrs.append(("name", default_pprint(obj.name))) # type: ignore
  440. # error: "Sequence[Any]" has no attribute "names"
  441. elif getattr(obj, "names", None) is not None and any(obj.names): # type: ignore
  442. # error: "Sequence[Any]" has no attribute "names"
  443. attrs.append(("names", default_pprint(obj.names))) # type: ignore
  444. max_seq_items = get_option("display.max_seq_items") or len(obj)
  445. if len(obj) > max_seq_items:
  446. attrs.append(("length", len(obj)))
  447. return attrs