_util.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. from pandas.compat._optional import import_optional_dependency
  2. from pandas.core.dtypes.common import is_integer, is_list_like
  3. _writers = {}
  4. def register_writer(klass):
  5. """
  6. Add engine to the excel writer registry.io.excel.
  7. You must use this method to integrate with ``to_excel``.
  8. Parameters
  9. ----------
  10. klass : ExcelWriter
  11. """
  12. if not callable(klass):
  13. raise ValueError("Can only register callables as engines")
  14. engine_name = klass.engine
  15. _writers[engine_name] = klass
  16. def _get_default_writer(ext):
  17. """
  18. Return the default writer for the given extension.
  19. Parameters
  20. ----------
  21. ext : str
  22. The excel file extension for which to get the default engine.
  23. Returns
  24. -------
  25. str
  26. The default engine for the extension.
  27. """
  28. _default_writers = {"xlsx": "openpyxl", "xlsm": "openpyxl", "xls": "xlwt"}
  29. xlsxwriter = import_optional_dependency(
  30. "xlsxwriter", raise_on_missing=False, on_version="warn"
  31. )
  32. if xlsxwriter:
  33. _default_writers["xlsx"] = "xlsxwriter"
  34. return _default_writers[ext]
  35. def get_writer(engine_name):
  36. try:
  37. return _writers[engine_name]
  38. except KeyError:
  39. raise ValueError(f"No Excel writer '{engine_name}'")
  40. def _excel2num(x):
  41. """
  42. Convert Excel column name like 'AB' to 0-based column index.
  43. Parameters
  44. ----------
  45. x : str
  46. The Excel column name to convert to a 0-based column index.
  47. Returns
  48. -------
  49. num : int
  50. The column index corresponding to the name.
  51. Raises
  52. ------
  53. ValueError
  54. Part of the Excel column name was invalid.
  55. """
  56. index = 0
  57. for c in x.upper().strip():
  58. cp = ord(c)
  59. if cp < ord("A") or cp > ord("Z"):
  60. raise ValueError(f"Invalid column name: {x}")
  61. index = index * 26 + cp - ord("A") + 1
  62. return index - 1
  63. def _range2cols(areas):
  64. """
  65. Convert comma separated list of column names and ranges to indices.
  66. Parameters
  67. ----------
  68. areas : str
  69. A string containing a sequence of column ranges (or areas).
  70. Returns
  71. -------
  72. cols : list
  73. A list of 0-based column indices.
  74. Examples
  75. --------
  76. >>> _range2cols('A:E')
  77. [0, 1, 2, 3, 4]
  78. >>> _range2cols('A,C,Z:AB')
  79. [0, 2, 25, 26, 27]
  80. """
  81. cols = []
  82. for rng in areas.split(","):
  83. if ":" in rng:
  84. rng = rng.split(":")
  85. cols.extend(range(_excel2num(rng[0]), _excel2num(rng[1]) + 1))
  86. else:
  87. cols.append(_excel2num(rng))
  88. return cols
  89. def _maybe_convert_usecols(usecols):
  90. """
  91. Convert `usecols` into a compatible format for parsing in `parsers.py`.
  92. Parameters
  93. ----------
  94. usecols : object
  95. The use-columns object to potentially convert.
  96. Returns
  97. -------
  98. converted : object
  99. The compatible format of `usecols`.
  100. """
  101. if usecols is None:
  102. return usecols
  103. if is_integer(usecols):
  104. raise ValueError(
  105. "Passing an integer for `usecols` is no longer supported. "
  106. "Please pass in a list of int from 0 to `usecols` "
  107. "inclusive instead."
  108. )
  109. if isinstance(usecols, str):
  110. return _range2cols(usecols)
  111. return usecols
  112. def _validate_freeze_panes(freeze_panes):
  113. if freeze_panes is not None:
  114. if len(freeze_panes) == 2 and all(
  115. isinstance(item, int) for item in freeze_panes
  116. ):
  117. return True
  118. raise ValueError(
  119. "freeze_panes must be of form (row, column) "
  120. "where row and column are integers"
  121. )
  122. # freeze_panes wasn't specified, return False so it won't be applied
  123. # to output sheet
  124. return False
  125. def _trim_excel_header(row):
  126. # trim header row so auto-index inference works
  127. # xlrd uses '' , openpyxl None
  128. while len(row) > 0 and (row[0] == "" or row[0] is None):
  129. row = row[1:]
  130. return row
  131. def _fill_mi_header(row, control_row):
  132. """Forward fill blank entries in row but only inside the same parent index.
  133. Used for creating headers in Multiindex.
  134. Parameters
  135. ----------
  136. row : list
  137. List of items in a single row.
  138. control_row : list of bool
  139. Helps to determine if particular column is in same parent index as the
  140. previous value. Used to stop propagation of empty cells between
  141. different indexes.
  142. Returns
  143. -------
  144. Returns changed row and control_row
  145. """
  146. last = row[0]
  147. for i in range(1, len(row)):
  148. if not control_row[i]:
  149. last = row[i]
  150. if row[i] == "" or row[i] is None:
  151. row[i] = last
  152. else:
  153. control_row[i] = False
  154. last = row[i]
  155. return row, control_row
  156. def _pop_header_name(row, index_col):
  157. """
  158. Pop the header name for MultiIndex parsing.
  159. Parameters
  160. ----------
  161. row : list
  162. The data row to parse for the header name.
  163. index_col : int, list
  164. The index columns for our data. Assumed to be non-null.
  165. Returns
  166. -------
  167. header_name : str
  168. The extracted header name.
  169. trimmed_row : list
  170. The original data row with the header name removed.
  171. """
  172. # Pop out header name and fill w/blank.
  173. i = index_col if not is_list_like(index_col) else max(index_col)
  174. header_name = row[i]
  175. header_name = None if header_name == "" else header_name
  176. return header_name, row[:i] + [""] + row[i + 1 :]