_openpyxl.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. from typing import List
  2. import numpy as np
  3. from pandas._typing import FilePathOrBuffer, Scalar
  4. from pandas.compat._optional import import_optional_dependency
  5. from pandas.io.excel._base import ExcelWriter, _BaseExcelReader
  6. from pandas.io.excel._util import _validate_freeze_panes
  7. class _OpenpyxlWriter(ExcelWriter):
  8. engine = "openpyxl"
  9. supported_extensions = (".xlsx", ".xlsm")
  10. def __init__(self, path, engine=None, mode="w", **engine_kwargs):
  11. # Use the openpyxl module as the Excel writer.
  12. from openpyxl.workbook import Workbook
  13. super().__init__(path, mode=mode, **engine_kwargs)
  14. if self.mode == "a": # Load from existing workbook
  15. from openpyxl import load_workbook
  16. book = load_workbook(self.path)
  17. self.book = book
  18. else:
  19. # Create workbook object with default optimized_write=True.
  20. self.book = Workbook()
  21. if self.book.worksheets:
  22. try:
  23. self.book.remove(self.book.worksheets[0])
  24. except AttributeError:
  25. # compat - for openpyxl <= 2.4
  26. self.book.remove_sheet(self.book.worksheets[0])
  27. def save(self):
  28. """
  29. Save workbook to disk.
  30. """
  31. return self.book.save(self.path)
  32. @classmethod
  33. def _convert_to_style(cls, style_dict):
  34. """
  35. Converts a style_dict to an openpyxl style object.
  36. Parameters
  37. ----------
  38. style_dict : style dictionary to convert
  39. """
  40. from openpyxl.style import Style
  41. xls_style = Style()
  42. for key, value in style_dict.items():
  43. for nk, nv in value.items():
  44. if key == "borders":
  45. (
  46. xls_style.borders.__getattribute__(nk).__setattr__(
  47. "border_style", nv
  48. )
  49. )
  50. else:
  51. xls_style.__getattribute__(key).__setattr__(nk, nv)
  52. return xls_style
  53. @classmethod
  54. def _convert_to_style_kwargs(cls, style_dict):
  55. """
  56. Convert a style_dict to a set of kwargs suitable for initializing
  57. or updating-on-copy an openpyxl v2 style object.
  58. Parameters
  59. ----------
  60. style_dict : dict
  61. A dict with zero or more of the following keys (or their synonyms).
  62. 'font'
  63. 'fill'
  64. 'border' ('borders')
  65. 'alignment'
  66. 'number_format'
  67. 'protection'
  68. Returns
  69. -------
  70. style_kwargs : dict
  71. A dict with the same, normalized keys as ``style_dict`` but each
  72. value has been replaced with a native openpyxl style object of the
  73. appropriate class.
  74. """
  75. _style_key_map = {"borders": "border"}
  76. style_kwargs = {}
  77. for k, v in style_dict.items():
  78. if k in _style_key_map:
  79. k = _style_key_map[k]
  80. _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
  81. new_v = _conv_to_x(v)
  82. if new_v:
  83. style_kwargs[k] = new_v
  84. return style_kwargs
  85. @classmethod
  86. def _convert_to_color(cls, color_spec):
  87. """
  88. Convert ``color_spec`` to an openpyxl v2 Color object.
  89. Parameters
  90. ----------
  91. color_spec : str, dict
  92. A 32-bit ARGB hex string, or a dict with zero or more of the
  93. following keys.
  94. 'rgb'
  95. 'indexed'
  96. 'auto'
  97. 'theme'
  98. 'tint'
  99. 'index'
  100. 'type'
  101. Returns
  102. -------
  103. color : openpyxl.styles.Color
  104. """
  105. from openpyxl.styles import Color
  106. if isinstance(color_spec, str):
  107. return Color(color_spec)
  108. else:
  109. return Color(**color_spec)
  110. @classmethod
  111. def _convert_to_font(cls, font_dict):
  112. """
  113. Convert ``font_dict`` to an openpyxl v2 Font object.
  114. Parameters
  115. ----------
  116. font_dict : dict
  117. A dict with zero or more of the following keys (or their synonyms).
  118. 'name'
  119. 'size' ('sz')
  120. 'bold' ('b')
  121. 'italic' ('i')
  122. 'underline' ('u')
  123. 'strikethrough' ('strike')
  124. 'color'
  125. 'vertAlign' ('vertalign')
  126. 'charset'
  127. 'scheme'
  128. 'family'
  129. 'outline'
  130. 'shadow'
  131. 'condense'
  132. Returns
  133. -------
  134. font : openpyxl.styles.Font
  135. """
  136. from openpyxl.styles import Font
  137. _font_key_map = {
  138. "sz": "size",
  139. "b": "bold",
  140. "i": "italic",
  141. "u": "underline",
  142. "strike": "strikethrough",
  143. "vertalign": "vertAlign",
  144. }
  145. font_kwargs = {}
  146. for k, v in font_dict.items():
  147. if k in _font_key_map:
  148. k = _font_key_map[k]
  149. if k == "color":
  150. v = cls._convert_to_color(v)
  151. font_kwargs[k] = v
  152. return Font(**font_kwargs)
  153. @classmethod
  154. def _convert_to_stop(cls, stop_seq):
  155. """
  156. Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
  157. suitable for initializing the ``GradientFill`` ``stop`` parameter.
  158. Parameters
  159. ----------
  160. stop_seq : iterable
  161. An iterable that yields objects suitable for consumption by
  162. ``_convert_to_color``.
  163. Returns
  164. -------
  165. stop : list of openpyxl.styles.Color
  166. """
  167. return map(cls._convert_to_color, stop_seq)
  168. @classmethod
  169. def _convert_to_fill(cls, fill_dict):
  170. """
  171. Convert ``fill_dict`` to an openpyxl v2 Fill object.
  172. Parameters
  173. ----------
  174. fill_dict : dict
  175. A dict with one or more of the following keys (or their synonyms),
  176. 'fill_type' ('patternType', 'patterntype')
  177. 'start_color' ('fgColor', 'fgcolor')
  178. 'end_color' ('bgColor', 'bgcolor')
  179. or one or more of the following keys (or their synonyms).
  180. 'type' ('fill_type')
  181. 'degree'
  182. 'left'
  183. 'right'
  184. 'top'
  185. 'bottom'
  186. 'stop'
  187. Returns
  188. -------
  189. fill : openpyxl.styles.Fill
  190. """
  191. from openpyxl.styles import PatternFill, GradientFill
  192. _pattern_fill_key_map = {
  193. "patternType": "fill_type",
  194. "patterntype": "fill_type",
  195. "fgColor": "start_color",
  196. "fgcolor": "start_color",
  197. "bgColor": "end_color",
  198. "bgcolor": "end_color",
  199. }
  200. _gradient_fill_key_map = {"fill_type": "type"}
  201. pfill_kwargs = {}
  202. gfill_kwargs = {}
  203. for k, v in fill_dict.items():
  204. pk = gk = None
  205. if k in _pattern_fill_key_map:
  206. pk = _pattern_fill_key_map[k]
  207. if k in _gradient_fill_key_map:
  208. gk = _gradient_fill_key_map[k]
  209. if pk in ["start_color", "end_color"]:
  210. v = cls._convert_to_color(v)
  211. if gk == "stop":
  212. v = cls._convert_to_stop(v)
  213. if pk:
  214. pfill_kwargs[pk] = v
  215. elif gk:
  216. gfill_kwargs[gk] = v
  217. else:
  218. pfill_kwargs[k] = v
  219. gfill_kwargs[k] = v
  220. try:
  221. return PatternFill(**pfill_kwargs)
  222. except TypeError:
  223. return GradientFill(**gfill_kwargs)
  224. @classmethod
  225. def _convert_to_side(cls, side_spec):
  226. """
  227. Convert ``side_spec`` to an openpyxl v2 Side object.
  228. Parameters
  229. ----------
  230. side_spec : str, dict
  231. A string specifying the border style, or a dict with zero or more
  232. of the following keys (or their synonyms).
  233. 'style' ('border_style')
  234. 'color'
  235. Returns
  236. -------
  237. side : openpyxl.styles.Side
  238. """
  239. from openpyxl.styles import Side
  240. _side_key_map = {"border_style": "style"}
  241. if isinstance(side_spec, str):
  242. return Side(style=side_spec)
  243. side_kwargs = {}
  244. for k, v in side_spec.items():
  245. if k in _side_key_map:
  246. k = _side_key_map[k]
  247. if k == "color":
  248. v = cls._convert_to_color(v)
  249. side_kwargs[k] = v
  250. return Side(**side_kwargs)
  251. @classmethod
  252. def _convert_to_border(cls, border_dict):
  253. """
  254. Convert ``border_dict`` to an openpyxl v2 Border object.
  255. Parameters
  256. ----------
  257. border_dict : dict
  258. A dict with zero or more of the following keys (or their synonyms).
  259. 'left'
  260. 'right'
  261. 'top'
  262. 'bottom'
  263. 'diagonal'
  264. 'diagonal_direction'
  265. 'vertical'
  266. 'horizontal'
  267. 'diagonalUp' ('diagonalup')
  268. 'diagonalDown' ('diagonaldown')
  269. 'outline'
  270. Returns
  271. -------
  272. border : openpyxl.styles.Border
  273. """
  274. from openpyxl.styles import Border
  275. _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
  276. border_kwargs = {}
  277. for k, v in border_dict.items():
  278. if k in _border_key_map:
  279. k = _border_key_map[k]
  280. if k == "color":
  281. v = cls._convert_to_color(v)
  282. if k in ["left", "right", "top", "bottom", "diagonal"]:
  283. v = cls._convert_to_side(v)
  284. border_kwargs[k] = v
  285. return Border(**border_kwargs)
  286. @classmethod
  287. def _convert_to_alignment(cls, alignment_dict):
  288. """
  289. Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
  290. Parameters
  291. ----------
  292. alignment_dict : dict
  293. A dict with zero or more of the following keys (or their synonyms).
  294. 'horizontal'
  295. 'vertical'
  296. 'text_rotation'
  297. 'wrap_text'
  298. 'shrink_to_fit'
  299. 'indent'
  300. Returns
  301. -------
  302. alignment : openpyxl.styles.Alignment
  303. """
  304. from openpyxl.styles import Alignment
  305. return Alignment(**alignment_dict)
  306. @classmethod
  307. def _convert_to_number_format(cls, number_format_dict):
  308. """
  309. Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
  310. initializer.
  311. Parameters
  312. ----------
  313. number_format_dict : dict
  314. A dict with zero or more of the following keys.
  315. 'format_code' : str
  316. Returns
  317. -------
  318. number_format : str
  319. """
  320. return number_format_dict["format_code"]
  321. @classmethod
  322. def _convert_to_protection(cls, protection_dict):
  323. """
  324. Convert ``protection_dict`` to an openpyxl v2 Protection object.
  325. Parameters
  326. ----------
  327. protection_dict : dict
  328. A dict with zero or more of the following keys.
  329. 'locked'
  330. 'hidden'
  331. Returns
  332. -------
  333. """
  334. from openpyxl.styles import Protection
  335. return Protection(**protection_dict)
  336. def write_cells(
  337. self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None
  338. ):
  339. # Write the frame cells using openpyxl.
  340. sheet_name = self._get_sheet_name(sheet_name)
  341. _style_cache = {}
  342. if sheet_name in self.sheets:
  343. wks = self.sheets[sheet_name]
  344. else:
  345. wks = self.book.create_sheet()
  346. wks.title = sheet_name
  347. self.sheets[sheet_name] = wks
  348. if _validate_freeze_panes(freeze_panes):
  349. wks.freeze_panes = wks.cell(
  350. row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
  351. )
  352. for cell in cells:
  353. xcell = wks.cell(
  354. row=startrow + cell.row + 1, column=startcol + cell.col + 1
  355. )
  356. xcell.value, fmt = self._value_with_fmt(cell.val)
  357. if fmt:
  358. xcell.number_format = fmt
  359. style_kwargs = {}
  360. if cell.style:
  361. key = str(cell.style)
  362. style_kwargs = _style_cache.get(key)
  363. if style_kwargs is None:
  364. style_kwargs = self._convert_to_style_kwargs(cell.style)
  365. _style_cache[key] = style_kwargs
  366. if style_kwargs:
  367. for k, v in style_kwargs.items():
  368. setattr(xcell, k, v)
  369. if cell.mergestart is not None and cell.mergeend is not None:
  370. wks.merge_cells(
  371. start_row=startrow + cell.row + 1,
  372. start_column=startcol + cell.col + 1,
  373. end_column=startcol + cell.mergeend + 1,
  374. end_row=startrow + cell.mergestart + 1,
  375. )
  376. # When cells are merged only the top-left cell is preserved
  377. # The behaviour of the other cells in a merged range is
  378. # undefined
  379. if style_kwargs:
  380. first_row = startrow + cell.row + 1
  381. last_row = startrow + cell.mergestart + 1
  382. first_col = startcol + cell.col + 1
  383. last_col = startcol + cell.mergeend + 1
  384. for row in range(first_row, last_row + 1):
  385. for col in range(first_col, last_col + 1):
  386. if row == first_row and col == first_col:
  387. # Ignore first cell. It is already handled.
  388. continue
  389. xcell = wks.cell(column=col, row=row)
  390. for k, v in style_kwargs.items():
  391. setattr(xcell, k, v)
  392. class _OpenpyxlReader(_BaseExcelReader):
  393. def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None:
  394. """Reader using openpyxl engine.
  395. Parameters
  396. ----------
  397. filepath_or_buffer : string, path object or Workbook
  398. Object to be parsed.
  399. """
  400. import_optional_dependency("openpyxl")
  401. super().__init__(filepath_or_buffer)
  402. @property
  403. def _workbook_class(self):
  404. from openpyxl import Workbook
  405. return Workbook
  406. def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
  407. from openpyxl import load_workbook
  408. return load_workbook(
  409. filepath_or_buffer, read_only=True, data_only=True, keep_links=False
  410. )
  411. def close(self):
  412. # https://stackoverflow.com/questions/31416842/
  413. # openpyxl-does-not-close-excel-workbook-in-read-only-mode
  414. self.book.close()
  415. @property
  416. def sheet_names(self) -> List[str]:
  417. return self.book.sheetnames
  418. def get_sheet_by_name(self, name: str):
  419. return self.book[name]
  420. def get_sheet_by_index(self, index: int):
  421. return self.book.worksheets[index]
  422. def _convert_cell(self, cell, convert_float: bool) -> Scalar:
  423. # TODO: replace with openpyxl constants
  424. if cell.is_date:
  425. return cell.value
  426. elif cell.data_type == "e":
  427. return np.nan
  428. elif cell.data_type == "b":
  429. return bool(cell.value)
  430. elif cell.value is None:
  431. return "" # compat with xlrd
  432. elif cell.data_type == "n":
  433. # GH5394
  434. if convert_float:
  435. val = int(cell.value)
  436. if val == cell.value:
  437. return val
  438. else:
  439. return float(cell.value)
  440. return cell.value
  441. def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
  442. data: List[List[Scalar]] = []
  443. for row in sheet.rows:
  444. data.append([self._convert_cell(cell, convert_float) for cell in row])
  445. return data