_pyxlsb.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. from typing import List
  2. from pandas._typing import FilePathOrBuffer, Scalar
  3. from pandas.compat._optional import import_optional_dependency
  4. from pandas.io.excel._base import _BaseExcelReader
  5. class _PyxlsbReader(_BaseExcelReader):
  6. def __init__(self, filepath_or_buffer: FilePathOrBuffer):
  7. """Reader using pyxlsb engine.
  8. Parameters
  9. __________
  10. filepath_or_buffer: string, path object, or Workbook
  11. Object to be parsed.
  12. """
  13. import_optional_dependency("pyxlsb")
  14. # This will call load_workbook on the filepath or buffer
  15. # And set the result to the book-attribute
  16. super().__init__(filepath_or_buffer)
  17. @property
  18. def _workbook_class(self):
  19. from pyxlsb import Workbook
  20. return Workbook
  21. def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
  22. from pyxlsb import open_workbook
  23. # Todo: hack in buffer capability
  24. # This might need some modifications to the Pyxlsb library
  25. # Actual work for opening it is in xlsbpackage.py, line 20-ish
  26. return open_workbook(filepath_or_buffer)
  27. @property
  28. def sheet_names(self) -> List[str]:
  29. return self.book.sheets
  30. def get_sheet_by_name(self, name: str):
  31. return self.book.get_sheet(name)
  32. def get_sheet_by_index(self, index: int):
  33. # pyxlsb sheets are indexed from 1 onwards
  34. # There's a fix for this in the source, but the pypi package doesn't have it
  35. return self.book.get_sheet(index + 1)
  36. def _convert_cell(self, cell, convert_float: bool) -> Scalar:
  37. # Todo: there is no way to distinguish between floats and datetimes in pyxlsb
  38. # This means that there is no way to read datetime types from an xlsb file yet
  39. if cell.v is None:
  40. return "" # Prevents non-named columns from not showing up as Unnamed: i
  41. if isinstance(cell.v, float) and convert_float:
  42. val = int(cell.v)
  43. if val == cell.v:
  44. return val
  45. else:
  46. return float(cell.v)
  47. return cell.v
  48. def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
  49. return [
  50. [self._convert_cell(c, convert_float) for c in r]
  51. for r in sheet.rows(sparse=False)
  52. ]