_xlrd.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. from datetime import time
  2. import numpy as np
  3. from pandas.compat._optional import import_optional_dependency
  4. from pandas.io.excel._base import _BaseExcelReader
  5. class _XlrdReader(_BaseExcelReader):
  6. def __init__(self, filepath_or_buffer):
  7. """Reader using xlrd engine.
  8. Parameters
  9. ----------
  10. filepath_or_buffer : string, path object or Workbook
  11. Object to be parsed.
  12. """
  13. err_msg = "Install xlrd >= 1.0.0 for Excel support"
  14. import_optional_dependency("xlrd", extra=err_msg)
  15. super().__init__(filepath_or_buffer)
  16. @property
  17. def _workbook_class(self):
  18. from xlrd import Book
  19. return Book
  20. def load_workbook(self, filepath_or_buffer):
  21. from xlrd import open_workbook
  22. if hasattr(filepath_or_buffer, "read"):
  23. data = filepath_or_buffer.read()
  24. return open_workbook(file_contents=data)
  25. else:
  26. return open_workbook(filepath_or_buffer)
  27. @property
  28. def sheet_names(self):
  29. return self.book.sheet_names()
  30. def get_sheet_by_name(self, name):
  31. return self.book.sheet_by_name(name)
  32. def get_sheet_by_index(self, index):
  33. return self.book.sheet_by_index(index)
  34. def get_sheet_data(self, sheet, convert_float):
  35. from xlrd import (
  36. xldate,
  37. XL_CELL_DATE,
  38. XL_CELL_ERROR,
  39. XL_CELL_BOOLEAN,
  40. XL_CELL_NUMBER,
  41. )
  42. epoch1904 = self.book.datemode
  43. def _parse_cell(cell_contents, cell_typ):
  44. """converts the contents of the cell into a pandas
  45. appropriate object"""
  46. if cell_typ == XL_CELL_DATE:
  47. # Use the newer xlrd datetime handling.
  48. try:
  49. cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904)
  50. except OverflowError:
  51. return cell_contents
  52. # Excel doesn't distinguish between dates and time,
  53. # so we treat dates on the epoch as times only.
  54. # Also, Excel supports 1900 and 1904 epochs.
  55. year = (cell_contents.timetuple())[0:3]
  56. if (not epoch1904 and year == (1899, 12, 31)) or (
  57. epoch1904 and year == (1904, 1, 1)
  58. ):
  59. cell_contents = time(
  60. cell_contents.hour,
  61. cell_contents.minute,
  62. cell_contents.second,
  63. cell_contents.microsecond,
  64. )
  65. elif cell_typ == XL_CELL_ERROR:
  66. cell_contents = np.nan
  67. elif cell_typ == XL_CELL_BOOLEAN:
  68. cell_contents = bool(cell_contents)
  69. elif convert_float and cell_typ == XL_CELL_NUMBER:
  70. # GH5394 - Excel 'numbers' are always floats
  71. # it's a minimal perf hit and less surprising
  72. val = int(cell_contents)
  73. if val == cell_contents:
  74. cell_contents = val
  75. return cell_contents
  76. data = []
  77. for i in range(sheet.nrows):
  78. row = [
  79. _parse_cell(value, typ)
  80. for value, typ in zip(sheet.row_values(i), sheet.row_types(i))
  81. ]
  82. data.append(row)
  83. return data