pickle_compat.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. """
  2. Support pre-0.12 series pickle compatibility.
  3. """
  4. import copy
  5. import pickle as pkl
  6. from typing import TYPE_CHECKING, Optional
  7. import warnings
  8. from pandas import Index
  9. if TYPE_CHECKING:
  10. from pandas import Series, DataFrame
  11. def load_reduce(self):
  12. stack = self.stack
  13. args = stack.pop()
  14. func = stack[-1]
  15. if len(args) and type(args[0]) is type:
  16. n = args[0].__name__ # noqa
  17. try:
  18. stack[-1] = func(*args)
  19. return
  20. except TypeError as err:
  21. # If we have a deprecated function,
  22. # try to replace and try again.
  23. msg = "_reconstruct: First argument must be a sub-type of ndarray"
  24. if msg in str(err):
  25. try:
  26. cls = args[0]
  27. stack[-1] = object.__new__(cls)
  28. return
  29. except TypeError:
  30. pass
  31. raise
  32. _sparse_msg = """\
  33. Loading a saved '{cls}' as a {new} with sparse values.
  34. '{cls}' is now removed. You should re-save this dataset in its new format.
  35. """
  36. class _LoadSparseSeries:
  37. # To load a SparseSeries as a Series[Sparse]
  38. # https://github.com/python/mypy/issues/1020
  39. # error: Incompatible return type for "__new__" (returns "Series", but must return
  40. # a subtype of "_LoadSparseSeries")
  41. def __new__(cls) -> "Series": # type: ignore
  42. from pandas import Series
  43. warnings.warn(
  44. _sparse_msg.format(cls="SparseSeries", new="Series"),
  45. FutureWarning,
  46. stacklevel=6,
  47. )
  48. return Series(dtype=object)
  49. class _LoadSparseFrame:
  50. # To load a SparseDataFrame as a DataFrame[Sparse]
  51. # https://github.com/python/mypy/issues/1020
  52. # error: Incompatible return type for "__new__" (returns "DataFrame", but must
  53. # return a subtype of "_LoadSparseFrame")
  54. def __new__(cls) -> "DataFrame": # type: ignore
  55. from pandas import DataFrame
  56. warnings.warn(
  57. _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"),
  58. FutureWarning,
  59. stacklevel=6,
  60. )
  61. return DataFrame()
  62. # If classes are moved, provide compat here.
  63. _class_locations_map = {
  64. ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
  65. # 15477
  66. ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
  67. ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
  68. ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
  69. # 10890
  70. ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
  71. ("pandas.sparse.series", "SparseTimeSeries"): (
  72. "pandas.core.sparse.series",
  73. "SparseSeries",
  74. ),
  75. # 12588, extensions moving
  76. ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
  77. ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
  78. # 18543 moving period
  79. ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
  80. ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
  81. # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
  82. ("pandas.tslib", "__nat_unpickle"): (
  83. "pandas._libs.tslibs.nattype",
  84. "__nat_unpickle",
  85. ),
  86. ("pandas._libs.tslib", "__nat_unpickle"): (
  87. "pandas._libs.tslibs.nattype",
  88. "__nat_unpickle",
  89. ),
  90. # 15998 top-level dirs moving
  91. ("pandas.sparse.array", "SparseArray"): (
  92. "pandas.core.arrays.sparse",
  93. "SparseArray",
  94. ),
  95. ("pandas.sparse.series", "SparseSeries"): (
  96. "pandas.compat.pickle_compat",
  97. "_LoadSparseSeries",
  98. ),
  99. ("pandas.sparse.frame", "SparseDataFrame"): (
  100. "pandas.core.sparse.frame",
  101. "_LoadSparseFrame",
  102. ),
  103. ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
  104. ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
  105. ("pandas.indexes.numeric", "Int64Index"): (
  106. "pandas.core.indexes.numeric",
  107. "Int64Index",
  108. ),
  109. ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
  110. ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
  111. ("pandas.tseries.index", "_new_DatetimeIndex"): (
  112. "pandas.core.indexes.datetimes",
  113. "_new_DatetimeIndex",
  114. ),
  115. ("pandas.tseries.index", "DatetimeIndex"): (
  116. "pandas.core.indexes.datetimes",
  117. "DatetimeIndex",
  118. ),
  119. ("pandas.tseries.period", "PeriodIndex"): (
  120. "pandas.core.indexes.period",
  121. "PeriodIndex",
  122. ),
  123. # 19269, arrays moving
  124. ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
  125. # 19939, add timedeltaindex, float64index compat from 15998 move
  126. ("pandas.tseries.tdi", "TimedeltaIndex"): (
  127. "pandas.core.indexes.timedeltas",
  128. "TimedeltaIndex",
  129. ),
  130. ("pandas.indexes.numeric", "Float64Index"): (
  131. "pandas.core.indexes.numeric",
  132. "Float64Index",
  133. ),
  134. ("pandas.core.sparse.series", "SparseSeries"): (
  135. "pandas.compat.pickle_compat",
  136. "_LoadSparseSeries",
  137. ),
  138. ("pandas.core.sparse.frame", "SparseDataFrame"): (
  139. "pandas.compat.pickle_compat",
  140. "_LoadSparseFrame",
  141. ),
  142. }
  143. # our Unpickler sub-class to override methods and some dispatcher
  144. # functions for compat and uses a non-public class of the pickle module.
  145. # error: Name 'pkl._Unpickler' is not defined
  146. class Unpickler(pkl._Unpickler): # type: ignore
  147. def find_class(self, module, name):
  148. # override superclass
  149. key = (module, name)
  150. module, name = _class_locations_map.get(key, key)
  151. return super().find_class(module, name)
  152. Unpickler.dispatch = copy.copy(Unpickler.dispatch)
  153. Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
  154. def load_newobj(self):
  155. args = self.stack.pop()
  156. cls = self.stack[-1]
  157. # compat
  158. if issubclass(cls, Index):
  159. obj = object.__new__(cls)
  160. else:
  161. obj = cls.__new__(cls, *args)
  162. self.stack[-1] = obj
  163. Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
  164. def load_newobj_ex(self):
  165. kwargs = self.stack.pop()
  166. args = self.stack.pop()
  167. cls = self.stack.pop()
  168. # compat
  169. if issubclass(cls, Index):
  170. obj = object.__new__(cls)
  171. else:
  172. obj = cls.__new__(cls, *args, **kwargs)
  173. self.append(obj)
  174. try:
  175. Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
  176. except (AttributeError, KeyError):
  177. pass
  178. def load(fh, encoding: Optional[str] = None, is_verbose: bool = False):
  179. """
  180. Load a pickle, with a provided encoding,
  181. Parameters
  182. ----------
  183. fh : a filelike object
  184. encoding : an optional encoding
  185. is_verbose : show exception output
  186. """
  187. try:
  188. fh.seek(0)
  189. if encoding is not None:
  190. up = Unpickler(fh, encoding=encoding)
  191. else:
  192. up = Unpickler(fh)
  193. up.is_verbose = is_verbose
  194. return up.load()
  195. except (ValueError, TypeError):
  196. raise