pickle.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. """ pickle compat """
  2. import pickle
  3. from typing import Any, Optional
  4. import warnings
  5. from pandas._typing import FilePathOrBuffer
  6. from pandas.compat import pickle_compat as pc
  7. from pandas.io.common import get_filepath_or_buffer, get_handle
  8. def to_pickle(
  9. obj: Any,
  10. filepath_or_buffer: FilePathOrBuffer,
  11. compression: Optional[str] = "infer",
  12. protocol: int = pickle.HIGHEST_PROTOCOL,
  13. ):
  14. """
  15. Pickle (serialize) object to file.
  16. Parameters
  17. ----------
  18. obj : any object
  19. Any python object.
  20. filepath_or_buffer : str, path object or file-like object
  21. File path, URL, or buffer where the pickled object will be stored.
  22. .. versionchanged:: 1.0.0
  23. Accept URL. URL has to be of S3 or GCS.
  24. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
  25. If 'infer' and 'path_or_url' is path-like, then detect compression from
  26. the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
  27. compression) If 'infer' and 'path_or_url' is not path-like, then use
  28. None (= no decompression).
  29. protocol : int
  30. Int which indicates which protocol should be used by the pickler,
  31. default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
  32. values for this parameter depend on the version of Python. For Python
  33. 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
  34. For Python >= 3.4, 4 is a valid value. A negative value for the
  35. protocol parameter is equivalent to setting its value to
  36. HIGHEST_PROTOCOL.
  37. .. [1] https://docs.python.org/3/library/pickle.html
  38. .. versionadded:: 0.21.0
  39. See Also
  40. --------
  41. read_pickle : Load pickled pandas object (or any object) from file.
  42. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  43. DataFrame.to_sql : Write DataFrame to a SQL database.
  44. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  45. Examples
  46. --------
  47. >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
  48. >>> original_df
  49. foo bar
  50. 0 0 5
  51. 1 1 6
  52. 2 2 7
  53. 3 3 8
  54. 4 4 9
  55. >>> pd.to_pickle(original_df, "./dummy.pkl")
  56. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  57. >>> unpickled_df
  58. foo bar
  59. 0 0 5
  60. 1 1 6
  61. 2 2 7
  62. 3 3 8
  63. 4 4 9
  64. >>> import os
  65. >>> os.remove("./dummy.pkl")
  66. """
  67. fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
  68. filepath_or_buffer, compression=compression, mode="wb"
  69. )
  70. if not isinstance(fp_or_buf, str) and compression == "infer":
  71. compression = None
  72. f, fh = get_handle(fp_or_buf, "wb", compression=compression, is_text=False)
  73. if protocol < 0:
  74. protocol = pickle.HIGHEST_PROTOCOL
  75. try:
  76. f.write(pickle.dumps(obj, protocol=protocol))
  77. finally:
  78. f.close()
  79. for _f in fh:
  80. _f.close()
  81. if should_close:
  82. try:
  83. fp_or_buf.close()
  84. except ValueError:
  85. pass
  86. def read_pickle(
  87. filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] = "infer"
  88. ):
  89. """
  90. Load pickled pandas object (or any object) from file.
  91. .. warning::
  92. Loading pickled data received from untrusted sources can be
  93. unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
  94. Parameters
  95. ----------
  96. filepath_or_buffer : str, path object or file-like object
  97. File path, URL, or buffer where the pickled object will be loaded from.
  98. .. versionchanged:: 1.0.0
  99. Accept URL. URL is not limited to S3 and GCS.
  100. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
  101. If 'infer' and 'path_or_url' is path-like, then detect compression from
  102. the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
  103. compression) If 'infer' and 'path_or_url' is not path-like, then use
  104. None (= no decompression).
  105. Returns
  106. -------
  107. unpickled : same type as object stored in file
  108. See Also
  109. --------
  110. DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
  111. Series.to_pickle : Pickle (serialize) Series object to file.
  112. read_hdf : Read HDF5 file into a DataFrame.
  113. read_sql : Read SQL query or database table into a DataFrame.
  114. read_parquet : Load a parquet object, returning a DataFrame.
  115. Notes
  116. -----
  117. read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3.
  118. Examples
  119. --------
  120. >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
  121. >>> original_df
  122. foo bar
  123. 0 0 5
  124. 1 1 6
  125. 2 2 7
  126. 3 3 8
  127. 4 4 9
  128. >>> pd.to_pickle(original_df, "./dummy.pkl")
  129. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  130. >>> unpickled_df
  131. foo bar
  132. 0 0 5
  133. 1 1 6
  134. 2 2 7
  135. 3 3 8
  136. 4 4 9
  137. >>> import os
  138. >>> os.remove("./dummy.pkl")
  139. """
  140. fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
  141. filepath_or_buffer, compression=compression
  142. )
  143. if not isinstance(fp_or_buf, str) and compression == "infer":
  144. compression = None
  145. f, fh = get_handle(fp_or_buf, "rb", compression=compression, is_text=False)
  146. # 1) try standard library Pickle
  147. # 2) try pickle_compat (older pandas version) to handle subclass changes
  148. # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError
  149. try:
  150. excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError)
  151. try:
  152. with warnings.catch_warnings(record=True):
  153. # We want to silence any warnings about, e.g. moved modules.
  154. warnings.simplefilter("ignore", Warning)
  155. return pickle.load(f)
  156. except excs_to_catch:
  157. # e.g.
  158. # "No module named 'pandas.core.sparse.series'"
  159. # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
  160. return pc.load(f, encoding=None)
  161. except UnicodeDecodeError:
  162. # e.g. can occur for files written in py27; see GH#28645 and GH#31988
  163. return pc.load(f, encoding="latin-1")
  164. finally:
  165. f.close()
  166. for _f in fh:
  167. _f.close()
  168. if should_close:
  169. try:
  170. fp_or_buf.close()
  171. except ValueError:
  172. pass