feather_format.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. """ feather-format compat """
  2. from pandas.compat._optional import import_optional_dependency
  3. from pandas import DataFrame, Int64Index, RangeIndex
  4. from pandas.io.common import stringify_path
  5. def to_feather(df: DataFrame, path):
  6. """
  7. Write a DataFrame to the feather-format
  8. Parameters
  9. ----------
  10. df : DataFrame
  11. path : string file path, or file-like object
  12. """
  13. import_optional_dependency("pyarrow")
  14. from pyarrow import feather
  15. path = stringify_path(path)
  16. if not isinstance(df, DataFrame):
  17. raise ValueError("feather only support IO with DataFrames")
  18. valid_types = {"string", "unicode"}
  19. # validate index
  20. # --------------
  21. # validate that we have only a default index
  22. # raise on anything else as we don't serialize the index
  23. if not isinstance(df.index, Int64Index):
  24. typ = type(df.index)
  25. raise ValueError(
  26. f"feather does not support serializing {typ} "
  27. "for the index; you can .reset_index() "
  28. "to make the index into column(s)"
  29. )
  30. if not df.index.equals(RangeIndex.from_range(range(len(df)))):
  31. raise ValueError(
  32. "feather does not support serializing a "
  33. "non-default index for the index; you "
  34. "can .reset_index() to make the index "
  35. "into column(s)"
  36. )
  37. if df.index.name is not None:
  38. raise ValueError(
  39. "feather does not serialize index meta-data on a default index"
  40. )
  41. # validate columns
  42. # ----------------
  43. # must have value column names (strings only)
  44. if df.columns.inferred_type not in valid_types:
  45. raise ValueError("feather must have string column names")
  46. feather.write_feather(df, path)
  47. def read_feather(path, columns=None, use_threads: bool = True):
  48. """
  49. Load a feather-format object from the file path.
  50. Parameters
  51. ----------
  52. path : str, path object or file-like object
  53. Any valid string path is acceptable. The string could be a URL. Valid
  54. URL schemes include http, ftp, s3, and file. For file URLs, a host is
  55. expected. A local file could be:
  56. ``file://localhost/path/to/table.feather``.
  57. If you want to pass in a path object, pandas accepts any
  58. ``os.PathLike``.
  59. By file-like object, we refer to objects with a ``read()`` method,
  60. such as a file handler (e.g. via builtin ``open`` function)
  61. or ``StringIO``.
  62. columns : sequence, default None
  63. If not provided, all columns are read.
  64. .. versionadded:: 0.24.0
  65. use_threads : bool, default True
  66. Whether to parallelize reading using multiple threads.
  67. .. versionadded:: 0.24.0
  68. Returns
  69. -------
  70. type of object stored in file
  71. """
  72. import_optional_dependency("pyarrow")
  73. from pyarrow import feather
  74. path = stringify_path(path)
  75. return feather.read_feather(path, columns=columns, use_threads=bool(use_threads))