sasreader.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. """
  2. Read SAS sas7bdat or xport files.
  3. """
  4. from pandas.io.common import stringify_path
  5. def read_sas(
  6. filepath_or_buffer,
  7. format=None,
  8. index=None,
  9. encoding=None,
  10. chunksize=None,
  11. iterator=False,
  12. ):
  13. """
  14. Read SAS files stored as either XPORT or SAS7BDAT format files.
  15. Parameters
  16. ----------
  17. filepath_or_buffer : str, path object or file-like object
  18. Any valid string path is acceptable. The string could be a URL. Valid
  19. URL schemes include http, ftp, s3, and file. For file URLs, a host is
  20. expected. A local file could be:
  21. ``file://localhost/path/to/table.sas``.
  22. If you want to pass in a path object, pandas accepts any
  23. ``os.PathLike``.
  24. By file-like object, we refer to objects with a ``read()`` method,
  25. such as a file handler (e.g. via builtin ``open`` function)
  26. or ``StringIO``.
  27. format : str {'xport', 'sas7bdat'} or None
  28. If None, file format is inferred from file extension. If 'xport' or
  29. 'sas7bdat', uses the corresponding format.
  30. index : identifier of index column, defaults to None
  31. Identifier of column that should be used as index of the DataFrame.
  32. encoding : str, default is None
  33. Encoding for text data. If None, text data are stored as raw bytes.
  34. chunksize : int
  35. Read file `chunksize` lines at a time, returns iterator.
  36. iterator : bool, defaults to False
  37. If True, returns an iterator for reading the file incrementally.
  38. Returns
  39. -------
  40. DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
  41. or XportReader
  42. """
  43. if format is None:
  44. buffer_error_msg = (
  45. "If this is a buffer object rather "
  46. "than a string name, you must specify "
  47. "a format string"
  48. )
  49. filepath_or_buffer = stringify_path(filepath_or_buffer)
  50. if not isinstance(filepath_or_buffer, str):
  51. raise ValueError(buffer_error_msg)
  52. fname = filepath_or_buffer.lower()
  53. if fname.endswith(".xpt"):
  54. format = "xport"
  55. elif fname.endswith(".sas7bdat"):
  56. format = "sas7bdat"
  57. else:
  58. raise ValueError("unable to infer format of SAS file")
  59. if format.lower() == "xport":
  60. from pandas.io.sas.sas_xport import XportReader
  61. reader = XportReader(
  62. filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
  63. )
  64. elif format.lower() == "sas7bdat":
  65. from pandas.io.sas.sas7bdat import SAS7BDATReader
  66. reader = SAS7BDATReader(
  67. filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
  68. )
  69. else:
  70. raise ValueError("unknown SAS format")
  71. if iterator or chunksize:
  72. return reader
  73. data = reader.read()
  74. reader.close()
  75. return data