spss.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. from pathlib import Path
  2. from typing import Optional, Sequence, Union
  3. from pandas.compat._optional import import_optional_dependency
  4. from pandas.core.dtypes.inference import is_list_like
  5. from pandas.core.api import DataFrame
  6. def read_spss(
  7. path: Union[str, Path],
  8. usecols: Optional[Sequence[str]] = None,
  9. convert_categoricals: bool = True,
  10. ) -> DataFrame:
  11. """
  12. Load an SPSS file from the file path, returning a DataFrame.
  13. .. versionadded:: 0.25.0
  14. Parameters
  15. ----------
  16. path : string or Path
  17. File path.
  18. usecols : list-like, optional
  19. Return a subset of the columns. If None, return all columns.
  20. convert_categoricals : bool, default is True
  21. Convert categorical columns into pd.Categorical.
  22. Returns
  23. -------
  24. DataFrame
  25. """
  26. pyreadstat = import_optional_dependency("pyreadstat")
  27. if usecols is not None:
  28. if not is_list_like(usecols):
  29. raise TypeError("usecols must be list-like.")
  30. else:
  31. usecols = list(usecols) # pyreadstat requires a list
  32. df, _ = pyreadstat.read_sav(
  33. path, usecols=usecols, apply_value_formats=convert_categoricals
  34. )
  35. return df