s3.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. """ s3 support for remote file interactivity """
  2. from typing import IO, Any, Optional, Tuple
  3. from urllib.parse import urlparse as parse_url
  4. from pandas._typing import FilePathOrBuffer
  5. from pandas.compat._optional import import_optional_dependency
  6. s3fs = import_optional_dependency(
  7. "s3fs", extra="The s3fs package is required to handle s3 files."
  8. )
  9. def _strip_schema(url):
  10. """Returns the url without the s3:// part"""
  11. result = parse_url(url, allow_fragments=False)
  12. return result.netloc + result.path
  13. def get_file_and_filesystem(
  14. filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None
  15. ) -> Tuple[IO, Any]:
  16. from botocore.exceptions import NoCredentialsError
  17. if mode is None:
  18. mode = "rb"
  19. fs = s3fs.S3FileSystem(anon=False)
  20. try:
  21. file = fs.open(_strip_schema(filepath_or_buffer), mode)
  22. except (FileNotFoundError, NoCredentialsError):
  23. # boto3 has troubles when trying to access a public file
  24. # when credentialed...
  25. # An OSError is raised if you have credentials, but they
  26. # aren't valid for that bucket.
  27. # A NoCredentialsError is raised if you don't have creds
  28. # for that bucket.
  29. fs = s3fs.S3FileSystem(anon=True)
  30. file = fs.open(_strip_schema(filepath_or_buffer), mode)
  31. return file, fs
  32. def get_filepath_or_buffer(
  33. filepath_or_buffer: FilePathOrBuffer,
  34. encoding: Optional[str] = None,
  35. compression: Optional[str] = None,
  36. mode: Optional[str] = None,
  37. ) -> Tuple[IO, Optional[str], Optional[str], bool]:
  38. file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode)
  39. return file, None, compression, True