gbq.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """ Google BigQuery support """
  2. from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
  3. from pandas.compat._optional import import_optional_dependency
  4. if TYPE_CHECKING:
  5. from pandas import DataFrame
  6. def _try_import():
  7. # since pandas is a dependency of pandas-gbq
  8. # we need to import on first use
  9. msg = (
  10. "pandas-gbq is required to load data from Google BigQuery. "
  11. "See the docs: https://pandas-gbq.readthedocs.io."
  12. )
  13. pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
  14. return pandas_gbq
  15. def read_gbq(
  16. query: str,
  17. project_id: Optional[str] = None,
  18. index_col: Optional[str] = None,
  19. col_order: Optional[List[str]] = None,
  20. reauth: bool = False,
  21. auth_local_webserver: bool = False,
  22. dialect: Optional[str] = None,
  23. location: Optional[str] = None,
  24. configuration: Optional[Dict[str, Any]] = None,
  25. credentials=None,
  26. use_bqstorage_api: Optional[bool] = None,
  27. private_key=None,
  28. verbose=None,
  29. progress_bar_type: Optional[str] = None,
  30. ) -> "DataFrame":
  31. """
  32. Load data from Google BigQuery.
  33. This function requires the `pandas-gbq package
  34. <https://pandas-gbq.readthedocs.io>`__.
  35. See the `How to authenticate with Google BigQuery
  36. <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
  37. guide for authentication instructions.
  38. Parameters
  39. ----------
  40. query : str
  41. SQL-Like Query to return data values.
  42. project_id : str, optional
  43. Google BigQuery Account project ID. Optional when available from
  44. the environment.
  45. index_col : str, optional
  46. Name of result column to use for index in results DataFrame.
  47. col_order : list(str), optional
  48. List of BigQuery column names in the desired order for results
  49. DataFrame.
  50. reauth : bool, default False
  51. Force Google BigQuery to re-authenticate the user. This is useful
  52. if multiple accounts are used.
  53. auth_local_webserver : bool, default False
  54. Use the `local webserver flow`_ instead of the `console flow`_
  55. when getting user credentials.
  56. .. _local webserver flow:
  57. http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
  58. .. _console flow:
  59. http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
  60. *New in version 0.2.0 of pandas-gbq*.
  61. dialect : str, default 'legacy'
  62. Note: The default value is changing to 'standard' in a future version.
  63. SQL syntax dialect to use. Value can be one of:
  64. ``'legacy'``
  65. Use BigQuery's legacy SQL dialect. For more information see
  66. `BigQuery Legacy SQL Reference
  67. <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
  68. ``'standard'``
  69. Use BigQuery's standard SQL, which is
  70. compliant with the SQL 2011 standard. For more information
  71. see `BigQuery Standard SQL Reference
  72. <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
  73. .. versionchanged:: 0.24.0
  74. location : str, optional
  75. Location where the query job should run. See the `BigQuery locations
  76. documentation
  77. <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
  78. list of available locations. The location must match that of any
  79. datasets used in the query.
  80. *New in version 0.5.0 of pandas-gbq*.
  81. configuration : dict, optional
  82. Query config parameters for job processing.
  83. For example:
  84. configuration = {'query': {'useQueryCache': False}}
  85. For more information see `BigQuery REST API Reference
  86. <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
  87. credentials : google.auth.credentials.Credentials, optional
  88. Credentials for accessing Google APIs. Use this parameter to override
  89. default credentials, such as to use Compute Engine
  90. :class:`google.auth.compute_engine.Credentials` or Service Account
  91. :class:`google.oauth2.service_account.Credentials` directly.
  92. *New in version 0.8.0 of pandas-gbq*.
  93. .. versionadded:: 0.24.0
  94. use_bqstorage_api : bool, default False
  95. Use the `BigQuery Storage API
  96. <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
  97. download query results quickly, but at an increased cost. To use this
  98. API, first `enable it in the Cloud Console
  99. <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
  100. You must also have the `bigquery.readsessions.create
  101. <https://cloud.google.com/bigquery/docs/access-control#roles>`__
  102. permission on the project you are billing queries to.
  103. This feature requires version 0.10.0 or later of the ``pandas-gbq``
  104. package. It also requires the ``google-cloud-bigquery-storage`` and
  105. ``fastavro`` packages.
  106. .. versionadded:: 0.25.0
  107. progress_bar_type : Optional, str
  108. If set, use the `tqdm <https://tqdm.github.io/>`__ library to
  109. display a progress bar while the data downloads. Install the
  110. ``tqdm`` package to use this feature.
  111. Possible values of ``progress_bar_type`` include:
  112. ``None``
  113. No progress bar.
  114. ``'tqdm'``
  115. Use the :func:`tqdm.tqdm` function to print a progress bar
  116. to :data:`sys.stderr`.
  117. ``'tqdm_notebook'``
  118. Use the :func:`tqdm.tqdm_notebook` function to display a
  119. progress bar as a Jupyter notebook widget.
  120. ``'tqdm_gui'``
  121. Use the :func:`tqdm.tqdm_gui` function to display a
  122. progress bar as a graphical dialog box.
  123. Note that his feature requires version 0.12.0 or later of the
  124. ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly
  125. different than ``pandas-gbq``, here the default is ``None``.
  126. .. versionadded:: 1.0.0
  127. Returns
  128. -------
  129. df: DataFrame
  130. DataFrame representing results of query.
  131. See Also
  132. --------
  133. pandas_gbq.read_gbq : This function in the pandas-gbq library.
  134. DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
  135. """
  136. pandas_gbq = _try_import()
  137. kwargs: Dict[str, Union[str, bool]] = {}
  138. # START: new kwargs. Don't populate unless explicitly set.
  139. if use_bqstorage_api is not None:
  140. kwargs["use_bqstorage_api"] = use_bqstorage_api
  141. if progress_bar_type is not None:
  142. kwargs["progress_bar_type"] = progress_bar_type
  143. # END: new kwargs
  144. return pandas_gbq.read_gbq(
  145. query,
  146. project_id=project_id,
  147. index_col=index_col,
  148. col_order=col_order,
  149. reauth=reauth,
  150. auth_local_webserver=auth_local_webserver,
  151. dialect=dialect,
  152. location=location,
  153. configuration=configuration,
  154. credentials=credentials,
  155. **kwargs,
  156. )
  157. def to_gbq(
  158. dataframe: "DataFrame",
  159. destination_table: str,
  160. project_id: Optional[str] = None,
  161. chunksize: Optional[int] = None,
  162. reauth: bool = False,
  163. if_exists: str = "fail",
  164. auth_local_webserver: bool = False,
  165. table_schema: Optional[List[Dict[str, str]]] = None,
  166. location: Optional[str] = None,
  167. progress_bar: bool = True,
  168. credentials=None,
  169. verbose=None,
  170. private_key=None,
  171. ) -> None:
  172. pandas_gbq = _try_import()
  173. pandas_gbq.to_gbq(
  174. dataframe,
  175. destination_table,
  176. project_id=project_id,
  177. chunksize=chunksize,
  178. reauth=reauth,
  179. if_exists=if_exists,
  180. auth_local_webserver=auth_local_webserver,
  181. table_schema=table_schema,
  182. location=location,
  183. progress_bar=progress_bar,
  184. credentials=credentials,
  185. verbose=verbose,
  186. private_key=private_key,
  187. )