__init__.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. # flake8: noqa
  2. __docformat__ = "restructuredtext"
  3. # Let users know if they're missing any of our hard dependencies
  4. hard_dependencies = ("numpy", "pytz", "dateutil")
  5. missing_dependencies = []
  6. for dependency in hard_dependencies:
  7. try:
  8. __import__(dependency)
  9. except ImportError as e:
  10. missing_dependencies.append(f"{dependency}: {e}")
  11. if missing_dependencies:
  12. raise ImportError(
  13. "Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
  14. )
  15. del hard_dependencies, dependency, missing_dependencies
  16. # numpy compat
  17. from pandas.compat.numpy import (
  18. _np_version_under1p14,
  19. _np_version_under1p15,
  20. _np_version_under1p16,
  21. _np_version_under1p17,
  22. _np_version_under1p18,
  23. _is_numpy_dev,
  24. )
  25. try:
  26. from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
  27. except ImportError as e: # pragma: no cover
  28. # hack but overkill to use re
  29. module = str(e).replace("cannot import name ", "")
  30. raise ImportError(
  31. f"C extension: {module} not built. If you want to import "
  32. "pandas from the source directory, you may need to run "
  33. "'python setup.py build_ext --inplace --force' to build "
  34. "the C extensions first."
  35. )
  36. from pandas._config import (
  37. get_option,
  38. set_option,
  39. reset_option,
  40. describe_option,
  41. option_context,
  42. options,
  43. )
  44. # let init-time option registration happen
  45. import pandas.core.config_init
  46. from pandas.core.api import (
  47. # dtype
  48. Int8Dtype,
  49. Int16Dtype,
  50. Int32Dtype,
  51. Int64Dtype,
  52. UInt8Dtype,
  53. UInt16Dtype,
  54. UInt32Dtype,
  55. UInt64Dtype,
  56. CategoricalDtype,
  57. PeriodDtype,
  58. IntervalDtype,
  59. DatetimeTZDtype,
  60. StringDtype,
  61. BooleanDtype,
  62. # missing
  63. NA,
  64. isna,
  65. isnull,
  66. notna,
  67. notnull,
  68. # indexes
  69. Index,
  70. CategoricalIndex,
  71. Int64Index,
  72. UInt64Index,
  73. RangeIndex,
  74. Float64Index,
  75. MultiIndex,
  76. IntervalIndex,
  77. TimedeltaIndex,
  78. DatetimeIndex,
  79. PeriodIndex,
  80. IndexSlice,
  81. # tseries
  82. NaT,
  83. Period,
  84. period_range,
  85. Timedelta,
  86. timedelta_range,
  87. Timestamp,
  88. date_range,
  89. bdate_range,
  90. Interval,
  91. interval_range,
  92. DateOffset,
  93. # conversion
  94. to_numeric,
  95. to_datetime,
  96. to_timedelta,
  97. # misc
  98. Grouper,
  99. factorize,
  100. unique,
  101. value_counts,
  102. NamedAgg,
  103. array,
  104. Categorical,
  105. set_eng_float_format,
  106. Series,
  107. DataFrame,
  108. )
  109. from pandas.core.arrays.sparse import SparseDtype
  110. from pandas.tseries.api import infer_freq
  111. from pandas.tseries import offsets
  112. from pandas.core.computation.api import eval
  113. from pandas.core.reshape.api import (
  114. concat,
  115. lreshape,
  116. melt,
  117. wide_to_long,
  118. merge,
  119. merge_asof,
  120. merge_ordered,
  121. crosstab,
  122. pivot,
  123. pivot_table,
  124. get_dummies,
  125. cut,
  126. qcut,
  127. )
  128. import pandas.api
  129. from pandas.util._print_versions import show_versions
  130. from pandas.io.api import (
  131. # excel
  132. ExcelFile,
  133. ExcelWriter,
  134. read_excel,
  135. # parsers
  136. read_csv,
  137. read_fwf,
  138. read_table,
  139. # pickle
  140. read_pickle,
  141. to_pickle,
  142. # pytables
  143. HDFStore,
  144. read_hdf,
  145. # sql
  146. read_sql,
  147. read_sql_query,
  148. read_sql_table,
  149. # misc
  150. read_clipboard,
  151. read_parquet,
  152. read_orc,
  153. read_feather,
  154. read_gbq,
  155. read_html,
  156. read_json,
  157. read_stata,
  158. read_sas,
  159. read_spss,
  160. )
  161. from pandas.io.json import _json_normalize as json_normalize
  162. from pandas.util._tester import test
  163. import pandas.testing
  164. import pandas.arrays
  165. # use the closest tagged version if possible
  166. from ._version import get_versions
  167. v = get_versions()
  168. __version__ = v.get("closest-tag", v["version"])
  169. __git_version__ = v.get("full-revisionid")
  170. del get_versions, v
  171. # GH 27101
  172. # TODO: remove Panel compat in 1.0
  173. if pandas.compat.PY37:
  174. def __getattr__(name):
  175. import warnings
  176. if name == "Panel":
  177. warnings.warn(
  178. "The Panel class is removed from pandas. Accessing it "
  179. "from the top-level namespace will also be removed in "
  180. "the next version",
  181. FutureWarning,
  182. stacklevel=2,
  183. )
  184. class Panel:
  185. pass
  186. return Panel
  187. elif name == "datetime":
  188. warnings.warn(
  189. "The pandas.datetime class is deprecated "
  190. "and will be removed from pandas in a future version. "
  191. "Import from datetime module instead.",
  192. FutureWarning,
  193. stacklevel=2,
  194. )
  195. from datetime import datetime as dt
  196. return dt
  197. elif name == "np":
  198. warnings.warn(
  199. "The pandas.np module is deprecated "
  200. "and will be removed from pandas in a future version. "
  201. "Import numpy directly instead",
  202. FutureWarning,
  203. stacklevel=2,
  204. )
  205. import numpy as np
  206. return np
  207. elif name in {"SparseSeries", "SparseDataFrame"}:
  208. warnings.warn(
  209. f"The {name} class is removed from pandas. Accessing it from "
  210. "the top-level namespace will also be removed in the next "
  211. "version",
  212. FutureWarning,
  213. stacklevel=2,
  214. )
  215. return type(name, (), {})
  216. elif name == "SparseArray":
  217. warnings.warn(
  218. "The pandas.SparseArray class is deprecated "
  219. "and will be removed from pandas in a future version. "
  220. "Use pandas.arrays.SparseArray instead.",
  221. FutureWarning,
  222. stacklevel=2,
  223. )
  224. from pandas.core.arrays.sparse import SparseArray as _SparseArray
  225. return _SparseArray
  226. raise AttributeError(f"module 'pandas' has no attribute '{name}'")
  227. else:
  228. class Panel:
  229. pass
  230. class SparseDataFrame:
  231. pass
  232. class SparseSeries:
  233. pass
  234. class __numpy:
  235. def __init__(self):
  236. import numpy as np
  237. import warnings
  238. self.np = np
  239. self.warnings = warnings
  240. def __getattr__(self, item):
  241. self.warnings.warn(
  242. "The pandas.np module is deprecated "
  243. "and will be removed from pandas in a future version. "
  244. "Import numpy directly instead",
  245. FutureWarning,
  246. stacklevel=2,
  247. )
  248. try:
  249. return getattr(self.np, item)
  250. except AttributeError:
  251. raise AttributeError(f"module numpy has no attribute {item}")
  252. np = __numpy()
  253. class __Datetime(type):
  254. from datetime import datetime as dt
  255. datetime = dt
  256. def __getattr__(cls, item):
  257. cls.emit_warning()
  258. try:
  259. return getattr(cls.datetime, item)
  260. except AttributeError:
  261. raise AttributeError(f"module datetime has no attribute {item}")
  262. def __instancecheck__(cls, other):
  263. return isinstance(other, cls.datetime)
  264. class __DatetimeSub(metaclass=__Datetime):
  265. def emit_warning(dummy=0):
  266. import warnings
  267. warnings.warn(
  268. "The pandas.datetime class is deprecated "
  269. "and will be removed from pandas in a future version. "
  270. "Import from datetime instead.",
  271. FutureWarning,
  272. stacklevel=3,
  273. )
  274. def __new__(cls, *args, **kwargs):
  275. cls.emit_warning()
  276. from datetime import datetime as dt
  277. return dt(*args, **kwargs)
  278. datetime = __DatetimeSub
  279. class __SparseArray(type):
  280. from pandas.core.arrays.sparse import SparseArray as sa
  281. SparseArray = sa
  282. def __instancecheck__(cls, other):
  283. return isinstance(other, cls.SparseArray)
  284. class __SparseArraySub(metaclass=__SparseArray):
  285. def emit_warning(dummy=0):
  286. import warnings
  287. warnings.warn(
  288. "The pandas.SparseArray class is deprecated "
  289. "and will be removed from pandas in a future version. "
  290. "Use pandas.arrays.SparseArray instead.",
  291. FutureWarning,
  292. stacklevel=3,
  293. )
  294. def __new__(cls, *args, **kwargs):
  295. cls.emit_warning()
  296. from pandas.core.arrays.sparse import SparseArray as sa
  297. return sa(*args, **kwargs)
  298. SparseArray = __SparseArraySub
  299. # module level doc-string
  300. __doc__ = """
  301. pandas - a powerful data analysis and manipulation library for Python
  302. =====================================================================
  303. **pandas** is a Python package providing fast, flexible, and expressive data
  304. structures designed to make working with "relational" or "labeled" data both
  305. easy and intuitive. It aims to be the fundamental high-level building block for
  306. doing practical, **real world** data analysis in Python. Additionally, it has
  307. the broader goal of becoming **the most powerful and flexible open source data
  308. analysis / manipulation tool available in any language**. It is already well on
  309. its way toward this goal.
  310. Main Features
  311. -------------
  312. Here are just a few of the things that pandas does well:
  313. - Easy handling of missing data in floating point as well as non-floating
  314. point data.
  315. - Size mutability: columns can be inserted and deleted from DataFrame and
  316. higher dimensional objects
  317. - Automatic and explicit data alignment: objects can be explicitly aligned
  318. to a set of labels, or the user can simply ignore the labels and let
  319. `Series`, `DataFrame`, etc. automatically align the data for you in
  320. computations.
  321. - Powerful, flexible group by functionality to perform split-apply-combine
  322. operations on data sets, for both aggregating and transforming data.
  323. - Make it easy to convert ragged, differently-indexed data in other Python
  324. and NumPy data structures into DataFrame objects.
  325. - Intelligent label-based slicing, fancy indexing, and subsetting of large
  326. data sets.
  327. - Intuitive merging and joining data sets.
  328. - Flexible reshaping and pivoting of data sets.
  329. - Hierarchical labeling of axes (possible to have multiple labels per tick).
  330. - Robust IO tools for loading data from flat files (CSV and delimited),
  331. Excel files, databases, and saving/loading data from the ultrafast HDF5
  332. format.
  333. - Time series-specific functionality: date range generation and frequency
  334. conversion, moving window statistics, date shifting and lagging.
  335. """