indexing.py 81 KB


  1. from typing import Hashable, List, Tuple, Union
  2. import numpy as np
  3. from pandas._libs.indexing import _NDFrameIndexerBase
  4. from pandas._libs.lib import item_from_zerodim
  5. from pandas.errors import AbstractMethodError
  6. from pandas.util._decorators import Appender
  7. from pandas.core.dtypes.common import (
  8. is_float,
  9. is_integer,
  10. is_iterator,
  11. is_list_like,
  12. is_numeric_dtype,
  13. is_object_dtype,
  14. is_scalar,
  15. is_sequence,
  16. )
  17. from pandas.core.dtypes.concat import concat_compat
  18. from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
  19. from pandas.core.dtypes.missing import _infer_fill_value, isna
  20. import pandas.core.common as com
  21. from pandas.core.indexers import (
  22. check_array_indexer,
  23. is_list_like_indexer,
  24. length_of_indexer,
  25. )
  26. from pandas.core.indexes.api import Index, InvalidIndexError
  27. # "null slice"
  28. _NS = slice(None, None)
  29. # the public IndexSlicerMaker
  30. class _IndexSlice:
  31. """
  32. Create an object to more easily perform multi-index slicing.
  33. See Also
  34. --------
  35. MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
  36. Notes
  37. -----
  38. See :ref:`Defined Levels <advanced.shown_levels>`
  39. for further info on slicing a MultiIndex.
  40. Examples
  41. --------
  42. >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
  43. >>> columns = ['foo', 'bar']
  44. >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
  45. index=midx, columns=columns)
  46. Using the default slice command:
  47. >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
  48. foo bar
  49. A0 B0 0 1
  50. B1 2 3
  51. A1 B0 8 9
  52. B1 10 11
  53. Using the IndexSlice class for a more intuitive command:
  54. >>> idx = pd.IndexSlice
  55. >>> dfmi.loc[idx[:, 'B0':'B1'], :]
  56. foo bar
  57. A0 B0 0 1
  58. B1 2 3
  59. A1 B0 8 9
  60. B1 10 11
  61. """
  62. def __getitem__(self, arg):
  63. return arg
  64. IndexSlice = _IndexSlice()
  65. class IndexingError(Exception):
  66. pass
  67. class IndexingMixin:
  68. """Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series.
  69. """
  70. @property
  71. def iloc(self) -> "_iLocIndexer":
  72. """
  73. Purely integer-location based indexing for selection by position.
  74. ``.iloc[]`` is primarily integer position based (from ``0`` to
  75. ``length-1`` of the axis), but may also be used with a boolean
  76. array.
  77. Allowed inputs are:
  78. - An integer, e.g. ``5``.
  79. - A list or array of integers, e.g. ``[4, 3, 0]``.
  80. - A slice object with ints, e.g. ``1:7``.
  81. - A boolean array.
  82. - A ``callable`` function with one argument (the calling Series or
  83. DataFrame) and that returns valid output for indexing (one of the above).
  84. This is useful in method chains, when you don't have a reference to the
  85. calling object, but would like to base your selection on some value.
  86. ``.iloc`` will raise ``IndexError`` if a requested indexer is
  87. out-of-bounds, except *slice* indexers which allow out-of-bounds
  88. indexing (this conforms with python/numpy *slice* semantics).
  89. See more at :ref:`Selection by Position <indexing.integer>`.
  90. See Also
  91. --------
  92. DataFrame.iat : Fast integer location scalar accessor.
  93. DataFrame.loc : Purely label-location based indexer for selection by label.
  94. Series.iloc : Purely integer-location based indexing for
  95. selection by position.
  96. Examples
  97. --------
  98. >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
  99. ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
  100. ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
  101. >>> df = pd.DataFrame(mydict)
  102. >>> df
  103. a b c d
  104. 0 1 2 3 4
  105. 1 100 200 300 400
  106. 2 1000 2000 3000 4000
  107. **Indexing just the rows**
  108. With a scalar integer.
  109. >>> type(df.iloc[0])
  110. <class 'pandas.core.series.Series'>
  111. >>> df.iloc[0]
  112. a 1
  113. b 2
  114. c 3
  115. d 4
  116. Name: 0, dtype: int64
  117. With a list of integers.
  118. >>> df.iloc[[0]]
  119. a b c d
  120. 0 1 2 3 4
  121. >>> type(df.iloc[[0]])
  122. <class 'pandas.core.frame.DataFrame'>
  123. >>> df.iloc[[0, 1]]
  124. a b c d
  125. 0 1 2 3 4
  126. 1 100 200 300 400
  127. With a `slice` object.
  128. >>> df.iloc[:3]
  129. a b c d
  130. 0 1 2 3 4
  131. 1 100 200 300 400
  132. 2 1000 2000 3000 4000
  133. With a boolean mask the same length as the index.
  134. >>> df.iloc[[True, False, True]]
  135. a b c d
  136. 0 1 2 3 4
  137. 2 1000 2000 3000 4000
  138. With a callable, useful in method chains. The `x` passed
  139. to the ``lambda`` is the DataFrame being sliced. This selects
  140. the rows whose index label even.
  141. >>> df.iloc[lambda x: x.index % 2 == 0]
  142. a b c d
  143. 0 1 2 3 4
  144. 2 1000 2000 3000 4000
  145. **Indexing both axes**
  146. You can mix the indexer types for the index and columns. Use ``:`` to
  147. select the entire axis.
  148. With scalar integers.
  149. >>> df.iloc[0, 1]
  150. 2
  151. With lists of integers.
  152. >>> df.iloc[[0, 2], [1, 3]]
  153. b d
  154. 0 2 4
  155. 2 2000 4000
  156. With `slice` objects.
  157. >>> df.iloc[1:3, 0:3]
  158. a b c
  159. 1 100 200 300
  160. 2 1000 2000 3000
  161. With a boolean array whose length matches the columns.
  162. >>> df.iloc[:, [True, False, True, False]]
  163. a c
  164. 0 1 3
  165. 1 100 300
  166. 2 1000 3000
  167. With a callable function that expects the Series or DataFrame.
  168. >>> df.iloc[:, lambda df: [0, 2]]
  169. a c
  170. 0 1 3
  171. 1 100 300
  172. 2 1000 3000
  173. """
  174. return _iLocIndexer("iloc", self)
  175. @property
  176. def loc(self) -> "_LocIndexer":
  177. """
  178. Access a group of rows and columns by label(s) or a boolean array.
  179. ``.loc[]`` is primarily label based, but may also be used with a
  180. boolean array.
  181. Allowed inputs are:
  182. - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
  183. interpreted as a *label* of the index, and **never** as an
  184. integer position along the index).
  185. - A list or array of labels, e.g. ``['a', 'b', 'c']``.
  186. - A slice object with labels, e.g. ``'a':'f'``.
  187. .. warning:: Note that contrary to usual python slices, **both** the
  188. start and the stop are included
  189. - A boolean array of the same length as the axis being sliced,
  190. e.g. ``[True, False, True]``.
  191. - A ``callable`` function with one argument (the calling Series or
  192. DataFrame) and that returns valid output for indexing (one of the above)
  193. See more at :ref:`Selection by Label <indexing.label>`
  194. Raises
  195. ------
  196. KeyError
  197. If any items are not found.
  198. See Also
  199. --------
  200. DataFrame.at : Access a single value for a row/column label pair.
  201. DataFrame.iloc : Access group of rows and columns by integer position(s).
  202. DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
  203. Series/DataFrame.
  204. Series.loc : Access group of values using labels.
  205. Examples
  206. --------
  207. **Getting values**
  208. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  209. ... index=['cobra', 'viper', 'sidewinder'],
  210. ... columns=['max_speed', 'shield'])
  211. >>> df
  212. max_speed shield
  213. cobra 1 2
  214. viper 4 5
  215. sidewinder 7 8
  216. Single label. Note this returns the row as a Series.
  217. >>> df.loc['viper']
  218. max_speed 4
  219. shield 5
  220. Name: viper, dtype: int64
  221. List of labels. Note using ``[[]]`` returns a DataFrame.
  222. >>> df.loc[['viper', 'sidewinder']]
  223. max_speed shield
  224. viper 4 5
  225. sidewinder 7 8
  226. Single label for row and column
  227. >>> df.loc['cobra', 'shield']
  228. 2
  229. Slice with labels for row and single label for column. As mentioned
  230. above, note that both the start and stop of the slice are included.
  231. >>> df.loc['cobra':'viper', 'max_speed']
  232. cobra 1
  233. viper 4
  234. Name: max_speed, dtype: int64
  235. Boolean list with the same length as the row axis
  236. >>> df.loc[[False, False, True]]
  237. max_speed shield
  238. sidewinder 7 8
  239. Conditional that returns a boolean Series
  240. >>> df.loc[df['shield'] > 6]
  241. max_speed shield
  242. sidewinder 7 8
  243. Conditional that returns a boolean Series with column labels specified
  244. >>> df.loc[df['shield'] > 6, ['max_speed']]
  245. max_speed
  246. sidewinder 7
  247. Callable that returns a boolean Series
  248. >>> df.loc[lambda df: df['shield'] == 8]
  249. max_speed shield
  250. sidewinder 7 8
  251. **Setting values**
  252. Set value for all items matching the list of labels
  253. >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
  254. >>> df
  255. max_speed shield
  256. cobra 1 2
  257. viper 4 50
  258. sidewinder 7 50
  259. Set value for an entire row
  260. >>> df.loc['cobra'] = 10
  261. >>> df
  262. max_speed shield
  263. cobra 10 10
  264. viper 4 50
  265. sidewinder 7 50
  266. Set value for an entire column
  267. >>> df.loc[:, 'max_speed'] = 30
  268. >>> df
  269. max_speed shield
  270. cobra 30 10
  271. viper 30 50
  272. sidewinder 30 50
  273. Set value for rows matching callable condition
  274. >>> df.loc[df['shield'] > 35] = 0
  275. >>> df
  276. max_speed shield
  277. cobra 30 10
  278. viper 0 0
  279. sidewinder 0 0
  280. **Getting values on a DataFrame with an index that has integer labels**
  281. Another example using integers for the index
  282. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  283. ... index=[7, 8, 9], columns=['max_speed', 'shield'])
  284. >>> df
  285. max_speed shield
  286. 7 1 2
  287. 8 4 5
  288. 9 7 8
  289. Slice with integer labels for rows. As mentioned above, note that both
  290. the start and stop of the slice are included.
  291. >>> df.loc[7:9]
  292. max_speed shield
  293. 7 1 2
  294. 8 4 5
  295. 9 7 8
  296. **Getting values with a MultiIndex**
  297. A number of examples using a DataFrame with a MultiIndex
  298. >>> tuples = [
  299. ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
  300. ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
  301. ... ('viper', 'mark ii'), ('viper', 'mark iii')
  302. ... ]
  303. >>> index = pd.MultiIndex.from_tuples(tuples)
  304. >>> values = [[12, 2], [0, 4], [10, 20],
  305. ... [1, 4], [7, 1], [16, 36]]
  306. >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
  307. >>> df
  308. max_speed shield
  309. cobra mark i 12 2
  310. mark ii 0 4
  311. sidewinder mark i 10 20
  312. mark ii 1 4
  313. viper mark ii 7 1
  314. mark iii 16 36
  315. Single label. Note this returns a DataFrame with a single index.
  316. >>> df.loc['cobra']
  317. max_speed shield
  318. mark i 12 2
  319. mark ii 0 4
  320. Single index tuple. Note this returns a Series.
  321. >>> df.loc[('cobra', 'mark ii')]
  322. max_speed 0
  323. shield 4
  324. Name: (cobra, mark ii), dtype: int64
  325. Single label for row and column. Similar to passing in a tuple, this
  326. returns a Series.
  327. >>> df.loc['cobra', 'mark i']
  328. max_speed 12
  329. shield 2
  330. Name: (cobra, mark i), dtype: int64
  331. Single tuple. Note using ``[[]]`` returns a DataFrame.
  332. >>> df.loc[[('cobra', 'mark ii')]]
  333. max_speed shield
  334. cobra mark ii 0 4
  335. Single tuple for the index with a single label for the column
  336. >>> df.loc[('cobra', 'mark i'), 'shield']
  337. 2
  338. Slice from index tuple to single label
  339. >>> df.loc[('cobra', 'mark i'):'viper']
  340. max_speed shield
  341. cobra mark i 12 2
  342. mark ii 0 4
  343. sidewinder mark i 10 20
  344. mark ii 1 4
  345. viper mark ii 7 1
  346. mark iii 16 36
  347. Slice from index tuple to index tuple
  348. >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
  349. max_speed shield
  350. cobra mark i 12 2
  351. mark ii 0 4
  352. sidewinder mark i 10 20
  353. mark ii 1 4
  354. viper mark ii 7 1
  355. """
  356. return _LocIndexer("loc", self)
  357. @property
  358. def at(self) -> "_AtIndexer":
  359. """
  360. Access a single value for a row/column label pair.
  361. Similar to ``loc``, in that both provide label-based lookups. Use
  362. ``at`` if you only need to get or set a single value in a DataFrame
  363. or Series.
  364. Raises
  365. ------
  366. KeyError
  367. If 'label' does not exist in DataFrame.
  368. See Also
  369. --------
  370. DataFrame.iat : Access a single value for a row/column pair by integer
  371. position.
  372. DataFrame.loc : Access a group of rows and columns by label(s).
  373. Series.at : Access a single value using a label.
  374. Examples
  375. --------
  376. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  377. ... index=[4, 5, 6], columns=['A', 'B', 'C'])
  378. >>> df
  379. A B C
  380. 4 0 2 3
  381. 5 0 4 1
  382. 6 10 20 30
  383. Get value at specified row/column pair
  384. >>> df.at[4, 'B']
  385. 2
  386. Set value at specified row/column pair
  387. >>> df.at[4, 'B'] = 10
  388. >>> df.at[4, 'B']
  389. 10
  390. Get value within a Series
  391. >>> df.loc[5].at['B']
  392. 4
  393. """
  394. return _AtIndexer("at", self)
  395. @property
  396. def iat(self) -> "_iAtIndexer":
  397. """
  398. Access a single value for a row/column pair by integer position.
  399. Similar to ``iloc``, in that both provide integer-based lookups. Use
  400. ``iat`` if you only need to get or set a single value in a DataFrame
  401. or Series.
  402. Raises
  403. ------
  404. IndexError
  405. When integer position is out of bounds.
  406. See Also
  407. --------
  408. DataFrame.at : Access a single value for a row/column label pair.
  409. DataFrame.loc : Access a group of rows and columns by label(s).
  410. DataFrame.iloc : Access a group of rows and columns by integer position(s).
  411. Examples
  412. --------
  413. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  414. ... columns=['A', 'B', 'C'])
  415. >>> df
  416. A B C
  417. 0 0 2 3
  418. 1 0 4 1
  419. 2 10 20 30
  420. Get value at specified row/column pair
  421. >>> df.iat[1, 2]
  422. 1
  423. Set value at specified row/column pair
  424. >>> df.iat[1, 2] = 10
  425. >>> df.iat[1, 2]
  426. 10
  427. Get value within a series
  428. >>> df.loc[0].iat[1]
  429. 2
  430. """
  431. return _iAtIndexer("iat", self)
  432. class _NDFrameIndexer(_NDFrameIndexerBase):
  433. _valid_types: str
  434. axis = None
  435. def __call__(self, axis=None):
  436. # we need to return a copy of ourselves
  437. new_self = type(self)(self.name, self.obj)
  438. if axis is not None:
  439. axis = self.obj._get_axis_number(axis)
  440. new_self.axis = axis
  441. return new_self
  442. # TODO: remove once geopandas no longer needs this
  443. def __getitem__(self, key):
  444. # Used in ix and downstream in geopandas _CoordinateIndexer
  445. if type(key) is tuple:
  446. # Note: we check the type exactly instead of with isinstance
  447. # because NamedTuple is checked separately.
  448. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  449. try:
  450. values = self.obj._get_value(*key)
  451. except (KeyError, TypeError, InvalidIndexError, AttributeError):
  452. # TypeError occurs here if the key has non-hashable entries,
  453. # generally slice or list.
  454. # TODO(ix): most/all of the TypeError cases here are for ix,
  455. # so this check can be removed once ix is removed.
  456. # The InvalidIndexError is only catched for compatibility
  457. # with geopandas, see
  458. # https://github.com/pandas-dev/pandas/issues/27258
  459. # TODO: The AttributeError is for IntervalIndex which
  460. # incorrectly implements get_value, see
  461. # https://github.com/pandas-dev/pandas/issues/27865
  462. pass
  463. else:
  464. if is_scalar(values):
  465. return values
  466. return self._getitem_tuple(key)
  467. else:
  468. # we by definition only have the 0th axis
  469. axis = self.axis or 0
  470. key = com.apply_if_callable(key, self.obj)
  471. return self._getitem_axis(key, axis=axis)
  472. def _get_label(self, label, axis: int):
  473. if self.ndim == 1:
  474. # for perf reasons we want to try _xs first
  475. # as its basically direct indexing
  476. # but will fail when the index is not present
  477. # see GH5667
  478. return self.obj._xs(label, axis=axis)
  479. elif isinstance(label, tuple) and isinstance(label[axis], slice):
  480. raise IndexingError("no slices here, handle elsewhere")
  481. return self.obj._xs(label, axis=axis)
  482. def _get_loc(self, key: int, axis: int):
  483. return self.obj._ixs(key, axis=axis)
  484. def _slice(self, obj, axis: int, kind=None):
  485. return self.obj._slice(obj, axis=axis, kind=kind)
  486. def _get_setitem_indexer(self, key):
  487. if self.axis is not None:
  488. return self._convert_tuple(key)
  489. ax = self.obj._get_axis(0)
  490. if isinstance(ax, ABCMultiIndex) and self.name != "iloc":
  491. try:
  492. return ax.get_loc(key)
  493. except (TypeError, KeyError, InvalidIndexError):
  494. # TypeError e.g. passed a bool
  495. pass
  496. if isinstance(key, tuple):
  497. try:
  498. return self._convert_tuple(key)
  499. except IndexingError:
  500. pass
  501. if isinstance(key, range):
  502. return list(key)
  503. axis = self.axis or 0
  504. try:
  505. return self._convert_to_indexer(key, axis=axis)
  506. except TypeError as e:
  507. # invalid indexer type vs 'other' indexing errors
  508. if "cannot do" in str(e):
  509. raise
  510. raise IndexingError(key)
  511. def __setitem__(self, key, value):
  512. if isinstance(key, tuple):
  513. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  514. else:
  515. key = com.apply_if_callable(key, self.obj)
  516. indexer = self._get_setitem_indexer(key)
  517. self._setitem_with_indexer(indexer, value)
  518. def _validate_key(self, key, axis: int):
  519. """
  520. Ensure that key is valid for current indexer.
  521. Parameters
  522. ----------
  523. key : scalar, slice or list-like
  524. Key requested.
  525. axis : int
  526. Dimension on which the indexing is being made.
  527. Raises
  528. ------
  529. TypeError
  530. If the key (or some element of it) has wrong type.
  531. IndexError
  532. If the key (or some element of it) is out of bounds.
  533. KeyError
  534. If the key was not found.
  535. """
  536. raise AbstractMethodError(self)
  537. def _has_valid_tuple(self, key: Tuple):
  538. """
  539. Check the key for valid keys across my indexer.
  540. """
  541. for i, k in enumerate(key):
  542. if i >= self.ndim:
  543. raise IndexingError("Too many indexers")
  544. try:
  545. self._validate_key(k, i)
  546. except ValueError:
  547. raise ValueError(
  548. "Location based indexing can only have "
  549. f"[{self._valid_types}] types"
  550. )
  551. def _is_nested_tuple_indexer(self, tup: Tuple) -> bool:
  552. """
  553. Returns
  554. -------
  555. bool
  556. """
  557. if any(isinstance(ax, ABCMultiIndex) for ax in self.obj.axes):
  558. return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
  559. return False
  560. def _convert_tuple(self, key):
  561. keyidx = []
  562. if self.axis is not None:
  563. axis = self.obj._get_axis_number(self.axis)
  564. for i in range(self.ndim):
  565. if i == axis:
  566. keyidx.append(self._convert_to_indexer(key, axis=axis))
  567. else:
  568. keyidx.append(slice(None))
  569. else:
  570. for i, k in enumerate(key):
  571. if i >= self.ndim:
  572. raise IndexingError("Too many indexers")
  573. idx = self._convert_to_indexer(k, axis=i)
  574. keyidx.append(idx)
  575. return tuple(keyidx)
  576. def _convert_scalar_indexer(self, key, axis: int):
  577. # if we are accessing via lowered dim, use the last dim
  578. ax = self.obj._get_axis(min(axis, self.ndim - 1))
  579. # a scalar
  580. return ax._convert_scalar_indexer(key, kind=self.name)
  581. def _convert_slice_indexer(self, key: slice, axis: int):
  582. # if we are accessing via lowered dim, use the last dim
  583. ax = self.obj._get_axis(min(axis, self.ndim - 1))
  584. return ax._convert_slice_indexer(key, kind=self.name)
  585. def _has_valid_setitem_indexer(self, indexer) -> bool:
  586. return True
  587. def _has_valid_positional_setitem_indexer(self, indexer) -> bool:
  588. """
  589. Validate that a positional indexer cannot enlarge its target
  590. will raise if needed, does not modify the indexer externally.
  591. Returns
  592. -------
  593. bool
  594. """
  595. if isinstance(indexer, dict):
  596. raise IndexError(f"{self.name} cannot enlarge its target object")
  597. else:
  598. if not isinstance(indexer, tuple):
  599. indexer = _tuplify(self.ndim, indexer)
  600. for ax, i in zip(self.obj.axes, indexer):
  601. if isinstance(i, slice):
  602. # should check the stop slice?
  603. pass
  604. elif is_list_like_indexer(i):
  605. # should check the elements?
  606. pass
  607. elif is_integer(i):
  608. if i >= len(ax):
  609. raise IndexError(
  610. f"{self.name} cannot enlarge its target object"
  611. )
  612. elif isinstance(i, dict):
  613. raise IndexError(f"{self.name} cannot enlarge its target object")
  614. return True
  615. def _setitem_with_indexer(self, indexer, value):
  616. self._has_valid_setitem_indexer(indexer)
  617. # also has the side effect of consolidating in-place
  618. from pandas import Series
  619. info_axis = self.obj._info_axis_number
  620. # maybe partial set
  621. take_split_path = self.obj._is_mixed_type
  622. # if there is only one block/type, still have to take split path
  623. # unless the block is one-dimensional or it can hold the value
  624. if not take_split_path and self.obj._data.blocks:
  625. (blk,) = self.obj._data.blocks
  626. if 1 < blk.ndim: # in case of dict, keys are indices
  627. val = list(value.values()) if isinstance(value, dict) else value
  628. take_split_path = not blk._can_hold_element(val)
  629. # if we have any multi-indexes that have non-trivial slices
  630. # (not null slices) then we must take the split path, xref
  631. # GH 10360, GH 27841
  632. if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
  633. for i, ax in zip(indexer, self.obj.axes):
  634. if isinstance(ax, ABCMultiIndex) and not (
  635. is_integer(i) or com.is_null_slice(i)
  636. ):
  637. take_split_path = True
  638. break
  639. if isinstance(indexer, tuple):
  640. nindexer = []
  641. for i, idx in enumerate(indexer):
  642. if isinstance(idx, dict):
  643. # reindex the axis to the new value
  644. # and set inplace
  645. key, _ = convert_missing_indexer(idx)
  646. # if this is the items axes, then take the main missing
  647. # path first
  648. # this correctly sets the dtype and avoids cache issues
  649. # essentially this separates out the block that is needed
  650. # to possibly be modified
  651. if self.ndim > 1 and i == self.obj._info_axis_number:
  652. # add the new item, and set the value
  653. # must have all defined axes if we have a scalar
  654. # or a list-like on the non-info axes if we have a
  655. # list-like
  656. len_non_info_axes = (
  657. len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i
  658. )
  659. if any(not l for l in len_non_info_axes):
  660. if not is_list_like_indexer(value):
  661. raise ValueError(
  662. "cannot set a frame with no "
  663. "defined index and a scalar"
  664. )
  665. self.obj[key] = value
  666. return self.obj
  667. # add a new item with the dtype setup
  668. self.obj[key] = _infer_fill_value(value)
  669. new_indexer = convert_from_missing_indexer_tuple(
  670. indexer, self.obj.axes
  671. )
  672. self._setitem_with_indexer(new_indexer, value)
  673. return self.obj
  674. # reindex the axis
  675. # make sure to clear the cache because we are
  676. # just replacing the block manager here
  677. # so the object is the same
  678. index = self.obj._get_axis(i)
  679. labels = index.insert(len(index), key)
  680. self.obj._data = self.obj.reindex(labels, axis=i)._data
  681. self.obj._maybe_update_cacher(clear=True)
  682. self.obj._is_copy = None
  683. nindexer.append(labels.get_loc(key))
  684. else:
  685. nindexer.append(idx)
  686. indexer = tuple(nindexer)
  687. else:
  688. indexer, missing = convert_missing_indexer(indexer)
  689. if missing:
  690. return self._setitem_with_indexer_missing(indexer, value)
  691. # set
  692. item_labels = self.obj._get_axis(info_axis)
  693. # align and set the values
  694. if take_split_path:
  695. # Above we only set take_split_path to True for 2D cases
  696. assert self.ndim == 2
  697. assert info_axis == 1
  698. if not isinstance(indexer, tuple):
  699. indexer = _tuplify(self.ndim, indexer)
  700. if isinstance(value, ABCSeries):
  701. value = self._align_series(indexer, value)
  702. info_idx = indexer[info_axis]
  703. if is_integer(info_idx):
  704. info_idx = [info_idx]
  705. labels = item_labels[info_idx]
  706. # if we have a partial multiindex, then need to adjust the plane
  707. # indexer here
  708. if len(labels) == 1 and isinstance(
  709. self.obj[labels[0]].axes[0], ABCMultiIndex
  710. ):
  711. item = labels[0]
  712. obj = self.obj[item]
  713. index = obj.index
  714. idx = indexer[:info_axis][0]
  715. plane_indexer = tuple([idx]) + indexer[info_axis + 1 :]
  716. lplane_indexer = length_of_indexer(plane_indexer[0], index)
  717. # require that we are setting the right number of values that
  718. # we are indexing
  719. if (
  720. is_list_like_indexer(value)
  721. and np.iterable(value)
  722. and lplane_indexer != len(value)
  723. ):
  724. if len(obj[idx]) != len(value):
  725. raise ValueError(
  726. "cannot set using a multi-index "
  727. "selection indexer with a different "
  728. "length than the value"
  729. )
  730. # make sure we have an ndarray
  731. value = getattr(value, "values", value).ravel()
  732. # we can directly set the series here
  733. # as we select a slice indexer on the mi
  734. if isinstance(idx, slice):
  735. idx = index._convert_slice_indexer(idx)
  736. obj._consolidate_inplace()
  737. obj = obj.copy()
  738. obj._data = obj._data.setitem(indexer=tuple([idx]), value=value)
  739. self.obj[item] = obj
  740. return
  741. # non-mi
  742. else:
  743. plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :]
  744. plane_axis = self.obj.axes[:info_axis][0]
  745. lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis)
  746. def setter(item, v):
  747. s = self.obj[item]
  748. pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
  749. # perform the equivalent of a setitem on the info axis
  750. # as we have a null slice or a slice with full bounds
  751. # which means essentially reassign to the columns of a
  752. # multi-dim object
  753. # GH6149 (null slice), GH10408 (full bounds)
  754. if isinstance(pi, tuple) and all(
  755. com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj))
  756. for idx in pi
  757. ):
  758. s = v
  759. else:
  760. # set the item, possibly having a dtype change
  761. s._consolidate_inplace()
  762. s = s.copy()
  763. s._data = s._data.setitem(indexer=pi, value=v)
  764. s._maybe_update_cacher(clear=True)
  765. # reset the sliced object if unique
  766. self.obj[item] = s
  767. # we need an iterable, with a ndim of at least 1
  768. # eg. don't pass through np.array(0)
  769. if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
  770. # we have an equal len Frame
  771. if isinstance(value, ABCDataFrame):
  772. sub_indexer = list(indexer)
  773. multiindex_indexer = isinstance(labels, ABCMultiIndex)
  774. for item in labels:
  775. if item in value:
  776. sub_indexer[info_axis] = item
  777. v = self._align_series(
  778. tuple(sub_indexer), value[item], multiindex_indexer
  779. )
  780. else:
  781. v = np.nan
  782. setter(item, v)
  783. # we have an equal len ndarray/convertible to our labels
  784. # hasattr first, to avoid coercing to ndarray without reason.
  785. # But we may be relying on the ndarray coercion to check ndim.
  786. # Why not just convert to an ndarray earlier on if needed?
  787. elif np.ndim(value) == 2:
  788. # note that this coerces the dtype if we are mixed
  789. # GH 7551
  790. value = np.array(value, dtype=object)
  791. if len(labels) != value.shape[1]:
  792. raise ValueError(
  793. "Must have equal len keys and value "
  794. "when setting with an ndarray"
  795. )
  796. for i, item in enumerate(labels):
  797. # setting with a list, recoerces
  798. setter(item, value[:, i].tolist())
  799. # we have an equal len list/ndarray
  800. elif _can_do_equal_len(
  801. labels, value, plane_indexer, lplane_indexer, self.obj
  802. ):
  803. setter(labels[0], value)
  804. # per label values
  805. else:
  806. if len(labels) != len(value):
  807. raise ValueError(
  808. "Must have equal len keys and value "
  809. "when setting with an iterable"
  810. )
  811. for item, v in zip(labels, value):
  812. setter(item, v)
  813. else:
  814. # scalar
  815. for item in labels:
  816. setter(item, value)
  817. else:
  818. if isinstance(indexer, tuple):
  819. indexer = maybe_convert_ix(*indexer)
  820. # if we are setting on the info axis ONLY
  821. # set using those methods to avoid block-splitting
  822. # logic here
  823. if (
  824. len(indexer) > info_axis
  825. and is_integer(indexer[info_axis])
  826. and all(
  827. com.is_null_slice(idx)
  828. for i, idx in enumerate(indexer)
  829. if i != info_axis
  830. )
  831. and item_labels.is_unique
  832. ):
  833. self.obj[item_labels[indexer[info_axis]]] = value
  834. return
  835. if isinstance(value, (ABCSeries, dict)):
  836. # TODO(EA): ExtensionBlock.setitem this causes issues with
  837. # setting for extensionarrays that store dicts. Need to decide
  838. # if it's worth supporting that.
  839. value = self._align_series(indexer, Series(value))
  840. elif isinstance(value, ABCDataFrame):
  841. value = self._align_frame(indexer, value)
  842. # check for chained assignment
  843. self.obj._check_is_chained_assignment_possible()
  844. # actually do the set
  845. self.obj._consolidate_inplace()
  846. self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
  847. self.obj._maybe_update_cacher(clear=True)
  848. def _setitem_with_indexer_missing(self, indexer, value):
  849. """
  850. Insert new row(s) or column(s) into the Series or DataFrame.
  851. """
  852. from pandas import Series
  853. # reindex the axis to the new value
  854. # and set inplace
  855. if self.ndim == 1:
  856. index = self.obj.index
  857. new_index = index.insert(len(index), indexer)
  858. # we have a coerced indexer, e.g. a float
  859. # that matches in an Int64Index, so
  860. # we will not create a duplicate index, rather
  861. # index to that element
  862. # e.g. 0.0 -> 0
  863. # GH#12246
  864. if index.is_unique:
  865. new_indexer = index.get_indexer([new_index[-1]])
  866. if (new_indexer != -1).any():
  867. return self._setitem_with_indexer(new_indexer, value)
  868. # this preserves dtype of the value
  869. new_values = Series([value])._values
  870. if len(self.obj._values):
  871. # GH#22717 handle casting compatibility that np.concatenate
  872. # does incorrectly
  873. new_values = concat_compat([self.obj._values, new_values])
  874. self.obj._data = self.obj._constructor(
  875. new_values, index=new_index, name=self.obj.name
  876. )._data
  877. self.obj._maybe_update_cacher(clear=True)
  878. return self.obj
  879. elif self.ndim == 2:
  880. if not len(self.obj.columns):
  881. # no columns and scalar
  882. raise ValueError("cannot set a frame with no defined columns")
  883. if isinstance(value, ABCSeries):
  884. # append a Series
  885. value = value.reindex(index=self.obj.columns, copy=True)
  886. value.name = indexer
  887. else:
  888. # a list-list
  889. if is_list_like_indexer(value):
  890. # must have conforming columns
  891. if len(value) != len(self.obj.columns):
  892. raise ValueError("cannot set a row with mismatched columns")
  893. value = Series(value, index=self.obj.columns, name=indexer)
  894. self.obj._data = self.obj.append(value)._data
  895. self.obj._maybe_update_cacher(clear=True)
  896. return self.obj
  897. def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer: bool = False):
  898. """
  899. Parameters
  900. ----------
  901. indexer : tuple, slice, scalar
  902. Indexer used to get the locations that will be set to `ser`.
  903. ser : pd.Series
  904. Values to assign to the locations specified by `indexer`.
  905. multiindex_indexer : boolean, optional
  906. Defaults to False. Should be set to True if `indexer` was from
  907. a `pd.MultiIndex`, to avoid unnecessary broadcasting.
  908. Returns
  909. -------
  910. `np.array` of `ser` broadcast to the appropriate shape for assignment
  911. to the locations selected by `indexer`
  912. """
  913. if isinstance(indexer, (slice, np.ndarray, list, Index)):
  914. indexer = tuple([indexer])
  915. if isinstance(indexer, tuple):
  916. # flatten np.ndarray indexers
  917. def ravel(i):
  918. return i.ravel() if isinstance(i, np.ndarray) else i
  919. indexer = tuple(map(ravel, indexer))
  920. aligners = [not com.is_null_slice(idx) for idx in indexer]
  921. sum_aligners = sum(aligners)
  922. single_aligner = sum_aligners == 1
  923. is_frame = self.ndim == 2
  924. obj = self.obj
  925. # are we a single alignable value on a non-primary
  926. # dim (e.g. panel: 1,2, or frame: 0) ?
  927. # hence need to align to a single axis dimension
  928. # rather that find all valid dims
  929. # frame
  930. if is_frame:
  931. single_aligner = single_aligner and aligners[0]
  932. # we have a frame, with multiple indexers on both axes; and a
  933. # series, so need to broadcast (see GH5206)
  934. if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
  935. ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
  936. # single indexer
  937. if len(indexer) > 1 and not multiindex_indexer:
  938. len_indexer = len(indexer[1])
  939. ser = np.tile(ser, len_indexer).reshape(len_indexer, -1).T
  940. return ser
  941. for i, idx in enumerate(indexer):
  942. ax = obj.axes[i]
  943. # multiple aligners (or null slices)
  944. if is_sequence(idx) or isinstance(idx, slice):
  945. if single_aligner and com.is_null_slice(idx):
  946. continue
  947. new_ix = ax[idx]
  948. if not is_list_like_indexer(new_ix):
  949. new_ix = Index([new_ix])
  950. else:
  951. new_ix = Index(new_ix)
  952. if ser.index.equals(new_ix) or not len(new_ix):
  953. return ser._values.copy()
  954. return ser.reindex(new_ix)._values
  955. # 2 dims
  956. elif single_aligner:
  957. # reindex along index
  958. ax = self.obj.axes[1]
  959. if ser.index.equals(ax) or not len(ax):
  960. return ser._values.copy()
  961. return ser.reindex(ax)._values
  962. elif is_scalar(indexer):
  963. ax = self.obj._get_axis(1)
  964. if ser.index.equals(ax):
  965. return ser._values.copy()
  966. return ser.reindex(ax)._values
  967. raise ValueError("Incompatible indexer with Series")
  968. def _align_frame(self, indexer, df: ABCDataFrame):
  969. is_frame = self.ndim == 2
  970. if isinstance(indexer, tuple):
  971. idx, cols = None, None
  972. sindexers = []
  973. for i, ix in enumerate(indexer):
  974. ax = self.obj.axes[i]
  975. if is_sequence(ix) or isinstance(ix, slice):
  976. if isinstance(ix, np.ndarray):
  977. ix = ix.ravel()
  978. if idx is None:
  979. idx = ax[ix]
  980. elif cols is None:
  981. cols = ax[ix]
  982. else:
  983. break
  984. else:
  985. sindexers.append(i)
  986. if idx is not None and cols is not None:
  987. if df.index.equals(idx) and df.columns.equals(cols):
  988. val = df.copy()._values
  989. else:
  990. val = df.reindex(idx, columns=cols)._values
  991. return val
  992. elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
  993. ax = self.obj.index[indexer]
  994. if df.index.equals(ax):
  995. val = df.copy()._values
  996. else:
  997. # we have a multi-index and are trying to align
  998. # with a particular, level GH3738
  999. if (
  1000. isinstance(ax, ABCMultiIndex)
  1001. and isinstance(df.index, ABCMultiIndex)
  1002. and ax.nlevels != df.index.nlevels
  1003. ):
  1004. raise TypeError(
  1005. "cannot align on a multi-index with out "
  1006. "specifying the join levels"
  1007. )
  1008. val = df.reindex(index=ax)._values
  1009. return val
  1010. raise ValueError("Incompatible indexer with DataFrame")
  1011. def _getitem_tuple(self, tup: Tuple):
  1012. try:
  1013. return self._getitem_lowerdim(tup)
  1014. except IndexingError:
  1015. pass
  1016. # no multi-index, so validate all of the indexers
  1017. self._has_valid_tuple(tup)
  1018. # ugly hack for GH #836
  1019. if self._multi_take_opportunity(tup):
  1020. return self._multi_take(tup)
  1021. # no shortcut needed
  1022. retval = self.obj
  1023. for i, key in enumerate(tup):
  1024. if com.is_null_slice(key):
  1025. continue
  1026. retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
  1027. return retval
  1028. def _multi_take_opportunity(self, tup: Tuple) -> bool:
  1029. """
  1030. Check whether there is the possibility to use ``_multi_take``.
  1031. Currently the limit is that all axes being indexed, must be indexed with
  1032. list-likes.
  1033. Parameters
  1034. ----------
  1035. tup : tuple
  1036. Tuple of indexers, one per axis.
  1037. Returns
  1038. -------
  1039. bool
  1040. Whether the current indexing,
  1041. can be passed through `_multi_take`.
  1042. """
  1043. if not all(is_list_like_indexer(x) for x in tup):
  1044. return False
  1045. # just too complicated
  1046. if any(com.is_bool_indexer(x) for x in tup):
  1047. return False
  1048. return True
  1049. def _multi_take(self, tup: Tuple):
  1050. """
  1051. Create the indexers for the passed tuple of keys, and
  1052. executes the take operation. This allows the take operation to be
  1053. executed all at once, rather than once for each dimension.
  1054. Improving efficiency.
  1055. Parameters
  1056. ----------
  1057. tup : tuple
  1058. Tuple of indexers, one per axis.
  1059. Returns
  1060. -------
  1061. values: same type as the object being indexed
  1062. """
  1063. # GH 836
  1064. o = self.obj
  1065. d = {
  1066. axis: self._get_listlike_indexer(key, axis)
  1067. for (key, axis) in zip(tup, o._AXIS_ORDERS)
  1068. }
  1069. return o._reindex_with_indexers(d, copy=True, allow_dups=True)
  1070. def _convert_for_reindex(self, key, axis: int):
  1071. return key
  1072. def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
  1073. # we have an axis0 multi-index, handle or raise
  1074. axis = self.axis or 0
  1075. try:
  1076. # fast path for series or for tup devoid of slices
  1077. return self._get_label(tup, axis=axis)
  1078. except TypeError:
  1079. # slices are unhashable
  1080. pass
  1081. except KeyError as ek:
  1082. # raise KeyError if number of indexers match
  1083. # else IndexingError will be raised
  1084. if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim:
  1085. raise ek
  1086. return None
  1087. def _getitem_lowerdim(self, tup: Tuple):
  1088. # we can directly get the axis result since the axis is specified
  1089. if self.axis is not None:
  1090. axis = self.obj._get_axis_number(self.axis)
  1091. return self._getitem_axis(tup, axis=axis)
  1092. # we may have a nested tuples indexer here
  1093. if self._is_nested_tuple_indexer(tup):
  1094. return self._getitem_nested_tuple(tup)
  1095. # we maybe be using a tuple to represent multiple dimensions here
  1096. ax0 = self.obj._get_axis(0)
  1097. # ...but iloc should handle the tuple as simple integer-location
  1098. # instead of checking it as multiindex representation (GH 13797)
  1099. if isinstance(ax0, ABCMultiIndex) and self.name != "iloc":
  1100. result = self._handle_lowerdim_multi_index_axis0(tup)
  1101. if result is not None:
  1102. return result
  1103. if len(tup) > self.ndim:
  1104. raise IndexingError("Too many indexers. handle elsewhere")
  1105. for i, key in enumerate(tup):
  1106. if is_label_like(key) or isinstance(key, tuple):
  1107. section = self._getitem_axis(key, axis=i)
  1108. # we have yielded a scalar ?
  1109. if not is_list_like_indexer(section):
  1110. return section
  1111. elif section.ndim == self.ndim:
  1112. # we're in the middle of slicing through a MultiIndex
  1113. # revise the key wrt to `section` by inserting an _NS
  1114. new_key = tup[:i] + (_NS,) + tup[i + 1 :]
  1115. else:
  1116. new_key = tup[:i] + tup[i + 1 :]
  1117. # unfortunately need an odious kludge here because of
  1118. # DataFrame transposing convention
  1119. if (
  1120. isinstance(section, ABCDataFrame)
  1121. and i > 0
  1122. and len(new_key) == 2
  1123. ):
  1124. a, b = new_key
  1125. new_key = b, a
  1126. if len(new_key) == 1:
  1127. new_key = new_key[0]
  1128. # Slices should return views, but calling iloc/loc with a null
  1129. # slice returns a new object.
  1130. if com.is_null_slice(new_key):
  1131. return section
  1132. # This is an elided recursive call to iloc/loc/etc'
  1133. return getattr(section, self.name)[new_key]
  1134. raise IndexingError("not applicable")
  1135. def _getitem_nested_tuple(self, tup: Tuple):
  1136. # we have a nested tuple so have at least 1 multi-index level
  1137. # we should be able to match up the dimensionality here
  1138. # we have too many indexers for our dim, but have at least 1
  1139. # multi-index dimension, try to see if we have something like
  1140. # a tuple passed to a series with a multi-index
  1141. if len(tup) > self.ndim:
  1142. result = self._handle_lowerdim_multi_index_axis0(tup)
  1143. if result is not None:
  1144. return result
  1145. # this is a series with a multi-index specified a tuple of
  1146. # selectors
  1147. axis = self.axis or 0
  1148. return self._getitem_axis(tup, axis=axis)
  1149. # handle the multi-axis by taking sections and reducing
  1150. # this is iterative
  1151. obj = self.obj
  1152. axis = 0
  1153. for i, key in enumerate(tup):
  1154. if com.is_null_slice(key):
  1155. axis += 1
  1156. continue
  1157. current_ndim = obj.ndim
  1158. obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
  1159. axis += 1
  1160. # if we have a scalar, we are done
  1161. if is_scalar(obj) or not hasattr(obj, "ndim"):
  1162. break
  1163. # has the dim of the obj changed?
  1164. # GH 7199
  1165. if obj.ndim < current_ndim:
  1166. axis -= 1
  1167. return obj
  1168. # TODO: remove once geopandas no longer needs __getitem__
  1169. def _getitem_axis(self, key, axis: int):
  1170. if is_iterator(key):
  1171. key = list(key)
  1172. self._validate_key(key, axis)
  1173. labels = self.obj._get_axis(axis)
  1174. if isinstance(key, slice):
  1175. return self._get_slice_axis(key, axis=axis)
  1176. elif is_list_like_indexer(key) and not (
  1177. isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)
  1178. ):
  1179. if hasattr(key, "ndim") and key.ndim > 1:
  1180. raise ValueError("Cannot index with multidimensional key")
  1181. return self._getitem_iterable(key, axis=axis)
  1182. else:
  1183. # maybe coerce a float scalar to integer
  1184. key = labels._maybe_cast_indexer(key)
  1185. if is_integer(key):
  1186. if axis == 0 and isinstance(labels, ABCMultiIndex):
  1187. try:
  1188. return self._get_label(key, axis=axis)
  1189. except (KeyError, TypeError):
  1190. if self.obj.index.levels[0].is_integer():
  1191. raise
  1192. # this is the fallback! (for a non-float, non-integer index)
  1193. if not labels.is_floating() and not labels.is_integer():
  1194. return self._get_loc(key, axis=axis)
  1195. return self._get_label(key, axis=axis)
  1196. def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
  1197. """
  1198. Transform a list-like of keys into a new index and an indexer.
  1199. Parameters
  1200. ----------
  1201. key : list-like
  1202. Targeted labels.
  1203. axis: int
  1204. Dimension on which the indexing is being made.
  1205. raise_missing: bool, default False
  1206. Whether to raise a KeyError if some labels were not found.
  1207. Will be removed in the future, and then this method will always behave as
  1208. if ``raise_missing=True``.
  1209. Raises
  1210. ------
  1211. KeyError
  1212. If at least one key was requested but none was found, and
  1213. raise_missing=True.
  1214. Returns
  1215. -------
  1216. keyarr: Index
  1217. New index (coinciding with 'key' if the axis is unique).
  1218. values : array-like
  1219. Indexer for the return object, -1 denotes keys not found.
  1220. """
  1221. o = self.obj
  1222. ax = o._get_axis(axis)
  1223. # Have the index compute an indexer or return None
  1224. # if it cannot handle:
  1225. indexer, keyarr = ax._convert_listlike_indexer(key, kind=self.name)
  1226. # We only act on all found values:
  1227. if indexer is not None and (indexer != -1).all():
  1228. self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)
  1229. return ax[indexer], indexer
  1230. if ax.is_unique and not getattr(ax, "is_overlapping", False):
  1231. # If we are trying to get actual keys from empty Series, we
  1232. # patiently wait for a KeyError later on - otherwise, convert
  1233. if len(ax) or not len(key):
  1234. key = self._convert_for_reindex(key, axis)
  1235. indexer = ax.get_indexer_for(key)
  1236. keyarr = ax.reindex(keyarr)[0]
  1237. else:
  1238. keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
  1239. self._validate_read_indexer(
  1240. keyarr, indexer, o._get_axis_number(axis), raise_missing=raise_missing
  1241. )
  1242. return keyarr, indexer
  1243. def _getitem_iterable(self, key, axis: int):
  1244. """
  1245. Index current object with an an iterable key.
  1246. The iterable key can be a boolean indexer or a collection of keys.
  1247. Parameters
  1248. ----------
  1249. key : iterable
  1250. Targeted labels or boolean indexer.
  1251. axis: int
  1252. Dimension on which the indexing is being made.
  1253. Raises
  1254. ------
  1255. KeyError
  1256. If no key was found. Will change in the future to raise if not all
  1257. keys were found.
  1258. IndexingError
  1259. If the boolean indexer is unalignable with the object being
  1260. indexed.
  1261. Returns
  1262. -------
  1263. scalar, DataFrame, or Series: indexed value(s).
  1264. """
  1265. # caller is responsible for ensuring non-None axis
  1266. self._validate_key(key, axis)
  1267. labels = self.obj._get_axis(axis)
  1268. if com.is_bool_indexer(key):
  1269. # A boolean indexer
  1270. key = check_bool_indexer(labels, key)
  1271. (inds,) = key.nonzero()
  1272. return self.obj._take_with_is_copy(inds, axis=axis)
  1273. else:
  1274. # A collection of keys
  1275. keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
  1276. return self.obj._reindex_with_indexers(
  1277. {axis: [keyarr, indexer]}, copy=True, allow_dups=True
  1278. )
  1279. def _validate_read_indexer(
  1280. self, key, indexer, axis: int, raise_missing: bool = False
  1281. ):
  1282. """
  1283. Check that indexer can be used to return a result.
  1284. e.g. at least one element was found,
  1285. unless the list of keys was actually empty.
  1286. Parameters
  1287. ----------
  1288. key : list-like
  1289. Targeted labels (only used to show correct error message).
  1290. indexer: array-like of booleans
  1291. Indices corresponding to the key,
  1292. (with -1 indicating not found).
  1293. axis: int
  1294. Dimension on which the indexing is being made.
  1295. raise_missing: bool
  1296. Whether to raise a KeyError if some labels are not found. Will be
  1297. removed in the future, and then this method will always behave as
  1298. if raise_missing=True.
  1299. Raises
  1300. ------
  1301. KeyError
  1302. If at least one key was requested but none was found, and
  1303. raise_missing=True.
  1304. """
  1305. ax = self.obj._get_axis(axis)
  1306. if len(key) == 0:
  1307. return
  1308. # Count missing values:
  1309. missing = (indexer < 0).sum()
  1310. if missing:
  1311. if missing == len(indexer):
  1312. axis_name = self.obj._get_axis_name(axis)
  1313. raise KeyError(f"None of [{key}] are in the [{axis_name}]")
  1314. # We (temporarily) allow for some missing keys with .loc, except in
  1315. # some cases (e.g. setting) in which "raise_missing" will be False
  1316. if not (self.name == "loc" and not raise_missing):
  1317. not_found = list(set(key) - set(ax))
  1318. raise KeyError(f"{not_found} not in index")
  1319. # we skip the warning on Categorical/Interval
  1320. # as this check is actually done (check for
  1321. # non-missing values), but a bit later in the
  1322. # code, so we want to avoid warning & then
  1323. # just raising
  1324. if not (ax.is_categorical() or ax.is_interval()):
  1325. raise KeyError(
  1326. "Passing list-likes to .loc or [] with any missing labels "
  1327. "is no longer supported, see "
  1328. "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501
  1329. )
  1330. def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
  1331. """
  1332. Convert indexing key into something we can use to do actual fancy
  1333. indexing on a ndarray.
  1334. Examples
  1335. ix[:5] -> slice(0, 5)
  1336. ix[[1,2,3]] -> [1,2,3]
  1337. ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
  1338. Going by Zen of Python?
  1339. 'In the face of ambiguity, refuse the temptation to guess.'
  1340. raise AmbiguousIndexError with integer labels?
  1341. - No, prefer label-based indexing
  1342. """
  1343. labels = self.obj._get_axis(axis)
  1344. if isinstance(obj, slice):
  1345. return self._convert_slice_indexer(obj, axis)
  1346. # try to find out correct indexer, if not type correct raise
  1347. try:
  1348. obj = self._convert_scalar_indexer(obj, axis)
  1349. except TypeError:
  1350. # but we will allow setting
  1351. pass
  1352. # see if we are positional in nature
  1353. is_int_index = labels.is_integer()
  1354. is_int_positional = is_integer(obj) and not is_int_index
  1355. # if we are a label return me
  1356. try:
  1357. return labels.get_loc(obj)
  1358. except LookupError:
  1359. if isinstance(obj, tuple) and isinstance(labels, ABCMultiIndex):
  1360. if len(obj) == labels.nlevels:
  1361. return {"key": obj}
  1362. raise
  1363. except TypeError:
  1364. pass
  1365. except ValueError:
  1366. if not is_int_positional:
  1367. raise
  1368. # a positional
  1369. if is_int_positional:
  1370. # if we are setting and its not a valid location
  1371. # its an insert which fails by definition
  1372. if self.name == "loc":
  1373. # always valid
  1374. return {"key": obj}
  1375. if obj >= self.obj.shape[axis] and not isinstance(labels, ABCMultiIndex):
  1376. # a positional
  1377. raise ValueError("cannot set by positional indexing with enlargement")
  1378. return obj
  1379. if is_nested_tuple(obj, labels):
  1380. return labels.get_locs(obj)
  1381. elif is_list_like_indexer(obj):
  1382. if com.is_bool_indexer(obj):
  1383. obj = check_bool_indexer(labels, obj)
  1384. (inds,) = obj.nonzero()
  1385. return inds
  1386. else:
  1387. # When setting, missing keys are not allowed, even with .loc:
  1388. return self._get_listlike_indexer(obj, axis, raise_missing=True)[1]
  1389. else:
  1390. try:
  1391. return labels.get_loc(obj)
  1392. except LookupError:
  1393. # allow a not found key only if we are a setter
  1394. if not is_list_like_indexer(obj):
  1395. return {"key": obj}
  1396. raise
  1397. def _get_slice_axis(self, slice_obj: slice, axis: int):
  1398. # caller is responsible for ensuring non-None axis
  1399. obj = self.obj
  1400. if not need_slice(slice_obj):
  1401. return obj.copy(deep=False)
  1402. indexer = self._convert_slice_indexer(slice_obj, axis)
  1403. return self._slice(indexer, axis=axis, kind="iloc")
  1404. class _LocationIndexer(_NDFrameIndexer):
  1405. def __getitem__(self, key):
  1406. if type(key) is tuple:
  1407. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  1408. if self._is_scalar_access(key):
  1409. try:
  1410. return self._getitem_scalar(key)
  1411. except (KeyError, IndexError, AttributeError):
  1412. pass
  1413. return self._getitem_tuple(key)
  1414. else:
  1415. # we by definition only have the 0th axis
  1416. axis = self.axis or 0
  1417. maybe_callable = com.apply_if_callable(key, self.obj)
  1418. return self._getitem_axis(maybe_callable, axis=axis)
  1419. def _is_scalar_access(self, key: Tuple):
  1420. raise NotImplementedError()
  1421. def _getitem_scalar(self, key):
  1422. raise NotImplementedError()
  1423. def _getitem_axis(self, key, axis: int):
  1424. raise NotImplementedError()
  1425. def _getbool_axis(self, key, axis: int):
  1426. # caller is responsible for ensuring non-None axis
  1427. labels = self.obj._get_axis(axis)
  1428. key = check_bool_indexer(labels, key)
  1429. inds = key.nonzero()[0]
  1430. return self.obj._take_with_is_copy(inds, axis=axis)
  1431. def _get_slice_axis(self, slice_obj: slice, axis: int):
  1432. """
  1433. This is pretty simple as we just have to deal with labels.
  1434. """
  1435. # caller is responsible for ensuring non-None axis
  1436. obj = self.obj
  1437. if not need_slice(slice_obj):
  1438. return obj.copy(deep=False)
  1439. labels = obj._get_axis(axis)
  1440. indexer = labels.slice_indexer(
  1441. slice_obj.start, slice_obj.stop, slice_obj.step, kind=self.name
  1442. )
  1443. if isinstance(indexer, slice):
  1444. return self._slice(indexer, axis=axis, kind="iloc")
  1445. else:
  1446. # DatetimeIndex overrides Index.slice_indexer and may
  1447. # return a DatetimeIndex instead of a slice object.
  1448. return self.obj._take_with_is_copy(indexer, axis=axis)
  1449. @Appender(IndexingMixin.loc.__doc__)
  1450. class _LocIndexer(_LocationIndexer):
  1451. _valid_types = (
  1452. "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
  1453. "endpoints included! Can be slices of integers if the "
  1454. "index is integers), listlike of labels, boolean"
  1455. )
  1456. @Appender(_NDFrameIndexer._validate_key.__doc__)
  1457. def _validate_key(self, key, axis: int):
  1458. # valid for a collection of labels (we check their presence later)
  1459. # slice of labels (where start-end in labels)
  1460. # slice of integers (only if in the labels)
  1461. # boolean
  1462. if isinstance(key, slice):
  1463. return
  1464. if com.is_bool_indexer(key):
  1465. return
  1466. if not is_list_like_indexer(key):
  1467. self._convert_scalar_indexer(key, axis)
  1468. def _is_scalar_access(self, key: Tuple) -> bool:
  1469. """
  1470. Returns
  1471. -------
  1472. bool
  1473. """
  1474. # this is a shortcut accessor to both .loc and .iloc
  1475. # that provide the equivalent access of .at and .iat
  1476. # a) avoid getting things via sections and (to minimize dtype changes)
  1477. # b) provide a performant path
  1478. if len(key) != self.ndim:
  1479. return False
  1480. for i, k in enumerate(key):
  1481. if not is_scalar(k):
  1482. return False
  1483. ax = self.obj.axes[i]
  1484. if isinstance(ax, ABCMultiIndex):
  1485. return False
  1486. if isinstance(k, str) and ax._supports_partial_string_indexing:
  1487. # partial string indexing, df.loc['2000', 'A']
  1488. # should not be considered scalar
  1489. return False
  1490. if not ax.is_unique:
  1491. return False
  1492. return True
  1493. def _getitem_scalar(self, key):
  1494. # a fast-path to scalar access
  1495. # if not, raise
  1496. values = self.obj._get_value(*key)
  1497. return values
  1498. def _get_partial_string_timestamp_match_key(self, key, labels):
  1499. """
  1500. Translate any partial string timestamp matches in key, returning the
  1501. new key.
  1502. (GH 10331)
  1503. """
  1504. if isinstance(labels, ABCMultiIndex):
  1505. if (
  1506. isinstance(key, str)
  1507. and labels.levels[0]._supports_partial_string_indexing
  1508. ):
  1509. # Convert key '2016-01-01' to
  1510. # ('2016-01-01'[, slice(None, None, None)]+)
  1511. key = tuple([key] + [slice(None)] * (len(labels.levels) - 1))
  1512. if isinstance(key, tuple):
  1513. # Convert (..., '2016-01-01', ...) in tuple to
  1514. # (..., slice('2016-01-01', '2016-01-01', None), ...)
  1515. new_key = []
  1516. for i, component in enumerate(key):
  1517. if (
  1518. isinstance(component, str)
  1519. and labels.levels[i]._supports_partial_string_indexing
  1520. ):
  1521. new_key.append(slice(component, component, None))
  1522. else:
  1523. new_key.append(component)
  1524. key = tuple(new_key)
  1525. return key
  1526. def _getitem_axis(self, key, axis: int):
  1527. key = item_from_zerodim(key)
  1528. if is_iterator(key):
  1529. key = list(key)
  1530. labels = self.obj._get_axis(axis)
  1531. key = self._get_partial_string_timestamp_match_key(key, labels)
  1532. if isinstance(key, slice):
  1533. self._validate_key(key, axis)
  1534. return self._get_slice_axis(key, axis=axis)
  1535. elif com.is_bool_indexer(key):
  1536. return self._getbool_axis(key, axis=axis)
  1537. elif is_list_like_indexer(key):
  1538. # convert various list-like indexers
  1539. # to a list of keys
  1540. # we will use the *values* of the object
  1541. # and NOT the index if its a PandasObject
  1542. if isinstance(labels, ABCMultiIndex):
  1543. if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1:
  1544. # Series, or 0,1 ndim ndarray
  1545. # GH 14730
  1546. key = list(key)
  1547. elif isinstance(key, ABCDataFrame):
  1548. # GH 15438
  1549. raise NotImplementedError(
  1550. "Indexing a MultiIndex with a "
  1551. "DataFrame key is not "
  1552. "implemented"
  1553. )
  1554. elif hasattr(key, "ndim") and key.ndim > 1:
  1555. raise NotImplementedError(
  1556. "Indexing a MultiIndex with a "
  1557. "multidimensional key is not "
  1558. "implemented"
  1559. )
  1560. if (
  1561. not isinstance(key, tuple)
  1562. and len(key)
  1563. and not isinstance(key[0], tuple)
  1564. ):
  1565. key = tuple([key])
  1566. # an iterable multi-selection
  1567. if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)):
  1568. if hasattr(key, "ndim") and key.ndim > 1:
  1569. raise ValueError("Cannot index with multidimensional key")
  1570. return self._getitem_iterable(key, axis=axis)
  1571. # nested tuple slicing
  1572. if is_nested_tuple(key, labels):
  1573. locs = labels.get_locs(key)
  1574. indexer = [slice(None)] * self.ndim
  1575. indexer[axis] = locs
  1576. return self.obj.iloc[tuple(indexer)]
  1577. # fall thru to straight lookup
  1578. self._validate_key(key, axis)
  1579. return self._get_label(key, axis=axis)
  1580. @Appender(IndexingMixin.iloc.__doc__)
  1581. class _iLocIndexer(_LocationIndexer):
  1582. _valid_types = (
  1583. "integer, integer slice (START point is INCLUDED, END "
  1584. "point is EXCLUDED), listlike of integers, boolean array"
  1585. )
  1586. _get_slice_axis = _NDFrameIndexer._get_slice_axis
  1587. def _validate_key(self, key, axis: int):
  1588. if com.is_bool_indexer(key):
  1589. if hasattr(key, "index") and isinstance(key.index, Index):
  1590. if key.index.inferred_type == "integer":
  1591. raise NotImplementedError(
  1592. "iLocation based boolean "
  1593. "indexing on an integer type "
  1594. "is not available"
  1595. )
  1596. raise ValueError(
  1597. "iLocation based boolean indexing cannot use "
  1598. "an indexable as a mask"
  1599. )
  1600. return
  1601. if isinstance(key, slice):
  1602. return
  1603. elif is_integer(key):
  1604. self._validate_integer(key, axis)
  1605. elif isinstance(key, tuple):
  1606. # a tuple should already have been caught by this point
  1607. # so don't treat a tuple as a valid indexer
  1608. raise IndexingError("Too many indexers")
  1609. elif is_list_like_indexer(key):
  1610. arr = np.array(key)
  1611. len_axis = len(self.obj._get_axis(axis))
  1612. # check that the key has a numeric dtype
  1613. if not is_numeric_dtype(arr.dtype):
  1614. raise IndexError(f".iloc requires numeric indexers, got {arr}")
  1615. # check that the key does not exceed the maximum size of the index
  1616. if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
  1617. raise IndexError("positional indexers are out-of-bounds")
  1618. else:
  1619. raise ValueError(f"Can only index by location with a [{self._valid_types}]")
  1620. def _has_valid_setitem_indexer(self, indexer):
  1621. self._has_valid_positional_setitem_indexer(indexer)
  1622. def _is_scalar_access(self, key: Tuple) -> bool:
  1623. """
  1624. Returns
  1625. -------
  1626. bool
  1627. """
  1628. # this is a shortcut accessor to both .loc and .iloc
  1629. # that provide the equivalent access of .at and .iat
  1630. # a) avoid getting things via sections and (to minimize dtype changes)
  1631. # b) provide a performant path
  1632. if len(key) != self.ndim:
  1633. return False
  1634. for i, k in enumerate(key):
  1635. if not is_integer(k):
  1636. return False
  1637. ax = self.obj.axes[i]
  1638. if not ax.is_unique:
  1639. return False
  1640. return True
  1641. def _getitem_scalar(self, key):
  1642. # a fast-path to scalar access
  1643. # if not, raise
  1644. values = self.obj._get_value(*key, takeable=True)
  1645. return values
  1646. def _validate_integer(self, key: int, axis: int) -> None:
  1647. """
  1648. Check that 'key' is a valid position in the desired axis.
  1649. Parameters
  1650. ----------
  1651. key : int
  1652. Requested position.
  1653. axis : int
  1654. Desired axis.
  1655. Raises
  1656. ------
  1657. IndexError
  1658. If 'key' is not a valid position in axis 'axis'.
  1659. """
  1660. len_axis = len(self.obj._get_axis(axis))
  1661. if key >= len_axis or key < -len_axis:
  1662. raise IndexError("single positional indexer is out-of-bounds")
  1663. def _getitem_tuple(self, tup: Tuple):
  1664. self._has_valid_tuple(tup)
  1665. try:
  1666. return self._getitem_lowerdim(tup)
  1667. except IndexingError:
  1668. pass
  1669. retval = self.obj
  1670. axis = 0
  1671. for i, key in enumerate(tup):
  1672. if com.is_null_slice(key):
  1673. axis += 1
  1674. continue
  1675. retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)
  1676. # if the dim was reduced, then pass a lower-dim the next time
  1677. if retval.ndim < self.ndim:
  1678. # TODO: this is never reached in tests; can we confirm that
  1679. # it is impossible?
  1680. axis -= 1
  1681. # try to get for the next axis
  1682. axis += 1
  1683. return retval
  1684. def _get_list_axis(self, key, axis: int):
  1685. """
  1686. Return Series values by list or array of integers.
  1687. Parameters
  1688. ----------
  1689. key : list-like positional indexer
  1690. axis : int
  1691. Returns
  1692. -------
  1693. Series object
  1694. Notes
  1695. -----
  1696. `axis` can only be zero.
  1697. """
  1698. try:
  1699. return self.obj._take_with_is_copy(key, axis=axis)
  1700. except IndexError:
  1701. # re-raise with different error message
  1702. raise IndexError("positional indexers are out-of-bounds")
  1703. def _getitem_axis(self, key, axis: int):
  1704. if isinstance(key, slice):
  1705. return self._get_slice_axis(key, axis=axis)
  1706. if isinstance(key, list):
  1707. key = np.asarray(key)
  1708. if com.is_bool_indexer(key):
  1709. self._validate_key(key, axis)
  1710. return self._getbool_axis(key, axis=axis)
  1711. # a list of integers
  1712. elif is_list_like_indexer(key):
  1713. return self._get_list_axis(key, axis=axis)
  1714. # a single integer
  1715. else:
  1716. key = item_from_zerodim(key)
  1717. if not is_integer(key):
  1718. raise TypeError("Cannot index by location index with a non-integer key")
  1719. # validate the location
  1720. self._validate_integer(key, axis)
  1721. return self._get_loc(key, axis=axis)
  1722. # raise_missing is included for compat with the parent class signature
  1723. def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
  1724. """
  1725. Much simpler as we only have to deal with our valid types.
  1726. """
  1727. # make need to convert a float key
  1728. if isinstance(obj, slice):
  1729. return self._convert_slice_indexer(obj, axis)
  1730. elif is_float(obj):
  1731. return self._convert_scalar_indexer(obj, axis)
  1732. try:
  1733. self._validate_key(obj, axis)
  1734. return obj
  1735. except ValueError:
  1736. raise ValueError(f"Can only index by location with a [{self._valid_types}]")
  1737. class _ScalarAccessIndexer(_NDFrameIndexerBase):
  1738. """
  1739. Access scalars quickly.
  1740. """
  1741. def _convert_key(self, key, is_setter: bool = False):
  1742. raise AbstractMethodError(self)
  1743. def __getitem__(self, key):
  1744. if not isinstance(key, tuple):
  1745. # we could have a convertible item here (e.g. Timestamp)
  1746. if not is_list_like_indexer(key):
  1747. key = tuple([key])
  1748. else:
  1749. raise ValueError("Invalid call for scalar access (getting)!")
  1750. key = self._convert_key(key)
  1751. return self.obj._get_value(*key, takeable=self._takeable)
  1752. def __setitem__(self, key, value):
  1753. if isinstance(key, tuple):
  1754. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  1755. else:
  1756. # scalar callable may return tuple
  1757. key = com.apply_if_callable(key, self.obj)
  1758. if not isinstance(key, tuple):
  1759. key = _tuplify(self.ndim, key)
  1760. if len(key) != self.ndim:
  1761. raise ValueError("Not enough indexers for scalar access (setting)!")
  1762. key = list(self._convert_key(key, is_setter=True))
  1763. key.append(value)
  1764. self.obj._set_value(*key, takeable=self._takeable)
  1765. @Appender(IndexingMixin.at.__doc__)
  1766. class _AtIndexer(_ScalarAccessIndexer):
  1767. _takeable = False
  1768. def _convert_key(self, key, is_setter: bool = False):
  1769. """
  1770. Require they keys to be the same type as the index. (so we don't
  1771. fallback)
  1772. """
  1773. # allow arbitrary setting
  1774. if is_setter:
  1775. return list(key)
  1776. for ax, i in zip(self.obj.axes, key):
  1777. if ax.is_integer():
  1778. if not is_integer(i):
  1779. raise ValueError(
  1780. "At based indexing on an integer index "
  1781. "can only have integer indexers"
  1782. )
  1783. else:
  1784. if is_integer(i) and not ax.holds_integer():
  1785. raise ValueError(
  1786. "At based indexing on an non-integer "
  1787. "index can only have non-integer "
  1788. "indexers"
  1789. )
  1790. return key
  1791. @Appender(IndexingMixin.iat.__doc__)
  1792. class _iAtIndexer(_ScalarAccessIndexer):
  1793. _takeable = True
  1794. def _convert_key(self, key, is_setter: bool = False):
  1795. """
  1796. Require integer args. (and convert to label arguments)
  1797. """
  1798. for a, i in zip(self.obj.axes, key):
  1799. if not is_integer(i):
  1800. raise ValueError("iAt based indexing can only have integer indexers")
  1801. return key
  1802. def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]:
  1803. """
  1804. Given an indexer for the first dimension, create an equivalent tuple
  1805. for indexing over all dimensions.
  1806. Parameters
  1807. ----------
  1808. ndim : int
  1809. loc : object
  1810. Returns
  1811. -------
  1812. tuple
  1813. """
  1814. _tup: List[Union[Hashable, slice]]
  1815. _tup = [slice(None, None) for _ in range(ndim)]
  1816. _tup[0] = loc
  1817. return tuple(_tup)
  1818. def convert_to_index_sliceable(obj, key):
  1819. """
  1820. If we are index sliceable, then return my slicer, otherwise return None.
  1821. """
  1822. idx = obj.index
  1823. if isinstance(key, slice):
  1824. return idx._convert_slice_indexer(key, kind="getitem")
  1825. elif isinstance(key, str):
  1826. # we are an actual column
  1827. if key in obj._data.items:
  1828. return None
  1829. # We might have a datetimelike string that we can translate to a
  1830. # slice here via partial string indexing
  1831. if idx._supports_partial_string_indexing:
  1832. try:
  1833. return idx._get_string_slice(key)
  1834. except (KeyError, ValueError, NotImplementedError):
  1835. return None
  1836. return None
  1837. def check_bool_indexer(index: Index, key) -> np.ndarray:
  1838. """
  1839. Check if key is a valid boolean indexer for an object with such index and
  1840. perform reindexing or conversion if needed.
  1841. This function assumes that is_bool_indexer(key) == True.
  1842. Parameters
  1843. ----------
  1844. index : Index
  1845. Index of the object on which the indexing is done.
  1846. key : list-like
  1847. Boolean indexer to check.
  1848. Returns
  1849. -------
  1850. np.array
  1851. Resulting key.
  1852. Raises
  1853. ------
  1854. IndexError
  1855. If the key does not have the same length as index.
  1856. IndexingError
  1857. If the index of the key is unalignable to index.
  1858. """
  1859. result = key
  1860. if isinstance(key, ABCSeries) and not key.index.equals(index):
  1861. result = result.reindex(index)
  1862. mask = isna(result._values)
  1863. if mask.any():
  1864. raise IndexingError(
  1865. "Unalignable boolean Series provided as "
  1866. "indexer (index of the boolean Series and of "
  1867. "the indexed object do not match)."
  1868. )
  1869. result = result.astype(bool)._values
  1870. elif is_object_dtype(key):
  1871. # key might be object-dtype bool, check_array_indexer needs bool array
  1872. result = np.asarray(result, dtype=bool)
  1873. result = check_array_indexer(index, result)
  1874. else:
  1875. result = check_array_indexer(index, result)
  1876. return result
  1877. def convert_missing_indexer(indexer):
  1878. """
  1879. Reverse convert a missing indexer, which is a dict
  1880. return the scalar indexer and a boolean indicating if we converted
  1881. """
  1882. if isinstance(indexer, dict):
  1883. # a missing key (but not a tuple indexer)
  1884. indexer = indexer["key"]
  1885. if isinstance(indexer, bool):
  1886. raise KeyError("cannot use a single bool to index into setitem")
  1887. return indexer, True
  1888. return indexer, False
  1889. def convert_from_missing_indexer_tuple(indexer, axes):
  1890. """
  1891. Create a filtered indexer that doesn't have any missing indexers.
  1892. """
  1893. def get_indexer(_i, _idx):
  1894. return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
  1895. return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
  1896. def maybe_convert_ix(*args):
  1897. """
  1898. We likely want to take the cross-product.
  1899. """
  1900. ixify = True
  1901. for arg in args:
  1902. if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
  1903. ixify = False
  1904. if ixify:
  1905. return np.ix_(*args)
  1906. else:
  1907. return args
  1908. def is_nested_tuple(tup, labels) -> bool:
  1909. """
  1910. Returns
  1911. -------
  1912. bool
  1913. """
  1914. # check for a compatible nested tuple and multiindexes among the axes
  1915. if not isinstance(tup, tuple):
  1916. return False
  1917. for i, k in enumerate(tup):
  1918. if is_list_like(k) or isinstance(k, slice):
  1919. return isinstance(labels, ABCMultiIndex)
  1920. return False
  1921. def is_label_like(key) -> bool:
  1922. """
  1923. Returns
  1924. -------
  1925. bool
  1926. """
  1927. # select a label or row
  1928. return not isinstance(key, slice) and not is_list_like_indexer(key)
  1929. def need_slice(obj) -> bool:
  1930. """
  1931. Returns
  1932. -------
  1933. bool
  1934. """
  1935. return (
  1936. obj.start is not None
  1937. or obj.stop is not None
  1938. or (obj.step is not None and obj.step != 1)
  1939. )
  1940. def _non_reducing_slice(slice_):
  1941. """
  1942. Ensurse that a slice doesn't reduce to a Series or Scalar.
  1943. Any user-paseed `subset` should have this called on it
  1944. to make sure we're always working with DataFrames.
  1945. """
  1946. # default to column slice, like DataFrame
  1947. # ['A', 'B'] -> IndexSlices[:, ['A', 'B']]
  1948. kinds = (ABCSeries, np.ndarray, Index, list, str)
  1949. if isinstance(slice_, kinds):
  1950. slice_ = IndexSlice[:, slice_]
  1951. def pred(part) -> bool:
  1952. """
  1953. Returns
  1954. -------
  1955. bool
  1956. True if slice does *not* reduce,
  1957. False if `part` is a tuple.
  1958. """
  1959. # true when slice does *not* reduce, False when part is a tuple,
  1960. # i.e. MultiIndex slice
  1961. return (isinstance(part, slice) or is_list_like(part)) and not isinstance(
  1962. part, tuple
  1963. )
  1964. if not is_list_like(slice_):
  1965. if not isinstance(slice_, slice):
  1966. # a 1-d slice, like df.loc[1]
  1967. slice_ = [[slice_]]
  1968. else:
  1969. # slice(a, b, c)
  1970. slice_ = [slice_] # to tuplize later
  1971. else:
  1972. slice_ = [part if pred(part) else [part] for part in slice_]
  1973. return tuple(slice_)
  1974. def _maybe_numeric_slice(df, slice_, include_bool=False):
  1975. """
  1976. Want nice defaults for background_gradient that don't break
  1977. with non-numeric data. But if slice_ is passed go with that.
  1978. """
  1979. if slice_ is None:
  1980. dtypes = [np.number]
  1981. if include_bool:
  1982. dtypes.append(bool)
  1983. slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns]
  1984. return slice_
  1985. def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj) -> bool:
  1986. """
  1987. Returns
  1988. -------
  1989. bool
  1990. True if we have an equal len settable.
  1991. """
  1992. if not len(labels) == 1 or not np.iterable(value) or is_scalar(plane_indexer[0]):
  1993. return False
  1994. item = labels[0]
  1995. index = obj[item].index
  1996. values_len = len(value)
  1997. # equal len list/ndarray
  1998. if len(index) == values_len:
  1999. return True
  2000. elif lplane_indexer == values_len:
  2001. return True
  2002. return False