common.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. import gc
  2. from typing import Optional, Type
  3. import numpy as np
  4. import pytest
  5. from pandas._libs.tslib import iNaT
  6. from pandas.core.dtypes.dtypes import CategoricalDtype
  7. import pandas as pd
  8. from pandas import (
  9. CategoricalIndex,
  10. DatetimeIndex,
  11. Index,
  12. Int64Index,
  13. IntervalIndex,
  14. MultiIndex,
  15. PeriodIndex,
  16. RangeIndex,
  17. Series,
  18. TimedeltaIndex,
  19. UInt64Index,
  20. isna,
  21. )
  22. import pandas._testing as tm
  23. from pandas.core.indexes.base import InvalidIndexError
  24. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  25. class Base:
  26. """ base class for index sub-class tests """
  27. _holder: Optional[Type[Index]] = None
  28. _compat_props = ["shape", "ndim", "size", "nbytes"]
  29. def test_pickle_compat_construction(self):
  30. # need an object to create with
  31. msg = (
  32. r"Index\(\.\.\.\) must be called with a collection of some"
  33. r" kind, None was passed|"
  34. r"__new__\(\) missing 1 required positional argument: 'data'|"
  35. r"__new__\(\) takes at least 2 arguments \(1 given\)"
  36. )
  37. with pytest.raises(TypeError, match=msg):
  38. self._holder()
  39. def test_to_series(self):
  40. # assert that we are creating a copy of the index
  41. idx = self.create_index()
  42. s = idx.to_series()
  43. assert s.values is not idx.values
  44. assert s.index is not idx
  45. assert s.name == idx.name
  46. def test_to_series_with_arguments(self):
  47. # GH18699
  48. # index kwarg
  49. idx = self.create_index()
  50. s = idx.to_series(index=idx)
  51. assert s.values is not idx.values
  52. assert s.index is idx
  53. assert s.name == idx.name
  54. # name kwarg
  55. idx = self.create_index()
  56. s = idx.to_series(name="__test")
  57. assert s.values is not idx.values
  58. assert s.index is not idx
  59. assert s.name != idx.name
  60. @pytest.mark.parametrize("name", [None, "new_name"])
  61. def test_to_frame(self, name):
  62. # see GH-15230, GH-22580
  63. idx = self.create_index()
  64. if name:
  65. idx_name = name
  66. else:
  67. idx_name = idx.name or 0
  68. df = idx.to_frame(name=idx_name)
  69. assert df.index is idx
  70. assert len(df.columns) == 1
  71. assert df.columns[0] == idx_name
  72. assert df[idx_name].values is not idx.values
  73. df = idx.to_frame(index=False, name=idx_name)
  74. assert df.index is not idx
  75. def test_shift(self):
  76. # GH8083 test the base class for shift
  77. idx = self.create_index()
  78. msg = "Not supported for type {}".format(type(idx).__name__)
  79. with pytest.raises(NotImplementedError, match=msg):
  80. idx.shift(1)
  81. with pytest.raises(NotImplementedError, match=msg):
  82. idx.shift(1, 2)
  83. def test_constructor_name_unhashable(self):
  84. # GH#29069 check that name is hashable
  85. # See also same-named test in tests.series.test_constructors
  86. idx = self.create_index()
  87. with pytest.raises(TypeError, match="Index.name must be a hashable type"):
  88. type(idx)(idx, name=[])
  89. def test_create_index_existing_name(self):
  90. # GH11193, when an existing index is passed, and a new name is not
  91. # specified, the new index should inherit the previous object name
  92. expected = self.create_index()
  93. if not isinstance(expected, MultiIndex):
  94. expected.name = "foo"
  95. result = pd.Index(expected)
  96. tm.assert_index_equal(result, expected)
  97. result = pd.Index(expected, name="bar")
  98. expected.name = "bar"
  99. tm.assert_index_equal(result, expected)
  100. else:
  101. expected.names = ["foo", "bar"]
  102. result = pd.Index(expected)
  103. tm.assert_index_equal(
  104. result,
  105. Index(
  106. Index(
  107. [
  108. ("foo", "one"),
  109. ("foo", "two"),
  110. ("bar", "one"),
  111. ("baz", "two"),
  112. ("qux", "one"),
  113. ("qux", "two"),
  114. ],
  115. dtype="object",
  116. ),
  117. names=["foo", "bar"],
  118. ),
  119. )
  120. result = pd.Index(expected, names=["A", "B"])
  121. tm.assert_index_equal(
  122. result,
  123. Index(
  124. Index(
  125. [
  126. ("foo", "one"),
  127. ("foo", "two"),
  128. ("bar", "one"),
  129. ("baz", "two"),
  130. ("qux", "one"),
  131. ("qux", "two"),
  132. ],
  133. dtype="object",
  134. ),
  135. names=["A", "B"],
  136. ),
  137. )
  138. def test_numeric_compat(self):
  139. idx = self.create_index()
  140. with pytest.raises(TypeError, match="cannot perform __mul__"):
  141. idx * 1
  142. with pytest.raises(TypeError, match="cannot perform __rmul__"):
  143. 1 * idx
  144. div_err = "cannot perform __truediv__"
  145. with pytest.raises(TypeError, match=div_err):
  146. idx / 1
  147. div_err = div_err.replace(" __", " __r")
  148. with pytest.raises(TypeError, match=div_err):
  149. 1 / idx
  150. with pytest.raises(TypeError, match="cannot perform __floordiv__"):
  151. idx // 1
  152. with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
  153. 1 // idx
  154. def test_logical_compat(self):
  155. idx = self.create_index()
  156. with pytest.raises(TypeError, match="cannot perform all"):
  157. idx.all()
  158. with pytest.raises(TypeError, match="cannot perform any"):
  159. idx.any()
  160. def test_boolean_context_compat(self):
  161. # boolean context compat
  162. idx = self.create_index()
  163. with pytest.raises(ValueError, match="The truth value of a"):
  164. if idx:
  165. pass
  166. def test_reindex_base(self):
  167. idx = self.create_index()
  168. expected = np.arange(idx.size, dtype=np.intp)
  169. actual = idx.get_indexer(idx)
  170. tm.assert_numpy_array_equal(expected, actual)
  171. with pytest.raises(ValueError, match="Invalid fill method"):
  172. idx.get_indexer(idx, method="invalid")
  173. def test_get_indexer_consistency(self, indices):
  174. # See GH 16819
  175. if isinstance(indices, IntervalIndex):
  176. return
  177. if indices.is_unique or isinstance(indices, CategoricalIndex):
  178. indexer = indices.get_indexer(indices[0:2])
  179. assert isinstance(indexer, np.ndarray)
  180. assert indexer.dtype == np.intp
  181. else:
  182. e = "Reindexing only valid with uniquely valued Index objects"
  183. with pytest.raises(InvalidIndexError, match=e):
  184. indices.get_indexer(indices[0:2])
  185. indexer, _ = indices.get_indexer_non_unique(indices[0:2])
  186. assert isinstance(indexer, np.ndarray)
  187. assert indexer.dtype == np.intp
  188. def test_ndarray_compat_properties(self):
  189. idx = self.create_index()
  190. assert idx.T.equals(idx)
  191. assert idx.transpose().equals(idx)
  192. values = idx.values
  193. for prop in self._compat_props:
  194. assert getattr(idx, prop) == getattr(values, prop)
  195. # test for validity
  196. idx.nbytes
  197. idx.values.nbytes
  198. def test_repr_roundtrip(self):
  199. idx = self.create_index()
  200. tm.assert_index_equal(eval(repr(idx)), idx)
  201. def test_str(self):
  202. # test the string repr
  203. idx = self.create_index()
  204. idx.name = "foo"
  205. assert "'foo'" in str(idx)
  206. assert type(idx).__name__ in str(idx)
  207. def test_repr_max_seq_item_setting(self):
  208. # GH10182
  209. idx = self.create_index()
  210. idx = idx.repeat(50)
  211. with pd.option_context("display.max_seq_items", None):
  212. repr(idx)
  213. assert "..." not in str(idx)
  214. def test_copy_name(self, indices):
  215. # gh-12309: Check that the "name" argument
  216. # passed at initialization is honored.
  217. if isinstance(indices, MultiIndex):
  218. return
  219. first = type(indices)(indices, copy=True, name="mario")
  220. second = type(first)(first, copy=False)
  221. # Even though "copy=False", we want a new object.
  222. assert first is not second
  223. # Not using tm.assert_index_equal() since names differ.
  224. assert indices.equals(first)
  225. assert first.name == "mario"
  226. assert second.name == "mario"
  227. s1 = Series(2, index=first)
  228. s2 = Series(3, index=second[:-1])
  229. if not isinstance(indices, CategoricalIndex):
  230. # See gh-13365
  231. s3 = s1 * s2
  232. assert s3.index.name == "mario"
  233. def test_ensure_copied_data(self, indices):
  234. # Check the "copy" argument of each Index.__new__ is honoured
  235. # GH12309
  236. init_kwargs = {}
  237. if isinstance(indices, PeriodIndex):
  238. # Needs "freq" specification:
  239. init_kwargs["freq"] = indices.freq
  240. elif isinstance(indices, (RangeIndex, MultiIndex, CategoricalIndex)):
  241. # RangeIndex cannot be initialized from data
  242. # MultiIndex and CategoricalIndex are tested separately
  243. return
  244. index_type = type(indices)
  245. result = index_type(indices.values, copy=True, **init_kwargs)
  246. tm.assert_index_equal(indices, result)
  247. tm.assert_numpy_array_equal(
  248. indices._ndarray_values, result._ndarray_values, check_same="copy"
  249. )
  250. if isinstance(indices, PeriodIndex):
  251. # .values an object array of Period, thus copied
  252. result = index_type(ordinal=indices.asi8, copy=False, **init_kwargs)
  253. tm.assert_numpy_array_equal(
  254. indices._ndarray_values, result._ndarray_values, check_same="same"
  255. )
  256. elif isinstance(indices, IntervalIndex):
  257. # checked in test_interval.py
  258. pass
  259. else:
  260. result = index_type(indices.values, copy=False, **init_kwargs)
  261. tm.assert_numpy_array_equal(
  262. indices.values, result.values, check_same="same"
  263. )
  264. tm.assert_numpy_array_equal(
  265. indices._ndarray_values, result._ndarray_values, check_same="same"
  266. )
  267. def test_memory_usage(self, indices):
  268. indices._engine.clear_mapping()
  269. result = indices.memory_usage()
  270. if indices.empty:
  271. # we report 0 for no-length
  272. assert result == 0
  273. return
  274. # non-zero length
  275. indices.get_loc(indices[0])
  276. result2 = indices.memory_usage()
  277. result3 = indices.memory_usage(deep=True)
  278. # RangeIndex, IntervalIndex
  279. # don't have engines
  280. if not isinstance(indices, (RangeIndex, IntervalIndex)):
  281. assert result2 > result
  282. if indices.inferred_type == "object":
  283. assert result3 > result2
  284. def test_argsort(self, request, indices):
  285. # separately tested
  286. if isinstance(indices, CategoricalIndex):
  287. return
  288. result = indices.argsort()
  289. expected = np.array(indices).argsort()
  290. tm.assert_numpy_array_equal(result, expected, check_dtype=False)
  291. def test_numpy_argsort(self, indices):
  292. result = np.argsort(indices)
  293. expected = indices.argsort()
  294. tm.assert_numpy_array_equal(result, expected)
  295. # these are the only two types that perform
  296. # pandas compatibility input validation - the
  297. # rest already perform separate (or no) such
  298. # validation via their 'values' attribute as
  299. # defined in pandas.core.indexes/base.py - they
  300. # cannot be changed at the moment due to
  301. # backwards compatibility concerns
  302. if isinstance(type(indices), (CategoricalIndex, RangeIndex)):
  303. msg = "the 'axis' parameter is not supported"
  304. with pytest.raises(ValueError, match=msg):
  305. np.argsort(indices, axis=1)
  306. msg = "the 'kind' parameter is not supported"
  307. with pytest.raises(ValueError, match=msg):
  308. np.argsort(indices, kind="mergesort")
  309. msg = "the 'order' parameter is not supported"
  310. with pytest.raises(ValueError, match=msg):
  311. np.argsort(indices, order=("a", "b"))
  312. def test_take(self, indices):
  313. indexer = [4, 3, 0, 2]
  314. if len(indices) < 5:
  315. # not enough elements; ignore
  316. return
  317. result = indices.take(indexer)
  318. expected = indices[indexer]
  319. assert result.equals(expected)
  320. if not isinstance(indices, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  321. # GH 10791
  322. with pytest.raises(AttributeError):
  323. indices.freq
  324. def test_take_invalid_kwargs(self):
  325. idx = self.create_index()
  326. indices = [1, 2]
  327. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  328. with pytest.raises(TypeError, match=msg):
  329. idx.take(indices, foo=2)
  330. msg = "the 'out' parameter is not supported"
  331. with pytest.raises(ValueError, match=msg):
  332. idx.take(indices, out=indices)
  333. msg = "the 'mode' parameter is not supported"
  334. with pytest.raises(ValueError, match=msg):
  335. idx.take(indices, mode="clip")
  336. def test_repeat(self):
  337. rep = 2
  338. i = self.create_index()
  339. expected = pd.Index(i.values.repeat(rep), name=i.name)
  340. tm.assert_index_equal(i.repeat(rep), expected)
  341. i = self.create_index()
  342. rep = np.arange(len(i))
  343. expected = pd.Index(i.values.repeat(rep), name=i.name)
  344. tm.assert_index_equal(i.repeat(rep), expected)
  345. def test_numpy_repeat(self):
  346. rep = 2
  347. i = self.create_index()
  348. expected = i.repeat(rep)
  349. tm.assert_index_equal(np.repeat(i, rep), expected)
  350. msg = "the 'axis' parameter is not supported"
  351. with pytest.raises(ValueError, match=msg):
  352. np.repeat(i, rep, axis=0)
  353. @pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
  354. def test_where(self, klass):
  355. i = self.create_index()
  356. cond = [True] * len(i)
  357. result = i.where(klass(cond))
  358. expected = i
  359. tm.assert_index_equal(result, expected)
  360. cond = [False] + [True] * len(i[1:])
  361. expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
  362. result = i.where(klass(cond))
  363. tm.assert_index_equal(result, expected)
  364. @pytest.mark.parametrize("case", [0.5, "xxx"])
  365. @pytest.mark.parametrize(
  366. "method", ["intersection", "union", "difference", "symmetric_difference"]
  367. )
  368. def test_set_ops_error_cases(self, case, method, indices):
  369. # non-iterable input
  370. msg = "Input must be Index or array-like"
  371. with pytest.raises(TypeError, match=msg):
  372. getattr(indices, method)(case)
  373. def test_intersection_base(self, indices):
  374. if isinstance(indices, CategoricalIndex):
  375. return
  376. first = indices[:5]
  377. second = indices[:3]
  378. intersect = first.intersection(second)
  379. assert tm.equalContents(intersect, second)
  380. # GH 10149
  381. cases = [klass(second.values) for klass in [np.array, Series, list]]
  382. for case in cases:
  383. result = first.intersection(case)
  384. assert tm.equalContents(result, second)
  385. if isinstance(indices, MultiIndex):
  386. msg = "other must be a MultiIndex or a list of tuples"
  387. with pytest.raises(TypeError, match=msg):
  388. first.intersection([1, 2, 3])
  389. def test_union_base(self, indices):
  390. first = indices[3:]
  391. second = indices[:5]
  392. everything = indices
  393. union = first.union(second)
  394. assert tm.equalContents(union, everything)
  395. # GH 10149
  396. cases = [klass(second.values) for klass in [np.array, Series, list]]
  397. for case in cases:
  398. if not isinstance(indices, CategoricalIndex):
  399. result = first.union(case)
  400. assert tm.equalContents(result, everything)
  401. if isinstance(indices, MultiIndex):
  402. msg = "other must be a MultiIndex or a list of tuples"
  403. with pytest.raises(TypeError, match=msg):
  404. first.union([1, 2, 3])
  405. @pytest.mark.parametrize("sort", [None, False])
  406. def test_difference_base(self, sort, indices):
  407. if isinstance(indices, CategoricalIndex):
  408. return
  409. first = indices[2:]
  410. second = indices[:4]
  411. answer = indices[4:]
  412. result = first.difference(second, sort)
  413. assert tm.equalContents(result, answer)
  414. # GH 10149
  415. cases = [klass(second.values) for klass in [np.array, Series, list]]
  416. for case in cases:
  417. if isinstance(indices, (DatetimeIndex, TimedeltaIndex)):
  418. assert type(result) == type(answer)
  419. tm.assert_numpy_array_equal(
  420. result.sort_values().asi8, answer.sort_values().asi8
  421. )
  422. else:
  423. result = first.difference(case, sort)
  424. assert tm.equalContents(result, answer)
  425. if isinstance(indices, MultiIndex):
  426. msg = "other must be a MultiIndex or a list of tuples"
  427. with pytest.raises(TypeError, match=msg):
  428. first.difference([1, 2, 3], sort)
  429. def test_symmetric_difference(self, indices):
  430. if isinstance(indices, CategoricalIndex):
  431. return
  432. first = indices[1:]
  433. second = indices[:-1]
  434. answer = indices[[0, -1]]
  435. result = first.symmetric_difference(second)
  436. assert tm.equalContents(result, answer)
  437. # GH 10149
  438. cases = [klass(second.values) for klass in [np.array, Series, list]]
  439. for case in cases:
  440. result = first.symmetric_difference(case)
  441. assert tm.equalContents(result, answer)
  442. if isinstance(indices, MultiIndex):
  443. msg = "other must be a MultiIndex or a list of tuples"
  444. with pytest.raises(TypeError, match=msg):
  445. first.symmetric_difference([1, 2, 3])
  446. def test_insert_base(self, indices):
  447. result = indices[1:4]
  448. if not len(indices):
  449. return
  450. # test 0th element
  451. assert indices[0:4].equals(result.insert(0, indices[0]))
  452. def test_delete_base(self, indices):
  453. if not len(indices):
  454. return
  455. if isinstance(indices, RangeIndex):
  456. # tested in class
  457. return
  458. expected = indices[1:]
  459. result = indices.delete(0)
  460. assert result.equals(expected)
  461. assert result.name == expected.name
  462. expected = indices[:-1]
  463. result = indices.delete(-1)
  464. assert result.equals(expected)
  465. assert result.name == expected.name
  466. with pytest.raises((IndexError, ValueError)):
  467. # either depending on numpy version
  468. indices.delete(len(indices))
  469. def test_equals(self, indices):
  470. if isinstance(indices, IntervalIndex):
  471. # IntervalIndex tested separately
  472. return
  473. assert indices.equals(indices)
  474. assert indices.equals(indices.copy())
  475. assert indices.equals(indices.astype(object))
  476. assert not indices.equals(list(indices))
  477. assert not indices.equals(np.array(indices))
  478. # Cannot pass in non-int64 dtype to RangeIndex
  479. if not isinstance(indices, RangeIndex):
  480. same_values = Index(indices, dtype=object)
  481. assert indices.equals(same_values)
  482. assert same_values.equals(indices)
  483. if indices.nlevels == 1:
  484. # do not test MultiIndex
  485. assert not indices.equals(Series(indices))
  486. def test_equals_op(self):
  487. # GH9947, GH10637
  488. index_a = self.create_index()
  489. if isinstance(index_a, PeriodIndex):
  490. pytest.skip("Skip check for PeriodIndex")
  491. n = len(index_a)
  492. index_b = index_a[0:-1]
  493. index_c = index_a[0:-1].append(index_a[-2:-1])
  494. index_d = index_a[0:1]
  495. msg = "Lengths must match|could not be broadcast"
  496. with pytest.raises(ValueError, match=msg):
  497. index_a == index_b
  498. expected1 = np.array([True] * n)
  499. expected2 = np.array([True] * (n - 1) + [False])
  500. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  501. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  502. # test comparisons with numpy arrays
  503. array_a = np.array(index_a)
  504. array_b = np.array(index_a[0:-1])
  505. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  506. array_d = np.array(index_a[0:1])
  507. with pytest.raises(ValueError, match=msg):
  508. index_a == array_b
  509. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  510. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  511. # test comparisons with Series
  512. series_a = Series(array_a)
  513. series_b = Series(array_b)
  514. series_c = Series(array_c)
  515. series_d = Series(array_d)
  516. with pytest.raises(ValueError, match=msg):
  517. index_a == series_b
  518. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  519. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  520. # cases where length is 1 for one of them
  521. with pytest.raises(ValueError, match="Lengths must match"):
  522. index_a == index_d
  523. with pytest.raises(ValueError, match="Lengths must match"):
  524. index_a == series_d
  525. with pytest.raises(ValueError, match="Lengths must match"):
  526. index_a == array_d
  527. msg = "Can only compare identically-labeled Series objects"
  528. with pytest.raises(ValueError, match=msg):
  529. series_a == series_d
  530. with pytest.raises(ValueError, match="Lengths must match"):
  531. series_a == array_d
  532. # comparing with a scalar should broadcast; note that we are excluding
  533. # MultiIndex because in this case each item in the index is a tuple of
  534. # length 2, and therefore is considered an array of length 2 in the
  535. # comparison instead of a scalar
  536. if not isinstance(index_a, MultiIndex):
  537. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  538. # assuming the 2nd to last item is unique in the data
  539. item = index_a[-2]
  540. tm.assert_numpy_array_equal(index_a == item, expected3)
  541. tm.assert_series_equal(series_a == item, Series(expected3))
  542. def test_hasnans_isnans(self, indices):
  543. # GH 11343, added tests for hasnans / isnans
  544. if isinstance(indices, MultiIndex):
  545. return
  546. # cases in indices doesn't include NaN
  547. idx = indices.copy(deep=True)
  548. expected = np.array([False] * len(idx), dtype=bool)
  549. tm.assert_numpy_array_equal(idx._isnan, expected)
  550. assert idx.hasnans is False
  551. idx = indices.copy(deep=True)
  552. values = np.asarray(idx.values)
  553. if len(indices) == 0:
  554. return
  555. elif isinstance(indices, DatetimeIndexOpsMixin):
  556. values[1] = iNaT
  557. elif isinstance(indices, (Int64Index, UInt64Index)):
  558. return
  559. else:
  560. values[1] = np.nan
  561. if isinstance(indices, PeriodIndex):
  562. idx = type(indices)(values, freq=indices.freq)
  563. else:
  564. idx = type(indices)(values)
  565. expected = np.array([False] * len(idx), dtype=bool)
  566. expected[1] = True
  567. tm.assert_numpy_array_equal(idx._isnan, expected)
  568. assert idx.hasnans is True
  569. def test_fillna(self, indices):
  570. # GH 11343
  571. if len(indices) == 0:
  572. pass
  573. elif isinstance(indices, MultiIndex):
  574. idx = indices.copy(deep=True)
  575. msg = "isna is not defined for MultiIndex"
  576. with pytest.raises(NotImplementedError, match=msg):
  577. idx.fillna(idx[0])
  578. else:
  579. idx = indices.copy(deep=True)
  580. result = idx.fillna(idx[0])
  581. tm.assert_index_equal(result, idx)
  582. assert result is not idx
  583. msg = "'value' must be a scalar, passed: "
  584. with pytest.raises(TypeError, match=msg):
  585. idx.fillna([idx[0]])
  586. idx = indices.copy(deep=True)
  587. values = np.asarray(idx.values)
  588. if isinstance(indices, DatetimeIndexOpsMixin):
  589. values[1] = iNaT
  590. elif isinstance(indices, (Int64Index, UInt64Index)):
  591. return
  592. else:
  593. values[1] = np.nan
  594. if isinstance(indices, PeriodIndex):
  595. idx = type(indices)(values, freq=indices.freq)
  596. else:
  597. idx = type(indices)(values)
  598. expected = np.array([False] * len(idx), dtype=bool)
  599. expected[1] = True
  600. tm.assert_numpy_array_equal(idx._isnan, expected)
  601. assert idx.hasnans is True
  602. def test_nulls(self, indices):
  603. # this is really a smoke test for the methods
  604. # as these are adequately tested for function elsewhere
  605. if len(indices) == 0:
  606. tm.assert_numpy_array_equal(indices.isna(), np.array([], dtype=bool))
  607. elif isinstance(indices, MultiIndex):
  608. idx = indices.copy()
  609. msg = "isna is not defined for MultiIndex"
  610. with pytest.raises(NotImplementedError, match=msg):
  611. idx.isna()
  612. elif not indices.hasnans:
  613. tm.assert_numpy_array_equal(
  614. indices.isna(), np.zeros(len(indices), dtype=bool)
  615. )
  616. tm.assert_numpy_array_equal(
  617. indices.notna(), np.ones(len(indices), dtype=bool)
  618. )
  619. else:
  620. result = isna(indices)
  621. tm.assert_numpy_array_equal(indices.isna(), result)
  622. tm.assert_numpy_array_equal(indices.notna(), ~result)
  623. def test_empty(self):
  624. # GH 15270
  625. index = self.create_index()
  626. assert not index.empty
  627. assert index[:0].empty
  628. def test_join_self_unique(self, join_type):
  629. index = self.create_index()
  630. if index.is_unique:
  631. joined = index.join(index, how=join_type)
  632. assert (index == joined).all()
  633. def test_map(self):
  634. # callable
  635. index = self.create_index()
  636. # we don't infer UInt64
  637. if isinstance(index, pd.UInt64Index):
  638. expected = index.astype("int64")
  639. else:
  640. expected = index
  641. result = index.map(lambda x: x)
  642. tm.assert_index_equal(result, expected)
  643. @pytest.mark.parametrize(
  644. "mapper",
  645. [
  646. lambda values, index: {i: e for e, i in zip(values, index)},
  647. lambda values, index: pd.Series(values, index),
  648. ],
  649. )
  650. def test_map_dictlike(self, mapper):
  651. index = self.create_index()
  652. if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
  653. pytest.skip("skipping tests for {}".format(type(index)))
  654. identity = mapper(index.values, index)
  655. # we don't infer to UInt64 for a dict
  656. if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
  657. expected = index.astype("int64")
  658. else:
  659. expected = index
  660. result = index.map(identity)
  661. tm.assert_index_equal(result, expected)
  662. # empty mappable
  663. expected = pd.Index([np.nan] * len(index))
  664. result = index.map(mapper(expected, index))
  665. tm.assert_index_equal(result, expected)
  666. def test_map_str(self):
  667. # GH 31202
  668. index = self.create_index()
  669. result = index.map(str)
  670. expected = Index([str(x) for x in index], dtype=object)
  671. tm.assert_index_equal(result, expected)
  672. def test_putmask_with_wrong_mask(self):
  673. # GH18368
  674. index = self.create_index()
  675. with pytest.raises(ValueError):
  676. index.putmask(np.ones(len(index) + 1, np.bool), 1)
  677. with pytest.raises(ValueError):
  678. index.putmask(np.ones(len(index) - 1, np.bool), 1)
  679. with pytest.raises(ValueError):
  680. index.putmask("foo", 1)
  681. @pytest.mark.parametrize("copy", [True, False])
  682. @pytest.mark.parametrize("name", [None, "foo"])
  683. @pytest.mark.parametrize("ordered", [True, False])
  684. def test_astype_category(self, copy, name, ordered):
  685. # GH 18630
  686. index = self.create_index()
  687. if name:
  688. index = index.rename(name)
  689. # standard categories
  690. dtype = CategoricalDtype(ordered=ordered)
  691. result = index.astype(dtype, copy=copy)
  692. expected = CategoricalIndex(index.values, name=name, ordered=ordered)
  693. tm.assert_index_equal(result, expected)
  694. # non-standard categories
  695. dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
  696. result = index.astype(dtype, copy=copy)
  697. expected = CategoricalIndex(index.values, name=name, dtype=dtype)
  698. tm.assert_index_equal(result, expected)
  699. if ordered is False:
  700. # dtype='category' defaults to ordered=False, so only test once
  701. result = index.astype("category", copy=copy)
  702. expected = CategoricalIndex(index.values, name=name)
  703. tm.assert_index_equal(result, expected)
  704. def test_is_unique(self):
  705. # initialize a unique index
  706. index = self.create_index().drop_duplicates()
  707. assert index.is_unique is True
  708. # empty index should be unique
  709. index_empty = index[:0]
  710. assert index_empty.is_unique is True
  711. # test basic dupes
  712. index_dup = index.insert(0, index[0])
  713. assert index_dup.is_unique is False
  714. # single NA should be unique
  715. index_na = index.insert(0, np.nan)
  716. assert index_na.is_unique is True
  717. # multiple NA should not be unique
  718. index_na_dup = index_na.insert(0, np.nan)
  719. assert index_na_dup.is_unique is False
  720. def test_engine_reference_cycle(self):
  721. # GH27585
  722. index = self.create_index()
  723. nrefs_pre = len(gc.get_referrers(index))
  724. index._engine
  725. assert len(gc.get_referrers(index)) == nrefs_pre
  726. def test_getitem_2d_deprecated(self):
  727. # GH#30588
  728. idx = self.create_index()
  729. with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
  730. res = idx[:, None]
  731. assert isinstance(res, np.ndarray), type(res)