test_internals.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285
  1. from collections import OrderedDict
  2. from datetime import date, datetime
  3. import itertools
  4. import operator
  5. import re
  6. import numpy as np
  7. import pytest
  8. from pandas._libs.internals import BlockPlacement
  9. import pandas as pd
  10. from pandas import Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Series
  11. import pandas._testing as tm
  12. import pandas.core.algorithms as algos
  13. from pandas.core.arrays import DatetimeArray, SparseArray, TimedeltaArray
  14. from pandas.core.internals import BlockManager, SingleBlockManager, make_block
  15. @pytest.fixture
  16. def mgr():
  17. return create_mgr(
  18. "a: f8; b: object; c: f8; d: object; e: f8;"
  19. "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;"
  20. "k: M8[ns, US/Eastern]; l: M8[ns, CET];"
  21. )
  22. def assert_block_equal(left, right):
  23. tm.assert_numpy_array_equal(left.values, right.values)
  24. assert left.dtype == right.dtype
  25. assert isinstance(left.mgr_locs, BlockPlacement)
  26. assert isinstance(right.mgr_locs, BlockPlacement)
  27. tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array)
  28. def get_numeric_mat(shape):
  29. arr = np.arange(shape[0])
  30. return np.lib.stride_tricks.as_strided(
  31. x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1)
  32. ).copy()
  33. N = 10
  34. def create_block(typestr, placement, item_shape=None, num_offset=0):
  35. """
  36. Supported typestr:
  37. * float, f8, f4, f2
  38. * int, i8, i4, i2, i1
  39. * uint, u8, u4, u2, u1
  40. * complex, c16, c8
  41. * bool
  42. * object, string, O
  43. * datetime, dt, M8[ns], M8[ns, tz]
  44. * timedelta, td, m8[ns]
  45. * sparse (SparseArray with fill_value=0.0)
  46. * sparse_na (SparseArray with fill_value=np.nan)
  47. * category, category2
  48. """
  49. placement = BlockPlacement(placement)
  50. num_items = len(placement)
  51. if item_shape is None:
  52. item_shape = (N,)
  53. shape = (num_items,) + item_shape
  54. mat = get_numeric_mat(shape)
  55. if typestr in (
  56. "float",
  57. "f8",
  58. "f4",
  59. "f2",
  60. "int",
  61. "i8",
  62. "i4",
  63. "i2",
  64. "i1",
  65. "uint",
  66. "u8",
  67. "u4",
  68. "u2",
  69. "u1",
  70. ):
  71. values = mat.astype(typestr) + num_offset
  72. elif typestr in ("complex", "c16", "c8"):
  73. values = 1.0j * (mat.astype(typestr) + num_offset)
  74. elif typestr in ("object", "string", "O"):
  75. values = np.reshape(
  76. ["A{i:d}".format(i=i) for i in mat.ravel() + num_offset], shape
  77. )
  78. elif typestr in ("b", "bool"):
  79. values = np.ones(shape, dtype=np.bool_)
  80. elif typestr in ("datetime", "dt", "M8[ns]"):
  81. values = (mat * 1e9).astype("M8[ns]")
  82. elif typestr.startswith("M8[ns"):
  83. # datetime with tz
  84. m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr)
  85. assert m is not None, "incompatible typestr -> {0}".format(typestr)
  86. tz = m.groups()[0]
  87. assert num_items == 1, "must have only 1 num items for a tz-aware"
  88. values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
  89. elif typestr in ("timedelta", "td", "m8[ns]"):
  90. values = (mat * 1).astype("m8[ns]")
  91. elif typestr in ("category",):
  92. values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
  93. elif typestr in ("category2",):
  94. values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"])
  95. elif typestr in ("sparse", "sparse_na"):
  96. # FIXME: doesn't support num_rows != 10
  97. assert shape[-1] == 10
  98. assert all(s == 1 for s in shape[:-1])
  99. if typestr.endswith("_na"):
  100. fill_value = np.nan
  101. else:
  102. fill_value = 0.0
  103. values = SparseArray(
  104. [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6],
  105. fill_value=fill_value,
  106. )
  107. arr = values.sp_values.view()
  108. arr += num_offset - 1
  109. else:
  110. raise ValueError(f'Unsupported typestr: "{typestr}"')
  111. return make_block(values, placement=placement, ndim=len(shape))
  112. def create_single_mgr(typestr, num_rows=None):
  113. if num_rows is None:
  114. num_rows = N
  115. return SingleBlockManager(
  116. create_block(typestr, placement=slice(0, num_rows), item_shape=()),
  117. np.arange(num_rows),
  118. )
  119. def create_mgr(descr, item_shape=None):
  120. """
  121. Construct BlockManager from string description.
  122. String description syntax looks similar to np.matrix initializer. It looks
  123. like this::
  124. a,b,c: f8; d,e,f: i8
  125. Rules are rather simple:
  126. * see list of supported datatypes in `create_block` method
  127. * components are semicolon-separated
  128. * each component is `NAME,NAME,NAME: DTYPE_ID`
  129. * whitespace around colons & semicolons are removed
  130. * components with same DTYPE_ID are combined into single block
  131. * to force multiple blocks with same dtype, use '-SUFFIX'::
  132. 'a:f8-1; b:f8-2; c:f8-foobar'
  133. """
  134. if item_shape is None:
  135. item_shape = (N,)
  136. offset = 0
  137. mgr_items = []
  138. block_placements = OrderedDict()
  139. for d in descr.split(";"):
  140. d = d.strip()
  141. if not len(d):
  142. continue
  143. names, blockstr = d.partition(":")[::2]
  144. blockstr = blockstr.strip()
  145. names = names.strip().split(",")
  146. mgr_items.extend(names)
  147. placement = list(np.arange(len(names)) + offset)
  148. try:
  149. block_placements[blockstr].extend(placement)
  150. except KeyError:
  151. block_placements[blockstr] = placement
  152. offset += len(names)
  153. mgr_items = Index(mgr_items)
  154. blocks = []
  155. num_offset = 0
  156. for blockstr, placement in block_placements.items():
  157. typestr = blockstr.split("-")[0]
  158. blocks.append(
  159. create_block(
  160. typestr, placement, item_shape=item_shape, num_offset=num_offset
  161. )
  162. )
  163. num_offset += len(placement)
  164. return BlockManager(
  165. sorted(blocks, key=lambda b: b.mgr_locs[0]),
  166. [mgr_items] + [np.arange(n) for n in item_shape],
  167. )
  168. class TestBlock:
  169. def setup_method(self, method):
  170. # self.fblock = get_float_ex() # a,c,e
  171. # self.cblock = get_complex_ex() #
  172. # self.oblock = get_obj_ex()
  173. # self.bool_block = get_bool_ex()
  174. # self.int_block = get_int_ex()
  175. self.fblock = create_block("float", [0, 2, 4])
  176. self.cblock = create_block("complex", [7])
  177. self.oblock = create_block("object", [1, 3])
  178. self.bool_block = create_block("bool", [5])
  179. self.int_block = create_block("int", [6])
  180. def test_constructor(self):
  181. int32block = create_block("i4", [0])
  182. assert int32block.dtype == np.int32
  183. def test_pickle(self):
  184. def _check(blk):
  185. assert_block_equal(tm.round_trip_pickle(blk), blk)
  186. _check(self.fblock)
  187. _check(self.cblock)
  188. _check(self.oblock)
  189. _check(self.bool_block)
  190. def test_mgr_locs(self):
  191. assert isinstance(self.fblock.mgr_locs, BlockPlacement)
  192. tm.assert_numpy_array_equal(
  193. self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)
  194. )
  195. def test_attrs(self):
  196. assert self.fblock.shape == self.fblock.values.shape
  197. assert self.fblock.dtype == self.fblock.values.dtype
  198. assert len(self.fblock) == len(self.fblock.values)
  199. def test_merge(self):
  200. avals = tm.randn(2, 10)
  201. bvals = tm.randn(2, 10)
  202. ref_cols = Index(["e", "a", "b", "d", "f"])
  203. ablock = make_block(avals, ref_cols.get_indexer(["e", "b"]))
  204. bblock = make_block(bvals, ref_cols.get_indexer(["a", "d"]))
  205. merged = ablock.merge(bblock)
  206. tm.assert_numpy_array_equal(
  207. merged.mgr_locs.as_array, np.array([0, 1, 2, 3], dtype=np.int64)
  208. )
  209. tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals))
  210. tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals))
  211. # TODO: merge with mixed type?
  212. def test_copy(self):
  213. cop = self.fblock.copy()
  214. assert cop is not self.fblock
  215. assert_block_equal(self.fblock, cop)
  216. def test_reindex_index(self):
  217. pass
  218. def test_reindex_cast(self):
  219. pass
  220. def test_insert(self):
  221. pass
  222. def test_delete(self):
  223. newb = self.fblock.copy()
  224. newb.delete(0)
  225. assert isinstance(newb.mgr_locs, BlockPlacement)
  226. tm.assert_numpy_array_equal(
  227. newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)
  228. )
  229. assert (newb.values[0] == 1).all()
  230. newb = self.fblock.copy()
  231. newb.delete(1)
  232. assert isinstance(newb.mgr_locs, BlockPlacement)
  233. tm.assert_numpy_array_equal(
  234. newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)
  235. )
  236. assert (newb.values[1] == 2).all()
  237. newb = self.fblock.copy()
  238. newb.delete(2)
  239. tm.assert_numpy_array_equal(
  240. newb.mgr_locs.as_array, np.array([0, 2], dtype=np.int64)
  241. )
  242. assert (newb.values[1] == 1).all()
  243. newb = self.fblock.copy()
  244. with pytest.raises(Exception):
  245. newb.delete(3)
  246. class TestDatetimeBlock:
  247. def test_can_hold_element(self):
  248. block = create_block("datetime", [0])
  249. # We will check that block._can_hold_element iff arr.__setitem__ works
  250. arr = pd.array(block.values.ravel())
  251. # coerce None
  252. assert block._can_hold_element(None)
  253. arr[0] = None
  254. assert arr[0] is pd.NaT
  255. # coerce different types of datetime objects
  256. vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
  257. for val in vals:
  258. assert block._can_hold_element(val)
  259. arr[0] = val
  260. val = date(2010, 10, 10)
  261. assert not block._can_hold_element(val)
  262. with pytest.raises(TypeError):
  263. arr[0] = val
  264. class TestBlockManager:
  265. def test_constructor_corner(self):
  266. pass
  267. def test_attrs(self):
  268. mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2")
  269. assert mgr.nblocks == 2
  270. assert len(mgr) == 6
  271. def test_is_mixed_dtype(self):
  272. assert not create_mgr("a,b:f8").is_mixed_type
  273. assert not create_mgr("a:f8-1; b:f8-2").is_mixed_type
  274. assert create_mgr("a,b:f8; c,d: f4").is_mixed_type
  275. assert create_mgr("a,b:f8; c,d: object").is_mixed_type
  276. def test_duplicate_ref_loc_failure(self):
  277. tmp_mgr = create_mgr("a:bool; a: f8")
  278. axes, blocks = tmp_mgr.axes, tmp_mgr.blocks
  279. blocks[0].mgr_locs = np.array([0])
  280. blocks[1].mgr_locs = np.array([0])
  281. # test trying to create block manager with overlapping ref locs
  282. with pytest.raises(AssertionError):
  283. BlockManager(blocks, axes)
  284. blocks[0].mgr_locs = np.array([0])
  285. blocks[1].mgr_locs = np.array([1])
  286. mgr = BlockManager(blocks, axes)
  287. mgr.iget(1)
  288. def test_contains(self, mgr):
  289. assert "a" in mgr
  290. assert "baz" not in mgr
  291. def test_pickle(self, mgr):
  292. mgr2 = tm.round_trip_pickle(mgr)
  293. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  294. # share ref_items
  295. # assert mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items
  296. # GH2431
  297. assert hasattr(mgr2, "_is_consolidated")
  298. assert hasattr(mgr2, "_known_consolidated")
  299. # reset to False on load
  300. assert not mgr2._is_consolidated
  301. assert not mgr2._known_consolidated
  302. def test_non_unique_pickle(self):
  303. mgr = create_mgr("a,a,a:f8")
  304. mgr2 = tm.round_trip_pickle(mgr)
  305. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  306. mgr = create_mgr("a: f8; a: i8")
  307. mgr2 = tm.round_trip_pickle(mgr)
  308. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  309. def test_categorical_block_pickle(self):
  310. mgr = create_mgr("a: category")
  311. mgr2 = tm.round_trip_pickle(mgr)
  312. tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2))
  313. smgr = create_single_mgr("category")
  314. smgr2 = tm.round_trip_pickle(smgr)
  315. tm.assert_series_equal(Series(smgr), Series(smgr2))
  316. def test_get(self):
  317. cols = Index(list("abc"))
  318. values = np.random.rand(3, 3)
  319. block = make_block(values=values.copy(), placement=np.arange(3))
  320. mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])
  321. tm.assert_almost_equal(mgr.get("a").internal_values(), values[0])
  322. tm.assert_almost_equal(mgr.get("b").internal_values(), values[1])
  323. tm.assert_almost_equal(mgr.get("c").internal_values(), values[2])
  324. def test_set(self):
  325. mgr = create_mgr("a,b,c: int", item_shape=(3,))
  326. mgr.set("d", np.array(["foo"] * 3))
  327. mgr.set("b", np.array(["bar"] * 3))
  328. tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3))
  329. tm.assert_numpy_array_equal(
  330. mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_)
  331. )
  332. tm.assert_numpy_array_equal(mgr.get("c").internal_values(), np.array([2] * 3))
  333. tm.assert_numpy_array_equal(
  334. mgr.get("d").internal_values(), np.array(["foo"] * 3, dtype=np.object_)
  335. )
  336. def test_set_change_dtype(self, mgr):
  337. mgr.set("baz", np.zeros(N, dtype=bool))
  338. mgr.set("baz", np.repeat("foo", N))
  339. assert mgr.get("baz").dtype == np.object_
  340. mgr2 = mgr.consolidate()
  341. mgr2.set("baz", np.repeat("foo", N))
  342. assert mgr2.get("baz").dtype == np.object_
  343. mgr2.set("quux", tm.randn(N).astype(int))
  344. assert mgr2.get("quux").dtype == np.int_
  345. mgr2.set("quux", tm.randn(N))
  346. assert mgr2.get("quux").dtype == np.float_
  347. def test_set_change_dtype_slice(self): # GH8850
  348. cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")])
  349. df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
  350. df["2nd"] = df["2nd"] * 2.0
  351. blocks = df._to_dict_of_blocks()
  352. assert sorted(blocks.keys()) == ["float64", "int64"]
  353. tm.assert_frame_equal(
  354. blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])
  355. )
  356. tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:]))
  357. def test_copy(self, mgr):
  358. cp = mgr.copy(deep=False)
  359. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  360. # view assertion
  361. assert cp_blk.equals(blk)
  362. if isinstance(blk.values, np.ndarray):
  363. assert cp_blk.values.base is blk.values.base
  364. else:
  365. # DatetimeTZBlock has DatetimeIndex values
  366. assert cp_blk.values._data.base is blk.values._data.base
  367. cp = mgr.copy(deep=True)
  368. for blk, cp_blk in zip(mgr.blocks, cp.blocks):
  369. # copy assertion we either have a None for a base or in case of
  370. # some blocks it is an array (e.g. datetimetz), but was copied
  371. assert cp_blk.equals(blk)
  372. if not isinstance(cp_blk.values, np.ndarray):
  373. assert cp_blk.values._data.base is not blk.values._data.base
  374. else:
  375. assert cp_blk.values.base is None and blk.values.base is None
  376. def test_sparse(self):
  377. mgr = create_mgr("a: sparse-1; b: sparse-2")
  378. # what to test here?
  379. assert mgr.as_array().dtype == np.float64
  380. def test_sparse_mixed(self):
  381. mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8")
  382. assert len(mgr.blocks) == 3
  383. assert isinstance(mgr, BlockManager)
  384. # what to test here?
  385. def test_as_array_float(self):
  386. mgr = create_mgr("c: f4; d: f2; e: f8")
  387. assert mgr.as_array().dtype == np.float64
  388. mgr = create_mgr("c: f4; d: f2")
  389. assert mgr.as_array().dtype == np.float32
  390. def test_as_array_int_bool(self):
  391. mgr = create_mgr("a: bool-1; b: bool-2")
  392. assert mgr.as_array().dtype == np.bool_
  393. mgr = create_mgr("a: i8-1; b: i8-2; c: i4; d: i2; e: u1")
  394. assert mgr.as_array().dtype == np.int64
  395. mgr = create_mgr("c: i4; d: i2; e: u1")
  396. assert mgr.as_array().dtype == np.int32
  397. def test_as_array_datetime(self):
  398. mgr = create_mgr("h: datetime-1; g: datetime-2")
  399. assert mgr.as_array().dtype == "M8[ns]"
  400. def test_as_array_datetime_tz(self):
  401. mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
  402. assert mgr.get("h").dtype == "datetime64[ns, US/Eastern]"
  403. assert mgr.get("g").dtype == "datetime64[ns, CET]"
  404. assert mgr.as_array().dtype == "object"
  405. @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
  406. def test_astype(self, t):
  407. # coerce all
  408. mgr = create_mgr("c: f4; d: f2; e: f8")
  409. t = np.dtype(t)
  410. tmgr = mgr.astype(t)
  411. assert tmgr.get("c").dtype.type == t
  412. assert tmgr.get("d").dtype.type == t
  413. assert tmgr.get("e").dtype.type == t
  414. # mixed
  415. mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
  416. t = np.dtype(t)
  417. tmgr = mgr.astype(t, errors="ignore")
  418. assert tmgr.get("c").dtype.type == t
  419. assert tmgr.get("e").dtype.type == t
  420. assert tmgr.get("f").dtype.type == t
  421. assert tmgr.get("g").dtype.type == t
  422. assert tmgr.get("a").dtype.type == np.object_
  423. assert tmgr.get("b").dtype.type == np.object_
  424. if t != np.int64:
  425. assert tmgr.get("d").dtype.type == np.datetime64
  426. else:
  427. assert tmgr.get("d").dtype.type == t
  428. def test_convert(self):
  429. def _compare(old_mgr, new_mgr):
  430. """ compare the blocks, numeric compare ==, object don't """
  431. old_blocks = set(old_mgr.blocks)
  432. new_blocks = set(new_mgr.blocks)
  433. assert len(old_blocks) == len(new_blocks)
  434. # compare non-numeric
  435. for b in old_blocks:
  436. found = False
  437. for nb in new_blocks:
  438. if (b.values == nb.values).all():
  439. found = True
  440. break
  441. assert found
  442. for b in new_blocks:
  443. found = False
  444. for ob in old_blocks:
  445. if (b.values == ob.values).all():
  446. found = True
  447. break
  448. assert found
  449. # noops
  450. mgr = create_mgr("f: i8; g: f8")
  451. new_mgr = mgr.convert()
  452. _compare(mgr, new_mgr)
  453. # convert
  454. mgr = create_mgr("a,b,foo: object; f: i8; g: f8")
  455. mgr.set("a", np.array(["1"] * N, dtype=np.object_))
  456. mgr.set("b", np.array(["2."] * N, dtype=np.object_))
  457. mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
  458. new_mgr = mgr.convert(numeric=True)
  459. assert new_mgr.get("a").dtype == np.int64
  460. assert new_mgr.get("b").dtype == np.float64
  461. assert new_mgr.get("foo").dtype == np.object_
  462. assert new_mgr.get("f").dtype == np.int64
  463. assert new_mgr.get("g").dtype == np.float64
  464. mgr = create_mgr(
  465. "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
  466. )
  467. mgr.set("a", np.array(["1"] * N, dtype=np.object_))
  468. mgr.set("b", np.array(["2."] * N, dtype=np.object_))
  469. mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
  470. new_mgr = mgr.convert(numeric=True)
  471. assert new_mgr.get("a").dtype == np.int64
  472. assert new_mgr.get("b").dtype == np.float64
  473. assert new_mgr.get("foo").dtype == np.object_
  474. assert new_mgr.get("f").dtype == np.int32
  475. assert new_mgr.get("bool").dtype == np.bool_
  476. assert new_mgr.get("dt").dtype.type, np.datetime64
  477. assert new_mgr.get("i").dtype == np.int64
  478. assert new_mgr.get("g").dtype == np.float64
  479. assert new_mgr.get("h").dtype == np.float16
  480. def test_interleave(self):
  481. # self
  482. for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]:
  483. mgr = create_mgr("a: {0}".format(dtype))
  484. assert mgr.as_array().dtype == dtype
  485. mgr = create_mgr("a: {0}; b: {0}".format(dtype))
  486. assert mgr.as_array().dtype == dtype
  487. # will be converted according the actual dtype of the underlying
  488. mgr = create_mgr("a: category")
  489. assert mgr.as_array().dtype == "i8"
  490. mgr = create_mgr("a: category; b: category")
  491. assert mgr.as_array().dtype == "i8"
  492. mgr = create_mgr("a: category; b: category2")
  493. assert mgr.as_array().dtype == "object"
  494. mgr = create_mgr("a: category2")
  495. assert mgr.as_array().dtype == "object"
  496. mgr = create_mgr("a: category2; b: category2")
  497. assert mgr.as_array().dtype == "object"
  498. # combinations
  499. mgr = create_mgr("a: f8")
  500. assert mgr.as_array().dtype == "f8"
  501. mgr = create_mgr("a: f8; b: i8")
  502. assert mgr.as_array().dtype == "f8"
  503. mgr = create_mgr("a: f4; b: i8")
  504. assert mgr.as_array().dtype == "f8"
  505. mgr = create_mgr("a: f4; b: i8; d: object")
  506. assert mgr.as_array().dtype == "object"
  507. mgr = create_mgr("a: bool; b: i8")
  508. assert mgr.as_array().dtype == "object"
  509. mgr = create_mgr("a: complex")
  510. assert mgr.as_array().dtype == "complex"
  511. mgr = create_mgr("a: f8; b: category")
  512. assert mgr.as_array().dtype == "object"
  513. mgr = create_mgr("a: M8[ns]; b: category")
  514. assert mgr.as_array().dtype == "object"
  515. mgr = create_mgr("a: M8[ns]; b: bool")
  516. assert mgr.as_array().dtype == "object"
  517. mgr = create_mgr("a: M8[ns]; b: i8")
  518. assert mgr.as_array().dtype == "object"
  519. mgr = create_mgr("a: m8[ns]; b: bool")
  520. assert mgr.as_array().dtype == "object"
  521. mgr = create_mgr("a: m8[ns]; b: i8")
  522. assert mgr.as_array().dtype == "object"
  523. mgr = create_mgr("a: M8[ns]; b: m8[ns]")
  524. assert mgr.as_array().dtype == "object"
  525. def test_interleave_non_unique_cols(self):
  526. df = DataFrame(
  527. [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]],
  528. columns=["x", "x"],
  529. index=[1, 2],
  530. )
  531. df_unique = df.copy()
  532. df_unique.columns = ["x", "y"]
  533. assert df_unique.values.shape == df.values.shape
  534. tm.assert_numpy_array_equal(df_unique.values[0], df.values[0])
  535. tm.assert_numpy_array_equal(df_unique.values[1], df.values[1])
  536. def test_consolidate(self):
  537. pass
  538. def test_consolidate_ordering_issues(self, mgr):
  539. mgr.set("f", tm.randn(N))
  540. mgr.set("d", tm.randn(N))
  541. mgr.set("b", tm.randn(N))
  542. mgr.set("g", tm.randn(N))
  543. mgr.set("h", tm.randn(N))
  544. # we have datetime/tz blocks in mgr
  545. cons = mgr.consolidate()
  546. assert cons.nblocks == 4
  547. cons = mgr.consolidate().get_numeric_data()
  548. assert cons.nblocks == 1
  549. assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
  550. tm.assert_numpy_array_equal(
  551. cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)
  552. )
  553. def test_reindex_index(self):
  554. # TODO: should this be pytest.skip?
  555. pass
  556. def test_reindex_items(self):
  557. # mgr is not consolidated, f8 & f8-2 blocks
  558. mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")
  559. reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
  560. assert reindexed.nblocks == 2
  561. tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"]))
  562. tm.assert_almost_equal(
  563. mgr.get("g").internal_values(), reindexed.get("g").internal_values()
  564. )
  565. tm.assert_almost_equal(
  566. mgr.get("c").internal_values(), reindexed.get("c").internal_values()
  567. )
  568. tm.assert_almost_equal(
  569. mgr.get("a").internal_values(), reindexed.get("a").internal_values()
  570. )
  571. tm.assert_almost_equal(
  572. mgr.get("d").internal_values(), reindexed.get("d").internal_values()
  573. )
  574. def test_get_numeric_data(self):
  575. mgr = create_mgr(
  576. "int: int; float: float; complex: complex;"
  577. "str: object; bool: bool; obj: object; dt: datetime",
  578. item_shape=(3,),
  579. )
  580. mgr.set("obj", np.array([1, 2, 3], dtype=np.object_))
  581. numeric = mgr.get_numeric_data()
  582. tm.assert_index_equal(
  583. numeric.items, pd.Index(["int", "float", "complex", "bool"])
  584. )
  585. tm.assert_almost_equal(
  586. mgr.get("float").internal_values(), numeric.get("float").internal_values()
  587. )
  588. # Check sharing
  589. numeric.set("float", np.array([100.0, 200.0, 300.0]))
  590. tm.assert_almost_equal(
  591. mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
  592. )
  593. numeric2 = mgr.get_numeric_data(copy=True)
  594. tm.assert_index_equal(
  595. numeric.items, pd.Index(["int", "float", "complex", "bool"])
  596. )
  597. numeric2.set("float", np.array([1000.0, 2000.0, 3000.0]))
  598. tm.assert_almost_equal(
  599. mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
  600. )
  601. def test_get_bool_data(self):
  602. mgr = create_mgr(
  603. "int: int; float: float; complex: complex;"
  604. "str: object; bool: bool; obj: object; dt: datetime",
  605. item_shape=(3,),
  606. )
  607. mgr.set("obj", np.array([True, False, True], dtype=np.object_))
  608. bools = mgr.get_bool_data()
  609. tm.assert_index_equal(bools.items, pd.Index(["bool"]))
  610. tm.assert_almost_equal(
  611. mgr.get("bool").internal_values(), bools.get("bool").internal_values()
  612. )
  613. bools.set("bool", np.array([True, False, True]))
  614. tm.assert_numpy_array_equal(
  615. mgr.get("bool").internal_values(), np.array([True, False, True])
  616. )
  617. # Check sharing
  618. bools2 = mgr.get_bool_data(copy=True)
  619. bools2.set("bool", np.array([False, True, False]))
  620. tm.assert_numpy_array_equal(
  621. mgr.get("bool").internal_values(), np.array([True, False, True])
  622. )
  623. def test_unicode_repr_doesnt_raise(self):
  624. repr(create_mgr("b,\u05d0: object"))
  625. def test_missing_unicode_key(self):
  626. df = DataFrame({"a": [1]})
  627. try:
  628. df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
  629. except KeyError:
  630. pass # this is the expected exception
  631. def test_equals(self):
  632. # unique items
  633. bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2")
  634. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  635. assert bm1.equals(bm2)
  636. bm1 = create_mgr("a,a,a: i8-1; b,b,b: i8-2")
  637. bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
  638. assert bm1.equals(bm2)
  639. def test_equals_block_order_different_dtypes(self):
  640. # GH 9330
  641. mgr_strings = [
  642. "a:i8;b:f8", # basic case
  643. "a:i8;b:f8;c:c8;d:b", # many types
  644. "a:i8;e:dt;f:td;g:string", # more types
  645. "a:i8;b:category;c:category2;d:category2", # categories
  646. "c:sparse;d:sparse_na;b:f8", # sparse
  647. ]
  648. for mgr_string in mgr_strings:
  649. bm = create_mgr(mgr_string)
  650. block_perms = itertools.permutations(bm.blocks)
  651. for bm_perm in block_perms:
  652. bm_this = BlockManager(bm_perm, bm.axes)
  653. assert bm.equals(bm_this)
  654. assert bm_this.equals(bm)
  655. def test_single_mgr_ctor(self):
  656. mgr = create_single_mgr("f8", num_rows=5)
  657. assert mgr.as_array().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0]
  658. def test_validate_bool_args(self):
  659. invalid_values = [1, "True", [1, 2, 3], 5.0]
  660. bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2")
  661. for value in invalid_values:
  662. with pytest.raises(ValueError):
  663. bm1.replace_list([1], [2], inplace=value)
  664. class TestIndexing:
  665. # Nosetests-style data-driven tests.
  666. #
  667. # This test applies different indexing routines to block managers and
  668. # compares the outcome to the result of same operations on np.ndarray.
  669. #
  670. # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests
  671. # and are disabled.
  672. MANAGERS = [
  673. create_single_mgr("f8", N),
  674. create_single_mgr("i8", N),
  675. # 2-dim
  676. create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)),
  677. create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)),
  678. create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)),
  679. create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)),
  680. # 3-dim
  681. create_mgr("a,b,c,d,e,f: f8", item_shape=(N, N)),
  682. create_mgr("a,b,c,d,e,f: i8", item_shape=(N, N)),
  683. create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N, N)),
  684. create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N, N)),
  685. ]
  686. # MANAGERS = [MANAGERS[6]]
  687. def test_get_slice(self):
  688. def assert_slice_ok(mgr, axis, slobj):
  689. mat = mgr.as_array()
  690. # we maybe using an ndarray to test slicing and
  691. # might not be the full length of the axis
  692. if isinstance(slobj, np.ndarray):
  693. ax = mgr.axes[axis]
  694. if len(ax) and len(slobj) and len(slobj) != len(ax):
  695. slobj = np.concatenate(
  696. [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)]
  697. )
  698. sliced = mgr.get_slice(slobj, axis=axis)
  699. mat_slobj = (slice(None),) * axis + (slobj,)
  700. tm.assert_numpy_array_equal(
  701. mat[mat_slobj], sliced.as_array(), check_dtype=False
  702. )
  703. tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis])
  704. for mgr in self.MANAGERS:
  705. for ax in range(mgr.ndim):
  706. # slice
  707. assert_slice_ok(mgr, ax, slice(None))
  708. assert_slice_ok(mgr, ax, slice(3))
  709. assert_slice_ok(mgr, ax, slice(100))
  710. assert_slice_ok(mgr, ax, slice(1, 4))
  711. assert_slice_ok(mgr, ax, slice(3, 0, -2))
  712. # boolean mask
  713. assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_))
  714. assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_))
  715. assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_))
  716. if mgr.shape[ax] >= 3:
  717. assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0)
  718. assert_slice_ok(
  719. mgr, ax, np.array([True, True, False], dtype=np.bool_)
  720. )
  721. # fancy indexer
  722. assert_slice_ok(mgr, ax, [])
  723. assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))
  724. if mgr.shape[ax] >= 3:
  725. assert_slice_ok(mgr, ax, [0, 1, 2])
  726. assert_slice_ok(mgr, ax, [-1, -2, -3])
  727. def test_take(self):
  728. def assert_take_ok(mgr, axis, indexer):
  729. mat = mgr.as_array()
  730. taken = mgr.take(indexer, axis)
  731. tm.assert_numpy_array_equal(
  732. np.take(mat, indexer, axis), taken.as_array(), check_dtype=False
  733. )
  734. tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis])
  735. for mgr in self.MANAGERS:
  736. for ax in range(mgr.ndim):
  737. # take/fancy indexer
  738. assert_take_ok(mgr, ax, indexer=[])
  739. assert_take_ok(mgr, ax, indexer=[0, 0, 0])
  740. assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax])))
  741. if mgr.shape[ax] >= 3:
  742. assert_take_ok(mgr, ax, indexer=[0, 1, 2])
  743. assert_take_ok(mgr, ax, indexer=[-1, -2, -3])
  744. def test_reindex_axis(self):
  745. def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
  746. mat = mgr.as_array()
  747. indexer = mgr.axes[axis].get_indexer_for(new_labels)
  748. reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value)
  749. tm.assert_numpy_array_equal(
  750. algos.take_nd(mat, indexer, axis, fill_value=fill_value),
  751. reindexed.as_array(),
  752. check_dtype=False,
  753. )
  754. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  755. for mgr in self.MANAGERS:
  756. for ax in range(mgr.ndim):
  757. for fill_value in (None, np.nan, 100.0):
  758. assert_reindex_axis_is_ok(mgr, ax, pd.Index([]), fill_value)
  759. assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value)
  760. assert_reindex_axis_is_ok(
  761. mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value
  762. )
  763. assert_reindex_axis_is_ok(
  764. mgr, ax, pd.Index(["foo", "bar", "baz"]), fill_value
  765. )
  766. assert_reindex_axis_is_ok(
  767. mgr, ax, pd.Index(["foo", mgr.axes[ax][0], "baz"]), fill_value
  768. )
  769. if mgr.shape[ax] >= 3:
  770. assert_reindex_axis_is_ok(
  771. mgr, ax, mgr.axes[ax][:-3], fill_value
  772. )
  773. assert_reindex_axis_is_ok(
  774. mgr, ax, mgr.axes[ax][-3::-1], fill_value
  775. )
  776. assert_reindex_axis_is_ok(
  777. mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value
  778. )
  779. def test_reindex_indexer(self):
  780. def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
  781. mat = mgr.as_array()
  782. reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value)
  783. reindexed = mgr.reindex_indexer(
  784. new_labels, indexer, axis, fill_value=fill_value
  785. )
  786. tm.assert_numpy_array_equal(
  787. reindexed_mat, reindexed.as_array(), check_dtype=False
  788. )
  789. tm.assert_index_equal(reindexed.axes[axis], new_labels)
  790. for mgr in self.MANAGERS:
  791. for ax in range(mgr.ndim):
  792. for fill_value in (None, np.nan, 100.0):
  793. assert_reindex_indexer_is_ok(mgr, ax, pd.Index([]), [], fill_value)
  794. assert_reindex_indexer_is_ok(
  795. mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value
  796. )
  797. assert_reindex_indexer_is_ok(
  798. mgr,
  799. ax,
  800. pd.Index(["foo"] * mgr.shape[ax]),
  801. np.arange(mgr.shape[ax]),
  802. fill_value,
  803. )
  804. assert_reindex_indexer_is_ok(
  805. mgr,
  806. ax,
  807. mgr.axes[ax][::-1],
  808. np.arange(mgr.shape[ax]),
  809. fill_value,
  810. )
  811. assert_reindex_indexer_is_ok(
  812. mgr,
  813. ax,
  814. mgr.axes[ax],
  815. np.arange(mgr.shape[ax])[::-1],
  816. fill_value,
  817. )
  818. assert_reindex_indexer_is_ok(
  819. mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 0, 0], fill_value
  820. )
  821. assert_reindex_indexer_is_ok(
  822. mgr,
  823. ax,
  824. pd.Index(["foo", "bar", "baz"]),
  825. [-1, 0, -1],
  826. fill_value,
  827. )
  828. assert_reindex_indexer_is_ok(
  829. mgr,
  830. ax,
  831. pd.Index(["foo", mgr.axes[ax][0], "baz"]),
  832. [-1, -1, -1],
  833. fill_value,
  834. )
  835. if mgr.shape[ax] >= 3:
  836. assert_reindex_indexer_is_ok(
  837. mgr,
  838. ax,
  839. pd.Index(["foo", "bar", "baz"]),
  840. [0, 1, 2],
  841. fill_value,
  842. )
  843. # test_get_slice(slice_like, axis)
  844. # take(indexer, axis)
  845. # reindex_axis(new_labels, axis)
  846. # reindex_indexer(new_labels, indexer, axis)
  847. class TestBlockPlacement:
  848. def test_slice_len(self):
  849. assert len(BlockPlacement(slice(0, 4))) == 4
  850. assert len(BlockPlacement(slice(0, 4, 2))) == 2
  851. assert len(BlockPlacement(slice(0, 3, 2))) == 2
  852. assert len(BlockPlacement(slice(0, 1, 2))) == 1
  853. assert len(BlockPlacement(slice(1, 0, -1))) == 1
  854. def test_zero_step_raises(self):
  855. with pytest.raises(ValueError):
  856. BlockPlacement(slice(1, 1, 0))
  857. with pytest.raises(ValueError):
  858. BlockPlacement(slice(1, 2, 0))
  859. def test_unbounded_slice_raises(self):
  860. def assert_unbounded_slice_error(slc):
  861. with pytest.raises(ValueError, match="unbounded slice"):
  862. BlockPlacement(slc)
  863. assert_unbounded_slice_error(slice(None, None))
  864. assert_unbounded_slice_error(slice(10, None))
  865. assert_unbounded_slice_error(slice(None, None, -1))
  866. assert_unbounded_slice_error(slice(None, 10, -1))
  867. # These are "unbounded" because negative index will change depending on
  868. # container shape.
  869. assert_unbounded_slice_error(slice(-1, None))
  870. assert_unbounded_slice_error(slice(None, -1))
  871. assert_unbounded_slice_error(slice(-1, -1))
  872. assert_unbounded_slice_error(slice(-1, None, -1))
  873. assert_unbounded_slice_error(slice(None, -1, -1))
  874. assert_unbounded_slice_error(slice(-1, -1, -1))
  875. def test_not_slice_like_slices(self):
  876. def assert_not_slice_like(slc):
  877. assert not BlockPlacement(slc).is_slice_like
  878. assert_not_slice_like(slice(0, 0))
  879. assert_not_slice_like(slice(100, 0))
  880. assert_not_slice_like(slice(100, 100, -1))
  881. assert_not_slice_like(slice(0, 100, -1))
  882. assert not BlockPlacement(slice(0, 0)).is_slice_like
  883. assert not BlockPlacement(slice(100, 100)).is_slice_like
  884. def test_array_to_slice_conversion(self):
  885. def assert_as_slice_equals(arr, slc):
  886. assert BlockPlacement(arr).as_slice == slc
  887. assert_as_slice_equals([0], slice(0, 1, 1))
  888. assert_as_slice_equals([100], slice(100, 101, 1))
  889. assert_as_slice_equals([0, 1, 2], slice(0, 3, 1))
  890. assert_as_slice_equals([0, 5, 10], slice(0, 15, 5))
  891. assert_as_slice_equals([0, 100], slice(0, 200, 100))
  892. assert_as_slice_equals([2, 1], slice(2, 0, -1))
  893. def test_not_slice_like_arrays(self):
  894. def assert_not_slice_like(arr):
  895. assert not BlockPlacement(arr).is_slice_like
  896. assert_not_slice_like([])
  897. assert_not_slice_like([-1])
  898. assert_not_slice_like([-1, -2, -3])
  899. assert_not_slice_like([-10])
  900. assert_not_slice_like([-1])
  901. assert_not_slice_like([-1, 0, 1, 2])
  902. assert_not_slice_like([-2, 0, 2, 4])
  903. assert_not_slice_like([1, 0, -1])
  904. assert_not_slice_like([1, 1, 1])
  905. def test_slice_iter(self):
  906. assert list(BlockPlacement(slice(0, 3))) == [0, 1, 2]
  907. assert list(BlockPlacement(slice(0, 0))) == []
  908. assert list(BlockPlacement(slice(3, 0))) == []
  909. def test_slice_to_array_conversion(self):
  910. def assert_as_array_equals(slc, asarray):
  911. tm.assert_numpy_array_equal(
  912. BlockPlacement(slc).as_array, np.asarray(asarray, dtype=np.int64)
  913. )
  914. assert_as_array_equals(slice(0, 3), [0, 1, 2])
  915. assert_as_array_equals(slice(0, 0), [])
  916. assert_as_array_equals(slice(3, 0), [])
  917. assert_as_array_equals(slice(3, 0, -1), [3, 2, 1])
  918. def test_blockplacement_add(self):
  919. bpl = BlockPlacement(slice(0, 5))
  920. assert bpl.add(1).as_slice == slice(1, 6, 1)
  921. assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2)
  922. assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5]
  923. def test_blockplacement_add_int(self):
  924. def assert_add_equals(val, inc, result):
  925. assert list(BlockPlacement(val).add(inc)) == result
  926. assert_add_equals(slice(0, 0), 0, [])
  927. assert_add_equals(slice(1, 4), 0, [1, 2, 3])
  928. assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1])
  929. assert_add_equals([1, 2, 4], 0, [1, 2, 4])
  930. assert_add_equals(slice(0, 0), 10, [])
  931. assert_add_equals(slice(1, 4), 10, [11, 12, 13])
  932. assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11])
  933. assert_add_equals([1, 2, 4], 10, [11, 12, 14])
  934. assert_add_equals(slice(0, 0), -1, [])
  935. assert_add_equals(slice(1, 4), -1, [0, 1, 2])
  936. assert_add_equals([1, 2, 4], -1, [0, 1, 3])
  937. with pytest.raises(ValueError):
  938. BlockPlacement(slice(1, 4)).add(-10)
  939. with pytest.raises(ValueError):
  940. BlockPlacement([1, 2, 4]).add(-10)
  941. class DummyElement:
  942. def __init__(self, value, dtype):
  943. self.value = value
  944. self.dtype = np.dtype(dtype)
  945. def __array__(self):
  946. return np.array(self.value, dtype=self.dtype)
  947. def __str__(self) -> str:
  948. return "DummyElement({}, {})".format(self.value, self.dtype)
  949. def __repr__(self) -> str:
  950. return str(self)
  951. def astype(self, dtype, copy=False):
  952. self.dtype = dtype
  953. return self
  954. def view(self, dtype):
  955. return type(self)(self.value.view(dtype), dtype)
  956. def any(self, axis=None):
  957. return bool(self.value)
  958. class TestCanHoldElement:
  959. @pytest.mark.parametrize(
  960. "value, dtype",
  961. [
  962. (1, "i8"),
  963. (1.0, "f8"),
  964. (2 ** 63, "f8"),
  965. (1j, "complex128"),
  966. (2 ** 63, "complex128"),
  967. (True, "bool"),
  968. (np.timedelta64(20, "ns"), "<m8[ns]"),
  969. (np.datetime64(20, "ns"), "<M8[ns]"),
  970. ],
  971. )
  972. @pytest.mark.parametrize(
  973. "op",
  974. [
  975. operator.add,
  976. operator.sub,
  977. operator.mul,
  978. operator.truediv,
  979. operator.mod,
  980. operator.pow,
  981. ],
  982. ids=lambda x: x.__name__,
  983. )
  984. def test_binop_other(self, op, value, dtype):
  985. skip = {
  986. (operator.add, "bool"),
  987. (operator.sub, "bool"),
  988. (operator.mul, "bool"),
  989. (operator.truediv, "bool"),
  990. (operator.mod, "i8"),
  991. (operator.mod, "complex128"),
  992. (operator.pow, "bool"),
  993. }
  994. if (op, dtype) in skip:
  995. pytest.skip("Invalid combination {},{}".format(op, dtype))
  996. e = DummyElement(value, dtype)
  997. s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
  998. invalid = {
  999. (operator.pow, "<M8[ns]"),
  1000. (operator.mod, "<M8[ns]"),
  1001. (operator.truediv, "<M8[ns]"),
  1002. (operator.mul, "<M8[ns]"),
  1003. (operator.add, "<M8[ns]"),
  1004. (operator.pow, "<m8[ns]"),
  1005. (operator.mul, "<m8[ns]"),
  1006. }
  1007. if (op, dtype) in invalid:
  1008. with pytest.raises(TypeError):
  1009. op(s, e.value)
  1010. else:
  1011. # FIXME: Since dispatching to Series, this test no longer
  1012. # asserts anything meaningful
  1013. result = op(s, e.value).dtypes
  1014. expected = op(s, value).dtypes
  1015. tm.assert_series_equal(result, expected)
  1016. @pytest.mark.parametrize(
  1017. "typestr, holder",
  1018. [
  1019. ("category", Categorical),
  1020. ("M8[ns]", DatetimeArray),
  1021. ("M8[ns, US/Central]", DatetimeArray),
  1022. ("m8[ns]", TimedeltaArray),
  1023. ("sparse", SparseArray),
  1024. ],
  1025. )
  1026. def test_holder(typestr, holder):
  1027. blk = create_block(typestr, [1])
  1028. assert blk._holder is holder
  1029. def test_validate_ndim():
  1030. values = np.array([1.0, 2.0])
  1031. placement = slice(2)
  1032. msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
  1033. with pytest.raises(ValueError, match=msg):
  1034. make_block(values, placement, ndim=2)
  1035. def test_block_shape():
  1036. idx = pd.Index([0, 1, 2, 3, 4])
  1037. a = pd.Series([1, 2, 3]).reindex(idx)
  1038. b = pd.Series(pd.Categorical([1, 2, 3])).reindex(idx)
  1039. assert a._data.blocks[0].mgr_locs.indexer == b._data.blocks[0].mgr_locs.indexer
  1040. def test_make_block_no_pandas_array():
  1041. # https://github.com/pandas-dev/pandas/pull/24866
  1042. arr = pd.arrays.PandasArray(np.array([1, 2]))
  1043. # PandasArray, no dtype
  1044. result = make_block(arr, slice(len(arr)))
  1045. assert result.is_integer is True
  1046. assert result.is_extension is False
  1047. # PandasArray, PandasDtype
  1048. result = make_block(arr, slice(len(arr)), dtype=arr.dtype)
  1049. assert result.is_integer is True
  1050. assert result.is_extension is False
  1051. # ndarray, PandasDtype
  1052. result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype)
  1053. assert result.is_integer is True
  1054. assert result.is_extension is False
  1055. def test_dataframe_not_equal():
  1056. # see GH28839
  1057. df1 = pd.DataFrame({"a": [1, 2], "b": ["s", "d"]})
  1058. df2 = pd.DataFrame({"a": ["s", "d"], "b": [1, 2]})
  1059. assert df1.equals(df2) is False