test_operators.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911
  1. from decimal import Decimal
  2. import operator
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. from pandas import DataFrame, MultiIndex, Series
  7. import pandas._testing as tm
  8. import pandas.core.common as com
  9. from pandas.tests.frame.common import _check_mixed_float
  10. class TestDataFrameUnaryOperators:
  11. # __pos__, __neg__, __inv__
  12. @pytest.mark.parametrize(
  13. "df,expected",
  14. [
  15. (pd.DataFrame({"a": [-1, 1]}), pd.DataFrame({"a": [1, -1]})),
  16. (pd.DataFrame({"a": [False, True]}), pd.DataFrame({"a": [True, False]})),
  17. (
  18. pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}),
  19. pd.DataFrame({"a": pd.Series(pd.to_timedelta([1, -1]))}),
  20. ),
  21. ],
  22. )
  23. def test_neg_numeric(self, df, expected):
  24. tm.assert_frame_equal(-df, expected)
  25. tm.assert_series_equal(-df["a"], expected["a"])
  26. @pytest.mark.parametrize(
  27. "df, expected",
  28. [
  29. (np.array([1, 2], dtype=object), np.array([-1, -2], dtype=object)),
  30. ([Decimal("1.0"), Decimal("2.0")], [Decimal("-1.0"), Decimal("-2.0")]),
  31. ],
  32. )
  33. def test_neg_object(self, df, expected):
  34. # GH#21380
  35. df = pd.DataFrame({"a": df})
  36. expected = pd.DataFrame({"a": expected})
  37. tm.assert_frame_equal(-df, expected)
  38. tm.assert_series_equal(-df["a"], expected["a"])
  39. @pytest.mark.parametrize(
  40. "df",
  41. [
  42. pd.DataFrame({"a": ["a", "b"]}),
  43. pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}),
  44. ],
  45. )
  46. def test_neg_raises(self, df):
  47. with pytest.raises(TypeError):
  48. (-df)
  49. with pytest.raises(TypeError):
  50. (-df["a"])
  51. def test_invert(self, float_frame):
  52. df = float_frame
  53. tm.assert_frame_equal(-(df < 0), ~(df < 0))
  54. def test_invert_mixed(self):
  55. shape = (10, 5)
  56. df = pd.concat(
  57. [
  58. pd.DataFrame(np.zeros(shape, dtype="bool")),
  59. pd.DataFrame(np.zeros(shape, dtype=int)),
  60. ],
  61. axis=1,
  62. ignore_index=True,
  63. )
  64. result = ~df
  65. expected = pd.concat(
  66. [
  67. pd.DataFrame(np.ones(shape, dtype="bool")),
  68. pd.DataFrame(-np.ones(shape, dtype=int)),
  69. ],
  70. axis=1,
  71. ignore_index=True,
  72. )
  73. tm.assert_frame_equal(result, expected)
  74. @pytest.mark.parametrize(
  75. "df",
  76. [
  77. pd.DataFrame({"a": [-1, 1]}),
  78. pd.DataFrame({"a": [False, True]}),
  79. pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}),
  80. ],
  81. )
  82. def test_pos_numeric(self, df):
  83. # GH#16073
  84. tm.assert_frame_equal(+df, df)
  85. tm.assert_series_equal(+df["a"], df["a"])
  86. @pytest.mark.parametrize(
  87. "df",
  88. [
  89. # numpy changing behavior in the future
  90. pytest.param(
  91. pd.DataFrame({"a": ["a", "b"]}),
  92. marks=[pytest.mark.filterwarnings("ignore")],
  93. ),
  94. pd.DataFrame({"a": np.array([-1, 2], dtype=object)}),
  95. pd.DataFrame({"a": [Decimal("-1.0"), Decimal("2.0")]}),
  96. ],
  97. )
  98. def test_pos_object(self, df):
  99. # GH#21380
  100. tm.assert_frame_equal(+df, df)
  101. tm.assert_series_equal(+df["a"], df["a"])
  102. @pytest.mark.parametrize(
  103. "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})]
  104. )
  105. def test_pos_raises(self, df):
  106. with pytest.raises(TypeError):
  107. (+df)
  108. with pytest.raises(TypeError):
  109. (+df["a"])
  110. class TestDataFrameLogicalOperators:
  111. # &, |, ^
  112. def test_logical_ops_empty_frame(self):
  113. # GH#5808
  114. # empty frames, non-mixed dtype
  115. df = DataFrame(index=[1])
  116. result = df & df
  117. tm.assert_frame_equal(result, df)
  118. result = df | df
  119. tm.assert_frame_equal(result, df)
  120. df2 = DataFrame(index=[1, 2])
  121. result = df & df2
  122. tm.assert_frame_equal(result, df2)
  123. dfa = DataFrame(index=[1], columns=["A"])
  124. result = dfa & dfa
  125. expected = DataFrame(False, index=[1], columns=["A"])
  126. tm.assert_frame_equal(result, expected)
  127. def test_logical_ops_bool_frame(self):
  128. # GH#5808
  129. df1a_bool = DataFrame(True, index=[1], columns=["A"])
  130. result = df1a_bool & df1a_bool
  131. tm.assert_frame_equal(result, df1a_bool)
  132. result = df1a_bool | df1a_bool
  133. tm.assert_frame_equal(result, df1a_bool)
  134. def test_logical_ops_int_frame(self):
  135. # GH#5808
  136. df1a_int = DataFrame(1, index=[1], columns=["A"])
  137. df1a_bool = DataFrame(True, index=[1], columns=["A"])
  138. result = df1a_int | df1a_bool
  139. tm.assert_frame_equal(result, df1a_bool)
  140. # Check that this matches Series behavior
  141. res_ser = df1a_int["A"] | df1a_bool["A"]
  142. tm.assert_series_equal(res_ser, df1a_bool["A"])
  143. def test_logical_ops_invalid(self):
  144. # GH#5808
  145. df1 = DataFrame(1.0, index=[1], columns=["A"])
  146. df2 = DataFrame(True, index=[1], columns=["A"])
  147. with pytest.raises(TypeError):
  148. df1 | df2
  149. df1 = DataFrame("foo", index=[1], columns=["A"])
  150. df2 = DataFrame(True, index=[1], columns=["A"])
  151. with pytest.raises(TypeError):
  152. df1 | df2
  153. def test_logical_operators(self):
  154. def _check_bin_op(op):
  155. result = op(df1, df2)
  156. expected = DataFrame(
  157. op(df1.values, df2.values), index=df1.index, columns=df1.columns
  158. )
  159. assert result.values.dtype == np.bool_
  160. tm.assert_frame_equal(result, expected)
  161. def _check_unary_op(op):
  162. result = op(df1)
  163. expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns)
  164. assert result.values.dtype == np.bool_
  165. tm.assert_frame_equal(result, expected)
  166. df1 = {
  167. "a": {"a": True, "b": False, "c": False, "d": True, "e": True},
  168. "b": {"a": False, "b": True, "c": False, "d": False, "e": False},
  169. "c": {"a": False, "b": False, "c": True, "d": False, "e": False},
  170. "d": {"a": True, "b": False, "c": False, "d": True, "e": True},
  171. "e": {"a": True, "b": False, "c": False, "d": True, "e": True},
  172. }
  173. df2 = {
  174. "a": {"a": True, "b": False, "c": True, "d": False, "e": False},
  175. "b": {"a": False, "b": True, "c": False, "d": False, "e": False},
  176. "c": {"a": True, "b": False, "c": True, "d": False, "e": False},
  177. "d": {"a": False, "b": False, "c": False, "d": True, "e": False},
  178. "e": {"a": False, "b": False, "c": False, "d": False, "e": True},
  179. }
  180. df1 = DataFrame(df1)
  181. df2 = DataFrame(df2)
  182. _check_bin_op(operator.and_)
  183. _check_bin_op(operator.or_)
  184. _check_bin_op(operator.xor)
  185. _check_unary_op(operator.inv) # TODO: belongs elsewhere
  186. def test_logical_with_nas(self):
  187. d = DataFrame({"a": [np.nan, False], "b": [True, True]})
  188. # GH4947
  189. # bool comparisons should return bool
  190. result = d["a"] | d["b"]
  191. expected = Series([False, True])
  192. tm.assert_series_equal(result, expected)
  193. # GH4604, automatic casting here
  194. result = d["a"].fillna(False) | d["b"]
  195. expected = Series([True, True])
  196. tm.assert_series_equal(result, expected)
  197. result = d["a"].fillna(False, downcast=False) | d["b"]
  198. expected = Series([True, True])
  199. tm.assert_series_equal(result, expected)
  200. @pytest.mark.parametrize(
  201. "left, right, op, expected",
  202. [
  203. (
  204. [True, False, np.nan],
  205. [True, False, True],
  206. operator.and_,
  207. [True, False, False],
  208. ),
  209. (
  210. [True, False, True],
  211. [True, False, np.nan],
  212. operator.and_,
  213. [True, False, False],
  214. ),
  215. (
  216. [True, False, np.nan],
  217. [True, False, True],
  218. operator.or_,
  219. [True, False, False],
  220. ),
  221. (
  222. [True, False, True],
  223. [True, False, np.nan],
  224. operator.or_,
  225. [True, False, True],
  226. ),
  227. ],
  228. )
  229. def test_logical_operators_nans(self, left, right, op, expected):
  230. # GH 13896
  231. result = op(DataFrame(left), DataFrame(right))
  232. expected = DataFrame(expected)
  233. tm.assert_frame_equal(result, expected)
  234. class TestDataFrameOperators:
  235. @pytest.mark.parametrize(
  236. "op", [operator.add, operator.sub, operator.mul, operator.truediv]
  237. )
  238. def test_operators_none_as_na(self, op):
  239. df = DataFrame(
  240. {"col1": [2, 5.0, 123, None], "col2": [1, 2, 3, 4]}, dtype=object
  241. )
  242. # since filling converts dtypes from object, changed expected to be
  243. # object
  244. filled = df.fillna(np.nan)
  245. result = op(df, 3)
  246. expected = op(filled, 3).astype(object)
  247. expected[com.isna(expected)] = None
  248. tm.assert_frame_equal(result, expected)
  249. result = op(df, df)
  250. expected = op(filled, filled).astype(object)
  251. expected[com.isna(expected)] = None
  252. tm.assert_frame_equal(result, expected)
  253. result = op(df, df.fillna(7))
  254. tm.assert_frame_equal(result, expected)
  255. result = op(df.fillna(7), df)
  256. tm.assert_frame_equal(result, expected, check_dtype=False)
  257. @pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
  258. # TODO: not sure what's correct here.
  259. @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning")
  260. def test_logical_typeerror_with_non_valid(self, op, res, float_frame):
  261. # we are comparing floats vs a string
  262. result = getattr(float_frame, op)("foo")
  263. assert bool(result.all().all()) is res
  264. def test_binary_ops_align(self):
  265. # test aligning binary ops
  266. # GH 6681
  267. index = MultiIndex.from_product(
  268. [list("abc"), ["one", "two", "three"], [1, 2, 3]],
  269. names=["first", "second", "third"],
  270. )
  271. df = DataFrame(
  272. np.arange(27 * 3).reshape(27, 3),
  273. index=index,
  274. columns=["value1", "value2", "value3"],
  275. ).sort_index()
  276. idx = pd.IndexSlice
  277. for op in ["add", "sub", "mul", "div", "truediv"]:
  278. opa = getattr(operator, op, None)
  279. if opa is None:
  280. continue
  281. x = Series([1.0, 10.0, 100.0], [1, 2, 3])
  282. result = getattr(df, op)(x, level="third", axis=0)
  283. expected = pd.concat(
  284. [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()]
  285. ).sort_index()
  286. tm.assert_frame_equal(result, expected)
  287. x = Series([1.0, 10.0], ["two", "three"])
  288. result = getattr(df, op)(x, level="second", axis=0)
  289. expected = (
  290. pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()])
  291. .reindex_like(df)
  292. .sort_index()
  293. )
  294. tm.assert_frame_equal(result, expected)
  295. # GH9463 (alignment level of dataframe with series)
  296. midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
  297. df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
  298. s = pd.Series({"a": 1, "b": 2})
  299. df2 = df.copy()
  300. df2.columns.names = ["lvl0", "lvl1"]
  301. s2 = s.copy()
  302. s2.index.name = "lvl1"
  303. # different cases of integer/string level names:
  304. res1 = df.mul(s, axis=1, level=1)
  305. res2 = df.mul(s2, axis=1, level=1)
  306. res3 = df2.mul(s, axis=1, level=1)
  307. res4 = df2.mul(s2, axis=1, level=1)
  308. res5 = df2.mul(s, axis=1, level="lvl1")
  309. res6 = df2.mul(s2, axis=1, level="lvl1")
  310. exp = DataFrame(
  311. np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx
  312. )
  313. for res in [res1, res2]:
  314. tm.assert_frame_equal(res, exp)
  315. exp.columns.names = ["lvl0", "lvl1"]
  316. for res in [res3, res4, res5, res6]:
  317. tm.assert_frame_equal(res, exp)
  318. def test_dti_tz_convert_to_utc(self):
  319. base = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
  320. idx1 = base.tz_convert("Asia/Tokyo")[:2]
  321. idx2 = base.tz_convert("US/Eastern")[1:]
  322. df1 = DataFrame({"A": [1, 2]}, index=idx1)
  323. df2 = DataFrame({"A": [1, 1]}, index=idx2)
  324. exp = DataFrame({"A": [np.nan, 3, np.nan]}, index=base)
  325. tm.assert_frame_equal(df1 + df2, exp)
  326. def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame):
  327. frame_copy = float_frame.reindex(float_frame.index[::2])
  328. del frame_copy["D"]
  329. frame_copy["C"][:5] = np.nan
  330. added = float_frame + frame_copy
  331. indexer = added["A"].dropna().index
  332. exp = (float_frame["A"] * 2).copy()
  333. tm.assert_series_equal(added["A"].dropna(), exp.loc[indexer])
  334. exp.loc[~exp.index.isin(indexer)] = np.nan
  335. tm.assert_series_equal(added["A"], exp.loc[added["A"].index])
  336. assert np.isnan(added["C"].reindex(frame_copy.index)[:5]).all()
  337. # assert(False)
  338. assert np.isnan(added["D"]).all()
  339. self_added = float_frame + float_frame
  340. tm.assert_index_equal(self_added.index, float_frame.index)
  341. added_rev = frame_copy + float_frame
  342. assert np.isnan(added["D"]).all()
  343. assert np.isnan(added_rev["D"]).all()
  344. # corner cases
  345. # empty
  346. plus_empty = float_frame + DataFrame()
  347. assert np.isnan(plus_empty.values).all()
  348. empty_plus = DataFrame() + float_frame
  349. assert np.isnan(empty_plus.values).all()
  350. empty_empty = DataFrame() + DataFrame()
  351. assert empty_empty.empty
  352. # out of order
  353. reverse = float_frame.reindex(columns=float_frame.columns[::-1])
  354. tm.assert_frame_equal(reverse + float_frame, float_frame * 2)
  355. # mix vs float64, upcast
  356. added = float_frame + mixed_float_frame
  357. _check_mixed_float(added, dtype="float64")
  358. added = mixed_float_frame + float_frame
  359. _check_mixed_float(added, dtype="float64")
  360. # mix vs mix
  361. added = mixed_float_frame + mixed_float_frame
  362. _check_mixed_float(added, dtype=dict(C=None))
  363. # with int
  364. added = float_frame + mixed_int_frame
  365. _check_mixed_float(added, dtype="float64")
  366. def test_combine_series(
  367. self, float_frame, mixed_float_frame, mixed_int_frame, datetime_frame
  368. ):
  369. # Series
  370. series = float_frame.xs(float_frame.index[0])
  371. added = float_frame + series
  372. for key, s in added.items():
  373. tm.assert_series_equal(s, float_frame[key] + series[key])
  374. larger_series = series.to_dict()
  375. larger_series["E"] = 1
  376. larger_series = Series(larger_series)
  377. larger_added = float_frame + larger_series
  378. for key, s in float_frame.items():
  379. tm.assert_series_equal(larger_added[key], s + series[key])
  380. assert "E" in larger_added
  381. assert np.isnan(larger_added["E"]).all()
  382. # no upcast needed
  383. added = mixed_float_frame + series
  384. _check_mixed_float(added)
  385. # vs mix (upcast) as needed
  386. added = mixed_float_frame + series.astype("float32")
  387. _check_mixed_float(added, dtype=dict(C=None))
  388. added = mixed_float_frame + series.astype("float16")
  389. _check_mixed_float(added, dtype=dict(C=None))
  390. # FIXME: don't leave commented-out
  391. # these raise with numexpr.....as we are adding an int64 to an
  392. # uint64....weird vs int
  393. # added = mixed_int_frame + (100*series).astype('int64')
  394. # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C =
  395. # 'int64', D = 'int64'))
  396. # added = mixed_int_frame + (100*series).astype('int32')
  397. # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C =
  398. # 'int32', D = 'int64'))
  399. # TimeSeries
  400. ts = datetime_frame["A"]
  401. # 10890
  402. # we no longer allow auto timeseries broadcasting
  403. # and require explicit broadcasting
  404. added = datetime_frame.add(ts, axis="index")
  405. for key, col in datetime_frame.items():
  406. result = col + ts
  407. tm.assert_series_equal(added[key], result, check_names=False)
  408. assert added[key].name == key
  409. if col.name == ts.name:
  410. assert result.name == "A"
  411. else:
  412. assert result.name is None
  413. smaller_frame = datetime_frame[:-5]
  414. smaller_added = smaller_frame.add(ts, axis="index")
  415. tm.assert_index_equal(smaller_added.index, datetime_frame.index)
  416. smaller_ts = ts[:-5]
  417. smaller_added2 = datetime_frame.add(smaller_ts, axis="index")
  418. tm.assert_frame_equal(smaller_added, smaller_added2)
  419. # length 0, result is all-nan
  420. result = datetime_frame.add(ts[:0], axis="index")
  421. expected = DataFrame(
  422. np.nan, index=datetime_frame.index, columns=datetime_frame.columns
  423. )
  424. tm.assert_frame_equal(result, expected)
  425. # Frame is all-nan
  426. result = datetime_frame[:0].add(ts, axis="index")
  427. expected = DataFrame(
  428. np.nan, index=datetime_frame.index, columns=datetime_frame.columns
  429. )
  430. tm.assert_frame_equal(result, expected)
  431. # empty but with non-empty index
  432. frame = datetime_frame[:1].reindex(columns=[])
  433. result = frame.mul(ts, axis="index")
  434. assert len(result) == len(ts)
  435. def test_combineFunc(self, float_frame, mixed_float_frame):
  436. result = float_frame * 2
  437. tm.assert_numpy_array_equal(result.values, float_frame.values * 2)
  438. # vs mix
  439. result = mixed_float_frame * 2
  440. for c, s in result.items():
  441. tm.assert_numpy_array_equal(s.values, mixed_float_frame[c].values * 2)
  442. _check_mixed_float(result, dtype=dict(C=None))
  443. result = DataFrame() * 2
  444. assert result.index.equals(DataFrame().index)
  445. assert len(result.columns) == 0
  446. def test_comparisons(self, simple_frame, float_frame):
  447. df1 = tm.makeTimeDataFrame()
  448. df2 = tm.makeTimeDataFrame()
  449. row = simple_frame.xs("a")
  450. ndim_5 = np.ones(df1.shape + (1, 1, 1))
  451. def test_comp(func):
  452. result = func(df1, df2)
  453. tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values))
  454. with pytest.raises(ValueError, match="dim must be <= 2"):
  455. func(df1, ndim_5)
  456. result2 = func(simple_frame, row)
  457. tm.assert_numpy_array_equal(
  458. result2.values, func(simple_frame.values, row.values)
  459. )
  460. result3 = func(float_frame, 0)
  461. tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0))
  462. msg = "Can only compare identically-labeled DataFrame"
  463. with pytest.raises(ValueError, match=msg):
  464. func(simple_frame, simple_frame[:2])
  465. test_comp(operator.eq)
  466. test_comp(operator.ne)
  467. test_comp(operator.lt)
  468. test_comp(operator.gt)
  469. test_comp(operator.ge)
  470. test_comp(operator.le)
  471. def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
  472. # GH 11565
  473. df = DataFrame(
  474. {x: {"x": "foo", "y": "bar", "z": "baz"} for x in ["a", "b", "c"]}
  475. )
  476. f = getattr(operator, compare_operators_no_eq_ne)
  477. with pytest.raises(TypeError):
  478. f(df, 0)
  479. def test_comparison_protected_from_errstate(self):
  480. missing_df = tm.makeDataFrame()
  481. missing_df.iloc[0]["A"] = np.nan
  482. with np.errstate(invalid="ignore"):
  483. expected = missing_df.values < 0
  484. with np.errstate(invalid="raise"):
  485. result = (missing_df < 0).values
  486. tm.assert_numpy_array_equal(result, expected)
  487. def test_boolean_comparison(self):
  488. # GH 4576
  489. # boolean comparisons with a tuple/list give unexpected results
  490. df = DataFrame(np.arange(6).reshape((3, 2)))
  491. b = np.array([2, 2])
  492. b_r = np.atleast_2d([2, 2])
  493. b_c = b_r.T
  494. lst = [2, 2, 2]
  495. tup = tuple(lst)
  496. # gt
  497. expected = DataFrame([[False, False], [False, True], [True, True]])
  498. result = df > b
  499. tm.assert_frame_equal(result, expected)
  500. result = df.values > b
  501. tm.assert_numpy_array_equal(result, expected.values)
  502. msg1d = "Unable to coerce to Series, length must be 2: given 3"
  503. msg2d = "Unable to coerce to DataFrame, shape must be"
  504. msg2db = "operands could not be broadcast together with shapes"
  505. with pytest.raises(ValueError, match=msg1d):
  506. # wrong shape
  507. df > lst
  508. with pytest.raises(ValueError, match=msg1d):
  509. # wrong shape
  510. result = df > tup
  511. # broadcasts like ndarray (GH#23000)
  512. result = df > b_r
  513. tm.assert_frame_equal(result, expected)
  514. result = df.values > b_r
  515. tm.assert_numpy_array_equal(result, expected.values)
  516. with pytest.raises(ValueError, match=msg2d):
  517. df > b_c
  518. with pytest.raises(ValueError, match=msg2db):
  519. df.values > b_c
  520. # ==
  521. expected = DataFrame([[False, False], [True, False], [False, False]])
  522. result = df == b
  523. tm.assert_frame_equal(result, expected)
  524. with pytest.raises(ValueError, match=msg1d):
  525. result = df == lst
  526. with pytest.raises(ValueError, match=msg1d):
  527. result = df == tup
  528. # broadcasts like ndarray (GH#23000)
  529. result = df == b_r
  530. tm.assert_frame_equal(result, expected)
  531. result = df.values == b_r
  532. tm.assert_numpy_array_equal(result, expected.values)
  533. with pytest.raises(ValueError, match=msg2d):
  534. df == b_c
  535. assert df.values.shape != b_c.shape
  536. # with alignment
  537. df = DataFrame(
  538. np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc")
  539. )
  540. expected.index = df.index
  541. expected.columns = df.columns
  542. with pytest.raises(ValueError, match=msg1d):
  543. result = df == lst
  544. with pytest.raises(ValueError, match=msg1d):
  545. result = df == tup
  546. def test_combine_generic(self, float_frame):
  547. df1 = float_frame
  548. df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]]
  549. combined = df1.combine(df2, np.add)
  550. combined2 = df2.combine(df1, np.add)
  551. assert combined["D"].isna().all()
  552. assert combined2["D"].isna().all()
  553. chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]]
  554. chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]]
  555. exp = (
  556. float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk)
  557. * 2
  558. )
  559. tm.assert_frame_equal(chunk, exp)
  560. tm.assert_frame_equal(chunk2, exp)
  561. def test_inplace_ops_alignment(self):
  562. # inplace ops / ops alignment
  563. # GH 8511
  564. columns = list("abcdefg")
  565. X_orig = DataFrame(
  566. np.arange(10 * len(columns)).reshape(-1, len(columns)),
  567. columns=columns,
  568. index=range(10),
  569. )
  570. Z = 100 * X_orig.iloc[:, 1:-1].copy()
  571. block1 = list("bedcf")
  572. subs = list("bcdef")
  573. # add
  574. X = X_orig.copy()
  575. result1 = (X[block1] + Z).reindex(columns=subs)
  576. X[block1] += Z
  577. result2 = X.reindex(columns=subs)
  578. X = X_orig.copy()
  579. result3 = (X[block1] + Z[block1]).reindex(columns=subs)
  580. X[block1] += Z[block1]
  581. result4 = X.reindex(columns=subs)
  582. tm.assert_frame_equal(result1, result2)
  583. tm.assert_frame_equal(result1, result3)
  584. tm.assert_frame_equal(result1, result4)
  585. # sub
  586. X = X_orig.copy()
  587. result1 = (X[block1] - Z).reindex(columns=subs)
  588. X[block1] -= Z
  589. result2 = X.reindex(columns=subs)
  590. X = X_orig.copy()
  591. result3 = (X[block1] - Z[block1]).reindex(columns=subs)
  592. X[block1] -= Z[block1]
  593. result4 = X.reindex(columns=subs)
  594. tm.assert_frame_equal(result1, result2)
  595. tm.assert_frame_equal(result1, result3)
  596. tm.assert_frame_equal(result1, result4)
  597. def test_inplace_ops_identity(self):
  598. # GH 5104
  599. # make sure that we are actually changing the object
  600. s_orig = Series([1, 2, 3])
  601. df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5))
  602. # no dtype change
  603. s = s_orig.copy()
  604. s2 = s
  605. s += 1
  606. tm.assert_series_equal(s, s2)
  607. tm.assert_series_equal(s_orig + 1, s)
  608. assert s is s2
  609. assert s._data is s2._data
  610. df = df_orig.copy()
  611. df2 = df
  612. df += 1
  613. tm.assert_frame_equal(df, df2)
  614. tm.assert_frame_equal(df_orig + 1, df)
  615. assert df is df2
  616. assert df._data is df2._data
  617. # dtype change
  618. s = s_orig.copy()
  619. s2 = s
  620. s += 1.5
  621. tm.assert_series_equal(s, s2)
  622. tm.assert_series_equal(s_orig + 1.5, s)
  623. df = df_orig.copy()
  624. df2 = df
  625. df += 1.5
  626. tm.assert_frame_equal(df, df2)
  627. tm.assert_frame_equal(df_orig + 1.5, df)
  628. assert df is df2
  629. assert df._data is df2._data
  630. # mixed dtype
  631. arr = np.random.randint(0, 10, size=5)
  632. df_orig = DataFrame({"A": arr.copy(), "B": "foo"})
  633. df = df_orig.copy()
  634. df2 = df
  635. df["A"] += 1
  636. expected = DataFrame({"A": arr.copy() + 1, "B": "foo"})
  637. tm.assert_frame_equal(df, expected)
  638. tm.assert_frame_equal(df2, expected)
  639. assert df._data is df2._data
  640. df = df_orig.copy()
  641. df2 = df
  642. df["A"] += 1.5
  643. expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"})
  644. tm.assert_frame_equal(df, expected)
  645. tm.assert_frame_equal(df2, expected)
  646. assert df._data is df2._data
  647. @pytest.mark.parametrize(
  648. "op",
  649. [
  650. "add",
  651. "and",
  652. "div",
  653. "floordiv",
  654. "mod",
  655. "mul",
  656. "or",
  657. "pow",
  658. "sub",
  659. "truediv",
  660. "xor",
  661. ],
  662. )
  663. def test_inplace_ops_identity2(self, op):
  664. if op == "div":
  665. return
  666. df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]})
  667. operand = 2
  668. if op in ("and", "or", "xor"):
  669. # cannot use floats for boolean ops
  670. df["a"] = [True, False, True]
  671. df_copy = df.copy()
  672. iop = "__i{}__".format(op)
  673. op = "__{}__".format(op)
  674. # no id change and value is correct
  675. getattr(df, iop)(operand)
  676. expected = getattr(df_copy, op)(operand)
  677. tm.assert_frame_equal(df, expected)
  678. expected = id(df)
  679. assert id(df) == expected
  680. def test_alignment_non_pandas(self):
  681. index = ["A", "B", "C"]
  682. columns = ["X", "Y", "Z"]
  683. df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns)
  684. align = pd.core.ops._align_method_FRAME
  685. for val in [
  686. [1, 2, 3],
  687. (1, 2, 3),
  688. np.array([1, 2, 3], dtype=np.int64),
  689. range(1, 4),
  690. ]:
  691. tm.assert_series_equal(
  692. align(df, val, "index"), Series([1, 2, 3], index=df.index)
  693. )
  694. tm.assert_series_equal(
  695. align(df, val, "columns"), Series([1, 2, 3], index=df.columns)
  696. )
  697. # length mismatch
  698. msg = "Unable to coerce to Series, length must be 3: given 2"
  699. for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]:
  700. with pytest.raises(ValueError, match=msg):
  701. align(df, val, "index")
  702. with pytest.raises(ValueError, match=msg):
  703. align(df, val, "columns")
  704. val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  705. tm.assert_frame_equal(
  706. align(df, val, "index"), DataFrame(val, index=df.index, columns=df.columns)
  707. )
  708. tm.assert_frame_equal(
  709. align(df, val, "columns"),
  710. DataFrame(val, index=df.index, columns=df.columns),
  711. )
  712. # shape mismatch
  713. msg = "Unable to coerce to DataFrame, shape must be"
  714. val = np.array([[1, 2, 3], [4, 5, 6]])
  715. with pytest.raises(ValueError, match=msg):
  716. align(df, val, "index")
  717. with pytest.raises(ValueError, match=msg):
  718. align(df, val, "columns")
  719. val = np.zeros((3, 3, 3))
  720. with pytest.raises(ValueError):
  721. align(df, val, "index")
  722. with pytest.raises(ValueError):
  723. align(df, val, "columns")
  724. def test_no_warning(self, all_arithmetic_operators):
  725. df = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
  726. b = df["B"]
  727. with tm.assert_produces_warning(None):
  728. getattr(df, all_arithmetic_operators)(b, 0)