test_setitem.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. import numpy as np
  2. from numpy.random import randn
  3. import pytest
  4. import pandas as pd
  5. from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna
  6. import pandas._testing as tm
  7. import pandas.core.common as com
  8. class TestMultiIndexSetItem:
  9. def test_setitem_multiindex(self):
  10. for index_fn in ("loc",):
  11. def assert_equal(a, b):
  12. assert a == b
  13. def check(target, indexers, value, compare_fn, expected=None):
  14. fn = getattr(target, index_fn)
  15. fn.__setitem__(indexers, value)
  16. result = fn.__getitem__(indexers)
  17. if expected is None:
  18. expected = value
  19. compare_fn(result, expected)
  20. # GH7190
  21. index = MultiIndex.from_product(
  22. [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
  23. )
  24. t, n = 0, 2
  25. df = DataFrame(
  26. np.nan,
  27. columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
  28. index=index,
  29. )
  30. check(target=df, indexers=((t, n), "X"), value=0, compare_fn=assert_equal)
  31. df = DataFrame(
  32. -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index
  33. )
  34. check(target=df, indexers=((t, n), "X"), value=1, compare_fn=assert_equal)
  35. df = DataFrame(
  36. columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index
  37. )
  38. check(target=df, indexers=((t, n), "X"), value=2, compare_fn=assert_equal)
  39. # gh-7218: assigning with 0-dim arrays
  40. df = DataFrame(
  41. -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index
  42. )
  43. check(
  44. target=df,
  45. indexers=((t, n), "X"),
  46. value=np.array(3),
  47. compare_fn=assert_equal,
  48. expected=3,
  49. )
  50. # GH5206
  51. df = DataFrame(
  52. np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
  53. )
  54. df["F"] = 99
  55. row_selection = df["A"] % 2 == 0
  56. col_selection = ["B", "C"]
  57. df.loc[row_selection, col_selection] = df["F"]
  58. output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
  59. tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
  60. check(
  61. target=df,
  62. indexers=(row_selection, col_selection),
  63. value=df["F"],
  64. compare_fn=tm.assert_frame_equal,
  65. expected=output,
  66. )
  67. # GH11372
  68. idx = MultiIndex.from_product(
  69. [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
  70. )
  71. cols = MultiIndex.from_product(
  72. [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
  73. )
  74. df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols)
  75. subidx = MultiIndex.from_tuples(
  76. [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))]
  77. )
  78. subcols = MultiIndex.from_tuples(
  79. [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))]
  80. )
  81. vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols)
  82. check(
  83. target=df,
  84. indexers=(subidx, subcols),
  85. value=vals,
  86. compare_fn=tm.assert_frame_equal,
  87. )
  88. # set all columns
  89. vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols)
  90. check(
  91. target=df,
  92. indexers=(subidx, slice(None, None, None)),
  93. value=vals,
  94. compare_fn=tm.assert_frame_equal,
  95. )
  96. # identity
  97. copy = df.copy()
  98. check(
  99. target=df,
  100. indexers=(df.index, df.columns),
  101. value=df,
  102. compare_fn=tm.assert_frame_equal,
  103. expected=copy,
  104. )
  105. def test_multiindex_setitem(self):
  106. # GH 3738
  107. # setting with a multi-index right hand side
  108. arrays = [
  109. np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
  110. np.array(["one", "two", "one", "one", "two", "one"]),
  111. np.arange(0, 6, 1),
  112. ]
  113. df_orig = DataFrame(
  114. np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"]
  115. ).sort_index()
  116. expected = df_orig.loc[["bar"]] * 2
  117. df = df_orig.copy()
  118. df.loc[["bar"]] *= 2
  119. tm.assert_frame_equal(df.loc[["bar"]], expected)
  120. # raise because these have differing levels
  121. with pytest.raises(TypeError):
  122. df.loc["bar"] *= 2
  123. # from SO
  124. # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
  125. df_orig = DataFrame.from_dict(
  126. {
  127. "price": {
  128. ("DE", "Coal", "Stock"): 2,
  129. ("DE", "Gas", "Stock"): 4,
  130. ("DE", "Elec", "Demand"): 1,
  131. ("FR", "Gas", "Stock"): 5,
  132. ("FR", "Solar", "SupIm"): 0,
  133. ("FR", "Wind", "SupIm"): 0,
  134. }
  135. }
  136. )
  137. df_orig.index = MultiIndex.from_tuples(
  138. df_orig.index, names=["Sit", "Com", "Type"]
  139. )
  140. expected = df_orig.copy()
  141. expected.iloc[[0, 2, 3]] *= 2
  142. idx = pd.IndexSlice
  143. df = df_orig.copy()
  144. df.loc[idx[:, :, "Stock"], :] *= 2
  145. tm.assert_frame_equal(df, expected)
  146. df = df_orig.copy()
  147. df.loc[idx[:, :, "Stock"], "price"] *= 2
  148. tm.assert_frame_equal(df, expected)
  149. def test_multiindex_assignment(self):
  150. # GH3777 part 2
  151. # mixed dtype
  152. df = DataFrame(
  153. np.random.randint(5, 10, size=9).reshape(3, 3),
  154. columns=list("abc"),
  155. index=[[4, 4, 8], [8, 10, 12]],
  156. )
  157. df["d"] = np.nan
  158. arr = np.array([0.0, 1.0])
  159. df.loc[4, "d"] = arr
  160. tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
  161. # single dtype
  162. df = DataFrame(
  163. np.random.randint(5, 10, size=9).reshape(3, 3),
  164. columns=list("abc"),
  165. index=[[4, 4, 8], [8, 10, 12]],
  166. )
  167. df.loc[4, "c"] = arr
  168. exp = Series(arr, index=[8, 10], name="c", dtype="float64")
  169. tm.assert_series_equal(df.loc[4, "c"], exp)
  170. # scalar ok
  171. df.loc[4, "c"] = 10
  172. exp = Series(10, index=[8, 10], name="c", dtype="float64")
  173. tm.assert_series_equal(df.loc[4, "c"], exp)
  174. # invalid assignments
  175. with pytest.raises(ValueError):
  176. df.loc[4, "c"] = [0, 1, 2, 3]
  177. with pytest.raises(ValueError):
  178. df.loc[4, "c"] = [0]
  179. # groupby example
  180. NUM_ROWS = 100
  181. NUM_COLS = 10
  182. col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
  183. index_cols = col_names[:5]
  184. df = DataFrame(
  185. np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
  186. dtype=np.int64,
  187. columns=col_names,
  188. )
  189. df = df.set_index(index_cols).sort_index()
  190. grp = df.groupby(level=index_cols[:4])
  191. df["new_col"] = np.nan
  192. f_index = np.arange(5)
  193. def f(name, df2):
  194. return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex(
  195. f_index
  196. )
  197. # TODO(wesm): unused?
  198. # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
  199. # we are actually operating on a copy here
  200. # but in this case, that's ok
  201. for name, df2 in grp:
  202. new_vals = np.arange(df2.shape[0])
  203. df.loc[name, "new_col"] = new_vals
  204. def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data):
  205. ymd = multiindex_year_month_day_dataframe_random_data
  206. s = ymd["A"]
  207. s[2000, 3] = np.nan
  208. assert isna(s.values[42:65]).all()
  209. assert notna(s.values[:42]).all()
  210. assert notna(s.values[65:]).all()
  211. s[2000, 3, 10] = np.nan
  212. assert isna(s[49])
  213. def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
  214. frame = multiindex_dataframe_random_data
  215. df = frame.T.copy()
  216. values = df.values
  217. result = df[df > 0]
  218. expected = df.where(df > 0)
  219. tm.assert_frame_equal(result, expected)
  220. df[df > 0] = 5
  221. values[values > 0] = 5
  222. tm.assert_almost_equal(df.values, values)
  223. df[df == 5] = 0
  224. values[values == 5] = 0
  225. tm.assert_almost_equal(df.values, values)
  226. # a df that needs alignment first
  227. df[df[:-1] < 0] = 2
  228. np.putmask(values[:-1], values[:-1] < 0, 2)
  229. tm.assert_almost_equal(df.values, values)
  230. with pytest.raises(TypeError, match="boolean values only"):
  231. df[df * 0] = 2
  232. def test_frame_getitem_setitem_multislice(self):
  233. levels = [["t1", "t2"], ["a", "b", "c"]]
  234. codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
  235. midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
  236. df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
  237. result = df.loc[:, "value"]
  238. tm.assert_series_equal(df["value"], result)
  239. result = df.loc[df.index[1:3], "value"]
  240. tm.assert_series_equal(df["value"][1:3], result)
  241. result = df.loc[:, :]
  242. tm.assert_frame_equal(df, result)
  243. result = df
  244. df.loc[:, "value"] = 10
  245. result["value"] = 10
  246. tm.assert_frame_equal(df, result)
  247. df.loc[:, :] = 10
  248. tm.assert_frame_equal(df, result)
  249. def test_frame_setitem_multi_column(self):
  250. df = DataFrame(randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]])
  251. cp = df.copy()
  252. cp["a"] = cp["b"]
  253. tm.assert_frame_equal(cp["a"], cp["b"])
  254. # set with ndarray
  255. cp = df.copy()
  256. cp["a"] = cp["b"].values
  257. tm.assert_frame_equal(cp["a"], cp["b"])
  258. # ---------------------------------------
  259. # #1803
  260. columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
  261. df = DataFrame(index=[1, 3, 5], columns=columns)
  262. # Works, but adds a column instead of updating the two existing ones
  263. df["A"] = 0.0 # Doesn't work
  264. assert (df["A"].values == 0).all()
  265. # it broadcasts
  266. df["B", "1"] = [1, 2, 3]
  267. df["A"] = df["B", "1"]
  268. sliced_a1 = df["A", "1"]
  269. sliced_a2 = df["A", "2"]
  270. sliced_b1 = df["B", "1"]
  271. tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
  272. tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
  273. assert sliced_a1.name == ("A", "1")
  274. assert sliced_a2.name == ("A", "2")
  275. assert sliced_b1.name == ("B", "1")
  276. def test_getitem_setitem_tuple_plus_columns(
  277. self, multiindex_year_month_day_dataframe_random_data
  278. ):
  279. # GH #1013
  280. ymd = multiindex_year_month_day_dataframe_random_data
  281. df = ymd[:5]
  282. result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
  283. expected = df.loc[2000, 1, 6][["A", "B", "C"]]
  284. tm.assert_series_equal(result, expected)
  285. def test_getitem_setitem_slice_integers(self):
  286. index = MultiIndex(
  287. levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
  288. )
  289. frame = DataFrame(
  290. np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"]
  291. )
  292. res = frame.loc[1:2]
  293. exp = frame.reindex(frame.index[2:])
  294. tm.assert_frame_equal(res, exp)
  295. frame.loc[1:2] = 7
  296. assert (frame.loc[1:2] == 7).values.all()
  297. series = Series(np.random.randn(len(index)), index=index)
  298. res = series.loc[1:2]
  299. exp = series.reindex(series.index[2:])
  300. tm.assert_series_equal(res, exp)
  301. series.loc[1:2] = 7
  302. assert (series.loc[1:2] == 7).values.all()
  303. def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
  304. frame = multiindex_dataframe_random_data
  305. dft = frame.T
  306. s = dft["foo", "two"]
  307. dft["foo", "two"] = s > s.median()
  308. tm.assert_series_equal(dft["foo", "two"], s > s.median())
  309. # assert isinstance(dft._data.blocks[1].items, MultiIndex)
  310. reindexed = dft.reindex(columns=[("foo", "two")])
  311. tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
  312. def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data):
  313. frame = multiindex_dataframe_random_data
  314. subset = frame.index[[1, 4, 5]]
  315. frame.loc[subset] = 99
  316. assert (frame.loc[subset].values == 99).all()
  317. col = frame["B"]
  318. col[subset] = 97
  319. assert (frame.loc[subset, "B"] == 97).all()
  320. def test_nonunique_assignment_1750(self):
  321. df = DataFrame(
  322. [[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
  323. )
  324. df = df.set_index(["A", "B"])
  325. ix = MultiIndex.from_tuples([(1, 1)])
  326. df.loc[ix, "C"] = "_"
  327. assert (df.xs((1, 1))["C"] == "_").all()
  328. def test_astype_assignment_with_dups(self):
  329. # GH 4686
  330. # assignment with dups that has a dtype change
  331. cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
  332. df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
  333. index = df.index.copy()
  334. df["A"] = df["A"].astype(np.float64)
  335. tm.assert_index_equal(df.index, index)
  336. def test_setitem_nonmonotonic(self):
  337. # https://github.com/pandas-dev/pandas/issues/31449
  338. index = pd.MultiIndex.from_tuples(
  339. [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
  340. )
  341. df = pd.DataFrame(data=[0, 1, 2], index=index, columns=["e"])
  342. df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
  343. expected = pd.DataFrame({"e": [99, 1, 100]}, index=index)
  344. tm.assert_frame_equal(df, expected)
  345. def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
  346. # this works because we are modifying the underlying array
  347. # really a no-no
  348. df = multiindex_dataframe_random_data.T
  349. df["foo"].values[:] = 0
  350. assert (df["foo"].values == 0).all()
  351. def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
  352. # will raise/warn as its chained assignment
  353. df = multiindex_dataframe_random_data.T
  354. msg = "A value is trying to be set on a copy of a slice from a DataFrame"
  355. with pytest.raises(com.SettingWithCopyError, match=msg):
  356. df["foo"]["one"] = 2
  357. def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
  358. frame = multiindex_dataframe_random_data.T
  359. expected = frame
  360. df = frame.copy()
  361. msg = "A value is trying to be set on a copy of a slice from a DataFrame"
  362. with pytest.raises(com.SettingWithCopyError, match=msg):
  363. df["foo"]["one"] = 2
  364. result = df
  365. tm.assert_frame_equal(result, expected)