test_partial.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. """
  2. test setting *parts* of objects both positionally and label based
  3. TODO: these should be split among the indexer tests
  4. """
  5. import numpy as np
  6. import pytest
  7. import pandas as pd
  8. from pandas import DataFrame, Index, Series, date_range
  9. import pandas._testing as tm
  10. class TestPartialSetting:
  11. def test_partial_setting(self):
  12. # GH2578, allow ix and friends to partially set
  13. # series
  14. s_orig = Series([1, 2, 3])
  15. s = s_orig.copy()
  16. s[5] = 5
  17. expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
  18. tm.assert_series_equal(s, expected)
  19. s = s_orig.copy()
  20. s.loc[5] = 5
  21. expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
  22. tm.assert_series_equal(s, expected)
  23. s = s_orig.copy()
  24. s[5] = 5.0
  25. expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
  26. tm.assert_series_equal(s, expected)
  27. s = s_orig.copy()
  28. s.loc[5] = 5.0
  29. expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
  30. tm.assert_series_equal(s, expected)
  31. # iloc/iat raise
  32. s = s_orig.copy()
  33. with pytest.raises(IndexError):
  34. s.iloc[3] = 5.0
  35. with pytest.raises(IndexError):
  36. s.iat[3] = 5.0
  37. # ## frame ##
  38. df_orig = DataFrame(
  39. np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
  40. )
  41. # iloc/iat raise
  42. df = df_orig.copy()
  43. with pytest.raises(IndexError):
  44. df.iloc[4, 2] = 5.0
  45. with pytest.raises(IndexError):
  46. df.iat[4, 2] = 5.0
  47. # row setting where it exists
  48. expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
  49. df = df_orig.copy()
  50. df.iloc[1] = df.iloc[2]
  51. tm.assert_frame_equal(df, expected)
  52. expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
  53. df = df_orig.copy()
  54. df.loc[1] = df.loc[2]
  55. tm.assert_frame_equal(df, expected)
  56. # like 2578, partial setting with dtype preservation
  57. expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
  58. df = df_orig.copy()
  59. df.loc[3] = df.loc[2]
  60. tm.assert_frame_equal(df, expected)
  61. # single dtype frame, overwrite
  62. expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
  63. df = df_orig.copy()
  64. df.loc[:, "B"] = df.loc[:, "A"]
  65. tm.assert_frame_equal(df, expected)
  66. # mixed dtype frame, overwrite
  67. expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
  68. df = df_orig.copy()
  69. df["B"] = df["B"].astype(np.float64)
  70. df.loc[:, "B"] = df.loc[:, "A"]
  71. tm.assert_frame_equal(df, expected)
  72. # single dtype frame, partial setting
  73. expected = df_orig.copy()
  74. expected["C"] = df["A"]
  75. df = df_orig.copy()
  76. df.loc[:, "C"] = df.loc[:, "A"]
  77. tm.assert_frame_equal(df, expected)
  78. # mixed frame, partial setting
  79. expected = df_orig.copy()
  80. expected["C"] = df["A"]
  81. df = df_orig.copy()
  82. df.loc[:, "C"] = df.loc[:, "A"]
  83. tm.assert_frame_equal(df, expected)
  84. # GH 8473
  85. dates = date_range("1/1/2000", periods=8)
  86. df_orig = DataFrame(
  87. np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]
  88. )
  89. expected = pd.concat(
  90. [df_orig, DataFrame({"A": 7}, index=[dates[-1] + dates.freq])], sort=True
  91. )
  92. df = df_orig.copy()
  93. df.loc[dates[-1] + dates.freq, "A"] = 7
  94. tm.assert_frame_equal(df, expected)
  95. df = df_orig.copy()
  96. df.at[dates[-1] + dates.freq, "A"] = 7
  97. tm.assert_frame_equal(df, expected)
  98. exp_other = DataFrame({0: 7}, index=[dates[-1] + dates.freq])
  99. expected = pd.concat([df_orig, exp_other], axis=1)
  100. df = df_orig.copy()
  101. df.loc[dates[-1] + dates.freq, 0] = 7
  102. tm.assert_frame_equal(df, expected)
  103. df = df_orig.copy()
  104. df.at[dates[-1] + dates.freq, 0] = 7
  105. tm.assert_frame_equal(df, expected)
  106. def test_partial_setting_mixed_dtype(self):
  107. # in a mixed dtype environment, try to preserve dtypes
  108. # by appending
  109. df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
  110. s = df.loc[1].copy()
  111. s.name = 2
  112. expected = df.append(s)
  113. df.loc[2] = df.loc[1]
  114. tm.assert_frame_equal(df, expected)
  115. # columns will align
  116. df = DataFrame(columns=["A", "B"])
  117. df.loc[0] = Series(1, index=range(4))
  118. tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0]))
  119. # columns will align
  120. df = DataFrame(columns=["A", "B"])
  121. df.loc[0] = Series(1, index=["B"])
  122. exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
  123. tm.assert_frame_equal(df, exp)
  124. # list-like must conform
  125. df = DataFrame(columns=["A", "B"])
  126. with pytest.raises(ValueError):
  127. df.loc[0] = [1, 2, 3]
  128. # TODO: #15657, these are left as object and not coerced
  129. df = DataFrame(columns=["A", "B"])
  130. df.loc[3] = [6, 7]
  131. exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
  132. tm.assert_frame_equal(df, exp)
  133. def test_series_partial_set(self):
  134. # partial set with new index
  135. # Regression from GH4825
  136. ser = Series([0.1, 0.2], index=[1, 2])
  137. # loc equiv to .reindex
  138. expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
  139. with pytest.raises(KeyError, match="with any missing labels"):
  140. result = ser.loc[[3, 2, 3]]
  141. result = ser.reindex([3, 2, 3])
  142. tm.assert_series_equal(result, expected, check_index_type=True)
  143. expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
  144. with pytest.raises(KeyError, match="with any missing labels"):
  145. result = ser.loc[[3, 2, 3, "x"]]
  146. result = ser.reindex([3, 2, 3, "x"])
  147. tm.assert_series_equal(result, expected, check_index_type=True)
  148. expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
  149. result = ser.loc[[2, 2, 1]]
  150. tm.assert_series_equal(result, expected, check_index_type=True)
  151. expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
  152. with pytest.raises(KeyError, match="with any missing labels"):
  153. result = ser.loc[[2, 2, "x", 1]]
  154. result = ser.reindex([2, 2, "x", 1])
  155. tm.assert_series_equal(result, expected, check_index_type=True)
  156. # raises as nothing in in the index
  157. msg = (
  158. r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are"
  159. r" in the \[index\]\""
  160. )
  161. with pytest.raises(KeyError, match=msg):
  162. ser.loc[[3, 3, 3]]
  163. expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
  164. with pytest.raises(KeyError, match="with any missing labels"):
  165. ser.loc[[2, 2, 3]]
  166. result = ser.reindex([2, 2, 3])
  167. tm.assert_series_equal(result, expected, check_index_type=True)
  168. s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
  169. expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
  170. with pytest.raises(KeyError, match="with any missing labels"):
  171. s.loc[[3, 4, 4]]
  172. result = s.reindex([3, 4, 4])
  173. tm.assert_series_equal(result, expected, check_index_type=True)
  174. s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
  175. expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
  176. with pytest.raises(KeyError, match="with any missing labels"):
  177. s.loc[[5, 3, 3]]
  178. result = s.reindex([5, 3, 3])
  179. tm.assert_series_equal(result, expected, check_index_type=True)
  180. s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
  181. expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
  182. with pytest.raises(KeyError, match="with any missing labels"):
  183. s.loc[[5, 4, 4]]
  184. result = s.reindex([5, 4, 4])
  185. tm.assert_series_equal(result, expected, check_index_type=True)
  186. s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
  187. expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
  188. with pytest.raises(KeyError, match="with any missing labels"):
  189. s.loc[[7, 2, 2]]
  190. result = s.reindex([7, 2, 2])
  191. tm.assert_series_equal(result, expected, check_index_type=True)
  192. s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
  193. expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
  194. with pytest.raises(KeyError, match="with any missing labels"):
  195. s.loc[[4, 5, 5]]
  196. result = s.reindex([4, 5, 5])
  197. tm.assert_series_equal(result, expected, check_index_type=True)
  198. # iloc
  199. expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
  200. result = ser.iloc[[1, 1, 0, 0]]
  201. tm.assert_series_equal(result, expected, check_index_type=True)
  202. def test_series_partial_set_with_name(self):
  203. # GH 11497
  204. idx = Index([1, 2], dtype="int64", name="idx")
  205. ser = Series([0.1, 0.2], index=idx, name="s")
  206. # loc
  207. with pytest.raises(KeyError, match="with any missing labels"):
  208. ser.loc[[3, 2, 3]]
  209. with pytest.raises(KeyError, match="with any missing labels"):
  210. ser.loc[[3, 2, 3, "x"]]
  211. exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
  212. expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
  213. result = ser.loc[[2, 2, 1]]
  214. tm.assert_series_equal(result, expected, check_index_type=True)
  215. with pytest.raises(KeyError, match="with any missing labels"):
  216. ser.loc[[2, 2, "x", 1]]
  217. # raises as nothing in in the index
  218. msg = (
  219. r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64',"
  220. r" name='idx'\)\] are in the \[index\]\""
  221. )
  222. with pytest.raises(KeyError, match=msg):
  223. ser.loc[[3, 3, 3]]
  224. with pytest.raises(KeyError, match="with any missing labels"):
  225. ser.loc[[2, 2, 3]]
  226. idx = Index([1, 2, 3], dtype="int64", name="idx")
  227. with pytest.raises(KeyError, match="with any missing labels"):
  228. Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
  229. idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
  230. with pytest.raises(KeyError, match="with any missing labels"):
  231. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
  232. idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
  233. with pytest.raises(KeyError, match="with any missing labels"):
  234. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
  235. idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
  236. with pytest.raises(KeyError, match="with any missing labels"):
  237. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
  238. idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
  239. with pytest.raises(KeyError, match="with any missing labels"):
  240. Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
  241. # iloc
  242. exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
  243. expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
  244. result = ser.iloc[[1, 1, 0, 0]]
  245. tm.assert_series_equal(result, expected, check_index_type=True)
  246. def test_partial_set_invalid(self):
  247. # GH 4940
  248. # allow only setting of 'valid' values
  249. orig = tm.makeTimeDataFrame()
  250. df = orig.copy()
  251. # don't allow not string inserts
  252. with pytest.raises(TypeError):
  253. df.loc[100.0, :] = df.iloc[0]
  254. with pytest.raises(TypeError):
  255. df.loc[100, :] = df.iloc[0]
  256. # allow object conversion here
  257. df = orig.copy()
  258. df.loc["a", :] = df.iloc[0]
  259. exp = orig.append(Series(df.iloc[0], name="a"))
  260. tm.assert_frame_equal(df, exp)
  261. tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
  262. assert df.index.dtype == "object"
  263. def test_partial_set_empty_series(self):
  264. # GH5226
  265. # partially set with an empty object series
  266. s = Series(dtype=object)
  267. s.loc[1] = 1
  268. tm.assert_series_equal(s, Series([1], index=[1]))
  269. s.loc[3] = 3
  270. tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
  271. s = Series(dtype=object)
  272. s.loc[1] = 1.0
  273. tm.assert_series_equal(s, Series([1.0], index=[1]))
  274. s.loc[3] = 3.0
  275. tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3]))
  276. s = Series(dtype=object)
  277. s.loc["foo"] = 1
  278. tm.assert_series_equal(s, Series([1], index=["foo"]))
  279. s.loc["bar"] = 3
  280. tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"]))
  281. s.loc[3] = 4
  282. tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3]))
  283. def test_partial_set_empty_frame(self):
  284. # partially set with an empty object
  285. # frame
  286. df = DataFrame()
  287. with pytest.raises(ValueError):
  288. df.loc[1] = 1
  289. with pytest.raises(ValueError):
  290. df.loc[1] = Series([1], index=["foo"])
  291. with pytest.raises(ValueError):
  292. df.loc[:, 1] = 1
  293. # these work as they don't really change
  294. # anything but the index
  295. # GH5632
  296. expected = DataFrame(columns=["foo"], index=Index([], dtype="object"))
  297. def f():
  298. df = DataFrame(index=Index([], dtype="object"))
  299. df["foo"] = Series([], dtype="object")
  300. return df
  301. tm.assert_frame_equal(f(), expected)
  302. def f():
  303. df = DataFrame()
  304. df["foo"] = Series(df.index)
  305. return df
  306. tm.assert_frame_equal(f(), expected)
  307. def f():
  308. df = DataFrame()
  309. df["foo"] = df.index
  310. return df
  311. tm.assert_frame_equal(f(), expected)
  312. expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
  313. expected["foo"] = expected["foo"].astype("float64")
  314. def f():
  315. df = DataFrame(index=Index([], dtype="int64"))
  316. df["foo"] = []
  317. return df
  318. tm.assert_frame_equal(f(), expected)
  319. def f():
  320. df = DataFrame(index=Index([], dtype="int64"))
  321. df["foo"] = Series(np.arange(len(df)), dtype="float64")
  322. return df
  323. tm.assert_frame_equal(f(), expected)
  324. def f():
  325. df = DataFrame(index=Index([], dtype="int64"))
  326. df["foo"] = range(len(df))
  327. return df
  328. expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
  329. expected["foo"] = expected["foo"].astype("float64")
  330. tm.assert_frame_equal(f(), expected)
  331. df = DataFrame()
  332. tm.assert_index_equal(df.columns, Index([], dtype=object))
  333. df2 = DataFrame()
  334. df2[1] = Series([1], index=["foo"])
  335. df.loc[:, 1] = Series([1], index=["foo"])
  336. tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
  337. tm.assert_frame_equal(df, df2)
  338. # no index to start
  339. expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
  340. df = DataFrame(columns=["A", "B"])
  341. df[0] = Series(1, index=range(4))
  342. df.dtypes
  343. str(df)
  344. tm.assert_frame_equal(df, expected)
  345. df = DataFrame(columns=["A", "B"])
  346. df.loc[:, 0] = Series(1, index=range(4))
  347. df.dtypes
  348. str(df)
  349. tm.assert_frame_equal(df, expected)
  350. def test_partial_set_empty_frame_row(self):
  351. # GH5720, GH5744
  352. # don't create rows when empty
  353. expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
  354. expected["A"] = expected["A"].astype("int64")
  355. expected["B"] = expected["B"].astype("float64")
  356. expected["New"] = expected["New"].astype("float64")
  357. df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
  358. y = df[df.A > 5]
  359. y["New"] = np.nan
  360. tm.assert_frame_equal(y, expected)
  361. # tm.assert_frame_equal(y,expected)
  362. expected = DataFrame(columns=["a", "b", "c c", "d"])
  363. expected["d"] = expected["d"].astype("int64")
  364. df = DataFrame(columns=["a", "b", "c c"])
  365. df["d"] = 3
  366. tm.assert_frame_equal(df, expected)
  367. tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
  368. # reindex columns is ok
  369. df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
  370. y = df[df.A > 5]
  371. result = y.reindex(columns=["A", "B", "C"])
  372. expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64"))
  373. expected["A"] = expected["A"].astype("int64")
  374. expected["B"] = expected["B"].astype("float64")
  375. expected["C"] = expected["C"].astype("float64")
  376. tm.assert_frame_equal(result, expected)
  377. def test_partial_set_empty_frame_set_series(self):
  378. # GH 5756
  379. # setting with empty Series
  380. df = DataFrame(Series(dtype=object))
  381. tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)}))
  382. df = DataFrame(Series(name="foo", dtype=object))
  383. tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)}))
  384. def test_partial_set_empty_frame_empty_copy_assignment(self):
  385. # GH 5932
  386. # copy on empty with assignment fails
  387. df = DataFrame(index=[0])
  388. df = df.copy()
  389. df["a"] = 0
  390. expected = DataFrame(0, index=[0], columns=["a"])
  391. tm.assert_frame_equal(df, expected)
  392. def test_partial_set_empty_frame_empty_consistencies(self):
  393. # GH 6171
  394. # consistency on empty frames
  395. df = DataFrame(columns=["x", "y"])
  396. df["x"] = [1, 2]
  397. expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
  398. tm.assert_frame_equal(df, expected, check_dtype=False)
  399. df = DataFrame(columns=["x", "y"])
  400. df["x"] = ["1", "2"]
  401. expected = DataFrame(dict(x=["1", "2"], y=[np.nan, np.nan]), dtype=object)
  402. tm.assert_frame_equal(df, expected)
  403. df = DataFrame(columns=["x", "y"])
  404. df.loc[0, "x"] = 1
  405. expected = DataFrame(dict(x=[1], y=[np.nan]))
  406. tm.assert_frame_equal(df, expected, check_dtype=False)