test_query_eval.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178
  1. from io import StringIO
  2. import operator
  3. import numpy as np
  4. import pytest
  5. import pandas.util._test_decorators as td
  6. import pandas as pd
  7. from pandas import DataFrame, Index, MultiIndex, Series, date_range
  8. import pandas._testing as tm
  9. from pandas.core.computation.check import _NUMEXPR_INSTALLED
  10. PARSERS = "python", "pandas"
  11. ENGINES = "python", pytest.param("numexpr", marks=td.skip_if_no_ne)
  12. @pytest.fixture(params=PARSERS, ids=lambda x: x)
  13. def parser(request):
  14. return request.param
  15. @pytest.fixture(params=ENGINES, ids=lambda x: x)
  16. def engine(request):
  17. return request.param
  18. def skip_if_no_pandas_parser(parser):
  19. if parser != "pandas":
  20. pytest.skip(f"cannot evaluate with parser {repr(parser)}")
  21. class TestCompat:
  22. def setup_method(self, method):
  23. self.df = DataFrame({"A": [1, 2, 3]})
  24. self.expected1 = self.df[self.df.A > 0]
  25. self.expected2 = self.df.A + 1
  26. def test_query_default(self):
  27. # GH 12749
  28. # this should always work, whether _NUMEXPR_INSTALLED or not
  29. df = self.df
  30. result = df.query("A>0")
  31. tm.assert_frame_equal(result, self.expected1)
  32. result = df.eval("A+1")
  33. tm.assert_series_equal(result, self.expected2, check_names=False)
  34. def test_query_None(self):
  35. df = self.df
  36. result = df.query("A>0", engine=None)
  37. tm.assert_frame_equal(result, self.expected1)
  38. result = df.eval("A+1", engine=None)
  39. tm.assert_series_equal(result, self.expected2, check_names=False)
  40. def test_query_python(self):
  41. df = self.df
  42. result = df.query("A>0", engine="python")
  43. tm.assert_frame_equal(result, self.expected1)
  44. result = df.eval("A+1", engine="python")
  45. tm.assert_series_equal(result, self.expected2, check_names=False)
  46. def test_query_numexpr(self):
  47. df = self.df
  48. if _NUMEXPR_INSTALLED:
  49. result = df.query("A>0", engine="numexpr")
  50. tm.assert_frame_equal(result, self.expected1)
  51. result = df.eval("A+1", engine="numexpr")
  52. tm.assert_series_equal(result, self.expected2, check_names=False)
  53. else:
  54. with pytest.raises(ImportError):
  55. df.query("A>0", engine="numexpr")
  56. with pytest.raises(ImportError):
  57. df.eval("A+1", engine="numexpr")
  58. class TestDataFrameEval:
  59. def test_ops(self):
  60. # tst ops and reversed ops in evaluation
  61. # GH7198
  62. # smaller hits python, larger hits numexpr
  63. for n in [4, 4000]:
  64. df = DataFrame(1, index=range(n), columns=list("abcd"))
  65. df.iloc[0] = 2
  66. m = df.mean()
  67. for op_str, op, rop in [
  68. ("+", "__add__", "__radd__"),
  69. ("-", "__sub__", "__rsub__"),
  70. ("*", "__mul__", "__rmul__"),
  71. ("/", "__truediv__", "__rtruediv__"),
  72. ]:
  73. base = DataFrame( # noqa
  74. np.tile(m.values, n).reshape(n, -1), columns=list("abcd")
  75. )
  76. expected = eval("base{op}df".format(op=op_str))
  77. # ops as strings
  78. result = eval("m{op}df".format(op=op_str))
  79. tm.assert_frame_equal(result, expected)
  80. # these are commutative
  81. if op in ["+", "*"]:
  82. result = getattr(df, op)(m)
  83. tm.assert_frame_equal(result, expected)
  84. # these are not
  85. elif op in ["-", "/"]:
  86. result = getattr(df, rop)(m)
  87. tm.assert_frame_equal(result, expected)
  88. # GH7192: Note we need a large number of rows to ensure this
  89. # goes through the numexpr path
  90. df = DataFrame(dict(A=np.random.randn(25000)))
  91. df.iloc[0:5] = np.nan
  92. expected = 1 - np.isnan(df.iloc[0:25])
  93. result = (1 - np.isnan(df)).iloc[0:25]
  94. tm.assert_frame_equal(result, expected)
  95. def test_query_non_str(self):
  96. # GH 11485
  97. df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]})
  98. msg = "expr must be a string to be evaluated"
  99. with pytest.raises(ValueError, match=msg):
  100. df.query(lambda x: x.B == "b")
  101. with pytest.raises(ValueError, match=msg):
  102. df.query(111)
  103. def test_query_empty_string(self):
  104. # GH 13139
  105. df = pd.DataFrame({"A": [1, 2, 3]})
  106. msg = "expr cannot be an empty string"
  107. with pytest.raises(ValueError, match=msg):
  108. df.query("")
  109. def test_eval_resolvers_as_list(self):
  110. # GH 14095
  111. df = DataFrame(np.random.randn(10, 2), columns=list("ab"))
  112. dict1 = {"a": 1}
  113. dict2 = {"b": 2}
  114. assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"]
  115. assert pd.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"]
  116. class TestDataFrameQueryWithMultiIndex:
  117. def test_query_with_named_multiindex(self, parser, engine):
  118. skip_if_no_pandas_parser(parser)
  119. a = np.random.choice(["red", "green"], size=10)
  120. b = np.random.choice(["eggs", "ham"], size=10)
  121. index = MultiIndex.from_arrays([a, b], names=["color", "food"])
  122. df = DataFrame(np.random.randn(10, 2), index=index)
  123. ind = Series(
  124. df.index.get_level_values("color").values, index=index, name="color"
  125. )
  126. # equality
  127. res1 = df.query('color == "red"', parser=parser, engine=engine)
  128. res2 = df.query('"red" == color', parser=parser, engine=engine)
  129. exp = df[ind == "red"]
  130. tm.assert_frame_equal(res1, exp)
  131. tm.assert_frame_equal(res2, exp)
  132. # inequality
  133. res1 = df.query('color != "red"', parser=parser, engine=engine)
  134. res2 = df.query('"red" != color', parser=parser, engine=engine)
  135. exp = df[ind != "red"]
  136. tm.assert_frame_equal(res1, exp)
  137. tm.assert_frame_equal(res2, exp)
  138. # list equality (really just set membership)
  139. res1 = df.query('color == ["red"]', parser=parser, engine=engine)
  140. res2 = df.query('["red"] == color', parser=parser, engine=engine)
  141. exp = df[ind.isin(["red"])]
  142. tm.assert_frame_equal(res1, exp)
  143. tm.assert_frame_equal(res2, exp)
  144. res1 = df.query('color != ["red"]', parser=parser, engine=engine)
  145. res2 = df.query('["red"] != color', parser=parser, engine=engine)
  146. exp = df[~ind.isin(["red"])]
  147. tm.assert_frame_equal(res1, exp)
  148. tm.assert_frame_equal(res2, exp)
  149. # in/not in ops
  150. res1 = df.query('["red"] in color', parser=parser, engine=engine)
  151. res2 = df.query('"red" in color', parser=parser, engine=engine)
  152. exp = df[ind.isin(["red"])]
  153. tm.assert_frame_equal(res1, exp)
  154. tm.assert_frame_equal(res2, exp)
  155. res1 = df.query('["red"] not in color', parser=parser, engine=engine)
  156. res2 = df.query('"red" not in color', parser=parser, engine=engine)
  157. exp = df[~ind.isin(["red"])]
  158. tm.assert_frame_equal(res1, exp)
  159. tm.assert_frame_equal(res2, exp)
  160. def test_query_with_unnamed_multiindex(self, parser, engine):
  161. skip_if_no_pandas_parser(parser)
  162. a = np.random.choice(["red", "green"], size=10)
  163. b = np.random.choice(["eggs", "ham"], size=10)
  164. index = MultiIndex.from_arrays([a, b])
  165. df = DataFrame(np.random.randn(10, 2), index=index)
  166. ind = Series(df.index.get_level_values(0).values, index=index)
  167. res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine)
  168. res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine)
  169. exp = df[ind == "red"]
  170. tm.assert_frame_equal(res1, exp)
  171. tm.assert_frame_equal(res2, exp)
  172. # inequality
  173. res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine)
  174. res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine)
  175. exp = df[ind != "red"]
  176. tm.assert_frame_equal(res1, exp)
  177. tm.assert_frame_equal(res2, exp)
  178. # list equality (really just set membership)
  179. res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine)
  180. res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine)
  181. exp = df[ind.isin(["red"])]
  182. tm.assert_frame_equal(res1, exp)
  183. tm.assert_frame_equal(res2, exp)
  184. res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine)
  185. res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine)
  186. exp = df[~ind.isin(["red"])]
  187. tm.assert_frame_equal(res1, exp)
  188. tm.assert_frame_equal(res2, exp)
  189. # in/not in ops
  190. res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine)
  191. res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine)
  192. exp = df[ind.isin(["red"])]
  193. tm.assert_frame_equal(res1, exp)
  194. tm.assert_frame_equal(res2, exp)
  195. res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine)
  196. res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine)
  197. exp = df[~ind.isin(["red"])]
  198. tm.assert_frame_equal(res1, exp)
  199. tm.assert_frame_equal(res2, exp)
  200. # ## LEVEL 1
  201. ind = Series(df.index.get_level_values(1).values, index=index)
  202. res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine)
  203. res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine)
  204. exp = df[ind == "eggs"]
  205. tm.assert_frame_equal(res1, exp)
  206. tm.assert_frame_equal(res2, exp)
  207. # inequality
  208. res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine)
  209. res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine)
  210. exp = df[ind != "eggs"]
  211. tm.assert_frame_equal(res1, exp)
  212. tm.assert_frame_equal(res2, exp)
  213. # list equality (really just set membership)
  214. res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine)
  215. res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine)
  216. exp = df[ind.isin(["eggs"])]
  217. tm.assert_frame_equal(res1, exp)
  218. tm.assert_frame_equal(res2, exp)
  219. res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine)
  220. res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine)
  221. exp = df[~ind.isin(["eggs"])]
  222. tm.assert_frame_equal(res1, exp)
  223. tm.assert_frame_equal(res2, exp)
  224. # in/not in ops
  225. res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine)
  226. res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine)
  227. exp = df[ind.isin(["eggs"])]
  228. tm.assert_frame_equal(res1, exp)
  229. tm.assert_frame_equal(res2, exp)
  230. res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine)
  231. res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine)
  232. exp = df[~ind.isin(["eggs"])]
  233. tm.assert_frame_equal(res1, exp)
  234. tm.assert_frame_equal(res2, exp)
  235. def test_query_with_partially_named_multiindex(self, parser, engine):
  236. skip_if_no_pandas_parser(parser)
  237. a = np.random.choice(["red", "green"], size=10)
  238. b = np.arange(10)
  239. index = MultiIndex.from_arrays([a, b])
  240. index.names = [None, "rating"]
  241. df = DataFrame(np.random.randn(10, 2), index=index)
  242. res = df.query("rating == 1", parser=parser, engine=engine)
  243. ind = Series(
  244. df.index.get_level_values("rating").values, index=index, name="rating"
  245. )
  246. exp = df[ind == 1]
  247. tm.assert_frame_equal(res, exp)
  248. res = df.query("rating != 1", parser=parser, engine=engine)
  249. ind = Series(
  250. df.index.get_level_values("rating").values, index=index, name="rating"
  251. )
  252. exp = df[ind != 1]
  253. tm.assert_frame_equal(res, exp)
  254. res = df.query('ilevel_0 == "red"', parser=parser, engine=engine)
  255. ind = Series(df.index.get_level_values(0).values, index=index)
  256. exp = df[ind == "red"]
  257. tm.assert_frame_equal(res, exp)
  258. res = df.query('ilevel_0 != "red"', parser=parser, engine=engine)
  259. ind = Series(df.index.get_level_values(0).values, index=index)
  260. exp = df[ind != "red"]
  261. tm.assert_frame_equal(res, exp)
  262. def test_query_multiindex_get_index_resolvers(self):
  263. df = tm.makeCustomDataframe(
  264. 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"]
  265. )
  266. resolvers = df._get_index_resolvers()
  267. def to_series(mi, level):
  268. level_values = mi.get_level_values(level)
  269. s = level_values.to_series()
  270. s.index = mi
  271. return s
  272. col_series = df.columns.to_series()
  273. expected = {
  274. "index": df.index,
  275. "columns": col_series,
  276. "spam": to_series(df.index, "spam"),
  277. "eggs": to_series(df.index, "eggs"),
  278. "C0": col_series,
  279. }
  280. for k, v in resolvers.items():
  281. if isinstance(v, Index):
  282. assert v.is_(expected[k])
  283. elif isinstance(v, Series):
  284. tm.assert_series_equal(v, expected[k])
  285. else:
  286. raise AssertionError("object must be a Series or Index")
  287. @td.skip_if_no_ne
  288. class TestDataFrameQueryNumExprPandas:
  289. @classmethod
  290. def setup_class(cls):
  291. cls.engine = "numexpr"
  292. cls.parser = "pandas"
  293. @classmethod
  294. def teardown_class(cls):
  295. del cls.engine, cls.parser
  296. def test_date_query_with_attribute_access(self):
  297. engine, parser = self.engine, self.parser
  298. skip_if_no_pandas_parser(parser)
  299. df = DataFrame(np.random.randn(5, 3))
  300. df["dates1"] = date_range("1/1/2012", periods=5)
  301. df["dates2"] = date_range("1/1/2013", periods=5)
  302. df["dates3"] = date_range("1/1/2014", periods=5)
  303. res = df.query(
  304. "@df.dates1 < 20130101 < @df.dates3", engine=engine, parser=parser
  305. )
  306. expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
  307. tm.assert_frame_equal(res, expec)
  308. def test_date_query_no_attribute_access(self):
  309. engine, parser = self.engine, self.parser
  310. df = DataFrame(np.random.randn(5, 3))
  311. df["dates1"] = date_range("1/1/2012", periods=5)
  312. df["dates2"] = date_range("1/1/2013", periods=5)
  313. df["dates3"] = date_range("1/1/2014", periods=5)
  314. res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
  315. expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
  316. tm.assert_frame_equal(res, expec)
  317. def test_date_query_with_NaT(self):
  318. engine, parser = self.engine, self.parser
  319. n = 10
  320. df = DataFrame(np.random.randn(n, 3))
  321. df["dates1"] = date_range("1/1/2012", periods=n)
  322. df["dates2"] = date_range("1/1/2013", periods=n)
  323. df["dates3"] = date_range("1/1/2014", periods=n)
  324. df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
  325. df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT
  326. res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
  327. expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
  328. tm.assert_frame_equal(res, expec)
  329. def test_date_index_query(self):
  330. engine, parser = self.engine, self.parser
  331. n = 10
  332. df = DataFrame(np.random.randn(n, 3))
  333. df["dates1"] = date_range("1/1/2012", periods=n)
  334. df["dates3"] = date_range("1/1/2014", periods=n)
  335. df.set_index("dates1", inplace=True, drop=True)
  336. res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
  337. expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
  338. tm.assert_frame_equal(res, expec)
  339. def test_date_index_query_with_NaT(self):
  340. engine, parser = self.engine, self.parser
  341. n = 10
  342. df = DataFrame(np.random.randn(n, 3))
  343. df["dates1"] = date_range("1/1/2012", periods=n)
  344. df["dates3"] = date_range("1/1/2014", periods=n)
  345. df.iloc[0, 0] = pd.NaT
  346. df.set_index("dates1", inplace=True, drop=True)
  347. res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
  348. expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
  349. tm.assert_frame_equal(res, expec)
  350. def test_date_index_query_with_NaT_duplicates(self):
  351. engine, parser = self.engine, self.parser
  352. n = 10
  353. d = {}
  354. d["dates1"] = date_range("1/1/2012", periods=n)
  355. d["dates3"] = date_range("1/1/2014", periods=n)
  356. df = DataFrame(d)
  357. df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
  358. df.set_index("dates1", inplace=True, drop=True)
  359. res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
  360. expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
  361. tm.assert_frame_equal(res, expec)
  362. def test_date_query_with_non_date(self):
  363. engine, parser = self.engine, self.parser
  364. n = 10
  365. df = DataFrame(
  366. {"dates": date_range("1/1/2012", periods=n), "nondate": np.arange(n)}
  367. )
  368. result = df.query("dates == nondate", parser=parser, engine=engine)
  369. assert len(result) == 0
  370. result = df.query("dates != nondate", parser=parser, engine=engine)
  371. tm.assert_frame_equal(result, df)
  372. for op in ["<", ">", "<=", ">="]:
  373. with pytest.raises(TypeError):
  374. df.query(
  375. "dates {op} nondate".format(op=op), parser=parser, engine=engine
  376. )
  377. def test_query_syntax_error(self):
  378. engine, parser = self.engine, self.parser
  379. df = DataFrame({"i": range(10), "+": range(3, 13), "r": range(4, 14)})
  380. with pytest.raises(SyntaxError):
  381. df.query("i - +", engine=engine, parser=parser)
  382. def test_query_scope(self):
  383. from pandas.core.computation.ops import UndefinedVariableError
  384. engine, parser = self.engine, self.parser
  385. skip_if_no_pandas_parser(parser)
  386. df = DataFrame(np.random.randn(20, 2), columns=list("ab"))
  387. a, b = 1, 2 # noqa
  388. res = df.query("a > b", engine=engine, parser=parser)
  389. expected = df[df.a > df.b]
  390. tm.assert_frame_equal(res, expected)
  391. res = df.query("@a > b", engine=engine, parser=parser)
  392. expected = df[a > df.b]
  393. tm.assert_frame_equal(res, expected)
  394. # no local variable c
  395. with pytest.raises(
  396. UndefinedVariableError, match="local variable 'c' is not defined"
  397. ):
  398. df.query("@a > b > @c", engine=engine, parser=parser)
  399. # no column named 'c'
  400. with pytest.raises(UndefinedVariableError, match="name 'c' is not defined"):
  401. df.query("@a > b > c", engine=engine, parser=parser)
  402. def test_query_doesnt_pickup_local(self):
  403. from pandas.core.computation.ops import UndefinedVariableError
  404. engine, parser = self.engine, self.parser
  405. n = m = 10
  406. df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
  407. # we don't pick up the local 'sin'
  408. with pytest.raises(UndefinedVariableError, match="name 'sin' is not defined"):
  409. df.query("sin > 5", engine=engine, parser=parser)
  410. def test_query_builtin(self):
  411. from pandas.core.computation.engines import NumExprClobberingError
  412. engine, parser = self.engine, self.parser
  413. n = m = 10
  414. df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
  415. df.index.name = "sin"
  416. msg = "Variables in expression.+"
  417. with pytest.raises(NumExprClobberingError, match=msg):
  418. df.query("sin > 5", engine=engine, parser=parser)
  419. def test_query(self):
  420. engine, parser = self.engine, self.parser
  421. df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
  422. tm.assert_frame_equal(
  423. df.query("a < b", engine=engine, parser=parser), df[df.a < df.b]
  424. )
  425. tm.assert_frame_equal(
  426. df.query("a + b > b * c", engine=engine, parser=parser),
  427. df[df.a + df.b > df.b * df.c],
  428. )
  429. def test_query_index_with_name(self):
  430. engine, parser = self.engine, self.parser
  431. df = DataFrame(
  432. np.random.randint(10, size=(10, 3)),
  433. index=Index(range(10), name="blob"),
  434. columns=["a", "b", "c"],
  435. )
  436. res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser)
  437. expec = df[(df.index < 5) & (df.a < df.b)]
  438. tm.assert_frame_equal(res, expec)
  439. res = df.query("blob < b", engine=engine, parser=parser)
  440. expec = df[df.index < df.b]
  441. tm.assert_frame_equal(res, expec)
  442. def test_query_index_without_name(self):
  443. engine, parser = self.engine, self.parser
  444. df = DataFrame(
  445. np.random.randint(10, size=(10, 3)),
  446. index=range(10),
  447. columns=["a", "b", "c"],
  448. )
  449. # "index" should refer to the index
  450. res = df.query("index < b", engine=engine, parser=parser)
  451. expec = df[df.index < df.b]
  452. tm.assert_frame_equal(res, expec)
  453. # test against a scalar
  454. res = df.query("index < 5", engine=engine, parser=parser)
  455. expec = df[df.index < 5]
  456. tm.assert_frame_equal(res, expec)
  457. def test_nested_scope(self):
  458. engine = self.engine
  459. parser = self.parser
  460. skip_if_no_pandas_parser(parser)
  461. df = DataFrame(np.random.randn(5, 3))
  462. df2 = DataFrame(np.random.randn(5, 3))
  463. expected = df[(df > 0) & (df2 > 0)]
  464. result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser)
  465. tm.assert_frame_equal(result, expected)
  466. result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser)
  467. tm.assert_frame_equal(result, expected)
  468. result = pd.eval(
  469. "df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser
  470. )
  471. expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
  472. tm.assert_frame_equal(result, expected)
  473. result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser)
  474. expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser)
  475. tm.assert_frame_equal(result, expected)
  476. def test_nested_raises_on_local_self_reference(self):
  477. from pandas.core.computation.ops import UndefinedVariableError
  478. df = DataFrame(np.random.randn(5, 3))
  479. # can't reference ourself b/c we're a local so @ is necessary
  480. with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"):
  481. df.query("df > 0", engine=self.engine, parser=self.parser)
  482. def test_local_syntax(self):
  483. skip_if_no_pandas_parser(self.parser)
  484. engine, parser = self.engine, self.parser
  485. df = DataFrame(np.random.randn(100, 10), columns=list("abcdefghij"))
  486. b = 1
  487. expect = df[df.a < b]
  488. result = df.query("a < @b", engine=engine, parser=parser)
  489. tm.assert_frame_equal(result, expect)
  490. expect = df[df.a < df.b]
  491. result = df.query("a < b", engine=engine, parser=parser)
  492. tm.assert_frame_equal(result, expect)
  493. def test_chained_cmp_and_in(self):
  494. skip_if_no_pandas_parser(self.parser)
  495. engine, parser = self.engine, self.parser
  496. cols = list("abc")
  497. df = DataFrame(np.random.randn(100, len(cols)), columns=cols)
  498. res = df.query(
  499. "a < b < c and a not in b not in c", engine=engine, parser=parser
  500. )
  501. ind = (
  502. (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b)
  503. ) # noqa
  504. expec = df[ind]
  505. tm.assert_frame_equal(res, expec)
  506. def test_local_variable_with_in(self):
  507. engine, parser = self.engine, self.parser
  508. skip_if_no_pandas_parser(parser)
  509. a = Series(np.random.randint(3, size=15), name="a")
  510. b = Series(np.random.randint(10, size=15), name="b")
  511. df = DataFrame({"a": a, "b": b})
  512. expected = df.loc[(df.b - 1).isin(a)]
  513. result = df.query("b - 1 in a", engine=engine, parser=parser)
  514. tm.assert_frame_equal(expected, result)
  515. b = Series(np.random.randint(10, size=15), name="b")
  516. expected = df.loc[(b - 1).isin(a)]
  517. result = df.query("@b - 1 in a", engine=engine, parser=parser)
  518. tm.assert_frame_equal(expected, result)
  519. def test_at_inside_string(self):
  520. engine, parser = self.engine, self.parser
  521. skip_if_no_pandas_parser(parser)
  522. c = 1 # noqa
  523. df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]})
  524. result = df.query('a == "@c"', engine=engine, parser=parser)
  525. expected = df[df.a == "@c"]
  526. tm.assert_frame_equal(result, expected)
  527. def test_query_undefined_local(self):
  528. from pandas.core.computation.ops import UndefinedVariableError
  529. engine, parser = self.engine, self.parser
  530. skip_if_no_pandas_parser(parser)
  531. df = DataFrame(np.random.rand(10, 2), columns=list("ab"))
  532. with pytest.raises(
  533. UndefinedVariableError, match="local variable 'c' is not defined"
  534. ):
  535. df.query("a == @c", engine=engine, parser=parser)
  536. def test_index_resolvers_come_after_columns_with_the_same_name(self):
  537. n = 1 # noqa
  538. a = np.r_[20:101:20]
  539. df = DataFrame({"index": a, "b": np.random.randn(a.size)})
  540. df.index.name = "index"
  541. result = df.query("index > 5", engine=self.engine, parser=self.parser)
  542. expected = df[df["index"] > 5]
  543. tm.assert_frame_equal(result, expected)
  544. df = DataFrame({"index": a, "b": np.random.randn(a.size)})
  545. result = df.query("ilevel_0 > 5", engine=self.engine, parser=self.parser)
  546. expected = df.loc[df.index[df.index > 5]]
  547. tm.assert_frame_equal(result, expected)
  548. df = DataFrame({"a": a, "b": np.random.randn(a.size)})
  549. df.index.name = "a"
  550. result = df.query("a > 5", engine=self.engine, parser=self.parser)
  551. expected = df[df.a > 5]
  552. tm.assert_frame_equal(result, expected)
  553. result = df.query("index > 5", engine=self.engine, parser=self.parser)
  554. expected = df.loc[df.index[df.index > 5]]
  555. tm.assert_frame_equal(result, expected)
  556. def test_inf(self):
  557. n = 10
  558. df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)})
  559. df.loc[::2, 0] = np.inf
  560. ops = "==", "!="
  561. d = dict(zip(ops, (operator.eq, operator.ne)))
  562. for op, f in d.items():
  563. q = "a {op} inf".format(op=op)
  564. expected = df[f(df.a, np.inf)]
  565. result = df.query(q, engine=self.engine, parser=self.parser)
  566. tm.assert_frame_equal(result, expected)
  567. @td.skip_if_no_ne
  568. class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas):
  569. @classmethod
  570. def setup_class(cls):
  571. super().setup_class()
  572. cls.engine = "numexpr"
  573. cls.parser = "python"
  574. def test_date_query_no_attribute_access(self):
  575. engine, parser = self.engine, self.parser
  576. df = DataFrame(np.random.randn(5, 3))
  577. df["dates1"] = date_range("1/1/2012", periods=5)
  578. df["dates2"] = date_range("1/1/2013", periods=5)
  579. df["dates3"] = date_range("1/1/2014", periods=5)
  580. res = df.query(
  581. "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
  582. )
  583. expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
  584. tm.assert_frame_equal(res, expec)
  585. def test_date_query_with_NaT(self):
  586. engine, parser = self.engine, self.parser
  587. n = 10
  588. df = DataFrame(np.random.randn(n, 3))
  589. df["dates1"] = date_range("1/1/2012", periods=n)
  590. df["dates2"] = date_range("1/1/2013", periods=n)
  591. df["dates3"] = date_range("1/1/2014", periods=n)
  592. df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
  593. df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT
  594. res = df.query(
  595. "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
  596. )
  597. expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
  598. tm.assert_frame_equal(res, expec)
  599. def test_date_index_query(self):
  600. engine, parser = self.engine, self.parser
  601. n = 10
  602. df = DataFrame(np.random.randn(n, 3))
  603. df["dates1"] = date_range("1/1/2012", periods=n)
  604. df["dates3"] = date_range("1/1/2014", periods=n)
  605. df.set_index("dates1", inplace=True, drop=True)
  606. res = df.query(
  607. "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
  608. )
  609. expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
  610. tm.assert_frame_equal(res, expec)
  611. def test_date_index_query_with_NaT(self):
  612. engine, parser = self.engine, self.parser
  613. n = 10
  614. df = DataFrame(np.random.randn(n, 3))
  615. df["dates1"] = date_range("1/1/2012", periods=n)
  616. df["dates3"] = date_range("1/1/2014", periods=n)
  617. df.iloc[0, 0] = pd.NaT
  618. df.set_index("dates1", inplace=True, drop=True)
  619. res = df.query(
  620. "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
  621. )
  622. expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
  623. tm.assert_frame_equal(res, expec)
  624. def test_date_index_query_with_NaT_duplicates(self):
  625. engine, parser = self.engine, self.parser
  626. n = 10
  627. df = DataFrame(np.random.randn(n, 3))
  628. df["dates1"] = date_range("1/1/2012", periods=n)
  629. df["dates3"] = date_range("1/1/2014", periods=n)
  630. df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
  631. df.set_index("dates1", inplace=True, drop=True)
  632. with pytest.raises(NotImplementedError):
  633. df.query("index < 20130101 < dates3", engine=engine, parser=parser)
  634. def test_nested_scope(self):
  635. from pandas.core.computation.ops import UndefinedVariableError
  636. engine = self.engine
  637. parser = self.parser
  638. # smoke test
  639. x = 1 # noqa
  640. result = pd.eval("x + 1", engine=engine, parser=parser)
  641. assert result == 2
  642. df = DataFrame(np.random.randn(5, 3))
  643. df2 = DataFrame(np.random.randn(5, 3))
  644. # don't have the pandas parser
  645. with pytest.raises(SyntaxError):
  646. df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser)
  647. with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"):
  648. df.query("(df>0) & (df2>0)", engine=engine, parser=parser)
  649. expected = df[(df > 0) & (df2 > 0)]
  650. result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser)
  651. tm.assert_frame_equal(expected, result)
  652. expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
  653. result = pd.eval(
  654. "df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser
  655. )
  656. tm.assert_frame_equal(expected, result)
  657. class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas):
  658. @classmethod
  659. def setup_class(cls):
  660. super().setup_class()
  661. cls.engine = "python"
  662. cls.parser = "pandas"
  663. def test_query_builtin(self):
  664. engine, parser = self.engine, self.parser
  665. n = m = 10
  666. df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
  667. df.index.name = "sin"
  668. expected = df[df.index > 5]
  669. result = df.query("sin > 5", engine=engine, parser=parser)
  670. tm.assert_frame_equal(expected, result)
  671. class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython):
  672. @classmethod
  673. def setup_class(cls):
  674. super().setup_class()
  675. cls.engine = cls.parser = "python"
  676. def test_query_builtin(self):
  677. engine, parser = self.engine, self.parser
  678. n = m = 10
  679. df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
  680. df.index.name = "sin"
  681. expected = df[df.index > 5]
  682. result = df.query("sin > 5", engine=engine, parser=parser)
  683. tm.assert_frame_equal(expected, result)
  684. class TestDataFrameQueryStrings:
  685. def test_str_query_method(self, parser, engine):
  686. df = DataFrame(np.random.randn(10, 1), columns=["b"])
  687. df["strings"] = Series(list("aabbccddee"))
  688. expect = df[df.strings == "a"]
  689. if parser != "pandas":
  690. col = "strings"
  691. lst = '"a"'
  692. lhs = [col] * 2 + [lst] * 2
  693. rhs = lhs[::-1]
  694. eq, ne = "==", "!="
  695. ops = 2 * ([eq] + [ne])
  696. for lhs, op, rhs in zip(lhs, ops, rhs):
  697. ex = "{lhs} {op} {rhs}".format(lhs=lhs, op=op, rhs=rhs)
  698. msg = r"'(Not)?In' nodes are not implemented"
  699. with pytest.raises(NotImplementedError, match=msg):
  700. df.query(
  701. ex,
  702. engine=engine,
  703. parser=parser,
  704. local_dict={"strings": df.strings},
  705. )
  706. else:
  707. res = df.query('"a" == strings', engine=engine, parser=parser)
  708. tm.assert_frame_equal(res, expect)
  709. res = df.query('strings == "a"', engine=engine, parser=parser)
  710. tm.assert_frame_equal(res, expect)
  711. tm.assert_frame_equal(res, df[df.strings.isin(["a"])])
  712. expect = df[df.strings != "a"]
  713. res = df.query('strings != "a"', engine=engine, parser=parser)
  714. tm.assert_frame_equal(res, expect)
  715. res = df.query('"a" != strings', engine=engine, parser=parser)
  716. tm.assert_frame_equal(res, expect)
  717. tm.assert_frame_equal(res, df[~df.strings.isin(["a"])])
  718. def test_str_list_query_method(self, parser, engine):
  719. df = DataFrame(np.random.randn(10, 1), columns=["b"])
  720. df["strings"] = Series(list("aabbccddee"))
  721. expect = df[df.strings.isin(["a", "b"])]
  722. if parser != "pandas":
  723. col = "strings"
  724. lst = '["a", "b"]'
  725. lhs = [col] * 2 + [lst] * 2
  726. rhs = lhs[::-1]
  727. eq, ne = "==", "!="
  728. ops = 2 * ([eq] + [ne])
  729. for lhs, op, rhs in zip(lhs, ops, rhs):
  730. ex = "{lhs} {op} {rhs}".format(lhs=lhs, op=op, rhs=rhs)
  731. with pytest.raises(NotImplementedError):
  732. df.query(ex, engine=engine, parser=parser)
  733. else:
  734. res = df.query('strings == ["a", "b"]', engine=engine, parser=parser)
  735. tm.assert_frame_equal(res, expect)
  736. res = df.query('["a", "b"] == strings', engine=engine, parser=parser)
  737. tm.assert_frame_equal(res, expect)
  738. expect = df[~df.strings.isin(["a", "b"])]
  739. res = df.query('strings != ["a", "b"]', engine=engine, parser=parser)
  740. tm.assert_frame_equal(res, expect)
  741. res = df.query('["a", "b"] != strings', engine=engine, parser=parser)
  742. tm.assert_frame_equal(res, expect)
  743. def test_query_with_string_columns(self, parser, engine):
  744. df = DataFrame(
  745. {
  746. "a": list("aaaabbbbcccc"),
  747. "b": list("aabbccddeeff"),
  748. "c": np.random.randint(5, size=12),
  749. "d": np.random.randint(9, size=12),
  750. }
  751. )
  752. if parser == "pandas":
  753. res = df.query("a in b", parser=parser, engine=engine)
  754. expec = df[df.a.isin(df.b)]
  755. tm.assert_frame_equal(res, expec)
  756. res = df.query("a in b and c < d", parser=parser, engine=engine)
  757. expec = df[df.a.isin(df.b) & (df.c < df.d)]
  758. tm.assert_frame_equal(res, expec)
  759. else:
  760. with pytest.raises(NotImplementedError):
  761. df.query("a in b", parser=parser, engine=engine)
  762. with pytest.raises(NotImplementedError):
  763. df.query("a in b and c < d", parser=parser, engine=engine)
  764. def test_object_array_eq_ne(self, parser, engine):
  765. df = DataFrame(
  766. {
  767. "a": list("aaaabbbbcccc"),
  768. "b": list("aabbccddeeff"),
  769. "c": np.random.randint(5, size=12),
  770. "d": np.random.randint(9, size=12),
  771. }
  772. )
  773. res = df.query("a == b", parser=parser, engine=engine)
  774. exp = df[df.a == df.b]
  775. tm.assert_frame_equal(res, exp)
  776. res = df.query("a != b", parser=parser, engine=engine)
  777. exp = df[df.a != df.b]
  778. tm.assert_frame_equal(res, exp)
  779. def test_query_with_nested_strings(self, parser, engine):
  780. skip_if_no_pandas_parser(parser)
  781. raw = """id event timestamp
  782. 1 "page 1 load" 1/1/2014 0:00:01
  783. 1 "page 1 exit" 1/1/2014 0:00:31
  784. 2 "page 2 load" 1/1/2014 0:01:01
  785. 2 "page 2 exit" 1/1/2014 0:01:31
  786. 3 "page 3 load" 1/1/2014 0:02:01
  787. 3 "page 3 exit" 1/1/2014 0:02:31
  788. 4 "page 1 load" 2/1/2014 1:00:01
  789. 4 "page 1 exit" 2/1/2014 1:00:31
  790. 5 "page 2 load" 2/1/2014 1:01:01
  791. 5 "page 2 exit" 2/1/2014 1:01:31
  792. 6 "page 3 load" 2/1/2014 1:02:01
  793. 6 "page 3 exit" 2/1/2014 1:02:31
  794. """
  795. df = pd.read_csv(
  796. StringIO(raw), sep=r"\s{2,}", engine="python", parse_dates=["timestamp"]
  797. )
  798. expected = df[df.event == '"page 1 load"']
  799. res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine)
  800. tm.assert_frame_equal(expected, res)
  801. def test_query_with_nested_special_character(self, parser, engine):
  802. skip_if_no_pandas_parser(parser)
  803. df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]})
  804. res = df.query('a == "test & test"', parser=parser, engine=engine)
  805. expec = df[df.a == "test & test"]
  806. tm.assert_frame_equal(res, expec)
  807. def test_query_lex_compare_strings(self, parser, engine):
  808. a = Series(np.random.choice(list("abcde"), 20))
  809. b = Series(np.arange(a.size))
  810. df = DataFrame({"X": a, "Y": b})
  811. ops = {"<": operator.lt, ">": operator.gt, "<=": operator.le, ">=": operator.ge}
  812. for op, func in ops.items():
  813. res = df.query(f'X {op} "d"', engine=engine, parser=parser)
  814. expected = df[func(df.X, "d")]
  815. tm.assert_frame_equal(res, expected)
  816. def test_query_single_element_booleans(self, parser, engine):
  817. columns = "bid", "bidsize", "ask", "asksize"
  818. data = np.random.randint(2, size=(1, len(columns))).astype(bool)
  819. df = DataFrame(data, columns=columns)
  820. res = df.query("bid & ask", engine=engine, parser=parser)
  821. expected = df[df.bid & df.ask]
  822. tm.assert_frame_equal(res, expected)
  823. def test_query_string_scalar_variable(self, parser, engine):
  824. skip_if_no_pandas_parser(parser)
  825. df = pd.DataFrame(
  826. {
  827. "Symbol": ["BUD US", "BUD US", "IBM US", "IBM US"],
  828. "Price": [109.70, 109.72, 183.30, 183.35],
  829. }
  830. )
  831. e = df[df.Symbol == "BUD US"]
  832. symb = "BUD US" # noqa
  833. r = df.query("Symbol == @symb", parser=parser, engine=engine)
  834. tm.assert_frame_equal(e, r)
  835. class TestDataFrameEvalWithFrame:
  836. def setup_method(self, method):
  837. self.frame = DataFrame(np.random.randn(10, 3), columns=list("abc"))
  838. def teardown_method(self, method):
  839. del self.frame
  840. def test_simple_expr(self, parser, engine):
  841. res = self.frame.eval("a + b", engine=engine, parser=parser)
  842. expect = self.frame.a + self.frame.b
  843. tm.assert_series_equal(res, expect)
  844. def test_bool_arith_expr(self, parser, engine):
  845. res = self.frame.eval("a[a < 1] + b", engine=engine, parser=parser)
  846. expect = self.frame.a[self.frame.a < 1] + self.frame.b
  847. tm.assert_series_equal(res, expect)
  848. @pytest.mark.parametrize("op", ["+", "-", "*", "/"])
  849. def test_invalid_type_for_operator_raises(self, parser, engine, op):
  850. df = DataFrame({"a": [1, 2], "b": ["c", "d"]})
  851. msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'"
  852. with pytest.raises(TypeError, match=msg):
  853. df.eval("a {0} b".format(op), engine=engine, parser=parser)
  854. class TestDataFrameQueryBacktickQuoting:
  855. @pytest.fixture(scope="class")
  856. def df(self):
  857. """
  858. Yields a dataframe with strings that may or may not need escaping
  859. by backticks. The last two columns cannot be escaped by backticks
  860. and should raise a ValueError.
  861. """
  862. yield DataFrame(
  863. {
  864. "A": [1, 2, 3],
  865. "B B": [3, 2, 1],
  866. "C C": [4, 5, 6],
  867. "C C": [7, 4, 3],
  868. "C_C": [8, 9, 10],
  869. "D_D D": [11, 1, 101],
  870. "E.E": [6, 3, 5],
  871. "F-F": [8, 1, 10],
  872. "1e1": [2, 4, 8],
  873. "def": [10, 11, 2],
  874. "A (x)": [4, 1, 3],
  875. "B(x)": [1, 1, 5],
  876. "B (x)": [2, 7, 4],
  877. " &^ :!€$?(} > <++*'' ": [2, 5, 6],
  878. "": [10, 11, 1],
  879. " A": [4, 7, 9],
  880. " ": [1, 2, 1],
  881. "it's": [6, 3, 1],
  882. "that's": [9, 1, 8],
  883. "☺": [8, 7, 6],
  884. "foo#bar": [2, 4, 5],
  885. 1: [5, 7, 9],
  886. }
  887. )
  888. def test_single_backtick_variable_query(self, df):
  889. res = df.query("1 < `B B`")
  890. expect = df[1 < df["B B"]]
  891. tm.assert_frame_equal(res, expect)
  892. def test_two_backtick_variables_query(self, df):
  893. res = df.query("1 < `B B` and 4 < `C C`")
  894. expect = df[(1 < df["B B"]) & (4 < df["C C"])]
  895. tm.assert_frame_equal(res, expect)
  896. def test_single_backtick_variable_expr(self, df):
  897. res = df.eval("A + `B B`")
  898. expect = df["A"] + df["B B"]
  899. tm.assert_series_equal(res, expect)
  900. def test_two_backtick_variables_expr(self, df):
  901. res = df.eval("`B B` + `C C`")
  902. expect = df["B B"] + df["C C"]
  903. tm.assert_series_equal(res, expect)
  904. def test_already_underscore_variable(self, df):
  905. res = df.eval("`C_C` + A")
  906. expect = df["C_C"] + df["A"]
  907. tm.assert_series_equal(res, expect)
  908. def test_same_name_but_underscores(self, df):
  909. res = df.eval("C_C + `C C`")
  910. expect = df["C_C"] + df["C C"]
  911. tm.assert_series_equal(res, expect)
  912. def test_mixed_underscores_and_spaces(self, df):
  913. res = df.eval("A + `D_D D`")
  914. expect = df["A"] + df["D_D D"]
  915. tm.assert_series_equal(res, expect)
  916. def test_backtick_quote_name_with_no_spaces(self, df):
  917. res = df.eval("A + `C_C`")
  918. expect = df["A"] + df["C_C"]
  919. tm.assert_series_equal(res, expect)
  920. def test_special_characters(self, df):
  921. res = df.eval("`E.E` + `F-F` - A")
  922. expect = df["E.E"] + df["F-F"] - df["A"]
  923. tm.assert_series_equal(res, expect)
  924. def test_start_with_digit(self, df):
  925. res = df.eval("A + `1e1`")
  926. expect = df["A"] + df["1e1"]
  927. tm.assert_series_equal(res, expect)
  928. def test_keyword(self, df):
  929. res = df.eval("A + `def`")
  930. expect = df["A"] + df["def"]
  931. tm.assert_series_equal(res, expect)
  932. def test_unneeded_quoting(self, df):
  933. res = df.query("`A` > 2")
  934. expect = df[df["A"] > 2]
  935. tm.assert_frame_equal(res, expect)
  936. def test_parenthesis(self, df):
  937. res = df.query("`A (x)` > 2")
  938. expect = df[df["A (x)"] > 2]
  939. tm.assert_frame_equal(res, expect)
  940. def test_empty_string(self, df):
  941. res = df.query("`` > 5")
  942. expect = df[df[""] > 5]
  943. tm.assert_frame_equal(res, expect)
  944. def test_multiple_spaces(self, df):
  945. res = df.query("`C C` > 5")
  946. expect = df[df["C C"] > 5]
  947. tm.assert_frame_equal(res, expect)
  948. def test_start_with_spaces(self, df):
  949. res = df.eval("` A` + ` `")
  950. expect = df[" A"] + df[" "]
  951. tm.assert_series_equal(res, expect)
  952. def test_lots_of_operators_string(self, df):
  953. res = df.query("` &^ :!€$?(} > <++*'' ` > 4")
  954. expect = df[df[" &^ :!€$?(} > <++*'' "] > 4]
  955. tm.assert_frame_equal(res, expect)
  956. def test_failing_quote(self, df):
  957. with pytest.raises(SyntaxError):
  958. df.query("`it's` > `that's`")
  959. def test_failing_character_outside_range(self, df):
  960. with pytest.raises(SyntaxError):
  961. df.query("`☺` > 4")
  962. def test_failing_hashtag(self, df):
  963. with pytest.raises(SyntaxError):
  964. df.query("`foo#bar` > 4")